From fc028b5d21727da66dc8e146b3dbcfc870c64f90 Mon Sep 17 00:00:00 2001
From: Tristan Gingold <tgingold@free.fr>
Date: Sat, 4 May 2019 16:49:19 +0200
Subject: vhdl: move scanner under vhdl hierarchy.

---
 src/vhdl/vhdl-scanner.adb | 2332 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2332 insertions(+)
 create mode 100644 src/vhdl/vhdl-scanner.adb

(limited to 'src/vhdl/vhdl-scanner.adb')

diff --git a/src/vhdl/vhdl-scanner.adb b/src/vhdl/vhdl-scanner.adb
new file mode 100644
index 000000000..734b0c7ce
--- /dev/null
+++ b/src/vhdl/vhdl-scanner.adb
@@ -0,0 +1,2332 @@
+--  VHDL lexical scanner.
+--  Copyright (C) 2002 - 2014 Tristan Gingold
+--
+--  GHDL is free software; you can redistribute it and/or modify it under
+--  the terms of the GNU General Public License as published by the Free
+--  Software Foundation; either version 2, or (at your option) any later
+--  version.
+--
+--  GHDL is distributed in the hope that it will be useful, but WITHOUT ANY
+--  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+--  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+--  for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with GHDL; see the file COPYING.  If not, write to the Free
+--  Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+--  02111-1307, USA.
+with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
+with Errorout; use Errorout;
+with Name_Table;
+with Files_Map; use Files_Map;
+with Std_Names;
+with Str_Table;
+with Flags; use Flags;
+
+package body Vhdl.Scanner is
+
+   -- This classification is a simplification of the categories of LRM93 13.1
+   -- LRM93 13.1
+   -- The only characters allowed in the text of a VHDL description are the
+   -- graphic characters and format effector.
+
+   type Character_Kind_Type is
+      (
+       -- Neither a format effector nor a graphic character.
+       Invalid,
+       Format_Effector,
+       Lower_Case_Letter,
+       Upper_Case_Letter,
+       Digit,
+       Special_Character,
+       Space_Character,
+       Other_Special_Character
+      );
+
+   --  LRM93 13.1
+   --  basic_graphic_character ::=
+   --    upper_case_letter | digit | special_character | space_character
+   --
+   --subtype Basic_Graphic_Character is
+   --  Character_Kind_Type range Upper_Case_Letter .. Space_Character;
+
+   --  LRM93 13.1
+   --  graphic_character ::=
+   --    basic_graphic_character | lower_case_letter | other_special_character
+   --
+   --  Note: There are 191 graphic characters.
+   subtype Graphic_Character is
+     Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character;
+
+   --  letter ::= upper_case_letter | lower_case_letter
+   subtype Letter is
+     Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter;
+
+   -- LRM93 13.1
+   -- The characters included in each of the categories of basic graphic
+   -- characters are defined as follows:
+   type Character_Array is array (Character) of Character_Kind_Type;
+   pragma Suppress_Initialization (Character_Array);
+   Characters_Kind : constant Character_Array :=
+     (NUL .. BS => Invalid,
+
+      -- Format effectors are the ISO (and ASCII) characters called horizontal
+      -- tabulation, vertical tabulation, carriage return, line feed, and form
+      -- feed.
+      HT | LF | VT | FF | CR => Format_Effector,
+
+      SO .. US => Invalid,
+
+      -- 1. upper case letters
+      'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis |
+      UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter,
+
+      -- 2. digits
+      '0' .. '9' => Digit,
+
+      -- 3. special characters
+      '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/'
+        | ':' | ';' | '<' | '=' | '>' | '[' | ']'
+        | '_' | '|' | '*' => Special_Character,
+
+      -- 4. the space characters
+      ' ' | NBSP => Space_Character,
+
+      -- 5. lower case letters
+      'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis |
+      LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter,
+
+      -- 6. other special characters
+      '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~'
+        | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign |
+        Division_Sign => Other_Special_Character,
+
+      --  '¡'    -- INVERTED EXCLAMATION MARK
+      --  '¢'    -- CENT SIGN
+      --  '£'    -- POUND SIGN
+      --  '¤'    -- CURRENCY SIGN
+      --  '¥'    -- YEN SIGN
+      --  '¦'    -- BROKEN BAR
+      --  '§'    -- SECTION SIGN
+      --  '¨'    -- DIAERESIS
+      --  '©'    -- COPYRIGHT SIGN
+      --  'ª'    -- FEMININE ORDINAL INDICATOR
+      --  '«'    -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+      --  '¬'    -- NOT SIGN
+      --  '­'    -- SOFT HYPHEN
+      --  '®'    -- REGISTERED SIGN
+      --  '¯'    -- MACRON
+      --  '°'    -- DEGREE SIGN
+      --  '±'    -- PLUS-MINUS SIGN
+      --  '²'    -- SUPERSCRIPT TWO
+      --  '³'    -- SUPERSCRIPT THREE
+      --  '´'    -- ACUTE ACCENT
+      --  'µ'    -- MICRO SIGN
+      --  '¶'    -- PILCROW SIGN
+      --  '·'    -- MIDDLE DOT
+      --  '¸'    -- CEDILLA
+      --  '¹'    -- SUPERSCRIPT ONE
+      --  'º'    -- MASCULINE ORDINAL INDICATOR
+      --  '»'    -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+      --  '¼'    -- VULGAR FRACTION ONE QUARTER
+      --  '½'    -- VULGAR FRACTION ONE HALF
+      --  '¾'    -- VULGAR FRACTION THREE QUARTERS
+      --  '¿'    -- INVERTED QUESTION MARK
+      --  '×'    -- MULTIPLICATION SIGN
+      --  '÷'    -- DIVISION SIGN
+
+      DEL .. APC => Invalid);
+
+   -- The context contains the whole internal state of the scanner, ie
+   -- it can be used to push/pop a lexical analysis, to restart the
+   -- scanner from a context marking a previous point.
+   type Scan_Context is record
+      Source : File_Buffer_Acc;
+      Source_File : Source_File_Entry;
+      Line_Number : Natural;
+      Line_Pos : Source_Ptr;
+      Prev_Pos : Source_Ptr;
+      Token_Pos : Source_Ptr;
+      Pos : Source_Ptr;
+      File_Len : Source_Ptr;
+      Token : Token_Type;
+      Prev_Token : Token_Type;
+
+      --  Additional values for the current token.
+      Bit_Str_Base : Character;
+      Bit_Str_Sign : Character;
+      Str_Id : String8_Id;
+      Str_Len : Nat32;
+      Identifier: Name_Id;
+      Int64 : Iir_Int64;
+      Fp64 : Iir_Fp64;
+   end record;
+   pragma Suppress_Initialization (Scan_Context);
+
+   -- Disp a message during scan.
+   -- The current location is automatically displayed before the message.
+   -- Disp a message during scan.
+   procedure Error_Msg_Scan (Msg: String) is
+   begin
+      Report_Msg (Msgid_Error, Scan, No_Location, Msg);
+   end Error_Msg_Scan;
+
+   procedure Error_Msg_Scan (Loc : Location_Type; Msg: String) is
+   begin
+      Report_Msg (Msgid_Error, Scan, Loc, Msg);
+   end Error_Msg_Scan;
+
+   procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is
+   begin
+      Report_Msg (Msgid_Error, Scan, No_Location, Msg, (1 => Arg1));
+   end Error_Msg_Scan;
+
+   -- Disp a message during scan.
+   procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String) is
+   begin
+      Report_Msg (Id, Scan, No_Location, Msg);
+   end Warning_Msg_Scan;
+
+   procedure Warning_Msg_Scan (Id : Msgid_Warnings;
+                               Msg: String;
+                               Arg1 : Earg_Type;
+                               Cont : Boolean := False) is
+   begin
+      Report_Msg (Id, Scan, No_Location, Msg, (1 => Arg1), Cont);
+   end Warning_Msg_Scan;
+
+   -- The current context.
+   -- Default value is an invalid context.
+   Current_Context: Scan_Context := (Source => null,
+                                     Source_File => No_Source_File_Entry,
+                                     Line_Number => 0,
+                                     Line_Pos => 0,
+                                     Pos => 0,
+                                     Prev_Pos => 0,
+                                     Token_Pos => 0,
+                                     File_Len => 0,
+                                     Token => Tok_Invalid,
+                                     Prev_Token => Tok_Invalid,
+                                     Identifier => Null_Identifier,
+                                     Bit_Str_Base => ' ',
+                                     Bit_Str_Sign => ' ',
+                                     Str_Id => Null_String8,
+                                     Str_Len => 0,
+                                     Int64 => 0,
+                                     Fp64 => 0.0);
+
+   Source: File_Buffer_Acc renames Current_Context.Source;
+   Pos: Source_Ptr renames Current_Context.Pos;
+
+   -- When CURRENT_TOKEN is an identifier, its name_id is stored into
+   -- this global variable.
+   -- Function current_text can be used to convert it into an iir.
+   function Current_Identifier return Name_Id is
+   begin
+      return Current_Context.Identifier;
+   end Current_Identifier;
+
+   procedure Invalidate_Current_Identifier is
+   begin
+      Current_Context.Identifier := Null_Identifier;
+   end Invalidate_Current_Identifier;
+
+   procedure Invalidate_Current_Token is
+   begin
+      if Current_Token /= Tok_Invalid then
+         Current_Context.Prev_Token := Current_Token;
+         Current_Token := Tok_Invalid;
+      end if;
+   end Invalidate_Current_Token;
+
+   function Current_String_Id return String8_Id is
+   begin
+      return Current_Context.Str_Id;
+   end Current_String_Id;
+
+   function Current_String_Length return Nat32 is
+   begin
+      return Current_Context.Str_Len;
+   end Current_String_Length;
+
+   function Get_Bit_String_Base return Character is
+   begin
+      return Current_Context.Bit_Str_Base;
+   end Get_Bit_String_Base;
+
+   function Get_Bit_String_Sign return Character is
+   begin
+      return Current_Context.Bit_Str_Sign;
+   end Get_Bit_String_Sign;
+
+   function Current_Iir_Int64 return Iir_Int64 is
+   begin
+      return Current_Context.Int64;
+   end Current_Iir_Int64;
+
+   function Current_Iir_Fp64 return Iir_Fp64 is
+   begin
+      return Current_Context.Fp64;
+   end Current_Iir_Fp64;
+
+   function Get_Current_Source_File return Source_File_Entry is
+   begin
+      return Current_Context.Source_File;
+   end Get_Current_Source_File;
+
+   function Get_Current_Line return Natural is
+   begin
+      return Current_Context.Line_Number;
+   end Get_Current_Line;
+
+   function Get_Current_Offset return Natural is
+   begin
+      return Natural (Current_Context.Pos - Current_Context.Line_Pos);
+   end Get_Current_Offset;
+
+   function Get_Token_Offset return Natural is
+   begin
+      return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos);
+   end Get_Token_Offset;
+
+   function Get_Token_Position return Source_Ptr is
+   begin
+      return Current_Context.Token_Pos;
+   end Get_Token_Position;
+
+   function Get_Position return Source_Ptr is
+   begin
+      return Current_Context.Pos;
+   end Get_Position;
+
+   function Get_Token_Location return Location_Type is
+   begin
+      return File_Pos_To_Location
+        (Current_Context.Source_File, Current_Context.Token_Pos);
+   end Get_Token_Location;
+
+   function Get_Prev_Location return Location_Type is
+   begin
+      return File_Pos_To_Location
+        (Current_Context.Source_File, Current_Context.Prev_Pos);
+   end Get_Prev_Location;
+
+   procedure Set_File (Source_File : Source_File_Entry)
+   is
+      N_Source: File_Buffer_Acc;
+   begin
+      pragma Assert (Current_Context.Source = null);
+      pragma Assert (Source_File /= No_Source_File_Entry);
+      N_Source := Get_File_Source (Source_File);
+      Current_Context := (Source => N_Source,
+                          Source_File => Source_File,
+                          Line_Number => 1,
+                          Line_Pos => 0,
+                          Prev_Pos => N_Source'First,
+                          Pos => N_Source'First,
+                          Token_Pos => 0, -- should be invalid,
+                          File_Len => Get_File_Length (Source_File),
+                          Token => Tok_Invalid,
+                          Prev_Token => Tok_Invalid,
+                          Identifier => Null_Identifier,
+                          Bit_Str_Base => ' ',
+                          Bit_Str_Sign => ' ',
+                          Str_Id => Null_String8,
+                          Str_Len => 0,
+                          Int64 => -1,
+                          Fp64 => 0.0);
+      Current_Token := Tok_Invalid;
+   end Set_File;
+
+   function Detect_Encoding_Errors return Boolean
+   is
+      C : constant Character := Source (Pos);
+   begin
+      --  No need to check further if first character is plain ASCII-7
+      if C >= ' ' and C < Character'Val (127) then
+         return False;
+      end if;
+
+      --  UTF-8 BOM is EF BB BF
+      if Source (Pos + 0) = Character'Val (16#ef#)
+        and then Source (Pos + 1) = Character'Val (16#bb#)
+        and then Source (Pos + 2) = Character'Val (16#bf#)
+      then
+         Error_Msg_Scan
+           ("source encoding must be latin-1 (UTF-8 BOM detected)");
+         return True;
+      end if;
+
+      --  UTF-16 BE BOM is FE FF
+      if Source (Pos + 0) = Character'Val (16#fe#)
+        and then Source (Pos + 1) = Character'Val (16#ff#)
+      then
+         Error_Msg_Scan
+           ("source encoding must be latin-1 (UTF-16 BE BOM detected)");
+         return True;
+      end if;
+
+      --  UTF-16 LE BOM is FF FE
+      if Source (Pos + 0) = Character'Val (16#ff#)
+        and then Source (Pos + 1) = Character'Val (16#fe#)
+      then
+         Error_Msg_Scan
+           ("source encoding must be latin-1 (UTF-16 LE BOM detected)");
+         return True;
+      end if;
+
+      --  Certainly weird, but scanner/parser will catch it.
+      return False;
+   end Detect_Encoding_Errors;
+
+   procedure Set_Current_Position (Position: Source_Ptr)
+   is
+      Loc : Location_Type;
+      Offset: Natural;
+      File_Entry : Source_File_Entry;
+   begin
+      --  Scanner must have been initialized.
+      pragma Assert (Current_Context.Source /= null);
+
+      Current_Token := Tok_Invalid;
+      Current_Context.Pos := Position;
+      Loc := File_Pos_To_Location (Current_Context.Source_File,
+                                   Current_Context.Pos);
+      Location_To_Coord (Loc,
+                         File_Entry, Current_Context.Line_Pos,
+                         Current_Context.Line_Number, Offset);
+   end Set_Current_Position;
+
+   procedure Close_File is
+   begin
+      Current_Context.Source := null;
+   end Close_File;
+
+   -- Emit an error when a character above 128 was found.
+   -- This must be called only in vhdl87.
+   procedure Error_8bit is
+   begin
+      Error_Msg_Scan ("8 bits characters not allowed in vhdl87");
+   end Error_8bit;
+
+   -- Emit an error when a separator is expected.
+   procedure Error_Separator is
+   begin
+      Error_Msg_Scan ("a separator is required here");
+   end Error_Separator;
+
+   -- scan a decimal literal or a based literal.
+   --
+   -- LRM93 13.4.1
+   -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ]
+   -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER
+   --
+   -- LRM93 13.4.2
+   -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT
+   -- BASE ::= INTEGER
+   procedure Scan_Literal is separate;
+
+   --  Scan a string literal.
+   --
+   --  LRM93 13.6 / LRM08 15.7
+   --  A string literal is formed by a sequence of graphic characters
+   --  (possibly none) enclosed between two quotation marks used as string
+   --  brackets.
+   --  STRING_LITERAL ::= " { GRAPHIC_CHARACTER } "
+   --
+   --  IN: for a string, at the call of this procedure, the current character
+   --  must be either '"' or '%'.
+   procedure Scan_String
+   is
+      -- The quotation character (can be " or %).
+      Mark: Character;
+      -- Current character.
+      C : Character;
+      --  Current length.
+      Length : Nat32;
+   begin
+      --  String delimiter.
+      Mark := Source (Pos);
+      pragma Assert (Mark = '"' or else Mark = '%');
+
+      Pos := Pos + 1;
+      Length := 0;
+      Current_Context.Str_Id := Str_Table.Create_String8;
+      loop
+         C := Source (Pos);
+         if C = Mark then
+            --  LRM93 13.6
+            --  If a quotation mark value is to be represented in the sequence
+            --  of character values, then a pair of adjacent quoatation
+            --  characters marks must be written at the corresponding place
+            --  within the string literal.
+            --  LRM93 13.10
+            --  Any pourcent sign within the sequence of characters must then
+            --  be doubled, and each such doubled percent sign is interpreted
+            --  as a single percent sign value.
+            --  The same replacement is allowed for a bit string literal,
+            --  provieded that both bit string brackets are replaced.
+            Pos := Pos + 1;
+            exit when Source (Pos) /= Mark;
+         end if;
+
+         case Characters_Kind (C) is
+            when Format_Effector =>
+               if Mark = '%' then
+                  --  No matching '%' has been found.  Consider '%' was used
+                  --  as the remainder operator, instead of 'rem'.  This will
+                  --  improve the error message.
+                  Error_Msg_Scan
+                    (Get_Token_Location,
+                     "'%%' is not a vhdl operator, use 'rem'");
+                  Current_Token := Tok_Rem;
+                  Pos := Current_Context.Token_Pos + 1;
+                  return;
+               end if;
+               if C = CR or C = LF then
+                  Error_Msg_Scan
+                    ("string cannot be multi-line, use concatenation");
+               else
+                  Error_Msg_Scan ("format effector not allowed in a string");
+               end if;
+               exit;
+            when Invalid =>
+               if C = Files_Map.EOT
+                 and then Pos >= Current_Context.File_Len
+               then
+                  Error_Msg_Scan ("string not terminated at end of file");
+                  exit;
+               end if;
+
+               Error_Msg_Scan
+                 ("invalid character not allowed, even in a string");
+            when Graphic_Character =>
+               if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then
+                  Error_8bit;
+               end if;
+         end case;
+
+         if C = '"' and Mark = '%' then
+            --  LRM93 13.10
+            --  The quotation marks (") used as string brackets at both ends of
+            --  a string literal can be replaced by percent signs (%), provided
+            --  that the enclosed sequence of characters constains no quotation
+            --  marks, and provided that both string brackets are replaced.
+            Error_Msg_Scan
+              ("'""' cannot be used in a string delimited with '%%'");
+         end if;
+
+         Length := Length + 1;
+         Str_Table.Append_String8 (Character'Pos (C));
+         Pos := Pos + 1;
+      end loop;
+
+      Current_Token := Tok_String;
+      Current_Context.Str_Len := Length;
+   end Scan_String;
+
+   --  Scan a bit string literal.
+   --
+   --  LRM93 13.7
+   --  A bit string literal is formed by a sequence of extended digits
+   --  (possibly none) enclosed between two quotations used as bit string
+   --  brackets, preceded by a base specifier.
+   --  BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] "
+   --  BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT }
+   --
+   --  The current character must be a base specifier, followed by '"' or '%'.
+   --  The base must be valid.
+   procedure Scan_Bit_String (Base_Log : Nat32)
+   is
+      --  Position of character '0'.
+      Pos_0 : constant Nat8 := Character'Pos ('0');
+
+      --  Used for the base.
+      subtype Nat4 is Natural range 1 .. 4;
+      Base : constant Nat32 := 2 ** Nat4 (Base_Log);
+
+      -- The quotation character (can be " or %).
+      Orig_Pos : constant Source_Ptr := Pos;
+      Mark     : constant Character := Source (Orig_Pos);
+      -- Current character.
+      C : Character;
+      --  Current length.
+      Length : Nat32;
+      --  Digit value.
+      V, D : Nat8;
+      --  True if invalid character already found, to avoid duplicate message.
+      Has_Invalid : Boolean;
+   begin
+      pragma Assert (Mark = '"' or else Mark = '%');
+      Pos := Pos + 1;
+      Length := 0;
+      Has_Invalid := False;
+      Current_Context.Str_Id := Str_Table.Create_String8;
+      loop
+         << Again >> null;
+         C := Source (Pos);
+         Pos := Pos + 1;
+         exit when C = Mark;
+
+         -- LRM93 13.7
+         -- If the base specifier is 'B', the extended digits in the bit
+         -- value are restricted to 0 and 1.
+         -- If the base specifier is 'O', the extended digits int the bit
+         -- value are restricted to legal digits in the octal number
+         -- system, ie, the digits 0 through 7.
+         -- If the base specifier is 'X', the extended digits are all digits
+         -- together with the letters A through F.
+         case C is
+            when '0' .. '9' =>
+               V := Character'Pos (C) - Character'Pos ('0');
+            when 'A' .. 'F' =>
+               V := Character'Pos (C) - Character'Pos ('A') + 10;
+            when 'a' .. 'f' =>
+               --  LRM93 13.7
+               --  A letter in a bit string literal (...) can be written either
+               --  in lowercase or in upper case, with the same meaning.
+               V := Character'Pos (C) - Character'Pos ('a') + 10;
+            when '_' =>
+               if Source (Pos) = '_' then
+                  Error_Msg_Scan
+                    ("double underscore not allowed in a bit string");
+               end if;
+               if Source (Pos - 2) = Mark then
+                  Error_Msg_Scan
+                    ("underscore not allowed at the start of a bit string");
+               elsif Source (Pos) = Mark then
+                  Error_Msg_Scan
+                    ("underscore not allowed at the end of a bit string");
+               end if;
+               goto Again;
+            when '"' =>
+               pragma Assert (Mark = '%');
+               Error_Msg_Scan
+                 ("'""' cannot close a bit string opened by '%%'");
+               exit;
+            when '%' =>
+               pragma Assert (Mark = '"');
+               Error_Msg_Scan
+                 ("'%%' cannot close a bit string opened by '""'");
+               exit;
+            when others =>
+               if Characters_Kind (C) in Graphic_Character then
+                  if Vhdl_Std >= Vhdl_08 then
+                     V := Nat8'Last;
+                  else
+                     if not Has_Invalid then
+                        Error_Msg_Scan ("invalid character in bit string");
+                        Has_Invalid := True;
+                     end if;
+                     --  Continue the bit string
+                     V := 0;
+                  end if;
+               else
+                  if Mark = '%' then
+                     Error_Msg_Scan
+                       (File_Pos_To_Location
+                          (Current_Context.Source_File, Orig_Pos),
+                        "'%%' is not a vhdl operator, use 'rem'");
+                     Current_Token := Tok_Rem;
+                     Pos := Orig_Pos + 1;
+                     return;
+                  else
+                     Error_Msg_Scan ("bit string not terminated");
+                     Pos := Pos - 1;
+                  end if;
+                  exit;
+               end if;
+         end case;
+
+         --  Expand bit value.
+         if Vhdl_Std >= Vhdl_08 and V > Base then
+            --  Expand as graphic character.
+            for I in 1 .. Base_Log loop
+               Str_Table.Append_String8_Char (C);
+            end loop;
+         else
+            --  Expand as extended digits.
+            case Base_Log is
+               when 1 =>
+                  if V > 1 then
+                     Error_Msg_Scan
+                       ("invalid character in a binary bit string");
+                     V := 1;
+                  end if;
+                  Str_Table.Append_String8 (Pos_0 + V);
+               when 3 =>
+                  if V > 7 then
+                     Error_Msg_Scan
+                       ("invalid character in a octal bit string");
+                     V := 7;
+                  end if;
+                  for I in 1 .. 3 loop
+                     D := V / 4;
+                     Str_Table.Append_String8 (Pos_0 + D);
+                     V := (V - 4 * D) * 2;
+                  end loop;
+               when 4 =>
+                  for I in 1 .. 4 loop
+                     D := V / 8;
+                     Str_Table.Append_String8 (Pos_0 + D);
+                     V := (V - 8 * D) * 2;
+                  end loop;
+               when others =>
+                  raise Internal_Error;
+            end case;
+         end if;
+
+         Length := Length + Base_Log;
+      end loop;
+
+      --  Note: the length of the bit string may be 0.
+
+      Current_Token := Tok_Bit_String;
+      Current_Context.Str_Len := Length;
+   end Scan_Bit_String;
+
+   --  Scan a decimal bit string literal.  For base specifier D the algorithm
+   --  is rather different: all the graphic characters shall be digits, and we
+   --  need to use a (not very efficient) arbitrary precision multiplication.
+   procedure Scan_Dec_Bit_String
+   is
+      use Str_Table;
+
+      Id : String8_Id;
+
+      --  Position of character '0'.
+      Pos_0 : constant Nat8 := Character'Pos ('0');
+
+      -- Current character.
+      C : Character;
+      --  Current length.
+      Length : Nat32;
+      --  Digit value.
+      V, D : Nat8;
+
+      type Carries_Type is array (0 .. 3) of Nat8;
+      Carries : Carries_Type;
+      No_Carries : constant Carries_Type := (others => Pos_0);
+
+      --  Shift right carries.  Note the Carries (0) is the LSB.
+      procedure Shr_Carries is
+      begin
+         Carries := (Carries (1), Carries (2), Carries (3), Pos_0);
+      end Shr_Carries;
+
+      procedure Append_Carries is
+      begin
+         --  Expand the bit string.  Note that position 1 of the string8 is
+         --  the MSB.
+         while Carries /= No_Carries loop
+            Append_String8 (Pos_0);
+            Length := Length + 1;
+            for I in reverse 2 .. Length loop
+               Set_Element_String8 (Id, I, Element_String8 (Id, I - 1));
+            end loop;
+            Set_Element_String8 (Id, 1, Carries (0));
+            Shr_Carries;
+         end loop;
+      end Append_Carries;
+
+      --  Add 1 to Carries.  Overflow is not allowed and should be prevented by
+      --  construction.
+      procedure Add_One_To_Carries is
+      begin
+         for I in Carries'Range loop
+            if Carries (I) = Pos_0 then
+               Carries (I) := Pos_0 + 1;
+               --  End of propagation.
+               exit;
+            else
+               Carries (I) := Pos_0;
+               --  Continue propagation.
+               pragma Assert (I < Carries'Last);
+            end if;
+         end loop;
+      end Add_One_To_Carries;
+   begin
+      pragma Assert (Source (Pos) = '"');
+      Pos := Pos + 1;
+      Length := 0;
+      Id := Create_String8;
+      Current_Context.Str_Id := Id;
+      loop
+         << Again >> null;
+         C := Source (Pos);
+         Pos := Pos + 1;
+         exit when C = '"';
+
+         if C in '0' .. '9' then
+            V := Character'Pos (C) - Character'Pos ('0');
+         elsif C = '_' then
+            if Source (Pos) = '_' then
+               Error_Msg_Scan
+                 ("double underscore not allowed in a bit string");
+            end if;
+            if Source (Pos - 2) = '"' then
+               Error_Msg_Scan
+                 ("underscore not allowed at the start of a bit string");
+            elsif Source (Pos) = '"' then
+               Error_Msg_Scan
+                 ("underscore not allowed at the end of a bit string");
+            end if;
+            goto Again;
+         else
+            if Characters_Kind (C) in Graphic_Character then
+               Error_Msg_Scan
+                 ("graphic character not allowed in decimal bit string");
+               --  Continue the bit string
+               V := 0;
+            else
+               Error_Msg_Scan ("bit string not terminated");
+               Pos := Pos - 1;
+               exit;
+            end if;
+         end if;
+
+         --  Multiply by 10.
+         Carries := (others => Pos_0);
+         for I in reverse 1 .. Length loop
+            --  Shift by 1 (*2).
+            D := Element_String8 (Id, I);
+            Set_Element_String8 (Id, I, Carries (0));
+            Shr_Carries;
+            --  Add D and D * 4.
+            if D /= Pos_0 then
+               Add_One_To_Carries;
+               --  Add_Four_To_Carries:
+               for I in 2 .. 3 loop
+                  if Carries (I) = Pos_0 then
+                     Carries (I) := Pos_0 + 1;
+                     --  End of propagation.
+                     exit;
+                  else
+                     Carries (I) := Pos_0;
+                     --  Continue propagation.
+                  end if;
+               end loop;
+            end if;
+         end loop;
+         Append_Carries;
+
+         --  Add V.
+         for I in Carries'Range loop
+            D := V / 2;
+            Carries (I) := Pos_0 + (V - 2 * D);
+            V := D;
+         end loop;
+         for I in reverse 1 .. Length loop
+            D := Element_String8 (Id, I);
+            if D /= Pos_0 then
+               Add_One_To_Carries;
+            end if;
+            Set_Element_String8 (Id, I, Carries (0));
+            Shr_Carries;
+            exit when Carries = No_Carries;
+         end loop;
+         Append_Carries;
+      end loop;
+
+      Current_Token := Tok_Bit_String;
+      Current_Context.Str_Len := Length;
+   end Scan_Dec_Bit_String;
+
+   --  LRM08 15.2 Character set
+   --  For each uppercase letter, there is a corresponding lowercase letter;
+   --  and for each lowercase letter except [y diaeresis] and [german sharp s],
+   --  there is a corresponding uppercase letter.
+   type Character_Map is array (Character) of Character;
+   To_Lower_Map : constant Character_Map :=
+     (
+      --  Uppercase ASCII letters.
+      'A' => 'a',
+      'B' => 'b',
+      'C' => 'c',
+      'D' => 'd',
+      'E' => 'e',
+      'F' => 'f',
+      'G' => 'g',
+      'H' => 'h',
+      'I' => 'i',
+      'J' => 'j',
+      'K' => 'k',
+      'L' => 'l',
+      'M' => 'm',
+      'N' => 'n',
+      'O' => 'o',
+      'P' => 'p',
+      'Q' => 'q',
+      'R' => 'r',
+      'S' => 's',
+      'T' => 't',
+      'U' => 'u',
+      'V' => 'v',
+      'W' => 'w',
+      'X' => 'x',
+      'Y' => 'y',
+      'Z' => 'z',
+
+      --  Lowercase ASCII letters.
+      'a' => 'a',
+      'b' => 'b',
+      'c' => 'c',
+      'd' => 'd',
+      'e' => 'e',
+      'f' => 'f',
+      'g' => 'g',
+      'h' => 'h',
+      'i' => 'i',
+      'j' => 'j',
+      'k' => 'k',
+      'l' => 'l',
+      'm' => 'm',
+      'n' => 'n',
+      'o' => 'o',
+      'p' => 'p',
+      'q' => 'q',
+      'r' => 'r',
+      's' => 's',
+      't' => 't',
+      'u' => 'u',
+      'v' => 'v',
+      'w' => 'w',
+      'x' => 'x',
+      'y' => 'y',
+      'z' => 'z',
+
+      --  Uppercase Latin-1 letters.
+      UC_A_Grave          => LC_A_Grave,
+      UC_A_Acute          => LC_A_Acute,
+      UC_A_Circumflex     => LC_A_Circumflex,
+      UC_A_Tilde          => LC_A_Tilde,
+      UC_A_Diaeresis      => LC_A_Diaeresis,
+      UC_A_Ring           => LC_A_Ring,
+      UC_AE_Diphthong     => LC_AE_Diphthong,
+      UC_C_Cedilla        => LC_C_Cedilla,
+      UC_E_Grave          => LC_E_Grave,
+      UC_E_Acute          => LC_E_Acute,
+      UC_E_Circumflex     => LC_E_Circumflex,
+      UC_E_Diaeresis      => LC_E_Diaeresis,
+      UC_I_Grave          => LC_I_Grave,
+      UC_I_Acute          => LC_I_Acute,
+      UC_I_Circumflex     => LC_I_Circumflex,
+      UC_I_Diaeresis      => LC_I_Diaeresis,
+      UC_Icelandic_Eth    => LC_Icelandic_Eth,
+      UC_N_Tilde          => LC_N_Tilde,
+      UC_O_Grave          => LC_O_Grave,
+      UC_O_Acute          => LC_O_Acute,
+      UC_O_Circumflex     => LC_O_Circumflex,
+      UC_O_Tilde          => LC_O_Tilde,
+      UC_O_Diaeresis      => LC_O_Diaeresis,
+      UC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
+      UC_U_Grave          => LC_U_Grave,
+      UC_U_Acute          => LC_U_Acute,
+      UC_U_Circumflex     => LC_U_Circumflex,
+      UC_U_Diaeresis      => LC_U_Diaeresis,
+      UC_Y_Acute          => LC_Y_Acute,
+      UC_Icelandic_Thorn  => LC_Icelandic_Thorn,
+
+      --  Lowercase Latin-1 letters.
+      LC_A_Grave          => LC_A_Grave,
+      LC_A_Acute          => LC_A_Acute,
+      LC_A_Circumflex     => LC_A_Circumflex,
+      LC_A_Tilde          => LC_A_Tilde,
+      LC_A_Diaeresis      => LC_A_Diaeresis,
+      LC_A_Ring           => LC_A_Ring,
+      LC_AE_Diphthong     => LC_AE_Diphthong,
+      LC_C_Cedilla        => LC_C_Cedilla,
+      LC_E_Grave          => LC_E_Grave,
+      LC_E_Acute          => LC_E_Acute,
+      LC_E_Circumflex     => LC_E_Circumflex,
+      LC_E_Diaeresis      => LC_E_Diaeresis,
+      LC_I_Grave          => LC_I_Grave,
+      LC_I_Acute          => LC_I_Acute,
+      LC_I_Circumflex     => LC_I_Circumflex,
+      LC_I_Diaeresis      => LC_I_Diaeresis,
+      LC_Icelandic_Eth    => LC_Icelandic_Eth,
+      LC_N_Tilde          => LC_N_Tilde,
+      LC_O_Grave          => LC_O_Grave,
+      LC_O_Acute          => LC_O_Acute,
+      LC_O_Circumflex     => LC_O_Circumflex,
+      LC_O_Tilde          => LC_O_Tilde,
+      LC_O_Diaeresis      => LC_O_Diaeresis,
+      LC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
+      LC_U_Grave          => LC_U_Grave,
+      LC_U_Acute          => LC_U_Acute,
+      LC_U_Circumflex     => LC_U_Circumflex,
+      LC_U_Diaeresis      => LC_U_Diaeresis,
+      LC_Y_Acute          => LC_Y_Acute,
+      LC_Icelandic_Thorn  => LC_Icelandic_Thorn,
+
+      --  Lowercase latin-1 characters without corresponding uppercase one.
+      LC_Y_Diaeresis      => LC_Y_Diaeresis,
+      LC_German_Sharp_S   => LC_German_Sharp_S,
+
+      --  Not a letter.
+      others => NUL);
+
+   procedure Error_Too_Long is
+   begin
+      Error_Msg_Scan ("identifier is too long (>"
+                        & Natural'Image (Max_Name_Length - 1) & ")");
+   end Error_Too_Long;
+
+   -- LRM93 13.3.1
+   -- Basic Identifiers
+   -- A basic identifier consists only of letters, digits, and underlines.
+   -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT }
+   -- LETTER_OR_DIGIT ::= LETTER | DIGIT
+   -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER
+   --
+   -- NB: At the call of this procedure, the current character must be a legal
+   -- character for a basic identifier.
+   procedure Scan_Identifier (Allow_PSL : Boolean)
+   is
+      use Name_Table;
+      Buffer : String (1 .. Max_Name_Length);
+      C : Character;
+      Len : Natural;
+   begin
+      -- This is an identifier or a key word.
+      Len := 0;
+      loop
+         --  Source (pos) is correct.
+         --  LRM93 13.3.1
+         --   All characters if a basic identifier are signifiant, including
+         --   any underline character inserted between a letter or digit and
+         --   an adjacent letter or digit.
+         --   Basic identifiers differing only in the use of the corresponding
+         --   upper and lower case letters are considered as the same.
+         --
+         --  GHDL: This is achieved by converting all upper case letters into
+         --  equivalent lower case letters.
+         --  The opposite (converting to upper lower case letters) is not
+         --  possible because two characters have no upper-case equivalent.
+         C := Source (Pos);
+         case C is
+            when 'A' .. 'Z' =>
+               C := Character'Val
+                 (Character'Pos (C)
+                    + Character'Pos ('a') - Character'Pos ('A'));
+            when 'a' .. 'z' | '0' .. '9' =>
+               null;
+            when '_' =>
+               if Source (Pos + 1) = '_' then
+                  Error_Msg_Scan ("two underscores can't be consecutive");
+               end if;
+            when ' ' | ')' | '.' | ';' | ':' =>
+               exit;
+            when others =>
+               --  Non common case.
+               case Characters_Kind (C) is
+                  when Upper_Case_Letter | Lower_Case_Letter =>
+                     if Vhdl_Std = Vhdl_87 then
+                        Error_8bit;
+                     end if;
+                     C := To_Lower_Map (C);
+                     pragma Assert (C /= NUL);
+                  when Digit =>
+                     raise Internal_Error;
+                  when others =>
+                     exit;
+               end case;
+         end case;
+
+         --  Put character in name buffer.  FIXME: compute the hash at the same
+         --  time ?
+         if Len >= Max_Name_Length - 1 then
+            if Len = Max_Name_Length -1 then
+               Error_Msg_Scan ("identifier is too long (>"
+                                 & Natural'Image (Max_Name_Length - 1) & ")");
+               --  Accept this last one character, so that no error for the
+               --  following characters.
+               Len := Len + 1;
+               Buffer (Len) := C;
+            end if;
+         else
+            Len := Len + 1;
+            Buffer (Len) := C;
+         end if;
+
+         --  Next character.
+         Pos := Pos + 1;
+      end loop;
+
+      if Source (Pos - 1) = '_' then
+         if Allow_PSL then
+            --  Some PSL reserved words finish with '_'.  This case is handled
+            --  later by Scan_Underscore and Scan_Exclam_Mark.
+            Pos := Pos - 1;
+            Len := Len - 1;
+            C := '_';
+         else
+            --  Eat the trailing underscore.
+            Error_Msg_Scan ("an identifier cannot finish with '_'");
+         end if;
+      end if;
+
+      -- LRM93 13.2
+      -- At least one separator is required between an identifier or an
+      -- abstract literal and an adjacent identifier or abstract literal.
+      case Characters_Kind (C) is
+         when Digit
+           | Upper_Case_Letter
+           | Lower_Case_Letter =>
+            raise Internal_Error;
+         when Other_Special_Character | Special_Character =>
+            if (C = '"' or C = '%') and then Len <= 2 then
+               if C = '%' and Vhdl_Std >= Vhdl_08 then
+                  Error_Msg_Scan ("'%%' not allowed in vhdl 2008 "
+                                    & "(was replacement character)");
+                  --  Continue as a bit string.
+               end if;
+
+               --  Good candidate for bit string.
+
+               --  LRM93 13.7
+               --  BASE_SPECIFIER ::= B | O | X
+               --
+               --  A letter in a bit string literal (either an extended digit
+               --  or the base specifier) can be written either in lower case
+               --  or in upper case, with the same meaning.
+               --
+               --  LRM08 15.8 Bit string literals
+               --  BASE_SPECICIER ::=
+               --     B | O | X | UB | UO | UX | SB | SO | SX | D
+               --
+               --  An extended digit and the base specifier in a bit string
+               --  literal can be written either in lowercase or in uppercase,
+               --  with the same meaning.
+               declare
+                  Base : Nat32;
+                  Cl : constant Character := Buffer (Len);
+                  Cf : constant Character := Buffer (1);
+               begin
+                  Current_Context.Bit_Str_Base := Cl;
+                  if Cl = 'b' then
+                     Base := 1;
+                  elsif Cl = 'o' then
+                     Base := 3;
+                  elsif Cl = 'x' then
+                     Base := 4;
+                  elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then
+                     Current_Context.Bit_Str_Sign := ' ';
+                     Scan_Dec_Bit_String;
+                     return;
+                  else
+                     Base := 0;
+                  end if;
+                  if Base > 0 then
+                     if Len = 1 then
+                        Current_Context.Bit_Str_Sign := ' ';
+                        Scan_Bit_String (Base);
+                        return;
+                     elsif Vhdl_Std >= Vhdl_08
+                       and then (Cf = 's' or Cf = 'u')
+                     then
+                        Current_Context.Bit_Str_Sign := Cf;
+                        Scan_Bit_String (Base);
+                        return;
+                     end if;
+                  end if;
+               end;
+            elsif Vhdl_Std > Vhdl_87 and then C = '\' then
+               --  Start of extended identifier.  Cannot follow an identifier.
+               Error_Separator;
+            end if;
+
+         when Invalid =>
+            --  Improve error message for use of UTF-8 quote marks.
+            --  It's possible because in the sequence of UTF-8 bytes for the
+            --  quote marks, there are invalid character (in the 128-160
+            --  range).
+            if C = Character'Val (16#80#)
+              and then Buffer (Len) = Character'Val (16#e2#)
+              and then (Source (Pos + 1) = Character'Val (16#98#)
+                          or else Source (Pos + 1) = Character'Val (16#99#))
+            then
+               --  UTF-8 left or right single quote mark.
+               if Len > 1 then
+                  --  The first byte (0xe2) is part of the identifier.  An
+                  --  error will be detected as the next byte (0x80) is
+                  --  invalid.  Remove the first byte from the identifier, and
+                  --  let's catch the error later.
+                  Len := Len - 1;
+                  Pos := Pos - 1;
+               else
+                  Error_Msg_Scan ("invalid use of UTF8 character for '");
+                  Pos := Pos + 2;
+
+                  --  Distinguish between character literal and tick.  Don't
+                  --  care about possible invalid character literal, as in any
+                  --  case we have already emitted an error message.
+                  if Current_Context.Prev_Token /= Tok_Identifier
+                    and then Current_Context.Prev_Token /= Tok_Character
+                    and then
+                    (Source (Pos + 1) = '''
+                       or else
+                       (Source (Pos + 1) = Character'Val (16#e2#)
+                          and then Source (Pos + 2) = Character'Val (16#80#)
+                          and then Source (Pos + 3) = Character'Val (16#99#)))
+                  then
+                     Current_Token := Tok_Character;
+                     Current_Context.Identifier :=
+                       Name_Table.Get_Identifier (Source (Pos));
+                     if Source (Pos + 1) = ''' then
+                        Pos := Pos + 2;
+                     else
+                        Pos := Pos + 4;
+                     end if;
+                  else
+                     Current_Token := Tok_Tick;
+                  end if;
+                  return;
+               end if;
+            end if;
+         when Format_Effector
+           | Space_Character =>
+            null;
+      end case;
+
+      -- Hash it.
+      Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
+      Current_Token := Tok_Identifier;
+   end Scan_Identifier;
+
+   procedure Identifier_To_Token is
+   begin
+      if Current_Identifier in Std_Names.Name_Id_Keywords then
+         -- LRM93 13.9
+         --   The identifiers listed below are called reserved words and are
+         --   reserved for signifiances in the language.
+         -- IN: this is also achieved in packages std_names and tokens.
+         Current_Token := Token_Type'Val
+           (Token_Type'Pos (Tok_First_Keyword)
+              + Current_Identifier - Std_Names.Name_First_Keyword);
+         case Current_Identifier is
+            when Std_Names.Name_Id_AMS_Reserved_Words =>
+               if not AMS_Vhdl then
+                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
+                     Warning_Msg_Scan
+                       (Warnid_Reserved_Word,
+                        "using %i AMS-VHDL reserved word as an identifier",
+                        +Current_Identifier);
+                  end if;
+                  Current_Token := Tok_Identifier;
+               end if;
+            when Std_Names.Name_Id_Vhdl08_Reserved_Words =>
+               if Vhdl_Std < Vhdl_08 then
+                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
+                     Warning_Msg_Scan
+                       (Warnid_Reserved_Word,
+                        "using %i vhdl-2008 reserved word as an identifier",
+                        +Current_Identifier);
+                  end if;
+                  Current_Token := Tok_Identifier;
+               end if;
+            when Std_Names.Name_Id_Vhdl00_Reserved_Words =>
+               if Vhdl_Std < Vhdl_00 then
+                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
+                     Warning_Msg_Scan
+                       (Warnid_Reserved_Word,
+                        "using %i vhdl-2000 reserved word as an identifier",
+                        +Current_Identifier);
+                  end if;
+                  Current_Token := Tok_Identifier;
+               end if;
+            when Std_Names.Name_Id_Vhdl93_Reserved_Words =>
+               if Vhdl_Std = Vhdl_87 then
+                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
+                     Warning_Msg_Scan
+                       (Warnid_Reserved_Word,
+                        "using %i vhdl93 reserved word as a vhdl87 identifier",
+                        +Current_Identifier, True);
+                     Warning_Msg_Scan
+                       (Warnid_Reserved_Word,
+                        "(use option --std=93 to compile as vhdl93)");
+                  end if;
+                  Current_Token := Tok_Identifier;
+               end if;
+            when Std_Names.Name_Id_Vhdl87_Reserved_Words =>
+               null;
+            when others =>
+               raise Program_Error;
+         end case;
+      elsif Flag_Psl then
+         case Current_Identifier is
+            when Std_Names.Name_Clock =>
+               Current_Token := Tok_Psl_Clock;
+            when Std_Names.Name_Const =>
+               Current_Token := Tok_Psl_Const;
+            when Std_Names.Name_Boolean =>
+               Current_Token := Tok_Psl_Boolean;
+            when Std_Names.Name_Sequence =>
+               Current_Token := Tok_Psl_Sequence;
+            when Std_Names.Name_Property =>
+               Current_Token := Tok_Psl_Property;
+            when Std_Names.Name_Endpoint =>
+               Current_Token := Tok_Psl_Endpoint;
+            when Std_Names.Name_Cover =>
+               Current_Token := Tok_Psl_Cover;
+            when Std_Names.Name_Default =>
+               Current_Token := Tok_Psl_Default;
+            when Std_Names.Name_Inf =>
+               Current_Token := Tok_Inf;
+            when Std_Names.Name_Within =>
+               Current_Token := Tok_Within;
+            when Std_Names.Name_Abort =>
+               Current_Token := Tok_Abort;
+            when Std_Names.Name_Before =>
+               Current_Token := Tok_Before;
+            when Std_Names.Name_Always =>
+               Current_Token := Tok_Always;
+            when Std_Names.Name_Never =>
+               Current_Token := Tok_Never;
+            when Std_Names.Name_Eventually =>
+               Current_Token := Tok_Eventually;
+            when Std_Names.Name_Next_A =>
+               Current_Token := Tok_Next_A;
+            when Std_Names.Name_Next_E =>
+               Current_Token := Tok_Next_E;
+            when Std_Names.Name_Next_Event =>
+               Current_Token := Tok_Next_Event;
+            when Std_Names.Name_Next_Event_A =>
+               Current_Token := Tok_Next_Event_A;
+            when Std_Names.Name_Next_Event_E =>
+               Current_Token := Tok_Next_Event_E;
+            when Std_Names.Name_Until =>
+               Current_Token := Tok_Until;
+            when others =>
+               Current_Token := Tok_Identifier;
+               if Source (Pos - 1) = '_' then
+                  Error_Msg_Scan ("identifiers cannot finish with '_'");
+               end if;
+         end case;
+      end if;
+   end Identifier_To_Token;
+
+   --  LRM93 13.3.2
+   --  EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \
+   --
+   --  Create an (extended) indentifier.
+   --  Extended identifiers are stored as they appear (leading and tailing
+   --  backslashes, doubling backslashes inside).
+   procedure Scan_Extended_Identifier
+   is
+      use Name_Table;
+      Buffer : String (1 .. Max_Name_Length);
+      Len : Natural;
+      C : Character;
+   begin
+      --  LRM93 13.3.2
+      --  Moreover, every extended identifiers is distinct from any basic
+      --  identifier.
+      --  GHDL: This is satisfied by storing '\' in the name table.
+      Len := 1;
+      Buffer (1) := '\';
+      loop
+         --  Next character.
+         Pos := Pos + 1;
+         C := Source (Pos);
+
+         if C = '\' then
+            --  LRM93 13.3.2
+            --  If a backslash is to be used as one of the graphic characters
+            --  of an extended literal, it must be doubled.
+            --  LRM93 13.3.2
+            --  (a doubled backslash couting as one character)
+            if Len >= Max_Name_Length - 1 then
+               if Len = Max_Name_Length - 1 then
+                  Error_Too_Long;
+                  --  Accept this last one.
+                  Len := Len + 1;
+                  Buffer (Len) := C;
+               end if;
+            else
+               Len := Len + 1;
+               Buffer (Len) := C;
+            end if;
+
+            Pos := Pos + 1;
+            C := Source (Pos);
+
+            exit when C /= '\';
+         end if;
+
+         case Characters_Kind (C) is
+            when Format_Effector =>
+               Error_Msg_Scan ("format effector in extended identifier");
+               exit;
+            when Graphic_Character =>
+               null;
+            when Invalid =>
+               if C = Files_Map.EOT
+                 and then Pos >= Current_Context.File_Len
+               then
+                  Error_Msg_Scan
+                    ("extended identifier not terminated at end of file");
+               elsif C = LF or C = CR then
+                  Error_Msg_Scan
+                    ("extended identifier not terminated at end of line");
+               else
+                  Error_Msg_Scan ("invalid character in extended identifier");
+               end if;
+               exit;
+         end case;
+
+         --  LRM93 13.3.2
+         --  Extended identifiers differing only in the use of corresponding
+         --  upper and lower case letters are distinct.
+         if Len >= Max_Name_Length - 1 then
+            if Len = Max_Name_Length - 1 then
+               Error_Too_Long;
+               --  Accept this last one.
+               Len := Len + 1;
+               Buffer (Len) := C;
+            end if;
+         else
+            Len := Len + 1;
+            Buffer (Len) := C;
+         end if;
+      end loop;
+
+      if Len <= 2 then
+         Error_Msg_Scan ("empty extended identifier is not allowed");
+      end if;
+
+      --  LRM93 13.2
+      --  At least one separator is required between an identifier or an
+      --  abstract literal and an adjacent identifier or abstract literal.
+      case Characters_Kind (C) is
+         when Digit
+           | Upper_Case_Letter
+           | Lower_Case_Letter =>
+            Error_Separator;
+         when Invalid
+           | Format_Effector
+           | Space_Character
+           | Special_Character
+           | Other_Special_Character =>
+            null;
+      end case;
+
+      -- Hash it.
+      Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
+      Current_Token := Tok_Identifier;
+   end Scan_Extended_Identifier;
+
+   procedure Convert_Identifier (Str : in out String)
+   is
+      procedure Error_Bad is
+      begin
+         Error_Msg_Option ("bad character in identifier");
+      end Error_Bad;
+
+      procedure Error_8bit is
+      begin
+         Error_Msg_Option ("8 bits characters not allowed in vhdl87");
+      end Error_8bit;
+
+      C : Character;
+      subtype Id_Subtype is String (1 .. Str'Length);
+      Id : Id_Subtype renames Str;
+   begin
+      if Id'Length = 0 then
+         Error_Msg_Option ("identifier required");
+         return;
+      end if;
+
+      if Id (1) = '\' then
+         --  Extended identifier.
+         if Vhdl_Std = Vhdl_87 then
+            Error_Msg_Option ("extended identifiers not allowed in vhdl87");
+            return;
+         end if;
+
+         if Id'Length < 3 then
+            Error_Msg_Option ("extended identifier is too short");
+            return;
+         end if;
+         if Id (Id'Last) /= '\' then
+            Error_Msg_Option ("extended identifier must finish with a '\'");
+            return;
+         end if;
+         for I in 2 .. Id'Last - 1 loop
+            C := Id (I);
+            case Characters_Kind (C) is
+               when Format_Effector =>
+                  Error_Msg_Option ("format effector in extended identifier");
+                  return;
+               when Graphic_Character =>
+                  if C = '\' then
+                     if Id (I + 1) /= '\'
+                       or else I = Id'Last - 1
+                     then
+                        Error_Msg_Option ("anti-slash must be doubled "
+                                            & "in extended identifier");
+                        return;
+                     end if;
+                  end if;
+               when Invalid =>
+                  Error_Bad;
+            end case;
+         end loop;
+      else
+         --  Identifier
+         for I in 1 .. Id'Length loop
+            C := Id (I);
+            case Characters_Kind (C) is
+               when Upper_Case_Letter =>
+                  if Vhdl_Std = Vhdl_87 and C > 'Z' then
+                     Error_8bit;
+                  end if;
+                  Id (I) := To_Lower_Map (C);
+               when Lower_Case_Letter | Digit =>
+                  if Vhdl_Std = Vhdl_87 and C > 'z' then
+                     Error_8bit;
+                  end if;
+               when Special_Character =>
+                  -- The current character is legal in an identifier.
+                  if C = '_' then
+                     if I = 1 then
+                        Error_Msg_Option
+                          ("an identifier cannot start with an underscore");
+                        return;
+                     end if;
+                     if Id (I - 1) = '_' then
+                        Error_Msg_Option
+                          ("two underscores can't be consecutive");
+                        return;
+                     end if;
+                     if I = Id'Last then
+                        Error_Msg_Option
+                          ("an identifier cannot finish with an underscore");
+                        return;
+                     end if;
+                  else
+                     Error_Bad;
+                  end if;
+               when others =>
+                  Error_Bad;
+            end case;
+         end loop;
+      end if;
+   end Convert_Identifier;
+
+   --  Internal scanner function: return True if C must be considered as a line
+   --  terminator.  This also includes EOT (which terminates the file or is
+   --  invalid).
+   function Is_EOL (C : Character) return Boolean is
+   begin
+      case C is
+         when CR | LF | VT | FF | Files_Map.EOT =>
+            return True;
+         when others =>
+            return False;
+      end case;
+   end Is_EOL;
+
+   --  Advance scanner till the first non-space character.
+   procedure Skip_Spaces is
+   begin
+      while Source (Pos) = ' ' or Source (Pos) = HT loop
+         Pos := Pos + 1;
+      end loop;
+   end Skip_Spaces;
+
+   --  Eat all characters until end-of-line (not included).
+   procedure Skip_Until_EOL is
+   begin
+      while not Is_EOL (Source (Pos)) loop
+         --  Don't warn about invalid character, it's somewhat out of the
+         --  scope.
+         Pos := Pos + 1;
+      end loop;
+   end Skip_Until_EOL;
+
+   --  Scan an identifier within a comment.  Only lower case letters are
+   --  allowed.
+   procedure Scan_Comment_Identifier (Id : out Name_Id)
+   is
+      use Name_Table;
+      Buffer : String (1 .. Max_Name_Length);
+      Len : Natural;
+      C : Character;
+   begin
+      Id := Null_Identifier;
+      Skip_Spaces;
+
+      --  The identifier shall start with a lower case letter.
+      if Source (Pos) not in 'a' .. 'z' then
+         return;
+      end if;
+
+      --  Scan the identifier (in lower cases).
+      Len := 0;
+      loop
+         C := Source (Pos);
+         exit when C not in 'a' .. 'z' and C /= '_';
+         Len := Len + 1;
+         Buffer (Len) := C;
+         Pos := Pos + 1;
+      end loop;
+
+      --  Shall be followed by a space or a new line.
+      if not (C = ' ' or else C = HT or else Is_EOL (C)) then
+         return;
+      end if;
+
+      Id := Get_Identifier (Buffer (1 .. Len));
+   end Scan_Comment_Identifier;
+
+   package Directive_Protect is
+      --  Called to scan a protect tool directive.
+      procedure Scan_Protect_Directive;
+   end Directive_Protect;
+
+   --  Body is put in a separate file to avoid pollution.
+   package body Directive_Protect is separate;
+
+   --  Called to scan a tool directive.
+   procedure Scan_Tool_Directive
+   is
+      procedure Error_Missing_Directive is
+      begin
+         Error_Msg_Scan ("tool directive required after '`'");
+         Skip_Until_EOL;
+      end Error_Missing_Directive;
+
+      C : Character;
+   begin
+      --  The current character is '`'.
+      Pos := Pos + 1;
+      Skip_Spaces;
+
+      --  Check and scan identifier.
+      C := Source (Pos);
+      if Characters_Kind (C) not in Letter then
+         Error_Missing_Directive;
+         return;
+      end if;
+
+      Scan_Identifier (False);
+
+      if Current_Token /= Tok_Identifier then
+         Error_Missing_Directive;
+         return;
+      end if;
+
+      Skip_Spaces;
+
+      --  Dispatch according to the identifier.
+      if Current_Identifier = Std_Names.Name_Protect then
+         Directive_Protect.Scan_Protect_Directive;
+      else
+         Error_Msg_Scan
+           ("unknown tool directive %i ignored", +Current_Identifier);
+         Skip_Until_EOL;
+      end if;
+   end Scan_Tool_Directive;
+
+   --  Scan tokens within a comment.  Return TRUE if Current_Token was set,
+   --  return FALSE to discard the comment (ie treat it like a real comment).
+   function Scan_Comment return Boolean
+   is
+      use Std_Names;
+      Id : Name_Id;
+   begin
+      Scan_Comment_Identifier (Id);
+
+      if Id = Null_Identifier then
+         return False;
+      end if;
+
+      case Id is
+         when Name_Psl =>
+            --  Accept tokens after '-- psl'.
+            if Flag_Psl_Comment then
+               Flag_Psl := True;
+               Flag_Scan_In_Comment := True;
+               return True;
+            end if;
+         when others =>
+            null;
+      end case;
+      return False;
+   end Scan_Comment;
+
+   function Scan_Exclam_Mark return Boolean is
+   begin
+      if Source (Pos) = '!' then
+         Pos := Pos + 1;
+         return True;
+      else
+         return False;
+      end if;
+   end Scan_Exclam_Mark;
+
+   function Scan_Underscore return Boolean is
+   begin
+      if Source (Pos) = '_' then
+         Pos := Pos + 1;
+         return True;
+      else
+         return False;
+      end if;
+   end Scan_Underscore;
+
+   --  The Scan_Next_Line procedure must be called after each end-of-line to
+   --  register to next line number.  This is called by Scan_CR_Newline and
+   --  Scan_LF_Newline.
+   procedure Scan_Next_Line is
+   begin
+      Files_Map.Skip_Gap (Current_Context.Source_File, Pos);
+      Current_Context.Line_Number := Current_Context.Line_Number + 1;
+      Current_Context.Line_Pos := Pos;
+      File_Add_Line_Number
+        (Current_Context.Source_File, Current_Context.Line_Number, Pos);
+   end Scan_Next_Line;
+
+   --  Scan a CR end-of-line.
+   procedure Scan_CR_Newline is
+   begin
+      -- Accept CR or CR+LF as line separator.
+      if Source (Pos + 1) = LF then
+         Pos := Pos + 2;
+      else
+         Pos := Pos + 1;
+      end if;
+      Scan_Next_Line;
+   end Scan_CR_Newline;
+
+   --  Scan a LF end-of-line.
+   procedure Scan_LF_Newline is
+   begin
+      -- Accept LF or LF+CR as line separator.
+      if Source (Pos + 1) = CR then
+         Pos := Pos + 2;
+      else
+         Pos := Pos + 1;
+      end if;
+      Scan_Next_Line;
+   end Scan_LF_Newline;
+
+   --  Emit an error message for an invalid character.
+   procedure Error_Bad_Character is
+   begin
+      --  Technically character literals, string literals, extended
+      --  identifiers and comments.
+      Error_Msg_Scan ("character %c can only be used in strings or comments",
+                      +Source (Pos));
+   end Error_Bad_Character;
+
+   -- Get a new token.
+   procedure Scan is
+   begin
+      if Current_Token /= Tok_Invalid then
+         Current_Context.Prev_Token := Current_Token;
+      end if;
+
+      Current_Context.Prev_Pos := Pos;
+
+      << Again >> null;
+
+      --  Skip commonly used separators.
+      --  (Like Skip_Spaces but manually inlined for speed).
+      while Source (Pos) = ' ' or Source (Pos) = HT loop
+         Pos := Pos + 1;
+      end loop;
+
+      Current_Context.Token_Pos := Pos;
+      Current_Context.Identifier := Null_Identifier;
+
+      case Source (Pos) is
+         when HT | ' ' =>
+            --  Must have already been skipped just above.
+            raise Internal_Error;
+         when NBSP =>
+            if Vhdl_Std = Vhdl_87 then
+               Error_Msg_Scan ("NBSP character not allowed in vhdl87");
+            end if;
+            Pos := Pos + 1;
+            goto Again;
+         when VT | FF =>
+            Pos := Pos + 1;
+            goto Again;
+         when LF =>
+            Scan_LF_Newline;
+            if Flag_Newline then
+               Current_Token := Tok_Newline;
+               return;
+            end if;
+            goto Again;
+         when CR =>
+            Scan_CR_Newline;
+            if Flag_Newline then
+               Current_Token := Tok_Newline;
+               return;
+            end if;
+            goto Again;
+         when '-' =>
+            if Source (Pos + 1) = '-' then
+               -- This is a comment.
+               -- LRM93 13.8
+               --   A comment starts with two adjacent hyphens and extends up
+               --   to the end of the line.
+               --   A comment can appear on any line line of a VHDL
+               --   description.
+               --   The presence or absence of comments has no influence on
+               --   whether a description is legal or illegal.
+               --   Futhermore, comments do not influence the execution of a
+               --   simulation module; their sole purpose is the enlightenment
+               --   of the human reader.
+               -- GHDL note: As a consequence, an obfruscating comment
+               --  is out of purpose, and a warning could be reported :-)
+               Pos := Pos + 2;
+
+               --  Scan inside a comment.  So we just ignore the two dashes.
+               if Flag_Scan_In_Comment then
+                  goto Again;
+               end if;
+
+               --  Handle keywords in comment (PSL).
+               if Flag_Comment_Keyword and then Scan_Comment then
+                  goto Again;
+               end if;
+
+               --  LRM93 13.2
+               --  In any case, a sequence of one or more format
+               --  effectors other than horizontal tabulation must
+               --  cause at least one end of line.
+               while not Is_EOL (Source (Pos)) loop
+                  --  LRM93 13.1
+                  --  The only characters allowed in the text of a VHDL
+                  --  description are the graphic characters and the format
+                  --  effectors.
+
+                  --  LRM02 13.1 Character set
+                  --  The only characters allowed in the text of a VHDL
+                  --  description (except within comments -- see 13.8) [...]
+                  --
+                  --  LRM02 13.8 Comments
+                  --  A comment [...] may contain any character except the
+                  --  format effectors vertical tab, carriage return, line
+                  --  feed and form feed.
+                  if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02)
+                    and then Characters_Kind (Source (Pos)) = Invalid
+                  then
+                     Error_Msg_Scan ("invalid character, even in a comment");
+                  end if;
+                  Pos := Pos + 1;
+               end loop;
+               if Flag_Comment then
+                  Current_Token := Tok_Comment;
+                  return;
+               end if;
+               goto Again;
+            elsif Flag_Psl and then Source (Pos + 1) = '>' then
+               Current_Token := Tok_Minus_Greater;
+               Pos := Pos + 2;
+               return;
+            else
+               Current_Token := Tok_Minus;
+               Pos := Pos + 1;
+               return;
+            end if;
+         when '+' =>
+            Current_Token := Tok_Plus;
+            Pos := Pos + 1;
+            return;
+         when '*' =>
+            if Source (Pos + 1) = '*' then
+               Current_Token := Tok_Double_Star;
+               Pos := Pos + 2;
+            else
+               Current_Token := Tok_Star;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when '/' =>
+            if Source (Pos + 1) = '=' then
+               Current_Token := Tok_Not_Equal;
+               Pos := Pos + 2;
+            elsif Source (Pos + 1) = '*' then
+               --  LRM08 15.9 Comments
+               --  A delimited comment start with a solidus (slash) character
+               --  immediately followed by an asterisk character and extends up
+               --  to the first subsequent occurrence of an asterisk character
+               --  immediately followed by a solidus character.
+               if Vhdl_Std < Vhdl_08 then
+                  Error_Msg_Scan
+                    ("block comment are not allowed before vhdl 2008");
+               end if;
+
+               --  Skip '/*'.
+               Pos := Pos + 2;
+
+               loop
+                  case Source (Pos) is
+                     when '/' =>
+                        --  LRM08 15.9
+                        --  Moreover, an occurrence of a solidus character
+                        --  immediately followed by an asterisk character
+                        --  within a delimited comment is not interpreted as
+                        --  the start of a nested delimited comment.
+                        if Source (Pos + 1) = '*' then
+                           Warning_Msg_Scan
+                             (Warnid_Nested_Comment,
+                              "'/*' found within a block comment");
+                        end if;
+                        Pos := Pos + 1;
+                     when '*' =>
+                        if Source (Pos + 1) = '/' then
+                           Pos := Pos + 2;
+                           exit;
+                        else
+                           Pos := Pos + 1;
+                        end if;
+                     when CR =>
+                        Scan_CR_Newline;
+                     when LF =>
+                        Scan_LF_Newline;
+                     when Files_Map.EOT =>
+                        if Pos >= Current_Context.File_Len then
+                           --  Point at the start of the comment.
+                           Error_Msg_Scan
+                             (Get_Token_Location,
+                              "block comment not terminated at end of file");
+                           exit;
+                        end if;
+                        Pos := Pos + 1;
+                     when others =>
+                        Pos := Pos + 1;
+                  end case;
+               end loop;
+               if Flag_Comment then
+                  Current_Token := Tok_Comment;
+                  return;
+               end if;
+               goto Again;
+            else
+               Current_Token := Tok_Slash;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when '(' =>
+            Current_Token := Tok_Left_Paren;
+            Pos := Pos + 1;
+            return;
+         when ')' =>
+            Current_Token := Tok_Right_Paren;
+            Pos := Pos + 1;
+            return;
+         when '|' =>
+            if Flag_Psl then
+               if Source (Pos + 1) = '|' then
+                  Current_Token := Tok_Bar_Bar;
+                  Pos := Pos + 2;
+               elsif Source (Pos + 1) = '-'
+                 and then Source (Pos + 2) = '>'
+               then
+                  Current_Token := Tok_Bar_Arrow;
+                  Pos := Pos + 3;
+               elsif Source (Pos + 1) = '='
+                 and then Source (Pos + 2) = '>'
+               then
+                  Current_Token := Tok_Bar_Double_Arrow;
+                  Pos := Pos + 3;
+               else
+                  Current_Token := Tok_Bar;
+                  Pos := Pos + 1;
+               end if;
+            else
+               Current_Token := Tok_Bar;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when '!' =>
+            if Flag_Psl then
+               Current_Token := Tok_Exclam_Mark;
+            else
+               if Source (Pos + 1) = '=' then
+                  --  != is not allowed in VHDL, but be friendly with C users.
+                  Error_Msg_Scan
+                    (Get_Token_Location, "Use '/=' for inequality in vhdl");
+                  Current_Token := Tok_Not_Equal;
+                  Pos := Pos + 1;
+               else
+                  --  LRM93 13.10
+                  --  A vertical line (|) can be replaced by an exclamation
+                  --  mark (!) where used as a delimiter.
+                  Current_Token := Tok_Bar;
+               end if;
+            end if;
+            Pos := Pos + 1;
+            return;
+         when ':' =>
+            if Source (Pos + 1) = '=' then
+               Current_Token := Tok_Assign;
+               Pos := Pos + 2;
+            else
+               Current_Token := Tok_Colon;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when ';' =>
+            Current_Token := Tok_Semi_Colon;
+            Pos := Pos + 1;
+            return;
+         when ',' =>
+            Current_Token := Tok_Comma;
+            Pos := Pos + 1;
+            return;
+         when '.' =>
+            if Source (Pos + 1) = '.' then
+               --  Be Ada friendly...
+               Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'");
+               Current_Token := Tok_To;
+               Pos := Pos + 2;
+               return;
+            end if;
+            Current_Token := Tok_Dot;
+            Pos := Pos + 1;
+            return;
+         when '&' =>
+            if Flag_Psl and then Source (Pos + 1) = '&' then
+               Current_Token := Tok_And_And;
+               Pos := Pos + 2;
+            else
+               Current_Token := Tok_Ampersand;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when '<' =>
+            case Source (Pos + 1) is
+               when '=' =>
+                  Current_Token := Tok_Less_Equal;
+                  Pos := Pos + 2;
+               when '>' =>
+                  Current_Token := Tok_Box;
+                  Pos := Pos + 2;
+               when '<' =>
+                  Current_Token := Tok_Double_Less;
+                  Pos := Pos + 2;
+               when others =>
+                  Current_Token := Tok_Less;
+                  Pos := Pos + 1;
+            end case;
+            return;
+         when '>' =>
+            case Source (Pos + 1) is
+               when '=' =>
+                  Current_Token := Tok_Greater_Equal;
+                  Pos := Pos + 2;
+               when '>' =>
+                  Current_Token := Tok_Double_Greater;
+                  Pos := Pos + 2;
+               when others =>
+                  Current_Token := Tok_Greater;
+                  Pos := Pos + 1;
+            end case;
+            return;
+         when '=' =>
+            if Source (Pos + 1) = '=' then
+               if AMS_Vhdl then
+                  Current_Token := Tok_Equal_Equal;
+               else
+                  Error_Msg_Scan
+                    ("'==' is not the vhdl equality, replaced by '='");
+                  Current_Token := Tok_Equal;
+               end if;
+               Pos := Pos + 2;
+            elsif Source (Pos + 1) = '>' then
+               Current_Token := Tok_Double_Arrow;
+               Pos := Pos + 2;
+            else
+               Current_Token := Tok_Equal;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when ''' =>
+            -- Handle cases such as character'('a')
+            -- FIXME: what about f ()'length ? or .all'length
+            if Current_Context.Prev_Token /= Tok_Identifier
+              and then Current_Context.Prev_Token /= Tok_Character
+              and then Source (Pos + 2) = '''
+            then
+               -- LRM93 13.5
+               -- A character literal is formed by enclosing one of the 191
+               -- graphic character (...) between two apostrophe characters.
+               -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER '
+               if Characters_Kind (Source (Pos + 1)) not in Graphic_Character
+               then
+                  Error_Msg_Scan
+                    ("a character literal can only be a graphic character");
+               elsif Vhdl_Std = Vhdl_87
+                 and then Source (Pos + 1) > Character'Val (127)
+               then
+                  Error_8bit;
+               end if;
+               Current_Token := Tok_Character;
+               Current_Context.Identifier :=
+                 Name_Table.Get_Identifier (Source (Pos + 1));
+               Pos := Pos + 3;
+               return;
+            else
+               Current_Token := Tok_Tick;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when '0' .. '9' =>
+            Scan_Literal;
+
+            --  LRM93 13.2
+            --  At least one separator is required between an identifier or
+            --  an abstract literal and an adjacent identifier or abstract
+            --  literal.
+            case Characters_Kind (Source (Pos)) is
+               when Digit =>
+                  --  Happen if d#ddd# is followed by a number.
+                  Error_Msg_Scan ("space is required between numbers");
+               when Upper_Case_Letter
+                 | Lower_Case_Letter =>
+                  --  Could call Error_Separator, but use a clearer message
+                  --  for this common case.
+                  --  Note: the term "unit name" is not correct here, since
+                  --  it can be any identifier or even a keyword; however it
+                  --  is probably the most common case (eg 10ns).
+                  if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer
+                  then
+                     Current_Token := Tok_Integer_Letter;
+                  else
+                     Error_Msg_Scan
+                       ("space is required between number and unit name");
+                  end if;
+               when Other_Special_Character =>
+                  if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then
+                     --  Start of extended identifier.
+                     Error_Separator;
+                  end if;
+               when Invalid
+                 | Format_Effector
+                 | Space_Character
+                 | Special_Character =>
+                  null;
+            end case;
+            return;
+         when '#' =>
+            Error_Msg_Scan ("'#' is used for based literals and "
+                              & "must be preceded by a base");
+            --  Skip.
+            Pos := Pos + 1;
+            goto Again;
+         when '"' =>
+            Scan_String;
+            return;
+         when '%' =>
+            if Vhdl_Std >= Vhdl_08 then
+               Error_Msg_Scan
+                 ("'%%' not allowed in vhdl 2008 (was replacement character)");
+               --  Continue as a string.
+            end if;
+            Scan_String;
+            return;
+         when '[' =>
+            if Flag_Psl then
+               if Source (Pos + 1) = '*' then
+                  Current_Token := Tok_Brack_Star;
+                  Pos := Pos + 2;
+               elsif Source (Pos + 1) = '+'
+                 and then Source (Pos + 2) = ']'
+               then
+                  Current_Token := Tok_Brack_Plus_Brack;
+                  Pos := Pos + 3;
+               elsif Source (Pos + 1) = '-'
+                 and then Source (Pos + 2) = '>'
+               then
+                  Current_Token := Tok_Brack_Arrow;
+                  Pos := Pos + 3;
+               elsif Source (Pos + 1) = '=' then
+                  Current_Token := Tok_Brack_Equal;
+                  Pos := Pos + 2;
+               else
+                  Current_Token := Tok_Left_Bracket;
+                  Pos := Pos + 1;
+               end if;
+            else
+               if Vhdl_Std = Vhdl_87 then
+                  Error_Msg_Scan
+                    ("'[' is an invalid character in vhdl87, replaced by '('");
+                  Current_Token := Tok_Left_Paren;
+               else
+                  Current_Token := Tok_Left_Bracket;
+               end if;
+               Pos := Pos + 1;
+            end if;
+            return;
+         when ']' =>
+            if Vhdl_Std = Vhdl_87 and not Flag_Psl then
+               Error_Msg_Scan
+                 ("']' is an invalid character in vhdl87, replaced by ')'");
+               Current_Token := Tok_Right_Paren;
+            else
+               Current_Token := Tok_Right_Bracket;
+            end if;
+            Pos := Pos + 1;
+            return;
+         when '{' =>
+            if Flag_Psl then
+               Current_Token := Tok_Left_Curly;
+            else
+               Error_Msg_Scan ("'{' is an invalid character, replaced by '('");
+               Current_Token := Tok_Left_Paren;
+            end if;
+            Pos := Pos + 1;
+            return;
+         when '}' =>
+            if Flag_Psl then
+               Current_Token := Tok_Right_Curly;
+            else
+               Error_Msg_Scan ("'}' is an invalid character, replaced by ')'");
+               Current_Token := Tok_Right_Paren;
+            end if;
+            Pos := Pos + 1;
+            return;
+         when '\' =>
+            if Vhdl_Std = Vhdl_87 then
+               Error_Msg_Scan
+                 ("extended identifiers are not allowed in vhdl87");
+            end if;
+            Scan_Extended_Identifier;
+            return;
+         when '^' =>
+            if Vhdl_Std >= Vhdl_08 then
+               Current_Token := Tok_Caret;
+            else
+               Current_Token := Tok_Xor;
+               Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'");
+            end if;
+            Pos := Pos + 1;
+            return;
+         when '~' =>
+            Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'");
+            Pos := Pos + 1;
+            Current_Token := Tok_Not;
+            return;
+         when '?' =>
+            if Vhdl_Std < Vhdl_08 then
+               Error_Bad_Character;
+               Pos := Pos + 1;
+               goto Again;
+            else
+               if Source (Pos + 1) = '<' then
+                  if Source (Pos + 2) = '=' then
+                     Current_Token := Tok_Match_Less_Equal;
+                     Pos := Pos + 3;
+                  else
+                     Current_Token := Tok_Match_Less;
+                     Pos := Pos + 2;
+                  end if;
+               elsif Source (Pos + 1) = '>' then
+                  if Source (Pos + 2) = '=' then
+                     Current_Token := Tok_Match_Greater_Equal;
+                     Pos := Pos + 3;
+                  else
+                     Current_Token := Tok_Match_Greater;
+                     Pos := Pos + 2;
+                  end if;
+               elsif Source (Pos + 1) = '?' then
+                  Current_Token := Tok_Condition;
+                  Pos := Pos + 2;
+               elsif Source (Pos + 1) = '=' then
+                  Current_Token := Tok_Match_Equal;
+                  Pos := Pos + 2;
+               elsif Source (Pos + 1) = '/'
+                 and then Source (Pos + 2) = '='
+               then
+                  Current_Token := Tok_Match_Not_Equal;
+                  Pos := Pos + 3;
+               else
+                  Error_Msg_Scan ("unknown matching operator");
+                  Pos := Pos + 1;
+                  goto Again;
+               end if;
+            end if;
+            return;
+         when '`' =>
+            if Vhdl_Std >= Vhdl_08 then
+               Scan_Tool_Directive;
+            else
+               Error_Bad_Character;
+               Skip_Until_EOL;
+            end if;
+            goto Again;
+         when '$'
+           | Inverted_Exclamation .. Inverted_Question
+           | Multiplication_Sign | Division_Sign =>
+            Error_Bad_Character;
+            Pos := Pos + 1;
+            goto Again;
+         when '@' =>
+            if Vhdl_Std >= Vhdl_08 or Flag_Psl then
+               Current_Token := Tok_Arobase;
+               Pos := Pos + 1;
+               return;
+            else
+               Error_Bad_Character;
+               Pos := Pos + 1;
+               goto Again;
+            end if;
+         when '_' =>
+            Error_Msg_Scan ("an identifier can't start with '_'");
+            Scan_Identifier (Flag_Psl);
+            --  Cannot be a reserved word.
+            return;
+         when 'A' .. 'Z' | 'a' .. 'z' =>
+            Scan_Identifier (Flag_Psl);
+            Identifier_To_Token;
+            return;
+         when UC_A_Grave .. UC_O_Diaeresis
+           | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn
+           | LC_German_Sharp_S .. LC_O_Diaeresis
+           | LC_O_Oblique_Stroke .. LC_Y_Diaeresis =>
+            if Vhdl_Std = Vhdl_87 then
+               Error_Msg_Scan
+                 ("non 7-bit latin-1 letters are not allowed in vhdl87");
+            end if;
+            Scan_Identifier (False);
+            --  Not a reserved word.
+            return;
+         when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC =>
+            Error_Msg_Scan
+              ("control character that is not CR, LF, FF, HT or VT " &
+               "is not allowed");
+            Pos := Pos + 1;
+            goto Again;
+         when Files_Map.EOT =>
+            if Pos >= Current_Context.File_Len then
+               --  FIXME: should conditionnaly emit a warning if the file
+               --   is not terminated by an end of line.
+               Current_Token := Tok_Eof;
+            else
+               Error_Msg_Scan ("EOT is not allowed inside the file");
+               Pos := Pos + 1;
+               goto Again;
+            end if;
+            return;
+      end case;
+      --  Not reachable: all case should use goto Again or return.
+   end Scan;
+
+   function Is_Whitespace (C : Character) return Boolean is
+   begin
+      if C = ' ' then
+         return True;
+      elsif Vhdl_Std > Vhdl_87 and C = NBSP then
+         return True;
+      else
+         return False;
+      end if;
+   end Is_Whitespace;
+end Vhdl.Scanner;
-- 
cgit v1.2.3