--  VHDL lexical scanner.
--  Copyright (C) 2002 - 2014 Tristan Gingold
--
--  GHDL is free software; you can redistribute it and/or modify it under
--  the terms of the GNU General Public License as published by the Free
--  Software Foundation; either version 2, or (at your option) any later
--  version.
--
--  GHDL is distributed in the hope that it will be useful, but WITHOUT ANY
--  WARRANTY; without even the implied warranty of MERCHANTABILITY or
--  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
--  for more details.
--
--  You should have received a copy of the GNU General Public License
--  along with GHDL; see the file COPYING.  If not, write to the Free
--  Software Foundation, 59 Temple Place - Suite 330, Boston, MA
--  02111-1307, USA.
with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
with Ada.Characters.Handling;
with Errorout; use Errorout;
with Name_Table;
with Files_Map; use Files_Map;
with Std_Names;
with Str_Table;
with Flags; use Flags;

package body Scanner is

   -- This classification is a simplification of the categories of LRM93 13.1
   -- LRM93 13.1
   -- The only characters allowed in the text of a VHDL description are the
   -- graphic characters and format effector.

   type Character_Kind_Type is
      (
   -- Neither a format effector nor a graphic character.
       Invalid,
       Format_Effector,
       Upper_Case_Letter,
       Digit,
       Special_Character,
       Space_Character,
       Lower_Case_Letter,
       Other_Special_Character);

   -- LRM93 13.1
   -- BASIC_GRAPHIC_CHARACTER ::=
   --   UPPER_CASE_LETTER | DIGIT | SPECIAL_CHARACTER | SPACE_CHARACTER
   --subtype Basic_Graphic_Character is
   --  Character_Kind_Type range Upper_Case_Letter .. Space_Character;

   -- LRM93 13.1
   -- GRAPHIC_CHARACTER ::=
   --   BASIC_GRAPHIC_CHARACTER | LOWER_CASE_LETTER | OTHER_SPECIAL_CHARACTER
   -- Note: There is 191 graphic character.
   subtype Graphic_Character is
     Character_Kind_Type range Upper_Case_Letter .. Other_Special_Character;

   -- LRM93 13.1
   -- The characters included in each of the categories of basic graphic
   -- characters are defined as follows:
   type Character_Array is array (Character) of Character_Kind_Type;
   pragma Suppress_Initialization (Character_Array);
   Characters_Kind : constant Character_Array :=
     (NUL .. BS => Invalid,

      -- Format effectors are the ISO (and ASCII) characters called horizontal
      -- tabulation, vertical tabulation, carriage return, line feed, and form
      -- feed.
      HT | LF | VT | FF | CR => Format_Effector,

      SO .. US => Invalid,

      -- 1. upper case letters
      'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis |
      UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter,

      -- 2. digits
      '0' .. '9' => Digit,

      -- 3. special characters
      '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/'
        | ':' | ';' | '<' | '=' | '>' | '[' | ']'
        | '_' | '|' | '*' => Special_Character,

      -- 4. the space characters
      ' ' | NBSP => Space_Character,

      -- 5. lower case letters
      'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis |
      LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter,

      -- 6. other special characters
      '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~'
        | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign |
        Division_Sign => Other_Special_Character,

      --  '¡'    -- INVERTED EXCLAMATION MARK
      --  '¢'    -- CENT SIGN
      --  '£'    -- POUND SIGN
      --  '¤'    -- CURRENCY SIGN
      --  '¥'    -- YEN SIGN
      --  '¦'    -- BROKEN BAR
      --  '§'    -- SECTION SIGN
      --  '¨'    -- DIAERESIS
      --  '©'    -- COPYRIGHT SIGN
      --  'ª'    -- FEMININE ORDINAL INDICATOR
      --  '«'    -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
      --  '¬'    -- NOT SIGN
      --  '­'    -- SOFT HYPHEN
      --  '®'    -- REGISTERED SIGN
      --  '¯'    -- MACRON
      --  '°'    -- DEGREE SIGN
      --  '±'    -- PLUS-MINUS SIGN
      --  '²'    -- SUPERSCRIPT TWO
      --  '³'    -- SUPERSCRIPT THREE
      --  '´'    -- ACUTE ACCENT
      --  'µ'    -- MICRO SIGN
      --  '¶'    -- PILCROW SIGN
      --  '·'    -- MIDDLE DOT
      --  '¸'    -- CEDILLA
      --  '¹'    -- SUPERSCRIPT ONE
      --  'º'    -- MASCULINE ORDINAL INDICATOR
      --  '»'    -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
      --  '¼'    -- VULGAR FRACTION ONE QUARTER
      --  '½'    -- VULGAR FRACTION ONE HALF
      --  '¾'    -- VULGAR FRACTION THREE QUARTERS
      --  '¿'    -- INVERTED QUESTION MARK
      --  '×'    -- MULTIPLICATION SIGN
      --  '÷'    -- DIVISION SIGN

      DEL .. APC => Invalid);

   -- The context contains the whole internal state of the scanner, ie
   -- it can be used to push/pop a lexical analysis, to restart the
   -- scanner from a context marking a previous point.
   type Scan_Context is record
      Source: File_Buffer_Acc;
      Source_File: Source_File_Entry;
      Line_Number: Natural;
      Line_Pos: Source_Ptr;
      Pos: Source_Ptr;
      Token_Pos: Source_Ptr;
      File_Len: Source_Ptr;
      Token: Token_Type;
      Prev_Token: Token_Type;
      Str_Id : String8_Id;
      Str_Len : Nat32;
      Identifier: Name_Id;
      Int64: Iir_Int64;
      Fp64: Iir_Fp64;
   end record;
   pragma Suppress_Initialization (Scan_Context);

   -- The current context.
   -- Default value is an invalid context.
   Current_Context: Scan_Context := (Source => null,
                                     Source_File => No_Source_File_Entry,
                                     Line_Number => 0,
                                     Line_Pos => 0,
                                     Pos => 0,
                                     Token_Pos => 0,
                                     File_Len => 0,
                                     Token => Tok_Invalid,
                                     Prev_Token => Tok_Invalid,
                                     Identifier => Null_Identifier,
                                     Str_Id => Null_String8,
                                     Str_Len => 0,
                                     Int64 => 0,
                                     Fp64 => 0.0);

   Source: File_Buffer_Acc renames Current_Context.Source;
   Pos: Source_Ptr renames Current_Context.Pos;

   -- When CURRENT_TOKEN is an identifier, its name_id is stored into
   -- this global variable.
   -- Function current_text can be used to convert it into an iir.
   function Current_Identifier return Name_Id is
   begin
      return Current_Context.Identifier;
   end Current_Identifier;

   procedure Invalidate_Current_Identifier is
   begin
      Current_Context.Identifier := Null_Identifier;
   end Invalidate_Current_Identifier;

   procedure Invalidate_Current_Token is
   begin
      if Current_Token /= Tok_Invalid then
         Current_Context.Prev_Token := Current_Token;
         Current_Token := Tok_Invalid;
      end if;
   end Invalidate_Current_Token;

   function Current_String_Id return String8_Id is
   begin
      return Current_Context.Str_Id;
   end Current_String_Id;

   function Current_String_Length return Nat32 is
   begin
      return Current_Context.Str_Len;
   end Current_String_Length;

   function Current_Iir_Int64 return Iir_Int64 is
   begin
      return Current_Context.Int64;
   end Current_Iir_Int64;

   function Current_Iir_Fp64 return Iir_Fp64 is
   begin
      return Current_Context.Fp64;
   end Current_Iir_Fp64;

   function Get_Current_Source_File return Source_File_Entry is
   begin
      return Current_Context.Source_File;
   end Get_Current_Source_File;

   function Get_Current_Line return Natural is
   begin
      return Current_Context.Line_Number;
   end Get_Current_Line;

   function Get_Current_Column return Natural
   is
      Col : Natural;
      Name : Name_Id;
   begin
      Coord_To_Position
        (Current_Context.Source_File,
         Current_Context.Line_Pos,
         Integer (Current_Context.Pos - Current_Context.Line_Pos),
         Name, Col);
      return Col;
   end Get_Current_Column;

   function Get_Token_Column return Natural
   is
      Col : Natural;
      Name : Name_Id;
   begin
      Coord_To_Position
        (Current_Context.Source_File,
         Current_Context.Line_Pos,
         Integer (Current_Context.Token_Pos - Current_Context.Line_Pos),
         Name, Col);
      return Col;
   end Get_Token_Column;

   function Get_Token_Position return Source_Ptr is
   begin
      return Current_Context.Token_Pos;
   end Get_Token_Position;

   function Get_Position return Source_Ptr is
   begin
      return Current_Context.Pos;
   end Get_Position;

   procedure Set_File (Source_File : Source_File_Entry)
   is
      N_Source: File_Buffer_Acc;
   begin
      pragma Assert (Current_Context.Source = null);
      pragma Assert (Source_File /= No_Source_File_Entry);
      N_Source := Get_File_Source (Source_File);
      Current_Context := (Source => N_Source,
                          Source_File => Source_File,
                          Line_Number => 1,
                          Line_Pos => 0,
                          Pos => N_Source'First,
                          Token_Pos => 0, -- should be invalid,
                          File_Len => Get_File_Length (Source_File),
                          Token => Tok_Invalid,
                          Prev_Token => Tok_Invalid,
                          Identifier => Null_Identifier,
                          Str_Id => Null_String8,
                          Str_Len => 0,
                          Int64 => -1,
                          Fp64 => 0.0);
      Current_Token := Tok_Invalid;
   end Set_File;

   function Detect_Encoding_Errors return Boolean
   is
      C : constant Character := Source (Pos);
   begin
      --  No need to check further if first character is plain ASCII-7
      if C >= ' ' and C < Character'Val (127) then
         return False;
      end if;

      --  UTF-8 BOM is EF BB BF
      if Source (Pos + 0) = Character'Val (16#ef#)
        and then Source (Pos + 1) = Character'Val (16#bb#)
        and then Source (Pos + 2) = Character'Val (16#bf#)
      then
         Error_Msg_Scan
           ("source encoding must be latin-1 (UTF-8 BOM detected)");
         return True;
      end if;

      --  UTF-16 BE BOM is FE FF
      if Source (Pos + 0) = Character'Val (16#fe#)
        and then Source (Pos + 1) = Character'Val (16#ff#)
      then
         Error_Msg_Scan
           ("source encoding must be latin-1 (UTF-16 BE BOM detected)");
         return True;
      end if;

      --  UTF-16 LE BOM is FF FE
      if Source (Pos + 0) = Character'Val (16#ff#)
        and then Source (Pos + 1) = Character'Val (16#fe#)
      then
         Error_Msg_Scan
           ("source encoding must be latin-1 (UTF-16 LE BOM detected)");
         return True;
      end if;

      --  Certainly weird, but scanner/parser will catch it.
      return False;
   end Detect_Encoding_Errors;

   procedure Set_Current_Position (Position: Source_Ptr)
   is
      Loc : Location_Type;
      Offset: Natural;
      File_Entry : Source_File_Entry;
   begin
      --  Scanner must have been initialized.
      pragma Assert (Current_Context.Source /= null);

      Current_Token := Tok_Invalid;
      Current_Context.Pos := Position;
      Loc := File_Pos_To_Location (Current_Context.Source_File,
                                   Current_Context.Pos);
      Location_To_Coord (Loc,
                         File_Entry, Current_Context.Line_Pos,
                         Current_Context.Line_Number, Offset);
   end Set_Current_Position;

   procedure Close_File is
   begin
      Current_Context.Source := null;
   end Close_File;

   -- Emit an error when a character above 128 was found.
   -- This must be called only in vhdl87.
   procedure Error_8bit is
   begin
      Error_Msg_Scan ("8 bits characters not allowed in vhdl87");
   end Error_8bit;

   -- Emit an error when a separator is expected.
   procedure Error_Separator is
   begin
      Error_Msg_Scan ("a separator is required here");
   end Error_Separator;

   -- scan a decimal literal or a based literal.
   --
   -- LRM93 13.4.1
   -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ]
   -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER
   --
   -- LRM93 13.4.2
   -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT
   -- BASE ::= INTEGER
   procedure Scan_Literal is separate;

   --  Scan a string literal.
   --
   --  LRM93 13.6 / LRM08 15.7
   --  A string literal is formed by a sequence of graphic characters
   --  (possibly none) enclosed between two quotation marks used as string
   --  brackets.
   --  STRING_LITERAL ::= " { GRAPHIC_CHARACTER } "
   --
   --  IN: for a string, at the call of this procedure, the current character
   --  must be either '"' or '%'.
   procedure Scan_String
   is
      -- The quotation character (can be " or %).
      Mark: Character;
      -- Current character.
      C : Character;
      --  Current length.
      Length : Nat32;
   begin
      --  String delimiter.
      Mark := Source (Pos);
      pragma Assert (Mark = '"' or else Mark = '%');

      Pos := Pos + 1;
      Length := 0;
      Current_Context.Str_Id := Str_Table.Create_String8;
      loop
         C := Source (Pos);
         if C = Mark then
            --  LRM93 13.6
            --  If a quotation mark value is to be represented in the sequence
            --  of character values, then a pair of adjacent quoatation
            --  characters marks must be written at the corresponding place
            --  within the string literal.
            --  LRM93 13.10
            --  Any pourcent sign within the sequence of characters must then
            --  be doubled, and each such doubled percent sign is interpreted
            --  as a single percent sign value.
            --  The same replacement is allowed for a bit string literal,
            --  provieded that both bit string brackets are replaced.
            Pos := Pos + 1;
            exit when Source (Pos) /= Mark;
         end if;

         case Characters_Kind (C) is
            when Format_Effector =>
               if Mark = '%' then
                  --  No matching '%' has been found.  Consider '%' was used
                  --  as the remainder operator, instead of 'rem'.  This will
                  --  improve the error message.
                  Error_Msg_Scan
                    (Get_Token_Location,
                     "'%%' is not a vhdl operator, use 'rem'");
                  Current_Token := Tok_Rem;
                  Pos := Current_Context.Token_Pos + 1;
                  return;
               end if;
               Error_Msg_Scan ("format effector not allowed in a string");
               exit;
            when Invalid =>
               Error_Msg_Scan
                 ("invalid character not allowed, even in a string");
            when Graphic_Character =>
               if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then
                  Error_8bit;
               end if;
         end case;

         if C = '"' and Mark = '%' then
            --  LRM93 13.10
            --  The quotation marks (") used as string brackets at both ends of
            --  a string literal can be replaced by percent signs (%), provided
            --  that the enclosed sequence of characters constains no quotation
            --  marks, and provided that both string brackets are replaced.
            Error_Msg_Scan
              ("'""' cannot be used in a string delimited with '%%'");
         end if;

         Length := Length + 1;
         Str_Table.Append_String8 (Character'Pos (C));
         Pos := Pos + 1;
      end loop;

      Current_Token := Tok_String;
      Current_Context.Str_Len := Length;
   end Scan_String;

   --  Scan a bit string literal.
   --
   --  LRM93 13.7
   --  A bit string literal is formed by a sequence of extended digits
   --  (possibly none) enclosed between two quotations used as bit string
   --  brackets, preceded by a base specifier.
   --  BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] "
   --  BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT }
   --
   --  The current character must be a base specifier, followed by '"' or '%'.
   --  The base must be valid.
   procedure Scan_Bit_String (Base_Log : Nat32)
   is
      --  Position of character '0'.
      Pos_0 : constant Nat8 := Character'Pos ('0');

      --  Used for the base.
      subtype Nat4 is Natural range 1 .. 4;
      Base : constant Nat32 := 2 ** Nat4 (Base_Log);

      -- The quotation character (can be " or %).
      Orig_Pos : constant Source_Ptr := Pos;
      Mark     : constant Character := Source (Orig_Pos);
      -- Current character.
      C : Character;
      --  Current length.
      Length : Nat32;
      --  Digit value.
      V, D : Nat8;
      --  True if invalid character already found, to avoid duplicate message.
      Has_Invalid : Boolean;
   begin
      pragma Assert (Mark = '"' or else Mark = '%');
      Pos := Pos + 1;
      Length := 0;
      Has_Invalid := False;
      Current_Context.Str_Id := Str_Table.Create_String8;
      loop
         << Again >> null;
         C := Source (Pos);
         Pos := Pos + 1;
         exit when C = Mark;

         -- LRM93 13.7
         -- If the base specifier is 'B', the extended digits in the bit
         -- value are restricted to 0 and 1.
         -- If the base specifier is 'O', the extended digits int the bit
         -- value are restricted to legal digits in the octal number
         -- system, ie, the digits 0 through 7.
         -- If the base specifier is 'X', the extended digits are all digits
         -- together with the letters A through F.
         case C is
            when '0' .. '9' =>
               V := Character'Pos (C) - Character'Pos ('0');
            when 'A' .. 'F' =>
               V := Character'Pos (C) - Character'Pos ('A') + 10;
            when 'a' .. 'f' =>
               --  LRM93 13.7
               --  A letter in a bit string literal (...) can be written either
               --  in lowercase or in upper case, with the same meaning.
               V := Character'Pos (C) - Character'Pos ('a') + 10;
            when '_' =>
               if Source (Pos) = '_' then
                  Error_Msg_Scan
                    ("double underscore not allowed in a bit string");
               end if;
               if Source (Pos - 2) = Mark then
                  Error_Msg_Scan
                    ("underscore not allowed at the start of a bit string");
               elsif Source (Pos) = Mark then
                  Error_Msg_Scan
                    ("underscore not allowed at the end of a bit string");
               end if;
               goto Again;
            when '"' =>
               pragma Assert (Mark = '%');
               Error_Msg_Scan
                 ("'""' cannot close a bit string opened by '%%'");
               exit;
            when '%' =>
               pragma Assert (Mark = '"');
               Error_Msg_Scan
                 ("'%%' cannot close a bit string opened by '""'");
               exit;
            when others =>
               if Characters_Kind (C) in Graphic_Character then
                  if Vhdl_Std >= Vhdl_08 then
                     V := Nat8'Last;
                  else
                     if not Has_Invalid then
                        Error_Msg_Scan ("invalid character in bit string");
                        Has_Invalid := True;
                     end if;
                     --  Continue the bit string
                     V := 0;
                  end if;
               else
                  if Mark = '%' then
                     Error_Msg_Scan
                       (File_Pos_To_Location
                          (Current_Context.Source_File, Orig_Pos),
                        "'%%' is not a vhdl operator, use 'rem'");
                     Current_Token := Tok_Rem;
                     Pos := Orig_Pos + 1;
                     return;
                  else
                     Error_Msg_Scan ("bit string not terminated");
                     Pos := Pos - 1;
                  end if;
                  exit;
               end if;
         end case;

         --  Expand bit value.
         if Vhdl_Std >= Vhdl_08 and V > Base then
            --  Expand as graphic character.
            for I in 1 .. Base_Log loop
               Str_Table.Append_String8_Char (C);
            end loop;
         else
            --  Expand as extended digits.
            case Base_Log is
               when 1 =>
                  if V > 1 then
                     Error_Msg_Scan
                       ("invalid character in a binary bit string");
                     V := 1;
                  end if;
                  Str_Table.Append_String8 (Pos_0 + V);
               when 3 =>
                  if V > 7 then
                     Error_Msg_Scan
                       ("invalid character in a octal bit string");
                     V := 7;
                  end if;
                  for I in 1 .. 3 loop
                     D := V / 4;
                     Str_Table.Append_String8 (Pos_0 + D);
                     V := (V - 4 * D) * 2;
                  end loop;
               when 4 =>
                  for I in 1 .. 4 loop
                     D := V / 8;
                     Str_Table.Append_String8 (Pos_0 + D);
                     V := (V - 8 * D) * 2;
                  end loop;
               when others =>
                  raise Internal_Error;
            end case;
         end if;

         Length := Length + Base_Log;
      end loop;

      --  Note: the length of the bit string may be 0.

      Current_Token := Tok_Bit_String;
      Current_Context.Str_Len := Length;
   end Scan_Bit_String;

   --  Scan a decimal bit string literal.  For base specifier D the algorithm
   --  is rather different: all the graphic characters shall be digits, and we
   --  need to use a (not very efficient) arbitrary precision multiplication.
   procedure Scan_Dec_Bit_String
   is
      use Str_Table;

      Id : String8_Id;

      --  Position of character '0'.
      Pos_0 : constant Nat8 := Character'Pos ('0');

      -- Current character.
      C : Character;
      --  Current length.
      Length : Nat32;
      --  Digit value.
      V, D : Nat8;

      type Carries_Type is array (0 .. 3) of Nat8;
      Carries : Carries_Type;
      No_Carries : constant Carries_Type := (others => Pos_0);

      --  Shift right carries.  Note the Carries (0) is the LSB.
      procedure Shr_Carries is
      begin
         Carries := (Carries (1), Carries (2), Carries (3), Pos_0);
      end Shr_Carries;

      procedure Append_Carries is
      begin
         --  Expand the bit string.  Note that position 1 of the string8 is
         --  the MSB.
         while Carries /= No_Carries loop
            Append_String8 (Pos_0);
            Length := Length + 1;
            for I in reverse 2 .. Length loop
               Set_Element_String8 (Id, I, Element_String8 (Id, I - 1));
            end loop;
            Set_Element_String8 (Id, 1, Carries (0));
            Shr_Carries;
         end loop;
      end Append_Carries;

      --  Add 1 to Carries.  Overflow is not allowed and should be prevented by
      --  construction.
      procedure Add_One_To_Carries is
      begin
         for I in Carries'Range loop
            if Carries (I) = Pos_0 then
               Carries (I) := Pos_0 + 1;
               --  End of propagation.
               exit;
            else
               Carries (I) := Pos_0;
               --  Continue propagation.
               pragma Assert (I < Carries'Last);
            end if;
         end loop;
      end Add_One_To_Carries;
   begin
      pragma Assert (Source (Pos) = '"');
      Pos := Pos + 1;
      Length := 0;
      Id := Create_String8;
      Current_Context.Str_Id := Id;
      loop
         << Again >> null;
         C := Source (Pos);
         Pos := Pos + 1;
         exit when C = '"';

         if C in '0' .. '9' then
            V := Character'Pos (C) - Character'Pos ('0');
         elsif C = '_' then
            if Source (Pos) = '_' then
               Error_Msg_Scan
                 ("double underscore not allowed in a bit string");
            end if;
            if Source (Pos - 2) = '"' then
               Error_Msg_Scan
                 ("underscore not allowed at the start of a bit string");
            elsif Source (Pos) = '"' then
               Error_Msg_Scan
                 ("underscore not allowed at the end of a bit string");
            end if;
            goto Again;
         else
            if Characters_Kind (C) in Graphic_Character then
               Error_Msg_Scan
                 ("graphic character not allowed in decimal bit string");
               --  Continue the bit string
               V := 0;
            else
               Error_Msg_Scan ("bit string not terminated");
               Pos := Pos - 1;
               exit;
            end if;
         end if;

         --  Multiply by 10.
         Carries := (others => Pos_0);
         for I in reverse 1 .. Length loop
            --  Shift by 1 (*2).
            D := Element_String8 (Id, I);
            Set_Element_String8 (Id, I, Carries (0));
            Shr_Carries;
            --  Add D and D * 4.
            if D /= Pos_0 then
               Add_One_To_Carries;
               --  Add_Four_To_Carries:
               for I in 2 .. 3 loop
                  if Carries (I) = Pos_0 then
                     Carries (I) := Pos_0 + 1;
                     --  End of propagation.
                     exit;
                  else
                     Carries (I) := Pos_0;
                     --  Continue propagation.
                  end if;
               end loop;
            end if;
         end loop;
         Append_Carries;

         --  Add V.
         for I in Carries'Range loop
            D := V / 2;
            Carries (I) := Pos_0 + (V - 2 * D);
            V := D;
         end loop;
         for I in reverse 1 .. Length loop
            D := Element_String8 (Id, I);
            if D /= Pos_0 then
               Add_One_To_Carries;
            end if;
            Set_Element_String8 (Id, I, Carries (0));
            Shr_Carries;
            exit when Carries = No_Carries;
         end loop;
         Append_Carries;
      end loop;

      Current_Token := Tok_Bit_String;
      Current_Context.Str_Len := Length;
   end Scan_Dec_Bit_String;

   -- LRM93 13.3.1
   -- Basic Identifiers
   -- A basic identifier consists only of letters, digits, and underlines.
   -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT }
   -- LETTER_OR_DIGIT ::= LETTER | DIGIT
   -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER
   --
   -- NB: At the call of this procedure, the current character must be a legal
   -- character for a basic identifier.
   procedure Scan_Identifier
   is
      use Name_Table;
      C : Character;
      Len : Natural;
   begin
      -- This is an identifier or a key word.
      Len := 0;
      loop
         --  Source (pos) is correct.
         --  LRM93 13.3.1
         --   All characters if a basic identifier are signifiant, including
         --   any underline character inserted between a letter or digit and
         --   an adjacent letter or digit.
         --   Basic identifiers differing only in the use of the corresponding
         --   upper and lower case letters are considered as the same.
         -- This is achieved by converting all upper case letters into
         -- equivalent lower case letters.
         -- The opposite (converting in lower case letters) is not possible,
         -- because two characters have no upper-case equivalent.
         C := Source (Pos);
         case C is
            when 'A' .. 'Z' =>
               C := Character'Val
                 (Character'Pos (C)
                    + Character'Pos ('a') - Character'Pos ('A'));
            when 'a' .. 'z' | '0' .. '9' =>
               null;
            when '_' =>
               if Source (Pos + 1) = '_' then
                  Error_Msg_Scan ("two underscores can't be consecutive");
               end if;
            when ' ' | ')' | '.' | ';' | ':' =>
               exit;
            when others =>
               --  Non common case.
               case Characters_Kind (C) is
                  when Upper_Case_Letter | Lower_Case_Letter =>
                     if Vhdl_Std = Vhdl_87 then
                        Error_8bit;
                     end if;
                     C := Ada.Characters.Handling.To_Lower (C);
                  when Digit =>
                     raise Internal_Error;
                  when others =>
                     exit;
               end case;
         end case;

         --  Put character in name buffer.  FIXME: compute the hash at the same
         --  time ?
         Len := Len + 1;
         Nam_Buffer (Len) := C;

         --  Next character.
         Pos := Pos + 1;
      end loop;

      if Source (Pos - 1) = '_' then
         if not Flag_Psl then
            --  Some PSL reserved words finish with '_'.  This case is handled
            --  later.
            Error_Msg_Scan ("identifier cannot finish with '_'");
         end if;
         Pos := Pos - 1;
         Len := Len - 1;
         C := '_';
      end if;
      Nam_Length := Len;

      -- LRM93 13.2
      -- At least one separator is required between an identifier or an
      -- abstract literal and an adjacent identifier or abstract literal.
      case Characters_Kind (C) is
         when Digit
           | Upper_Case_Letter
           | Lower_Case_Letter =>
            raise Internal_Error;
         when Other_Special_Character | Special_Character =>
            if (C = '"' or C = '%') and then Len <= 2 then
               if C = '%' and Vhdl_Std >= Vhdl_08 then
                  Error_Msg_Scan ("'%%' not allowed in vhdl 2008 "
                                    & "(was replacement character)");
                  --  Continue as a bit string.
               end if;

               --  Good candidate for bit string.

               --  LRM93 13.7
               --  BASE_SPECIFIER ::= B | O | X
               --
               --  A letter in a bit string literal (either an extended digit
               --  or the base specifier) can be written either in lower case
               --  or in upper case, with the same meaning.
               --
               --  LRM08 15.8 Bit string literals
               --  BASE_SPECICIER ::=
               --     B | O | X | UB | UO | UX | SB | SO | SX | D
               --
               --  An extended digit and the base specifier in a bit string
               --  literal can be written either in lowercase or in uppercase,
               --  with the same meaning.
               declare
                  Base : Nat32;
                  Cl : constant Character := Nam_Buffer (Len);
                  Cf : constant Character := Nam_Buffer (1);
               begin
                  if Cl = 'b' then
                     Base := 1;
                  elsif Cl = 'o' then
                     Base := 3;
                  elsif Cl = 'x' then
                     Base := 4;
                  elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then
                     Scan_Dec_Bit_String;
                     return;
                  else
                     Base := 0;
                  end if;
                  if Base > 0 then
                     if Len = 1 then
                        Scan_Bit_String (Base);
                        return;
                     elsif Vhdl_Std >= Vhdl_08
                       and then (Cf = 's' or Cf = 'u')
                     then
                        Scan_Bit_String (Base);
                        return;
                     end if;
                  end if;
               end;
            elsif Vhdl_Std > Vhdl_87 and then C = '\' then
               --  Start of extended identifier.  Cannot follow an identifier.
               Error_Separator;
            end if;

         when Invalid =>
            --  Improve error message for use of UTF-8 quote marks.
            --  It's possible because in the sequence of UTF-8 bytes for the
            --  quote marks, there are invalid character (in the 128-160
            --  range).
            if C = Character'Val (16#80#)
              and then Nam_Buffer (Len) = Character'Val (16#e2#)
              and then (Source (Pos + 1) = Character'Val (16#98#)
                          or else Source (Pos + 1) = Character'Val (16#99#))
            then
               --  UTF-8 left or right single quote mark.
               if Len > 1 then
                  --  The first byte (0xe2) is part of the identifier.  An
                  --  error will be detected as the next byte (0x80) is
                  --  invalid.  Remove the first byte from the identifier, and
                  --  let's catch the error later.
                  Nam_Length := Len - 1;
                  Pos := Pos - 1;
               else
                  Error_Msg_Scan ("invalid use of UTF8 character for '");
                  Pos := Pos + 2;

                  --  Distinguish between character literal and tick.  Don't
                  --  care about possible invalid character literal, as in any
                  --  case we have already emitted an error message.
                  if Current_Context.Prev_Token /= Tok_Identifier
                    and then Current_Context.Prev_Token /= Tok_Character
                    and then
                    (Source (Pos + 1) = '''
                       or else
                       (Source (Pos + 1) = Character'Val (16#e2#)
                          and then Source (Pos + 2) = Character'Val (16#80#)
                          and then Source (Pos + 3) = Character'Val (16#99#)))
                  then
                     Current_Token := Tok_Character;
                     Current_Context.Identifier :=
                       Name_Table.Get_Identifier (Source (Pos));
                     if Source (Pos + 1) = ''' then
                        Pos := Pos + 2;
                     else
                        Pos := Pos + 4;
                     end if;
                  else
                     Current_Token := Tok_Tick;
                  end if;
                  return;
               end if;
            end if;
         when Format_Effector
           | Space_Character =>
            null;
      end case;

      -- Hash it.
      Current_Context.Identifier := Name_Table.Get_Identifier;
      if Current_Identifier in Std_Names.Name_Id_Keywords then
         -- LRM93 13.9
         --   The identifiers listed below are called reserved words and are
         --   reserved for signifiances in the language.
         -- IN: this is also achieved in packages std_names and tokens.
         Current_Token := Token_Type'Val
           (Token_Type'Pos (Tok_First_Keyword)
              + Current_Identifier - Std_Names.Name_First_Keyword);
         case Current_Identifier is
            when Std_Names.Name_Id_AMS_Reserved_Words =>
               if not AMS_Vhdl then
                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
                     Warning_Msg_Scan
                       (Warnid_Reserved_Word,
                        "using %i AMS-VHDL reserved word as an identifier",
                        +Current_Identifier);
                  end if;
                  Current_Token := Tok_Identifier;
               end if;
            when Std_Names.Name_Id_Vhdl08_Reserved_Words =>
               if Vhdl_Std < Vhdl_08 then
                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
                     Warning_Msg_Scan
                       (Warnid_Reserved_Word,
                        "using %i vhdl-2008 reserved word as an identifier",
                        +Current_Identifier);
                  end if;
                  Current_Token := Tok_Identifier;
               end if;
            when Std_Names.Name_Id_Vhdl00_Reserved_Words =>
               if Vhdl_Std < Vhdl_00 then
                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
                     Warning_Msg_Scan
                       (Warnid_Reserved_Word,
                        "using %i vhdl-2000 reserved word as an identifier",
                        +Current_Identifier);
                  end if;
                  Current_Token := Tok_Identifier;
               end if;
            when Std_Names.Name_Id_Vhdl93_Reserved_Words =>
               if Vhdl_Std = Vhdl_87 then
                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
                     Warning_Msg_Scan
                       (Warnid_Reserved_Word,
                        "using %i vhdl93 reserved word as a vhdl87 identifier",
                        +Current_Identifier, True);
                     Warning_Msg_Scan
                       (Warnid_Reserved_Word,
                        "(use option --std=93 to compile as vhdl93)");
                  end if;
                  Current_Token := Tok_Identifier;
               end if;
            when Std_Names.Name_Id_Vhdl87_Reserved_Words =>
               null;
            when others =>
               raise Program_Error;
         end case;
      elsif Flag_Psl then
         case Current_Identifier is
            when Std_Names.Name_Clock =>
               Current_Token := Tok_Psl_Clock;
            when Std_Names.Name_Const =>
               Current_Token := Tok_Psl_Const;
            when Std_Names.Name_Boolean =>
               Current_Token := Tok_Psl_Boolean;
            when Std_Names.Name_Sequence =>
               Current_Token := Tok_Psl_Sequence;
            when Std_Names.Name_Property =>
               Current_Token := Tok_Psl_Property;
            when Std_Names.Name_Endpoint =>
               Current_Token := Tok_Psl_Endpoint;
            when Std_Names.Name_Cover =>
               Current_Token := Tok_Psl_Cover;
            when Std_Names.Name_Default =>
               Current_Token := Tok_Psl_Default;
            when Std_Names.Name_Inf =>
               Current_Token := Tok_Inf;
            when Std_Names.Name_Within =>
               Current_Token := Tok_Within;
            when Std_Names.Name_Abort =>
               Current_Token := Tok_Abort;
            when Std_Names.Name_Before =>
               Current_Token := Tok_Before;
            when Std_Names.Name_Always =>
               Current_Token := Tok_Always;
            when Std_Names.Name_Never =>
               Current_Token := Tok_Never;
            when Std_Names.Name_Eventually =>
               Current_Token := Tok_Eventually;
            when Std_Names.Name_Next_A =>
               Current_Token := Tok_Next_A;
            when Std_Names.Name_Next_E =>
               Current_Token := Tok_Next_E;
            when Std_Names.Name_Next_Event =>
               Current_Token := Tok_Next_Event;
            when Std_Names.Name_Next_Event_A =>
               Current_Token := Tok_Next_Event_A;
            when Std_Names.Name_Next_Event_E =>
               Current_Token := Tok_Next_Event_E;
            when Std_Names.Name_Until =>
               Current_Token := Tok_Until;
            when others =>
               Current_Token := Tok_Identifier;
               if C = '_' then
                  Error_Msg_Scan ("identifiers cannot finish with '_'");
               end if;
         end case;
      else
         Current_Token := Tok_Identifier;
      end if;
   end Scan_Identifier;

   --  LRM93 13.3.2
   --  EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \
   --
   -- Create an (extended) indentifier.
   -- Extended identifiers are stored as they appear (leading and tailing
   -- backslashes, doubling backslashes inside).
   procedure Scan_Extended_Identifier
   is
      use Name_Table;
   begin
      -- LRM93 13.3.2
      --   Moreover, every extended identifiers is distinct from any basic
      --   identifier.
      -- This is satisfied by storing '\' in the name table.
      Nam_Length := 1;
      Nam_Buffer (1) := '\';
      loop
         --  Next character.
         Pos := Pos + 1;

         if Source (Pos) = '\' then
            -- LRM93 13.3.2
            -- If a backslash is to be used as one of the graphic characters
            -- of an extended literal, it must be doubled.
            -- LRM93 13.3.2
            -- (a doubled backslash couting as one character)
            Nam_Length := Nam_Length + 1;
            Nam_Buffer (Nam_Length) := '\';

            Pos := Pos + 1;

            exit when Source (Pos) /= '\';
         end if;

         -- source (pos) is correct.
         case Characters_Kind (Source (Pos)) is
            when Format_Effector =>
               Error_Msg_Scan ("format effector in extended identifier");
               exit;
            when Graphic_Character =>
               null;
            when Invalid =>
               Error_Msg_Scan ("invalid character in extended identifier");
         end case;
         Nam_Length := Nam_Length + 1;
         -- LRM93 13.3.2
         -- Extended identifiers differing only in the use of corresponding
         -- upper and lower case letters are distinct.
         Nam_Buffer (Nam_Length) := Source (Pos);
      end loop;

      if Nam_Length <= 2 then
         Error_Msg_Scan ("empty extended identifier is not allowed");
      end if;

      -- LRM93 13.2
      -- At least one separator is required between an identifier or an
      -- abstract literal and an adjacent identifier or abstract literal.
      case Characters_Kind (Source (Pos)) is
         when Digit
           | Upper_Case_Letter
           | Lower_Case_Letter =>
            Error_Separator;
         when Invalid
           | Format_Effector
           | Space_Character
           | Special_Character
           | Other_Special_Character =>
            null;
      end case;

      -- Hash it.
      Current_Context.Identifier := Name_Table.Get_Identifier;
      Current_Token := Tok_Identifier;
   end Scan_Extended_Identifier;

   procedure Convert_Identifier
   is
      procedure Error_Bad is
      begin
         Error_Msg_Option ("bad character in identifier");
      end Error_Bad;

      procedure Error_8bit is
      begin
         Error_Msg_Option ("8 bits characters not allowed in vhdl87");
      end Error_8bit;

      use Name_Table;
      C : Character;
   begin
      if Nam_Length = 0 then
         Error_Msg_Option ("identifier required");
         return;
      end if;

      if Nam_Buffer (1) = '\' then
         --  Extended identifier.
         if Vhdl_Std = Vhdl_87 then
            Error_Msg_Option ("extended identifiers not allowed in vhdl87");
            return;
         end if;

         if Nam_Length < 3 then
            Error_Msg_Option ("extended identifier is too short");
            return;
         end if;
         if Nam_Buffer (Nam_Length) /= '\' then
            Error_Msg_Option ("extended identifier must finish with a '\'");
            return;
         end if;
         for I in 2 .. Nam_Length - 1 loop
            C := Nam_Buffer (I);
            case Characters_Kind (C) is
               when Format_Effector =>
                  Error_Msg_Option ("format effector in extended identifier");
                  return;
               when Graphic_Character =>
                  if C = '\' then
                     if Nam_Buffer (I + 1) /= '\'
                       or else I = Nam_Length - 1
                     then
                        Error_Msg_Option ("anti-slash must be doubled "
                                            & "in extended identifier");
                        return;
                     end if;
                  end if;
               when Invalid =>
                  Error_Bad;
            end case;
         end loop;
      else
         --  Identifier
         for I in 1 .. Nam_Length loop
            C := Nam_Buffer (I);
            case Characters_Kind (C) is
               when Upper_Case_Letter =>
                  if Vhdl_Std = Vhdl_87 and C > 'Z' then
                     Error_8bit;
                  end if;
                  Nam_Buffer (I) := Ada.Characters.Handling.To_Lower (C);
               when Lower_Case_Letter | Digit =>
                  if Vhdl_Std = Vhdl_87 and C > 'z' then
                     Error_8bit;
                  end if;
               when Special_Character =>
                  -- The current character is legal in an identifier.
                  if C = '_' then
                     if I = 1 then
                        Error_Msg_Option
                          ("identifier cannot start with an underscore");
                        return;
                     end if;
                     if Nam_Buffer (I - 1) = '_' then
                        Error_Msg_Option
                          ("two underscores can't be consecutive");
                        return;
                     end if;
                     if I = Nam_Length then
                        Error_Msg_Option
                          ("identifier cannot finish with an underscore");
                        return;
                     end if;
                  else
                     Error_Bad;
                  end if;
               when others =>
                  Error_Bad;
            end case;
         end loop;
      end if;
   end Convert_Identifier;

   --  Scan an identifier within a comment.  Only lower case letters are
   --  allowed.
   function Scan_Comment_Identifier return Boolean
   is
      use Name_Table;
      Len : Natural;
      C : Character;
   begin
      --  Skip spaces.
      while Source (Pos) = ' ' or Source (Pos) = HT loop
         Pos := Pos + 1;
      end loop;

      --  The identifier shall start with a lower case letter.
      if Source (Pos) not in 'a' .. 'z' then
         return False;
      end if;

      --  Scan the identifier (in lower cases).
      Len := 0;
      loop
         C := Source (Pos);
         exit when C not in 'a' .. 'z' and C /= '_';
         Len := Len + 1;
         Nam_Buffer (Len) := C;
         Pos := Pos + 1;
      end loop;

      --  Shall be followed by a space or a new line.
      case C is
         when ' ' | HT | LF | CR =>
            null;
         when others =>
            return False;
      end case;

      Nam_Length := Len;
      return True;
   end Scan_Comment_Identifier;

   --  Scan tokens within a comment.  Return TRUE if Current_Token was set,
   --  return FALSE to discard the comment (ie treat it like a real comment).
   function Scan_Comment return Boolean
   is
      use Std_Names;
      Id : Name_Id;
   begin
      if not Scan_Comment_Identifier then
         return False;
      end if;

      -- Hash it.
      Id := Name_Table.Get_Identifier;

      case Id is
         when Name_Psl =>
            --  Accept tokens after '-- psl'.
            if Flag_Psl_Comment then
               Flag_Psl := True;
               Flag_Scan_In_Comment := True;
               return True;
            end if;
         when others =>
            null;
      end case;
      return False;
   end Scan_Comment;

   function Scan_Exclam_Mark return Boolean is
   begin
      if Source (Pos) = '!' then
         Pos := Pos + 1;
         return True;
      else
         return False;
      end if;
   end Scan_Exclam_Mark;

   function Scan_Underscore return Boolean is
   begin
      if Source (Pos) = '_' then
         Pos := Pos + 1;
         return True;
      else
         return False;
      end if;
   end Scan_Underscore;

   --  The Scan_Next_Line procedure must be called after each end-of-line to
   --  register to next line number.  This is called by Scan_CR_Newline and
   --  Scan_LF_Newline.
   procedure Scan_Next_Line is
   begin
      Current_Context.Line_Number := Current_Context.Line_Number + 1;
      Current_Context.Line_Pos := Pos;
      File_Add_Line_Number
        (Current_Context.Source_File, Current_Context.Line_Number, Pos);
   end Scan_Next_Line;

   --  Scan a CR end-of-line.
   procedure Scan_CR_Newline is
   begin
      -- Accept CR or CR+LF as line separator.
      if Source (Pos + 1) = LF then
         Pos := Pos + 2;
      else
         Pos := Pos + 1;
      end if;
      Scan_Next_Line;
   end Scan_CR_Newline;

   --  Scan a LF end-of-line.
   procedure Scan_LF_Newline is
   begin
      -- Accept LF or LF+CR as line separator.
      if Source (Pos + 1) = CR then
         Pos := Pos + 2;
      else
         Pos := Pos + 1;
      end if;
      Scan_Next_Line;
   end Scan_LF_Newline;

   -- Get a new token.
   procedure Scan is
   begin
      if Current_Token /= Tok_Invalid then
         Current_Context.Prev_Token := Current_Token;
      end if;

      << Again >> null;

      --  Skip commonly used separators.
      while Source (Pos) = ' ' or Source (Pos) = HT loop
         Pos := Pos + 1;
      end loop;

      Current_Context.Token_Pos := Pos;
      Current_Context.Identifier := Null_Identifier;

      case Source (Pos) is
         when HT | ' ' =>
            --  Must have already been skipped just above.
            raise Internal_Error;
         when NBSP =>
            if Vhdl_Std = Vhdl_87 then
               Error_Msg_Scan ("NBSP character not allowed in vhdl87");
            end if;
            Pos := Pos + 1;
            goto Again;
         when VT | FF =>
            Pos := Pos + 1;
            goto Again;
         when LF =>
            Scan_LF_Newline;
            if Flag_Newline then
               Current_Token := Tok_Newline;
               return;
            end if;
            goto Again;
         when CR =>
            Scan_CR_Newline;
            if Flag_Newline then
               Current_Token := Tok_Newline;
               return;
            end if;
            goto Again;
         when '-' =>
            if Source (Pos + 1) = '-' then
               -- This is a comment.
               -- LRM93 13.8
               --   A comment starts with two adjacent hyphens and extends up
               --   to the end of the line.
               --   A comment can appear on any line line of a VHDL
               --   description.
               --   The presence or absence of comments has no influence on
               --   wether a description is legal or illegal.
               --   Futhermore, comments do not influence the execution of a
               --   simulation module; their sole purpose is the enlightenment
               --   of the human reader.
               -- GHDL note: As a consequence, an obfruscating comment
               --  is out of purpose, and a warning could be reported :-)
               Pos := Pos + 2;

               --  Scan inside a comment.  So we just ignore the two dashes.
               if Flag_Scan_In_Comment then
                  goto Again;
               end if;

               --  Handle keywords in comment (PSL).
               if Flag_Comment_Keyword and then Scan_Comment then
                  goto Again;
               end if;

               --  LRM93 13.2
               --  In any case, a sequence of one or more format
               --  effectors other than horizontal tabulation must
               --  cause at least one end of line.
               while Source (Pos) /= CR and Source (Pos) /= LF and
                 Source (Pos) /= VT and Source (Pos) /= FF and
                 Source (Pos) /= Files_Map.EOT
               loop
                  --  LRM93 13.1
                  --  The only characters allowed in the text of a VHDL
                  --  description are the graphic characters and the format
                  --  effectors.

                  --  LRM02 13.1 Character set
                  --  The only characters allowed in the text of a VHDL
                  --  description (except within comments -- see 13.8) [...]
                  --
                  --  LRM02 13.8 Comments
                  --  A comment [...] may contain any character except the
                  --  format effectors vertical tab, carriage return, line
                  --  feed and form feed.
                  if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02)
                    and then Characters_Kind (Source (Pos)) = Invalid
                  then
                     Error_Msg_Scan ("invalid character, even in a comment");
                  end if;
                  Pos := Pos + 1;
               end loop;
               if Flag_Comment then
                  Current_Token := Tok_Comment;
                  return;
               end if;
               goto Again;
            elsif Flag_Psl and then Source (Pos + 1) = '>' then
               Current_Token := Tok_Minus_Greater;
               Pos := Pos + 2;
               return;
            else
               Current_Token := Tok_Minus;
               Pos := Pos + 1;
               return;
            end if;
         when '+' =>
            Current_Token := Tok_Plus;
            Pos := Pos + 1;
            return;
         when '*' =>
            if Source (Pos + 1) = '*' then
               Current_Token := Tok_Double_Star;
               Pos := Pos + 2;
            else
               Current_Token := Tok_Star;
               Pos := Pos + 1;
            end if;
            return;
         when '/' =>
            if Source (Pos + 1) = '=' then
               Current_Token := Tok_Not_Equal;
               Pos := Pos + 2;
            elsif Source (Pos + 1) = '*' then
               --  LRM08 15.9 Comments
               --  A delimited comment start with a solidus (slash) character
               --  immediately followed by an asterisk character and extends up
               --  to the first subsequent occurrence of an asterisk character
               --  immediately followed by a solidus character.
               if Vhdl_Std < Vhdl_08 then
                  Error_Msg_Scan
                    ("block comment are not allowed before vhdl 2008");
               end if;

               --  Skip '/*'.
               Pos := Pos + 2;

               loop
                  case Source (Pos) is
                     when '/' =>
                        --  LRM08 15.9
                        --  Moreover, an occurrence of a solidus character
                        --  immediately followed by an asterisk character
                        --  within a delimited comment is not interpreted as
                        --  the start of a nested delimited comment.
                        if Source (Pos + 1) = '*' then
                           Warning_Msg_Scan
                             (Warnid_Nested_Comment,
                              "'/*' found within a block comment");
                        end if;
                        Pos := Pos + 1;
                     when '*' =>
                        if Source (Pos + 1) = '/' then
                           Pos := Pos + 2;
                           exit;
                        else
                           Pos := Pos + 1;
                        end if;
                     when CR =>
                        Scan_CR_Newline;
                     when LF =>
                        Scan_LF_Newline;
                     when Files_Map.EOT =>
                        if Pos >= Current_Context.File_Len then
                           --  Point at the start of the comment.
                           Error_Msg_Scan
                             (Get_Token_Location,
                              "block comment not terminated at end of file");
                           exit;
                        end if;
                        Pos := Pos + 1;
                     when others =>
                        Pos := Pos + 1;
                  end case;
               end loop;
               if Flag_Comment then
                  Current_Token := Tok_Comment;
                  return;
               end if;
               goto Again;
            else
               Current_Token := Tok_Slash;
               Pos := Pos + 1;
            end if;
            return;
         when '(' =>
            Current_Token := Tok_Left_Paren;
            Pos := Pos + 1;
            return;
         when ')' =>
            Current_Token := Tok_Right_Paren;
            Pos := Pos + 1;
            return;
         when '|' =>
            if Flag_Psl then
               if Source (Pos + 1) = '|' then
                  Current_Token := Tok_Bar_Bar;
                  Pos := Pos + 2;
               elsif Source (Pos + 1) = '-'
                 and then Source (Pos + 2) = '>'
               then
                  Current_Token := Tok_Bar_Arrow;
                  Pos := Pos + 3;
               elsif Source (Pos + 1) = '='
                 and then Source (Pos + 2) = '>'
               then
                  Current_Token := Tok_Bar_Double_Arrow;
                  Pos := Pos + 3;
               else
                  Current_Token := Tok_Bar;
                  Pos := Pos + 1;
               end if;
            else
               Current_Token := Tok_Bar;
               Pos := Pos + 1;
            end if;
            return;
         when '!' =>
            if Flag_Psl then
               Current_Token := Tok_Exclam_Mark;
            else
               if Source (Pos + 1) = '=' then
                  --  != is not allowed in VHDL, but be friendly with C users.
                  Error_Msg_Scan
                    (Get_Token_Location, "Use '/=' for inequality in vhdl");
                  Current_Token := Tok_Not_Equal;
                  Pos := Pos + 1;
               else
                  --  LRM93 13.10
                  --  A vertical line (|) can be replaced by an exclamation
                  --  mark (!) where used as a delimiter.
                  Current_Token := Tok_Bar;
               end if;
            end if;
            Pos := Pos + 1;
            return;
         when ':' =>
            if Source (Pos + 1) = '=' then
               Current_Token := Tok_Assign;
               Pos := Pos + 2;
            else
               Current_Token := Tok_Colon;
               Pos := Pos + 1;
            end if;
            return;
         when ';' =>
            Current_Token := Tok_Semi_Colon;
            Pos := Pos + 1;
            return;
         when ',' =>
            Current_Token := Tok_Comma;
            Pos := Pos + 1;
            return;
         when '.' =>
            if Source (Pos + 1) = '.' then
               --  Be Ada friendly...
               Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'");
               Current_Token := Tok_To;
               Pos := Pos + 2;
               return;
            end if;
            Current_Token := Tok_Dot;
            Pos := Pos + 1;
            return;
         when '&' =>
            if Flag_Psl and then Source (Pos + 1) = '&' then
               Current_Token := Tok_And_And;
               Pos := Pos + 2;
            else
               Current_Token := Tok_Ampersand;
               Pos := Pos + 1;
            end if;
            return;
         when '<' =>
            case Source (Pos + 1) is
               when '=' =>
                  Current_Token := Tok_Less_Equal;
                  Pos := Pos + 2;
               when '>' =>
                  Current_Token := Tok_Box;
                  Pos := Pos + 2;
               when '<' =>
                  Current_Token := Tok_Double_Less;
                  Pos := Pos + 2;
               when others =>
                  Current_Token := Tok_Less;
                  Pos := Pos + 1;
            end case;
            return;
         when '>' =>
            case Source (Pos + 1) is
               when '=' =>
                  Current_Token := Tok_Greater_Equal;
                  Pos := Pos + 2;
               when '>' =>
                  Current_Token := Tok_Double_Greater;
                  Pos := Pos + 2;
               when others =>
                  Current_Token := Tok_Greater;
                  Pos := Pos + 1;
            end case;
            return;
         when '=' =>
            if Source (Pos + 1) = '=' then
               if AMS_Vhdl then
                  Current_Token := Tok_Equal_Equal;
               else
                  Error_Msg_Scan
                    ("'==' is not the vhdl equality, replaced by '='");
                  Current_Token := Tok_Equal;
               end if;
               Pos := Pos + 2;
            elsif Source (Pos + 1) = '>' then
               Current_Token := Tok_Double_Arrow;
               Pos := Pos + 2;
            else
               Current_Token := Tok_Equal;
               Pos := Pos + 1;
            end if;
            return;
         when ''' =>
            -- Handle cases such as character'('a')
            -- FIXME: what about f ()'length ? or .all'length
            if Current_Context.Prev_Token /= Tok_Identifier
              and then Current_Context.Prev_Token /= Tok_Character
              and then Source (Pos + 2) = '''
            then
               -- LRM93 13.5
               -- A character literal is formed by enclosing one of the 191
               -- graphic character (...) between two apostrophe characters.
               -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER '
               if Characters_Kind (Source (Pos + 1)) not in Graphic_Character
               then
                  Error_Msg_Scan
                    ("a character literal can only be a graphic character");
               elsif Vhdl_Std = Vhdl_87
                 and then Source (Pos + 1) > Character'Val (127)
               then
                  Error_8bit;
               end if;
               Current_Token := Tok_Character;
               Current_Context.Identifier :=
                 Name_Table.Get_Identifier (Source (Pos + 1));
               Pos := Pos + 3;
               return;
            else
               Current_Token := Tok_Tick;
               Pos := Pos + 1;
            end if;
            return;
         when '0' .. '9' =>
            Scan_Literal;

            --  LRM93 13.2
            --  At least one separator is required between an identifier or
            --  an abstract literal and an adjacent identifier or abstract
            --  literal.
            case Characters_Kind (Source (Pos)) is
               when Digit =>
                  raise Internal_Error;
               when Upper_Case_Letter
                 | Lower_Case_Letter =>
                  --  Could call Error_Separator, but use a clearer message
                  --  for this common case.
                  --  Note: the term "unit name" is not correct here, since
                  --  it can be any identifier or even a keyword; however it
                  --  is probably the most common case (eg 10ns).
                  if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer
                  then
                     Current_Token := Tok_Integer_Letter;
                  else
                     Error_Msg_Scan
                       ("space is required between number and unit name");
                  end if;
               when Other_Special_Character =>
                  if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then
                     --  Start of extended identifier.
                     Error_Separator;
                  end if;
               when Invalid
                 | Format_Effector
                 | Space_Character
                 | Special_Character =>
                  null;
            end case;
            return;
         when '#' =>
            Error_Msg_Scan ("'#' is used for based literals and "
                            & "must be preceded by a base");
            -- Cannot easily continue.
            raise Compilation_Error;
         when '"' =>
            Scan_String;
            return;
         when '%' =>
            if Vhdl_Std >= Vhdl_08 then
               Error_Msg_Scan
                 ("'%%' not allowed in vhdl 2008 (was replacement character)");
               --  Continue as a string.
            end if;
            Scan_String;
            return;
         when '[' =>
            if Flag_Psl then
               if Source (Pos + 1) = '*' then
                  Current_Token := Tok_Brack_Star;
                  Pos := Pos + 2;
               elsif Source (Pos + 1) = '+'
                 and then Source (Pos + 2) = ']'
               then
                  Current_Token := Tok_Brack_Plus_Brack;
                  Pos := Pos + 3;
               elsif Source (Pos + 1) = '-'
                 and then Source (Pos + 2) = '>'
               then
                  Current_Token := Tok_Brack_Arrow;
                  Pos := Pos + 3;
               elsif Source (Pos + 1) = '=' then
                  Current_Token := Tok_Brack_Equal;
                  Pos := Pos + 2;
               else
                  Current_Token := Tok_Left_Bracket;
                  Pos := Pos + 1;
               end if;
            else
               if Vhdl_Std = Vhdl_87 then
                  Error_Msg_Scan
                    ("'[' is an invalid character in vhdl87, replaced by '('");
                  Current_Token := Tok_Left_Paren;
               else
                  Current_Token := Tok_Left_Bracket;
               end if;
               Pos := Pos + 1;
            end if;
            return;
         when ']' =>
            if Vhdl_Std = Vhdl_87 and not Flag_Psl then
               Error_Msg_Scan
                 ("']' is an invalid character in vhdl87, replaced by ')'");
               Current_Token := Tok_Right_Paren;
            else
               Current_Token := Tok_Right_Bracket;
            end if;
            Pos := Pos + 1;
            return;
         when '{' =>
            if Flag_Psl then
               Current_Token := Tok_Left_Curly;
            else
               Error_Msg_Scan ("'{' is an invalid character, replaced by '('");
               Current_Token := Tok_Left_Paren;
            end if;
            Pos := Pos + 1;
            return;
         when '}' =>
            if Flag_Psl then
               Current_Token := Tok_Right_Curly;
            else
               Error_Msg_Scan ("'}' is an invalid character, replaced by ')'");
               Current_Token := Tok_Right_Paren;
            end if;
            Pos := Pos + 1;
            return;
         when '\' =>
            if Vhdl_Std = Vhdl_87 then
               Error_Msg_Scan
                 ("extended identifiers are not allowed in vhdl87");
            end if;
            Scan_Extended_Identifier;
            return;
         when '^' =>
            if Vhdl_Std >= Vhdl_08 then
               Current_Token := Tok_Caret;
            else
               Current_Token := Tok_Xor;
               Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'");
            end if;
            Pos := Pos + 1;
            return;
         when '~' =>
            Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'");
            Pos := Pos + 1;
            Current_Token := Tok_Not;
            return;
         when '?' =>
            if Vhdl_Std < Vhdl_08 then
               Error_Msg_Scan ("'?' can only be used in strings or comments");
               Pos := Pos + 1;
               goto Again;
            else
               if Source (Pos + 1) = '<' then
                  if Source (Pos + 2) = '=' then
                     Current_Token := Tok_Match_Less_Equal;
                     Pos := Pos + 3;
                  else
                     Current_Token := Tok_Match_Less;
                     Pos := Pos + 2;
                  end if;
               elsif Source (Pos + 1) = '>' then
                  if Source (Pos + 2) = '=' then
                     Current_Token := Tok_Match_Greater_Equal;
                     Pos := Pos + 3;
                  else
                     Current_Token := Tok_Match_Greater;
                     Pos := Pos + 2;
                  end if;
               elsif Source (Pos + 1) = '?' then
                  Current_Token := Tok_Condition;
                  Pos := Pos + 2;
               elsif Source (Pos + 1) = '=' then
                  Current_Token := Tok_Match_Equal;
                  Pos := Pos + 2;
               elsif Source (Pos + 1) = '/'
                 and then Source (Pos + 2) = '='
               then
                  Current_Token := Tok_Match_Not_Equal;
                  Pos := Pos + 3;
               else
                  Error_Msg_Scan ("unknown matching operator");
                  Pos := Pos + 1;
                  goto Again;
               end if;
            end if;
            return;
         when '$' | '`'
           | Inverted_Exclamation .. Inverted_Question
           | Multiplication_Sign | Division_Sign =>
            Error_Msg_Scan
              ("character %c can only be used in strings or comments",
               +Source (Pos));
            Pos := Pos + 1;
            goto Again;
         when '@' =>
            if Vhdl_Std >= Vhdl_08 or Flag_Psl then
               Current_Token := Tok_Arobase;
               Pos := Pos + 1;
               return;
            else
               Error_Msg_Scan
                 ("character %c can only be used in strings or comments",
                  +Source (Pos));
               Pos := Pos + 1;
               goto Again;
            end if;
         when '_' =>
            Error_Msg_Scan ("an identifier can't start with '_'");
            Pos := Pos + 1;
            goto Again;
         when 'A' .. 'Z' | 'a' .. 'z' =>
            Scan_Identifier;
            return;
         when UC_A_Grave .. UC_O_Diaeresis
           | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn =>
            if Vhdl_Std = Vhdl_87 then
               Error_Msg_Scan
                 ("upper case letters above 128 are not allowed in vhdl87");
            end if;
            Scan_Identifier;
            return;
         when LC_German_Sharp_S .. LC_O_Diaeresis
           | LC_O_Oblique_Stroke .. LC_Y_Diaeresis =>
            if Vhdl_Std = Vhdl_87 then
               Error_Msg_Scan
                 ("lower case letters above 128 are not allowed in vhdl87");
            end if;
            Scan_Identifier;
            return;
         when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC =>
            Error_Msg_Scan
              ("control character that is not CR, LF, FF, HT or VT " &
               "is not allowed");
            Pos := Pos + 1;
            goto Again;
         when Files_Map.EOT =>
            if Pos >= Current_Context.File_Len then
               --  FIXME: should conditionnaly emit a warning if the file
               --   is not terminated by an end of line.
               Current_Token := Tok_Eof;
            else
               Error_Msg_Scan ("EOT is not allowed inside the file");
               Pos := Pos + 1;
               goto Again;
            end if;
            return;
      end case;
   end Scan;

   function Get_Token_Location return Location_Type is
   begin
      return File_Pos_To_Location
        (Current_Context.Source_File, Current_Context.Token_Pos);
   end Get_Token_Location;

   function Is_Whitespace (C : Character) return Boolean is
   begin
      if C = ' ' then
         return True;
      elsif Vhdl_Std > Vhdl_87 and C = NBSP then
         return True;
      else
         return False;
      end if;
   end Is_Whitespace;
end Scanner;