-- VHDL lexical scanner. -- Copyright (C) 2002 - 2014 Tristan Gingold -- -- GHDL is free software; you can redistribute it and/or modify it under -- the terms of the GNU General Public License as published by the Free -- Software Foundation; either version 2, or (at your option) any later -- version. -- -- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY -- WARRANTY; without even the implied warranty of MERCHANTABILITY or -- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- for more details. -- -- You should have received a copy of the GNU General Public License -- along with GHDL; see the file COPYING. If not, write to the Free -- Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- 02111-1307, USA. with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; with Errorout; use Errorout; with Name_Table; with Files_Map; use Files_Map; with Std_Names; with Str_Table; with Flags; use Flags; package body Vhdl.Scanner is -- This classification is a simplification of the categories of LRM93 13.1 -- LRM93 13.1 -- The only characters allowed in the text of a VHDL description are the -- graphic characters and format effector. type Character_Kind_Type is ( -- Neither a format effector nor a graphic character. Invalid, Format_Effector, Lower_Case_Letter, Upper_Case_Letter, Digit, Special_Character, Space_Character, Other_Special_Character ); -- LRM93 13.1 -- basic_graphic_character ::= -- upper_case_letter | digit | special_character | space_character -- --subtype Basic_Graphic_Character is -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; -- LRM93 13.1 -- graphic_character ::= -- basic_graphic_character | lower_case_letter | other_special_character -- -- Note: There are 191 graphic characters. subtype Graphic_Character is Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character; -- letter ::= upper_case_letter | lower_case_letter subtype Letter is Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter; -- LRM93 13.1 -- The characters included in each of the categories of basic graphic -- characters are defined as follows: type Character_Array is array (Character) of Character_Kind_Type; pragma Suppress_Initialization (Character_Array); Characters_Kind : constant Character_Array := (NUL .. BS => Invalid, -- Format effectors are the ISO (and ASCII) characters called horizontal -- tabulation, vertical tabulation, carriage return, line feed, and form -- feed. HT | LF | VT | FF | CR => Format_Effector, SO .. US => Invalid, -- 1. upper case letters 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, -- 2. digits '0' .. '9' => Digit, -- 3. special characters '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '[' | ']' | '_' | '|' | '*' => Special_Character, -- 4. the space characters ' ' | NBSP => Space_Character, -- 5. lower case letters 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, -- 6. other special characters '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | Division_Sign => Other_Special_Character, -- '¡' -- INVERTED EXCLAMATION MARK -- '¢' -- CENT SIGN -- '£' -- POUND SIGN -- '¤' -- CURRENCY SIGN -- '¥' -- YEN SIGN -- '¦' -- BROKEN BAR -- '§' -- SECTION SIGN -- '¨' -- DIAERESIS -- '©' -- COPYRIGHT SIGN -- 'ª' -- FEMININE ORDINAL INDICATOR -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -- '¬' -- NOT SIGN -- '­' -- SOFT HYPHEN -- '®' -- REGISTERED SIGN -- '¯' -- MACRON -- '°' -- DEGREE SIGN -- '±' -- PLUS-MINUS SIGN -- '²' -- SUPERSCRIPT TWO -- '³' -- SUPERSCRIPT THREE -- '´' -- ACUTE ACCENT -- 'µ' -- MICRO SIGN -- '¶' -- PILCROW SIGN -- '·' -- MIDDLE DOT -- '¸' -- CEDILLA -- '¹' -- SUPERSCRIPT ONE -- 'º' -- MASCULINE ORDINAL INDICATOR -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -- '¼' -- VULGAR FRACTION ONE QUARTER -- '½' -- VULGAR FRACTION ONE HALF -- '¾' -- VULGAR FRACTION THREE QUARTERS -- '¿' -- INVERTED QUESTION MARK -- '×' -- MULTIPLICATION SIGN -- '÷' -- DIVISION SIGN DEL .. APC => Invalid); -- The context contains the whole internal state of the scanner, ie -- it can be used to push/pop a lexical analysis, to restart the -- scanner from a context marking a previous point. type Scan_Context is record Source : File_Buffer_Acc; Source_File : Source_File_Entry; Line_Number : Natural; Line_Pos : Source_Ptr; Prev_Pos : Source_Ptr; Token_Pos : Source_Ptr; Pos : Source_Ptr; File_Len : Source_Ptr; Token : Token_Type; Prev_Token : Token_Type; -- Additional values for the current token. Bit_Str_Base : Character; Bit_Str_Sign : Character; Str_Id : String8_Id; Str_Len : Nat32; Identifier: Name_Id; Lit_Int64 : Int64; Lit_Fp64 : Fp64; end record; pragma Suppress_Initialization (Scan_Context); -- The current context. -- Default value is an invalid context. Current_Context: Scan_Context := (Source => null, Source_File => No_Source_File_Entry, Line_Number => 0, Line_Pos => 0, Pos => 0, Prev_Pos => 0, Token_Pos => 0, File_Len => 0, Token => Tok_Invalid, Prev_Token => Tok_Invalid, Identifier => Null_Identifier, Bit_Str_Base => ' ', Bit_Str_Sign => ' ', Str_Id => Null_String8, Str_Len => 0, Lit_Int64 => 0, Lit_Fp64 => 0.0); function Get_Current_Coord return Source_Coord_Type is begin return (File => Get_Current_Source_File, Line_Pos => Current_Context.Line_Pos, Line => Get_Current_Line, Offset => Get_Current_Offset); end Get_Current_Coord; function Get_Token_Coord return Source_Coord_Type is begin return (File => Get_Current_Source_File, Line_Pos => Current_Context.Line_Pos, Line => Get_Current_Line, Offset => Get_Token_Offset); end Get_Token_Coord; -- Disp a message during scan. -- The current location is automatically displayed before the message. -- Disp a message during scan. procedure Error_Msg_Scan (Msg: String) is begin Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg); end Error_Msg_Scan; procedure Error_Msg_Scan (Loc : Source_Coord_Type; Msg: String) is begin Report_Msg (Msgid_Error, Scan, Loc, Msg); end Error_Msg_Scan; procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is begin Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg, (1 => Arg1)); end Error_Msg_Scan; -- Disp a message during scan. procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String; Arg1 : Earg_Type) is begin Report_Msg (Id, Scan, Get_Current_Coord, Msg, (1 => Arg1)); end Warning_Msg_Scan; procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String; Args : Earg_Arr := No_Eargs) is begin Report_Msg (Id, Scan, Get_Current_Coord, Msg, Args); end Warning_Msg_Scan; Source: File_Buffer_Acc renames Current_Context.Source; Pos: Source_Ptr renames Current_Context.Pos; -- When CURRENT_TOKEN is an identifier, its name_id is stored into -- this global variable. -- Function current_text can be used to convert it into an iir. function Current_Identifier return Name_Id is begin return Current_Context.Identifier; end Current_Identifier; procedure Invalidate_Current_Identifier is begin Current_Context.Identifier := Null_Identifier; end Invalidate_Current_Identifier; procedure Invalidate_Current_Token is begin if Current_Token /= Tok_Invalid then Current_Context.Prev_Token := Current_Token; Current_Token := Tok_Invalid; end if; end Invalidate_Current_Token; function Current_String_Id return String8_Id is begin return Current_Context.Str_Id; end Current_String_Id; function Current_String_Length return Nat32 is begin return Current_Context.Str_Len; end Current_String_Length; function Get_Bit_String_Base return Character is begin return Current_Context.Bit_Str_Base; end Get_Bit_String_Base; function Get_Bit_String_Sign return Character is begin return Current_Context.Bit_Str_Sign; end Get_Bit_String_Sign; function Current_Iir_Int64 return Int64 is begin return Current_Context.Lit_Int64; end Current_Iir_Int64; function Current_Iir_Fp64 return Fp64 is begin return Current_Context.Lit_Fp64; end Current_Iir_Fp64; function Get_Current_Source_File return Source_File_Entry is begin return Current_Context.Source_File; end Get_Current_Source_File; function Get_Current_Line return Natural is begin return Current_Context.Line_Number; end Get_Current_Line; function Get_Current_Offset return Natural is begin return Natural (Current_Context.Pos - Current_Context.Line_Pos); end Get_Current_Offset; function Get_Token_Offset return Natural is begin return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos); end Get_Token_Offset; function Get_Token_Position return Source_Ptr is begin return Current_Context.Token_Pos; end Get_Token_Position; function Get_Position return Source_Ptr is begin return Current_Context.Pos; end Get_Position; function Get_Token_Location return Location_Type is begin return File_Pos_To_Location (Current_Context.Source_File, Current_Context.Token_Pos); end Get_Token_Location; function Get_Prev_Location return Location_Type is begin return File_Pos_To_Location (Current_Context.Source_File, Current_Context.Prev_Pos); end Get_Prev_Location; procedure Set_File (Source_File : Source_File_Entry) is N_Source: File_Buffer_Acc; begin pragma Assert (Current_Context.Source = null); pragma Assert (Source_File /= No_Source_File_Entry); N_Source := Get_File_Source (Source_File); Current_Context := (Source => N_Source, Source_File => Source_File, Line_Number => 1, Line_Pos => 0, Prev_Pos => N_Source'First, Pos => N_Source'First, Token_Pos => 0, -- should be invalid, File_Len => Get_File_Length (Source_File), Token => Tok_Invalid, Prev_Token => Tok_Invalid, Identifier => Null_Identifier, Bit_Str_Base => ' ', Bit_Str_Sign => ' ', Str_Id => Null_String8, Str_Len => 0, Lit_Int64 => -1, Lit_Fp64 => 0.0); Current_Token := Tok_Invalid; end Set_File; function Detect_Encoding_Errors return Boolean is C : constant Character := Source (Pos); begin -- No need to check further if first character is plain ASCII-7 if C >= ' ' and C < Character'Val (127) then return False; end if; -- UTF-8 BOM is EF BB BF if Source (Pos + 0) = Character'Val (16#ef#) and then Source (Pos + 1) = Character'Val (16#bb#) and then Source (Pos + 2) = Character'Val (16#bf#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-8 BOM detected)"); return True; end if; -- UTF-16 BE BOM is FE FF if Source (Pos + 0) = Character'Val (16#fe#) and then Source (Pos + 1) = Character'Val (16#ff#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); return True; end if; -- UTF-16 LE BOM is FF FE if Source (Pos + 0) = Character'Val (16#ff#) and then Source (Pos + 1) = Character'Val (16#fe#) then Error_Msg_Scan ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); return True; end if; -- Certainly weird, but scanner/parser will catch it. return False; end Detect_Encoding_Errors; procedure Set_Current_Position (Position: Source_Ptr) is Loc : Location_Type; Offset: Natural; File_Entry : Source_File_Entry; begin -- Scanner must have been initialized. pragma Assert (Current_Context.Source /= null); Current_Token := Tok_Invalid; Current_Context.Pos := Position; Loc := File_Pos_To_Location (Current_Context.Source_File, Current_Context.Pos); Location_To_Coord (Loc, File_Entry, Current_Context.Line_Pos, Current_Context.Line_Number, Offset); end Set_Current_Position; procedure Close_File is begin Current_Context.Source := null; end Close_File; -- Emit an error when a character above 128 was found. -- This must be called only in vhdl87. procedure Error_8bit is begin Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); end Error_8bit; -- Emit an error when a separator is expected. procedure Error_Separator is begin Error_Msg_Scan ("a separator is required here"); end Error_Separator; -- scan a decimal literal or a based literal. -- -- LRM93 13.4.1 -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER -- -- LRM93 13.4.2 -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT -- BASE ::= INTEGER procedure Scan_Literal is separate; -- Scan a string literal. -- -- LRM93 13.6 / LRM08 15.7 -- A string literal is formed by a sequence of graphic characters -- (possibly none) enclosed between two quotation marks used as string -- brackets. -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " -- -- IN: for a string, at the call of this procedure, the current character -- must be either '"' or '%'. procedure Scan_String is -- The quotation character (can be " or %). Mark: Character; -- Current character. C : Character; -- Current length. Length : Nat32; begin -- String delimiter. Mark := Source (Pos); pragma Assert (Mark = '"' or else Mark = '%'); Pos := Pos + 1; Length := 0; Current_Context.Str_Id := Str_Table.Create_String8; loop C := Source (Pos); if C = Mark then -- LRM93 13.6 -- If a quotation mark value is to be represented in the sequence -- of character values, then a pair of adjacent quoatation -- characters marks must be written at the corresponding place -- within the string literal. -- LRM93 13.10 -- Any pourcent sign within the sequence of characters must then -- be doubled, and each such doubled percent sign is interpreted -- as a single percent sign value. -- The same replacement is allowed for a bit string literal, -- provieded that both bit string brackets are replaced. Pos := Pos + 1; exit when Source (Pos) /= Mark; end if; case Characters_Kind (C) is when Format_Effector => if Mark = '%' then -- No matching '%' has been found. Consider '%' was used -- as the remainder operator, instead of 'rem'. This will -- improve the error message. Error_Msg_Scan (+Get_Token_Location, "'%%' is not a vhdl operator, use 'rem'"); Current_Token := Tok_Rem; Pos := Current_Context.Token_Pos + 1; return; end if; if C = CR or C = LF then Error_Msg_Scan ("string cannot be multi-line, use concatenation"); else Error_Msg_Scan ("format effector not allowed in a string"); end if; exit; when Invalid => if C = Files_Map.EOT and then Pos >= Current_Context.File_Len then Error_Msg_Scan ("string not terminated at end of file"); exit; end if; Error_Msg_Scan ("invalid character not allowed, even in a string"); when Graphic_Character => if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then Error_8bit; end if; end case; if C = '"' and Mark = '%' then -- LRM93 13.10 -- The quotation marks (") used as string brackets at both ends of -- a string literal can be replaced by percent signs (%), provided -- that the enclosed sequence of characters constains no quotation -- marks, and provided that both string brackets are replaced. Error_Msg_Scan ("'""' cannot be used in a string delimited with '%%'"); end if; Length := Length + 1; Str_Table.Append_String8 (Character'Pos (C)); Pos := Pos + 1; end loop; Current_Token := Tok_String; Current_Context.Str_Len := Length; end Scan_String; -- Scan a bit string literal. -- -- LRM93 13.7 -- A bit string literal is formed by a sequence of extended digits -- (possibly none) enclosed between two quotations used as bit string -- brackets, preceded by a base specifier. -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } -- -- The current character must be a base specifier, followed by '"' or '%'. -- The base must be valid. procedure Scan_Bit_String (Base_Log : Nat32) is -- Position of character '0'. Pos_0 : constant Nat8 := Character'Pos ('0'); -- Used for the base. subtype Nat4 is Natural range 1 .. 4; Base : constant Nat32 := 2 ** Nat4 (Base_Log); -- The quotation character (can be " or %). Orig_Pos : constant Source_Ptr := Pos; Mark : constant Character := Source (Orig_Pos); -- Current character. C : Character; -- Current length. Length : Nat32; -- Digit value. V, D : Nat8; -- True if invalid character already found, to avoid duplicate message. Has_Invalid : Boolean; begin pragma Assert (Mark = '"' or else Mark = '%'); Pos := Pos + 1; Length := 0; Has_Invalid := False; Current_Context.Str_Id := Str_Table.Create_String8; loop << Again >> null; C := Source (Pos); Pos := Pos + 1; exit when C = Mark; -- LRM93 13.7 -- If the base specifier is 'B', the extended digits in the bit -- value are restricted to 0 and 1. -- If the base specifier is 'O', the extended digits int the bit -- value are restricted to legal digits in the octal number -- system, ie, the digits 0 through 7. -- If the base specifier is 'X', the extended digits are all digits -- together with the letters A through F. case C is when '0' .. '9' => V := Character'Pos (C) - Character'Pos ('0'); when 'A' .. 'F' => V := Character'Pos (C) - Character'Pos ('A') + 10; when 'a' .. 'f' => -- LRM93 13.7 -- A letter in a bit string literal (...) can be written either -- in lowercase or in upper case, with the same meaning. V := Character'Pos (C) - Character'Pos ('a') + 10; when '_' => if Source (Pos) = '_' then Error_Msg_Scan ("double underscore not allowed in a bit string"); end if; if Source (Pos - 2) = Mark then Error_Msg_Scan ("underscore not allowed at the start of a bit string"); elsif Source (Pos) = Mark then Error_Msg_Scan ("underscore not allowed at the end of a bit string"); end if; goto Again; when '"' => pragma Assert (Mark = '%'); Error_Msg_Scan ("'""' cannot close a bit string opened by '%%'"); exit; when '%' => pragma Assert (Mark = '"'); Error_Msg_Scan ("'%%' cannot close a bit string opened by '""'"); exit; when others => if Characters_Kind (C) in Graphic_Character then if Vhdl_Std >= Vhdl_08 then V := Nat8'Last; else if not Has_Invalid then Error_Msg_Scan ("invalid character in bit string"); Has_Invalid := True; end if; -- Continue the bit string V := 0; end if; else if Mark = '%' then Error_Msg_Scan (+File_Pos_To_Location (Current_Context.Source_File, Orig_Pos), "'%%' is not a vhdl operator, use 'rem'"); Current_Token := Tok_Rem; Pos := Orig_Pos + 1; return; else Error_Msg_Scan ("bit string not terminated"); Pos := Pos - 1; end if; exit; end if; end case; -- Expand bit value. if Vhdl_Std >= Vhdl_08 and V > Base then -- Expand as graphic character. for I in 1 .. Base_Log loop Str_Table.Append_String8_Char (C); end loop; else -- Expand as extended digits. case Base_Log is when 1 => if V > 1 then Error_Msg_Scan ("invalid character in a binary bit string"); V := 1; end if; Str_Table.Append_String8 (Pos_0 + V); when 3 => if V > 7 then Error_Msg_Scan ("invalid character in a octal bit string"); V := 7; end if; for I in 1 .. 3 loop D := V / 4; Str_Table.Append_String8 (Pos_0 + D); V := (V - 4 * D) * 2; end loop; when 4 => for I in 1 .. 4 loop D := V / 8; Str_Table.Append_String8 (Pos_0 + D); V := (V - 8 * D) * 2; end loop; when others => raise Internal_Error; end case; end if; Length := Length + Base_Log; end loop; -- Note: the length of the bit string may be 0. Current_Token := Tok_Bit_String; Current_Context.Str_Len := Length; end Scan_Bit_String; -- Scan a decimal bit string literal. For base specifier D the algorithm -- is rather different: all the graphic characters shall be digits, and we -- need to use a (not very efficient) arbitrary precision multiplication. procedure Scan_Dec_Bit_String is use Str_Table; Id : String8_Id; -- Position of character '0'. Pos_0 : constant Nat8 := Character'Pos ('0'); -- Current character. C : Character; -- Current length. Length : Nat32; -- Digit value. V, D : Nat8; type Carries_Type is array (0 .. 3) of Nat8; Carries : Carries_Type; No_Carries : constant Carries_Type := (others => Pos_0); -- Shift right carries. Note the Carries (0) is the LSB. procedure Shr_Carries is begin Carries := (Carries (1), Carries (2), Carries (3), Pos_0); end Shr_Carries; procedure Append_Carries is begin -- Expand the bit string. Note that position 1 of the string8 is -- the MSB. while Carries /= No_Carries loop Append_String8 (Pos_0); Length := Length + 1; for I in reverse 2 .. Length loop Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); end loop; Set_Element_String8 (Id, 1, Carries (0)); Shr_Carries; end loop; end Append_Carries; -- Add 1 to Carries. Overflow is not allowed and should be prevented by -- construction. procedure Add_One_To_Carries is begin for I in Carries'Range loop if Carries (I) = Pos_0 then Carries (I) := Pos_0 + 1; -- End of propagation. exit; else Carries (I) := Pos_0; -- Continue propagation. pragma Assert (I < Carries'Last); end if; end loop; end Add_One_To_Carries; begin pragma Assert (Source (Pos) = '"'); Pos := Pos + 1; Length := 0; Id := Create_String8; Current_Context.Str_Id := Id; loop << Again >> null; C := Source (Pos); Pos := Pos + 1; exit when C = '"'; if C in '0' .. '9' then V := Character'Pos (C) - Character'Pos ('0'); elsif C = '_' then if Source (Pos) = '_' then Error_Msg_Scan ("double underscore not allowed in a bit string"); end if; if Source (Pos - 2) = '"' then Error_Msg_Scan ("underscore not allowed at the start of a bit string"); elsif Source (Pos) = '"' then Error_Msg_Scan ("underscore not allowed at the end of a bit string"); end if; goto Again; else if Characters_Kind (C) in Graphic_Character then Error_Msg_Scan ("graphic character not allowed in decimal bit string"); -- Continue the bit string V := 0; else Error_Msg_Scan ("bit string not terminated"); Pos := Pos - 1; exit; end if; end if; -- Multiply by 10. Carries := (others => Pos_0); for I in reverse 1 .. Length loop -- Shift by 1 (*2). D := Element_String8 (Id, I); Set_Element_String8 (Id, I, Carries (0)); Shr_Carries; -- Add D and D * 4. if D /= Pos_0 then Add_One_To_Carries; -- Add_Four_To_Carries: for I in 2 .. 3 loop if Carries (I) = Pos_0 then Carries (I) := Pos_0 + 1; -- End of propagation. exit; else Carries (I) := Pos_0; -- Continue propagation. end if; end loop; end if; end loop; Append_Carries; -- Add V. for I in Carries'Range loop D := V / 2; Carries (I) := Pos_0 + (V - 2 * D); V := D; end loop; for I in reverse 1 .. Length loop D := Element_String8 (Id, I); if D /= Pos_0 then Add_One_To_Carries; end if; Set_Element_String8 (Id, I, Carries (0)); Shr_Carries; exit when Carries = No_Carries; end loop; Append_Carries; end loop; Current_Token := Tok_Bit_String; Current_Context.Str_Len := Length; end Scan_Dec_Bit_String; -- LRM08 15.2 Character set -- For each uppercase letter, there is a corresponding lowercase letter; -- and for each lowercase letter except [y diaeresis] and [german sharp s], -- there is a corresponding uppercase letter. type Character_Map is array (Character) of Character; To_Lower_Map : constant Character_Map := ( -- Uppercase ASCII letters. 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y', 'Z' => 'z', -- Lowercase ASCII letters. 'a' => 'a', 'b' => 'b', 'c' => 'c', 'd' => 'd', 'e' => 'e', 'f' => 'f', 'g' => 'g', 'h' => 'h', 'i' => 'i', 'j' => 'j', 'k' => 'k', 'l' => 'l', 'm' => 'm', 'n' => 'n', 'o' => 'o', 'p' => 'p', 'q' => 'q', 'r' => 'r', 's' => 's', 't' => 't', 'u' => 'u', 'v' => 'v', 'w' => 'w', 'x' => 'x', 'y' => 'y', 'z' => 'z', -- Uppercase Latin-1 letters. UC_A_Grave => LC_A_Grave, UC_A_Acute => LC_A_Acute, UC_A_Circumflex => LC_A_Circumflex, UC_A_Tilde => LC_A_Tilde, UC_A_Diaeresis => LC_A_Diaeresis, UC_A_Ring => LC_A_Ring, UC_AE_Diphthong => LC_AE_Diphthong, UC_C_Cedilla => LC_C_Cedilla, UC_E_Grave => LC_E_Grave, UC_E_Acute => LC_E_Acute, UC_E_Circumflex => LC_E_Circumflex, UC_E_Diaeresis => LC_E_Diaeresis, UC_I_Grave => LC_I_Grave, UC_I_Acute => LC_I_Acute, UC_I_Circumflex => LC_I_Circumflex, UC_I_Diaeresis => LC_I_Diaeresis, UC_Icelandic_Eth => LC_Icelandic_Eth, UC_N_Tilde => LC_N_Tilde, UC_O_Grave => LC_O_Grave, UC_O_Acute => LC_O_Acute, UC_O_Circumflex => LC_O_Circumflex, UC_O_Tilde => LC_O_Tilde, UC_O_Diaeresis => LC_O_Diaeresis, UC_O_Oblique_Stroke => LC_O_Oblique_Stroke, UC_U_Grave => LC_U_Grave, UC_U_Acute => LC_U_Acute, UC_U_Circumflex => LC_U_Circumflex, UC_U_Diaeresis => LC_U_Diaeresis, UC_Y_Acute => LC_Y_Acute, UC_Icelandic_Thorn => LC_Icelandic_Thorn, -- Lowercase Latin-1 letters. LC_A_Grave => LC_A_Grave, LC_A_Acute => LC_A_Acute, LC_A_Circumflex => LC_A_Circumflex, LC_A_Tilde => LC_A_Tilde, LC_A_Diaeresis => LC_A_Diaeresis, LC_A_Ring => LC_A_Ring, LC_AE_Diphthong => LC_AE_Diphthong, LC_C_Cedilla => LC_C_Cedilla, LC_E_Grave => LC_E_Grave, LC_E_Acute => LC_E_Acute, LC_E_Circumflex => LC_E_Circumflex, LC_E_Diaeresis => LC_E_Diaeresis, LC_I_Grave => LC_I_Grave, LC_I_Acute => LC_I_Acute, LC_I_Circumflex => LC_I_Circumflex, LC_I_Diaeresis => LC_I_Diaeresis, LC_Icelandic_Eth => LC_Icelandic_Eth, LC_N_Tilde => LC_N_Tilde, LC_O_Grave => LC_O_Grave, LC_O_Acute => LC_O_Acute, LC_O_Circumflex => LC_O_Circumflex, LC_O_Tilde => LC_O_Tilde, LC_O_Diaeresis => LC_O_Diaeresis, LC_O_Oblique_Stroke => LC_O_Oblique_Stroke, LC_U_Grave => LC_U_Grave, LC_U_Acute => LC_U_Acute, LC_U_Circumflex => LC_U_Circumflex, LC_U_Diaeresis => LC_U_Diaeresis, LC_Y_Acute => LC_Y_Acute, LC_Icelandic_Thorn => LC_Icelandic_Thorn, -- Lowercase latin-1 characters without corresponding uppercase one. LC_Y_Diaeresis => LC_Y_Diaeresis, LC_German_Sharp_S => LC_German_Sharp_S, -- Not a letter. others => NUL); procedure Error_Too_Long is begin Error_Msg_Scan ("identifier is too long (>" & Natural'Image (Max_Name_Length - 1) & ")"); end Error_Too_Long; -- LRM93 13.3.1 -- Basic Identifiers -- A basic identifier consists only of letters, digits, and underlines. -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } -- LETTER_OR_DIGIT ::= LETTER | DIGIT -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER -- -- NB: At the call of this procedure, the current character must be a legal -- character for a basic identifier. procedure Scan_Identifier (Allow_PSL : Boolean) is use Name_Table; Buffer : String (1 .. Max_Name_Length); C : Character; Len : Natural; begin -- This is an identifier or a key word. Len := 0; loop -- Source (pos) is correct. -- LRM93 13.3.1 -- All characters if a basic identifier are signifiant, including -- any underline character inserted between a letter or digit and -- an adjacent letter or digit. -- Basic identifiers differing only in the use of the corresponding -- upper and lower case letters are considered as the same. -- -- GHDL: This is achieved by converting all upper case letters into -- equivalent lower case letters. -- The opposite (converting to upper lower case letters) is not -- possible because two characters have no upper-case equivalent. C := Source (Pos); case C is when 'A' .. 'Z' => C := Character'Val (Character'Pos (C) + Character'Pos ('a') - Character'Pos ('A')); when 'a' .. 'z' | '0' .. '9' => null; when '_' => if Source (Pos + 1) = '_' then Error_Msg_Scan ("two underscores can't be consecutive"); end if; when ' ' | ')' | '.' | ';' | ':' => exit; when others => -- Non common case. case Characters_Kind (C) is when Upper_Case_Letter | Lower_Case_Letter => if Vhdl_Std = Vhdl_87 then Error_8bit; end if; C := To_Lower_Map (C); pragma Assert (C /= NUL); when Digit => raise Internal_Error; when others => exit; end case; end case; -- Put character in name buffer. FIXME: compute the hash at the same -- time ? if Len >= Max_Name_Length - 1 then if Len = Max_Name_Length -1 then Error_Msg_Scan ("identifier is too long (>" & Natural'Image (Max_Name_Length - 1) & ")"); -- Accept this last one character, so that no error for the -- following characters. Len := Len + 1; Buffer (Len) := C; end if; else Len := Len + 1; Buffer (Len) := C; end if; -- Next character. Pos := Pos + 1; end loop; if Source (Pos - 1) = '_' then if Allow_PSL then -- Some PSL reserved words finish with '_'. This case is handled -- later by Scan_Underscore and Scan_Exclam_Mark. Pos := Pos - 1; Len := Len - 1; C := '_'; else -- Eat the trailing underscore. Error_Msg_Scan ("an identifier cannot finish with '_'"); end if; end if; -- LRM93 13.2 -- At least one separator is required between an identifier or an -- abstract literal and an adjacent identifier or abstract literal. case Characters_Kind (C) is when Digit | Upper_Case_Letter | Lower_Case_Letter => raise Internal_Error; when Other_Special_Character | Special_Character => if (C = '"' or C = '%') and then Len <= 2 then if C = '%' and Vhdl_Std >= Vhdl_08 then Error_Msg_Scan ("'%%' not allowed in vhdl 2008 " & "(was replacement character)"); -- Continue as a bit string. end if; -- Good candidate for bit string. -- LRM93 13.7 -- BASE_SPECIFIER ::= B | O | X -- -- A letter in a bit string literal (either an extended digit -- or the base specifier) can be written either in lower case -- or in upper case, with the same meaning. -- -- LRM08 15.8 Bit string literals -- BASE_SPECICIER ::= -- B | O | X | UB | UO | UX | SB | SO | SX | D -- -- An extended digit and the base specifier in a bit string -- literal can be written either in lowercase or in uppercase, -- with the same meaning. declare Base : Nat32; Cl : constant Character := Buffer (Len); Cf : constant Character := Buffer (1); begin Current_Context.Bit_Str_Base := Cl; if Cl = 'b' then Base := 1; elsif Cl = 'o' then Base := 3; elsif Cl = 'x' then Base := 4; elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then Current_Context.Bit_Str_Sign := ' '; Scan_Dec_Bit_String; return; else Base := 0; end if; if Base > 0 then if Len = 1 then Current_Context.Bit_Str_Sign := ' '; Scan_Bit_String (Base); return; elsif Vhdl_Std >= Vhdl_08 and then (Cf = 's' or Cf = 'u') then Current_Context.Bit_Str_Sign := Cf; Scan_Bit_String (Base); return; end if; end if; end; elsif Vhdl_Std > Vhdl_87 and then C = '\' then -- Start of extended identifier. Cannot follow an identifier. Error_Separator; end if; when Invalid => -- Improve error message for use of UTF-8 quote marks. -- It's possible because in the sequence of UTF-8 bytes for the -- quote marks, there are invalid character (in the 128-160 -- range). if C = Character'Val (16#80#) and then Buffer (Len) = Character'Val (16#e2#) and then (Source (Pos + 1) = Character'Val (16#98#) or else Source (Pos + 1) = Character'Val (16#99#)) then -- UTF-8 left or right single quote mark. if Len > 1 then -- The first byte (0xe2) is part of the identifier. An -- error will be detected as the next byte (0x80) is -- invalid. Remove the first byte from the identifier, and -- let's catch the error later. Len := Len - 1; Pos := Pos - 1; else Error_Msg_Scan ("invalid use of UTF8 character for '"); Pos := Pos + 2; -- Distinguish between character literal and tick. Don't -- care about possible invalid character literal, as in any -- case we have already emitted an error message. if Current_Context.Prev_Token /= Tok_Identifier and then Current_Context.Prev_Token /= Tok_Character and then (Source (Pos + 1) = ''' or else (Source (Pos + 1) = Character'Val (16#e2#) and then Source (Pos + 2) = Character'Val (16#80#) and then Source (Pos + 3) = Character'Val (16#99#))) then Current_Token := Tok_Character; Current_Context.Identifier := Name_Table.Get_Identifier (Source (Pos)); if Source (Pos + 1) = ''' then Pos := Pos + 2; else Pos := Pos + 4; end if; else Current_Token := Tok_Tick; end if; return; end if; end if; when Format_Effector | Space_Character => null; end case; -- Hash it. Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); Current_Token := Tok_Identifier; end Scan_Identifier; procedure Identifier_To_Token is begin if Current_Identifier in Std_Names.Name_Id_Keywords then -- LRM93 13.9 -- The identifiers listed below are called reserved words and are -- reserved for signifiances in the language. -- IN: this is also achieved in packages std_names and tokens. Current_Token := Token_Type'Val (Token_Type'Pos (Tok_First_Keyword) + Current_Identifier - Std_Names.Name_First_Keyword); case Current_Identifier is when Std_Names.Name_Id_AMS_Reserved_Words => if not AMS_Vhdl then if Is_Warning_Enabled (Warnid_Reserved_Word) then Warning_Msg_Scan (Warnid_Reserved_Word, "using %i AMS-VHDL reserved word as an identifier", +Current_Identifier); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl08_Reserved_Words => if Vhdl_Std < Vhdl_08 then if Is_Warning_Enabled (Warnid_Reserved_Word) then Warning_Msg_Scan (Warnid_Reserved_Word, "using %i vhdl-2008 reserved word as an identifier", +Current_Identifier); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl00_Reserved_Words => if Vhdl_Std < Vhdl_00 then if Is_Warning_Enabled (Warnid_Reserved_Word) then Warning_Msg_Scan (Warnid_Reserved_Word, "using %i vhdl-2000 reserved word as an identifier", +Current_Identifier); end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl93_Reserved_Words => if Vhdl_Std = Vhdl_87 then if Is_Warning_Enabled (Warnid_Reserved_Word) then Report_Start_Group; Warning_Msg_Scan (Warnid_Reserved_Word, "using %i vhdl93 reserved word as a vhdl87 identifier", +Current_Identifier); Warning_Msg_Scan (Warnid_Reserved_Word, "(use option --std=93 to compile as vhdl93)"); Report_End_Group; end if; Current_Token := Tok_Identifier; end if; when Std_Names.Name_Id_Vhdl87_Reserved_Words => null; when others => raise Program_Error; end case; elsif Flag_Psl then case Current_Identifier is when Std_Names.Name_Clock => Current_Token := Tok_Psl_Clock; when Std_Names.Name_Const => Current_Token := Tok_Psl_Const; when Std_Names.Name_Boolean => Current_Token := Tok_Psl_Boolean; when Std_Names.Name_Sequence => Current_Token := Tok_Psl_Sequence; when Std_Names.Name_Property => Current_Token := Tok_Psl_Property; when Std_Names.Name_Endpoint => Current_Token := Tok_Psl_Endpoint; when Std_Names.Name_Cover => Current_Token := Tok_Psl_Cover; when Std_Names.Name_Default => Current_Token := Tok_Psl_Default; when Std_Names.Name_Inf => Current_Token := Tok_Inf; when Std_Names.Name_Within => Current_Token := Tok_Within; when Std_Names.Name_Abort => Current_Token := Tok_Abort; when Std_Names.Name_Before => Current_Token := Tok_Before; when Std_Names.Name_Always => Current_Token := Tok_Always; when Std_Names.Name_Never => Current_Token := Tok_Never; when Std_Names.Name_Eventually => Current_Token := Tok_Eventually; when Std_Names.Name_Next_A => Current_Token := Tok_Next_A; when Std_Names.Name_Next_E => Current_Token := Tok_Next_E; when Std_Names.Name_Next_Event => Current_Token := Tok_Next_Event; when Std_Names.Name_Next_Event_A => Current_Token := Tok_Next_Event_A; when Std_Names.Name_Next_Event_E => Current_Token := Tok_Next_Event_E; when Std_Names.Name_Until => Current_Token := Tok_Until; when others => Current_Token := Tok_Identifier; if Source (Pos - 1) = '_' then Error_Msg_Scan ("identifiers cannot finish with '_'"); end if; end case; end if; end Identifier_To_Token; -- LRM93 13.3.2 -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ -- -- Create an (extended) indentifier. -- Extended identifiers are stored as they appear (leading and tailing -- backslashes, doubling backslashes inside). procedure Scan_Extended_Identifier is use Name_Table; Buffer : String (1 .. Max_Name_Length); Len : Natural; C : Character; begin -- LRM93 13.3.2 -- Moreover, every extended identifiers is distinct from any basic -- identifier. -- GHDL: This is satisfied by storing '\' in the name table. Len := 1; Buffer (1) := '\'; loop -- Next character. Pos := Pos + 1; C := Source (Pos); if C = '\' then -- LRM93 13.3.2 -- If a backslash is to be used as one of the graphic characters -- of an extended literal, it must be doubled. -- LRM93 13.3.2 -- (a doubled backslash couting as one character) if Len >= Max_Name_Length - 1 then if Len = Max_Name_Length - 1 then Error_Too_Long; -- Accept this last one. Len := Len + 1; Buffer (Len) := C; end if; else Len := Len + 1; Buffer (Len) := C; end if; Pos := Pos + 1; C := Source (Pos); exit when C /= '\'; end if; case Characters_Kind (C) is when Format_Effector => Error_Msg_Scan ("format effector in extended identifier"); exit; when Graphic_Character => null; when Invalid => if C = Files_Map.EOT and then Pos >= Current_Context.File_Len then Error_Msg_Scan ("extended identifier not terminated at end of file"); elsif C = LF or C = CR then Error_Msg_Scan ("extended identifier not terminated at end of line"); else Error_Msg_Scan ("invalid character in extended identifier"); end if; exit; end case; -- LRM93 13.3.2 -- Extended identifiers differing only in the use of corresponding -- upper and lower case letters are distinct. if Len >= Max_Name_Length - 1 then if Len = Max_Name_Length - 1 then Error_Too_Long; -- Accept this last one. Len := Len + 1; Buffer (Len) := C; end if; else Len := Len + 1; Buffer (Len) := C; end if; end loop; if Len <= 2 then Error_Msg_Scan ("empty extended identifier is not allowed"); end if; -- LRM93 13.2 -- At least one separator is required between an identifier or an -- abstract literal and an adjacent identifier or abstract literal. case Characters_Kind (C) is when Digit | Upper_Case_Letter | Lower_Case_Letter => Error_Separator; when Invalid | Format_Effector | Space_Character | Special_Character | Other_Special_Character => null; end case; -- Hash it. Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); Current_Token := Tok_Identifier; end Scan_Extended_Identifier; procedure Convert_Identifier (Str : in out String) is procedure Error_Bad is begin Error_Msg_Option ("bad character in identifier"); end Error_Bad; procedure Error_8bit is begin Error_Msg_Option ("8 bits characters not allowed in vhdl87"); end Error_8bit; C : Character; subtype Id_Subtype is String (1 .. Str'Length); Id : Id_Subtype renames Str; begin if Id'Length = 0 then Error_Msg_Option ("identifier required"); return; end if; if Id (1) = '\' then -- Extended identifier. if Vhdl_Std = Vhdl_87 then Error_Msg_Option ("extended identifiers not allowed in vhdl87"); return; end if; if Id'Length < 3 then Error_Msg_Option ("extended identifier is too short"); return; end if; if Id (Id'Last) /= '\' then Error_Msg_Option ("extended identifier must finish with a '\'"); return; end if; for I in 2 .. Id'Last - 1 loop C := Id (I); case Characters_Kind (C) is when Format_Effector => Error_Msg_Option ("format effector in extended identifier"); return; when Graphic_Character => if C = '\' then if Id (I + 1) /= '\' or else I = Id'Last - 1 then Error_Msg_Option ("anti-slash must be doubled " & "in extended identifier"); return; end if; end if; when Invalid => Error_Bad; end case; end loop; else -- Identifier for I in 1 .. Id'Length loop C := Id (I); case Characters_Kind (C) is when Upper_Case_Letter => if Vhdl_Std = Vhdl_87 and C > 'Z' then Error_8bit; end if; Id (I) := To_Lower_Map (C); when Lower_Case_Letter | Digit => if Vhdl_Std = Vhdl_87 and C > 'z' then Error_8bit; end if; when Special_Character => -- The current character is legal in an identifier. if C = '_' then if I = 1 then Error_Msg_Option ("an identifier cannot start with an underscore"); return; end if; if Id (I - 1) = '_' then Error_Msg_Option ("two underscores can't be consecutive"); return; end if; if I = Id'Last then Error_Msg_Option ("an identifier cannot finish with an underscore"); return; end if; else Error_Bad; end if; when others => Error_Bad; end case; end loop; end if; end Convert_Identifier; -- Internal scanner function: return True if C must be considered as a line -- terminator. This also includes EOT (which terminates the file or is -- invalid). function Is_EOL (C : Character) return Boolean is begin case C is when CR | LF | VT | FF | Files_Map.EOT => return True; when others => return False; end case; end Is_EOL; -- Advance scanner till the first non-space character. procedure Skip_Spaces is begin while Source (Pos) = ' ' or Source (Pos) = HT loop Pos := Pos + 1; end loop; end Skip_Spaces; -- Eat all characters until end-of-line (not included). procedure Skip_Until_EOL is begin while not Is_EOL (Source (Pos)) loop -- Don't warn about invalid character, it's somewhat out of the -- scope. Pos := Pos + 1; end loop; end Skip_Until_EOL; -- Scan an identifier within a comment. Only lower case letters are -- allowed. procedure Scan_Comment_Identifier (Id : out Name_Id) is use Name_Table; Buffer : String (1 .. Max_Name_Length); Len : Natural; C : Character; begin Id := Null_Identifier; Skip_Spaces; -- The identifier shall start with a lower case letter. if Source (Pos) not in 'a' .. 'z' then return; end if; -- Scan the identifier (in lower cases). Len := 0; loop C := Source (Pos); exit when C not in 'a' .. 'z' and C /= '_'; Len := Len + 1; Buffer (Len) := C; Pos := Pos + 1; end loop; -- Shall be followed by a space or a new line. if not (C = ' ' or else C = HT or else Is_EOL (C)) then return; end if; Id := Get_Identifier (Buffer (1 .. Len)); end Scan_Comment_Identifier; package Directive_Protect is -- Called to scan a protect tool directive. procedure Scan_Protect_Directive; end Directive_Protect; -- Body is put in a separate file to avoid pollution. package body Directive_Protect is separate; -- Called to scan a tool directive. procedure Scan_Tool_Directive is procedure Error_Missing_Directive is begin Error_Msg_Scan ("tool directive required after '`'"); Skip_Until_EOL; end Error_Missing_Directive; C : Character; begin -- The current character is '`'. Pos := Pos + 1; Skip_Spaces; -- Check and scan identifier. C := Source (Pos); if Characters_Kind (C) not in Letter then Error_Missing_Directive; return; end if; Scan_Identifier (False); if Current_Token /= Tok_Identifier then Error_Missing_Directive; return; end if; Skip_Spaces; -- Dispatch according to the identifier. if Current_Identifier = Std_Names.Name_Protect then Directive_Protect.Scan_Protect_Directive; else Error_Msg_Scan ("unknown tool directive %i ignored", +Current_Identifier); Skip_Until_EOL; end if; end Scan_Tool_Directive; -- Scan tokens within a comment. Return TRUE if Current_Token was set, -- return FALSE to discard the comment (ie treat it like a real comment). function Scan_Comment return Boolean is use Std_Names; Id : Name_Id; begin Scan_Comment_Identifier (Id); if Id = Null_Identifier then return False; end if; case Id is when Name_Psl => -- Accept tokens after '-- psl'. if Flag_Psl_Comment then Flag_Psl := True; Flag_Scan_In_Comment := True; return True; end if; when others => null; end case; return False; end Scan_Comment; function Scan_Exclam_Mark return Boolean is begin if Source (Pos) = '!' then Pos := Pos + 1; return True; else return False; end if; end Scan_Exclam_Mark; function Scan_Underscore return Boolean is begin if Source (Pos) = '_' then Pos := Pos + 1; return True; else return False; end if; end Scan_Underscore; -- The Scan_Next_Line procedure must be called after each end-of-line to -- register to next line number. This is called by Scan_CR_Newline and -- Scan_LF_Newline. procedure Scan_Next_Line is begin Files_Map.Skip_Gap (Current_Context.Source_File, Pos); Current_Context.Line_Number := Current_Context.Line_Number + 1; Current_Context.Line_Pos := Pos; File_Add_Line_Number (Current_Context.Source_File, Current_Context.Line_Number, Pos); end Scan_Next_Line; -- Scan a CR end-of-line. procedure Scan_CR_Newline is begin -- Accept CR or CR+LF as line separator. if Source (Pos + 1) = LF then Pos := Pos + 2; else Pos := Pos + 1; end if; Scan_Next_Line; end Scan_CR_Newline; -- Scan a LF end-of-line. procedure Scan_LF_Newline is begin -- Accept LF or LF+CR as line separator. if Source (Pos + 1) = CR then Pos := Pos + 2; else Pos := Pos + 1; end if; Scan_Next_Line; end Scan_LF_Newline; -- Emit an error message for an invalid character. procedure Error_Bad_Character is begin -- Technically character literals, string literals, extended -- identifiers and comments. Error_Msg_Scan ("character %c can only be used in strings or comments", +Source (Pos)); end Error_Bad_Character; -- Get a new token. procedure Scan is begin if Current_Token /= Tok_Invalid then Current_Context.Prev_Token := Current_Token; end if; Current_Context.Prev_Pos := Pos; << Again >> null; -- Skip commonly used separators. -- (Like Skip_Spaces but manually inlined for speed). while Source (Pos) = ' ' or Source (Pos) = HT loop Pos := Pos + 1; end loop; Current_Context.Token_Pos := Pos; Current_Context.Identifier := Null_Identifier; case Source (Pos) is when HT | ' ' => -- Must have already been skipped just above. raise Internal_Error; when NBSP => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("NBSP character not allowed in vhdl87"); end if; Pos := Pos + 1; goto Again; when VT | FF => Pos := Pos + 1; goto Again; when LF => Scan_LF_Newline; if Flag_Newline then Current_Token := Tok_Newline; return; end if; goto Again; when CR => Scan_CR_Newline; if Flag_Newline then Current_Token := Tok_Newline; return; end if; goto Again; when '-' => if Source (Pos + 1) = '-' then -- This is a comment. -- LRM93 13.8 -- A comment starts with two adjacent hyphens and extends up -- to the end of the line. -- A comment can appear on any line line of a VHDL -- description. -- The presence or absence of comments has no influence on -- whether a description is legal or illegal. -- Futhermore, comments do not influence the execution of a -- simulation module; their sole purpose is the enlightenment -- of the human reader. -- GHDL note: As a consequence, an obfruscating comment -- is out of purpose, and a warning could be reported :-) Pos := Pos + 2; -- Scan inside a comment. So we just ignore the two dashes. if Flag_Scan_In_Comment then goto Again; end if; -- Handle keywords in comment (PSL). if Flag_Comment_Keyword and then Scan_Comment then goto Again; end if; -- LRM93 13.2 -- In any case, a sequence of one or more format -- effectors other than horizontal tabulation must -- cause at least one end of line. while not Is_EOL (Source (Pos)) loop -- LRM93 13.1 -- The only characters allowed in the text of a VHDL -- description are the graphic characters and the format -- effectors. -- LRM02 13.1 Character set -- The only characters allowed in the text of a VHDL -- description (except within comments -- see 13.8) [...] -- -- LRM02 13.8 Comments -- A comment [...] may contain any character except the -- format effectors vertical tab, carriage return, line -- feed and form feed. if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02) and then Characters_Kind (Source (Pos)) = Invalid then Error_Msg_Scan ("invalid character, even in a comment"); end if; Pos := Pos + 1; end loop; if Flag_Comment then Current_Token := Tok_Comment; return; end if; goto Again; elsif Flag_Psl and then Source (Pos + 1) = '>' then Current_Token := Tok_Minus_Greater; Pos := Pos + 2; return; else Current_Token := Tok_Minus; Pos := Pos + 1; return; end if; when '+' => Current_Token := Tok_Plus; Pos := Pos + 1; return; when '*' => if Source (Pos + 1) = '*' then Current_Token := Tok_Double_Star; Pos := Pos + 2; else Current_Token := Tok_Star; Pos := Pos + 1; end if; return; when '/' => if Source (Pos + 1) = '=' then Current_Token := Tok_Not_Equal; Pos := Pos + 2; elsif Source (Pos + 1) = '*' then -- LRM08 15.9 Comments -- A delimited comment start with a solidus (slash) character -- immediately followed by an asterisk character and extends up -- to the first subsequent occurrence of an asterisk character -- immediately followed by a solidus character. if Vhdl_Std < Vhdl_08 then Error_Msg_Scan ("block comment are not allowed before vhdl 2008"); end if; -- Skip '/*'. Pos := Pos + 2; loop case Source (Pos) is when '/' => -- LRM08 15.9 -- Moreover, an occurrence of a solidus character -- immediately followed by an asterisk character -- within a delimited comment is not interpreted as -- the start of a nested delimited comment. if Source (Pos + 1) = '*' then Warning_Msg_Scan (Warnid_Nested_Comment, "'/*' found within a block comment"); end if; Pos := Pos + 1; when '*' => if Source (Pos + 1) = '/' then Pos := Pos + 2; exit; else Pos := Pos + 1; end if; when CR => Scan_CR_Newline; when LF => Scan_LF_Newline; when Files_Map.EOT => if Pos >= Current_Context.File_Len then -- Point at the start of the comment. Error_Msg_Scan (+Get_Token_Location, "block comment not terminated at end of file"); exit; end if; Pos := Pos + 1; when others => Pos := Pos + 1; end case; end loop; if Flag_Comment then Current_Token := Tok_Comment; return; end if; goto Again; else Current_Token := Tok_Slash; Pos := Pos + 1; end if; return; when '(' => Current_Token := Tok_Left_Paren; Pos := Pos + 1; return; when ')' => Current_Token := Tok_Right_Paren; Pos := Pos + 1; return; when '|' => if Flag_Psl then if Source (Pos + 1) = '|' then Current_Token := Tok_Bar_Bar; Pos := Pos + 2; elsif Source (Pos + 1) = '-' and then Source (Pos + 2) = '>' then Current_Token := Tok_Bar_Arrow; Pos := Pos + 3; elsif Source (Pos + 1) = '=' and then Source (Pos + 2) = '>' then Current_Token := Tok_Bar_Double_Arrow; Pos := Pos + 3; else Current_Token := Tok_Bar; Pos := Pos + 1; end if; else Current_Token := Tok_Bar; Pos := Pos + 1; end if; return; when '!' => if Flag_Psl then Current_Token := Tok_Exclam_Mark; else if Source (Pos + 1) = '=' then -- != is not allowed in VHDL, but be friendly with C users. Error_Msg_Scan (+Get_Token_Location, "Use '/=' for inequality in vhdl"); Current_Token := Tok_Not_Equal; Pos := Pos + 1; else -- LRM93 13.10 -- A vertical line (|) can be replaced by an exclamation -- mark (!) where used as a delimiter. Current_Token := Tok_Bar; end if; end if; Pos := Pos + 1; return; when ':' => if Source (Pos + 1) = '=' then Current_Token := Tok_Assign; Pos := Pos + 2; else Current_Token := Tok_Colon; Pos := Pos + 1; end if; return; when ';' => Current_Token := Tok_Semi_Colon; Pos := Pos + 1; return; when ',' => Current_Token := Tok_Comma; Pos := Pos + 1; return; when '.' => if Source (Pos + 1) = '.' then -- Be Ada friendly... Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); Current_Token := Tok_To; Pos := Pos + 2; return; end if; Current_Token := Tok_Dot; Pos := Pos + 1; return; when '&' => if Flag_Psl and then Source (Pos + 1) = '&' then Current_Token := Tok_And_And; Pos := Pos + 2; else Current_Token := Tok_Ampersand; Pos := Pos + 1; end if; return; when '<' => case Source (Pos + 1) is when '=' => Current_Token := Tok_Less_Equal; Pos := Pos + 2; when '>' => Current_Token := Tok_Box; Pos := Pos + 2; when '<' => Current_Token := Tok_Double_Less; Pos := Pos + 2; when others => Current_Token := Tok_Less; Pos := Pos + 1; end case; return; when '>' => case Source (Pos + 1) is when '=' => Current_Token := Tok_Greater_Equal; Pos := Pos + 2; when '>' => Current_Token := Tok_Double_Greater; Pos := Pos + 2; when others => Current_Token := Tok_Greater; Pos := Pos + 1; end case; return; when '=' => if Source (Pos + 1) = '=' then if AMS_Vhdl then Current_Token := Tok_Equal_Equal; else Error_Msg_Scan ("'==' is not the vhdl equality, replaced by '='"); Current_Token := Tok_Equal; end if; Pos := Pos + 2; elsif Source (Pos + 1) = '>' then Current_Token := Tok_Double_Arrow; Pos := Pos + 2; else Current_Token := Tok_Equal; Pos := Pos + 1; end if; return; when ''' => -- Handle cases such as character'('a') -- FIXME: what about f ()'length ? or .all'length if Current_Context.Prev_Token /= Tok_Identifier and then Current_Context.Prev_Token /= Tok_Character and then Source (Pos + 2) = ''' then -- LRM93 13.5 -- A character literal is formed by enclosing one of the 191 -- graphic character (...) between two apostrophe characters. -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' if Characters_Kind (Source (Pos + 1)) not in Graphic_Character then Error_Msg_Scan ("a character literal can only be a graphic character"); elsif Vhdl_Std = Vhdl_87 and then Source (Pos + 1) > Character'Val (127) then Error_8bit; end if; Current_Token := Tok_Character; Current_Context.Identifier := Name_Table.Get_Identifier (Source (Pos + 1)); Pos := Pos + 3; return; else Current_Token := Tok_Tick; Pos := Pos + 1; end if; return; when '0' .. '9' => Scan_Literal; -- LRM93 13.2 -- At least one separator is required between an identifier or -- an abstract literal and an adjacent identifier or abstract -- literal. case Characters_Kind (Source (Pos)) is when Digit => -- Happen if d#ddd# is followed by a number. Error_Msg_Scan ("space is required between numbers"); when Upper_Case_Letter | Lower_Case_Letter => -- Could call Error_Separator, but use a clearer message -- for this common case. -- Note: the term "unit name" is not correct here, since -- it can be any identifier or even a keyword; however it -- is probably the most common case (eg 10ns). if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer then Current_Token := Tok_Integer_Letter; else Error_Msg_Scan ("space is required between number and unit name"); end if; when Other_Special_Character => if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then -- Start of extended identifier. Error_Separator; end if; when Invalid | Format_Effector | Space_Character | Special_Character => null; end case; return; when '#' => Error_Msg_Scan ("'#' is used for based literals and " & "must be preceded by a base"); -- Skip. Pos := Pos + 1; goto Again; when '"' => Scan_String; return; when '%' => if Vhdl_Std >= Vhdl_08 then Error_Msg_Scan ("'%%' not allowed in vhdl 2008 (was replacement character)"); -- Continue as a string. end if; Scan_String; return; when '[' => if Flag_Psl then if Source (Pos + 1) = '*' then Current_Token := Tok_Brack_Star; Pos := Pos + 2; elsif Source (Pos + 1) = '+' and then Source (Pos + 2) = ']' then Current_Token := Tok_Brack_Plus_Brack; Pos := Pos + 3; elsif Source (Pos + 1) = '-' and then Source (Pos + 2) = '>' then Current_Token := Tok_Brack_Arrow; Pos := Pos + 3; elsif Source (Pos + 1) = '=' then Current_Token := Tok_Brack_Equal; Pos := Pos + 2; else Current_Token := Tok_Left_Bracket; Pos := Pos + 1; end if; else if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("'[' is an invalid character in vhdl87, replaced by '('"); Current_Token := Tok_Left_Paren; else Current_Token := Tok_Left_Bracket; end if; Pos := Pos + 1; end if; return; when ']' => if Vhdl_Std = Vhdl_87 and not Flag_Psl then Error_Msg_Scan ("']' is an invalid character in vhdl87, replaced by ')'"); Current_Token := Tok_Right_Paren; else Current_Token := Tok_Right_Bracket; end if; Pos := Pos + 1; return; when '{' => if Flag_Psl then Current_Token := Tok_Left_Curly; else Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); Current_Token := Tok_Left_Paren; end if; Pos := Pos + 1; return; when '}' => if Flag_Psl then Current_Token := Tok_Right_Curly; else Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); Current_Token := Tok_Right_Paren; end if; Pos := Pos + 1; return; when '\' => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("extended identifiers are not allowed in vhdl87"); end if; Scan_Extended_Identifier; return; when '^' => if Vhdl_Std >= Vhdl_08 then Current_Token := Tok_Caret; else Current_Token := Tok_Xor; Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); end if; Pos := Pos + 1; return; when '~' => Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); Pos := Pos + 1; Current_Token := Tok_Not; return; when '?' => if Vhdl_Std < Vhdl_08 then Error_Bad_Character; Pos := Pos + 1; goto Again; else if Source (Pos + 1) = '<' then if Source (Pos + 2) = '=' then Current_Token := Tok_Match_Less_Equal; Pos := Pos + 3; else Current_Token := Tok_Match_Less; Pos := Pos + 2; end if; elsif Source (Pos + 1) = '>' then if Source (Pos + 2) = '=' then Current_Token := Tok_Match_Greater_Equal; Pos := Pos + 3; else Current_Token := Tok_Match_Greater; Pos := Pos + 2; end if; elsif Source (Pos + 1) = '?' then Current_Token := Tok_Condition; Pos := Pos + 2; elsif Source (Pos + 1) = '=' then Current_Token := Tok_Match_Equal; Pos := Pos + 2; elsif Source (Pos + 1) = '/' and then Source (Pos + 2) = '=' then Current_Token := Tok_Match_Not_Equal; Pos := Pos + 3; else Error_Msg_Scan ("unknown matching operator"); Pos := Pos + 1; goto Again; end if; end if; return; when '`' => if Vhdl_Std >= Vhdl_08 then Scan_Tool_Directive; else Error_Bad_Character; Skip_Until_EOL; end if; goto Again; when '$' | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | Division_Sign => Error_Bad_Character; Pos := Pos + 1; goto Again; when '@' => if Vhdl_Std >= Vhdl_08 or Flag_Psl then Current_Token := Tok_Arobase; Pos := Pos + 1; return; else Error_Bad_Character; Pos := Pos + 1; goto Again; end if; when '_' => Error_Msg_Scan ("an identifier can't start with '_'"); Scan_Identifier (Flag_Psl); -- Cannot be a reserved word. return; when 'A' .. 'Z' | 'a' .. 'z' => Scan_Identifier (Flag_Psl); Identifier_To_Token; return; when UC_A_Grave .. UC_O_Diaeresis | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn | LC_German_Sharp_S .. LC_O_Diaeresis | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => if Vhdl_Std = Vhdl_87 then Error_Msg_Scan ("non 7-bit latin-1 letters are not allowed in vhdl87"); end if; Scan_Identifier (False); -- Not a reserved word. return; when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => Error_Msg_Scan ("control character that is not CR, LF, FF, HT or VT " & "is not allowed"); Pos := Pos + 1; goto Again; when Files_Map.EOT => if Pos >= Current_Context.File_Len then -- FIXME: should conditionnaly emit a warning if the file -- is not terminated by an end of line. Current_Token := Tok_Eof; else Error_Msg_Scan ("EOT is not allowed inside the file"); Pos := Pos + 1; goto Again; end if; return; end case; -- Not reachable: all case should use goto Again or return. end Scan; function Is_Whitespace (C : Character) return Boolean is begin if C = ' ' then return True; elsif Vhdl_Std > Vhdl_87 and C = NBSP then return True; else return False; end if; end Is_Whitespace; end Vhdl.Scanner;