diff options
author | Tristan Gingold <tgingold@free.fr> | 2014-11-04 20:14:19 +0100 |
---|---|---|
committer | Tristan Gingold <tgingold@free.fr> | 2014-11-04 20:14:19 +0100 |
commit | 9c195bf5d86d67ea5eb419ccf6e48dc153e57c68 (patch) | |
tree | 575346e529b99e26382b4a06f6ff2caa0b391ab2 /scanner.adb | |
parent | 184a123f91e07c927292d67462561dc84f3a920d (diff) | |
download | ghdl-9c195bf5d86d67ea5eb419ccf6e48dc153e57c68.tar.gz ghdl-9c195bf5d86d67ea5eb419ccf6e48dc153e57c68.tar.bz2 ghdl-9c195bf5d86d67ea5eb419ccf6e48dc153e57c68.zip |
Move sources to src/ subdirectory.
Diffstat (limited to 'scanner.adb')
-rw-r--r-- | scanner.adb | 1621 |
1 files changed, 0 insertions, 1621 deletions
diff --git a/scanner.adb b/scanner.adb deleted file mode 100644 index 260bd7c8f..000000000 --- a/scanner.adb +++ /dev/null @@ -1,1621 +0,0 @@ --- VHDL lexical scanner. --- Copyright (C) 2002 - 2014 Tristan Gingold --- --- GHDL is free software; you can redistribute it and/or modify it under --- the terms of the GNU General Public License as published by the Free --- Software Foundation; either version 2, or (at your option) any later --- version. --- --- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or --- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --- for more details. --- --- You should have received a copy of the GNU General Public License --- along with GHDL; see the file COPYING. If not, write to the Free --- Software Foundation, 59 Temple Place - Suite 330, Boston, MA --- 02111-1307, USA. -with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; -with Ada.Characters.Handling; -with Errorout; use Errorout; -with Name_Table; -with Files_Map; use Files_Map; -with Std_Names; -with Str_Table; -with Flags; use Flags; - -package body Scanner is - - -- This classification is a simplification of the categories of LRM93 13.1 - -- LRM93 13.1 - -- The only characters allowed in the text of a VHDL description are the - -- graphic characters and format effector. - - type Character_Kind_Type is - ( - -- Neither a format effector nor a graphic character. - Invalid, - Format_Effector, - Upper_Case_Letter, - Digit, - Special_Character, - Space_Character, - Lower_Case_Letter, - Other_Special_Character); - - -- LRM93 13.1 - -- BASIC_GRAPHIC_CHARACTER ::= - -- UPPER_CASE_LETTER | DIGIT | SPECIAL_CHARACTER | SPACE_CHARACTER - --subtype Basic_Graphic_Character is - -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; - - -- LRM93 13.1 - -- GRAPHIC_CHARACTER ::= - -- BASIC_GRAPHIC_CHARACTER | LOWER_CASE_LETTER | OTHER_SPECIAL_CHARACTER - -- Note: There is 191 graphic character. - subtype Graphic_Character is - Character_Kind_Type range Upper_Case_Letter .. Other_Special_Character; - - -- LRM93 13.1 - -- The characters included in each of the categories of basic graphic - -- characters are defined as follows: - type Character_Array is array (Character) of Character_Kind_Type; - Characters_Kind : constant Character_Array := - (NUL .. BS => Invalid, - - -- Format effectors are the ISO (and ASCII) characters called horizontal - -- tabulation, vertical tabulation, carriage return, line feed, and form - -- feed. - HT | LF | VT | FF | CR => Format_Effector, - - SO .. US => Invalid, - - -- 1. upper case letters - 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | - UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, - - -- 2. digits - '0' .. '9' => Digit, - - -- 3. special characters - Quotation | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' - | ':' | ';' | '<' | '=' | '>' | '[' | ']' - | '_' | '|' | '*' => Special_Character, - - -- 4. the space characters - ' ' | No_Break_Space => Space_Character, - - -- 5. lower case letters - 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | - LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, - - -- 6. other special characters - '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' - | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | - Division_Sign => Other_Special_Character, - - -- '¡' -- INVERTED EXCLAMATION MARK - -- '¢' -- CENT SIGN - -- '£' -- POUND SIGN - -- '¤' -- CURRENCY SIGN - -- '¥' -- YEN SIGN - -- '¦' -- BROKEN BAR - -- '§' -- SECTION SIGN - -- '¨' -- DIAERESIS - -- '©' -- COPYRIGHT SIGN - -- 'ª' -- FEMININE ORDINAL INDICATOR - -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¬' -- NOT SIGN - -- '' -- SOFT HYPHEN - -- '®' -- REGISTERED SIGN - -- '¯' -- MACRON - -- '°' -- DEGREE SIGN - -- '±' -- PLUS-MINUS SIGN - -- '²' -- SUPERSCRIPT TWO - -- '³' -- SUPERSCRIPT THREE - -- '´' -- ACUTE ACCENT - -- 'µ' -- MICRO SIGN - -- '¶' -- PILCROW SIGN - -- '·' -- MIDDLE DOT - -- '¸' -- CEDILLA - -- '¹' -- SUPERSCRIPT ONE - -- 'º' -- MASCULINE ORDINAL INDICATOR - -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¼' -- VULGAR FRACTION ONE QUARTER - -- '½' -- VULGAR FRACTION ONE HALF - -- '¾' -- VULGAR FRACTION THREE QUARTERS - -- '¿' -- INVERTED QUESTION MARK - -- '×' -- MULTIPLICATION SIGN - -- '÷' -- DIVISION SIGN - - DEL .. APC => Invalid); - - -- The context contains the whole internal state of the scanner, ie - -- it can be used to push/pop a lexical analysis, to restart the - -- scanner from a context marking a previous point. - type Scan_Context is record - Source: File_Buffer_Acc; - Source_File: Source_File_Entry; - Line_Number: Natural; - Line_Pos: Source_Ptr; - Pos: Source_Ptr; - Token_Pos: Source_Ptr; - File_Len: Source_Ptr; - File_Name: Name_Id; - Token: Token_Type; - Prev_Token: Token_Type; - Str_Id : String_Id; - Str_Len : Nat32; - Identifier: Name_Id; - Int64: Iir_Int64; - Fp64: Iir_Fp64; - end record; - - -- The current context. - -- Default value is an invalid context. - Current_Context: Scan_Context := (Source => null, - Source_File => No_Source_File_Entry, - Line_Number => 0, - Line_Pos => 0, - Pos => 0, - Token_Pos => 0, - File_Len => 0, - File_Name => Null_Identifier, - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Str_Id => Null_String, - Str_Len => 0, - Int64 => 0, - Fp64 => 0.0); - - Source: File_Buffer_Acc renames Current_Context.Source; - Pos: Source_Ptr renames Current_Context.Pos; - - -- When CURRENT_TOKEN is an identifier, its name_id is stored into - -- this global variable. - -- Function current_text can be used to convert it into an iir. - function Current_Identifier return Name_Id is - begin - return Current_Context.Identifier; - end Current_Identifier; - - procedure Invalidate_Current_Identifier is - begin - Current_Context.Identifier := Null_Identifier; - end Invalidate_Current_Identifier; - - procedure Invalidate_Current_Token is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - Current_Token := Tok_Invalid; - end if; - end Invalidate_Current_Token; - - function Current_String_Id return String_Id is - begin - return Current_Context.Str_Id; - end Current_String_Id; - - function Current_String_Length return Nat32 is - begin - return Current_Context.Str_Len; - end Current_String_Length; - - function Current_Iir_Int64 return Iir_Int64 is - begin - return Current_Context.Int64; - end Current_Iir_Int64; - - function Current_Iir_Fp64 return Iir_Fp64 is - begin - return Current_Context.Fp64; - end Current_Iir_Fp64; - - function Get_Current_File return Name_Id is - begin - return Current_Context.File_Name; - end Get_Current_File; - - function Get_Current_Source_File return Source_File_Entry is - begin - return Current_Context.Source_File; - end Get_Current_Source_File; - - function Get_Current_Line return Natural is - begin - return Current_Context.Line_Number; - end Get_Current_Line; - - function Get_Current_Column return Natural - is - Col : Natural; - Name : Name_Id; - begin - Coord_To_Position - (Current_Context.Source_File, - Current_Context.Line_Pos, - Integer (Current_Context.Pos - Current_Context.Line_Pos), - Name, Col); - return Col; - end Get_Current_Column; - - function Get_Token_Column return Natural - is - Col : Natural; - Name : Name_Id; - begin - Coord_To_Position - (Current_Context.Source_File, - Current_Context.Line_Pos, - Integer (Current_Context.Token_Pos - Current_Context.Line_Pos), - Name, Col); - return Col; - end Get_Token_Column; - - function Get_Token_Position return Source_Ptr is - begin - return Current_Context.Token_Pos; - end Get_Token_Position; - - function Get_Position return Source_Ptr is - begin - return Current_Context.Pos; - end Get_Position; - - procedure Set_File (Source_File : Source_File_Entry) - is - N_Source: File_Buffer_Acc; - begin - if Current_Context.Source /= null then - raise Internal_Error; - end if; - if Source_File = No_Source_File_Entry then - raise Internal_Error; - end if; - N_Source := Get_File_Source (Source_File); - Current_Context := - (Source => N_Source, - Source_File => Source_File, - Line_Number => 1, - Line_Pos => 0, - Pos => N_Source'First, - Token_Pos => 0, -- should be invalid, - File_Len => Get_File_Length (Source_File), - File_Name => Get_File_Name (Source_File), - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Str_Id => Null_String, - Str_Len => 0, - Int64 => -1, - Fp64 => 0.0); - Current_Token := Tok_Invalid; - end Set_File; - - procedure Set_Current_Position (Position: Source_Ptr) - is - Loc : Location_Type; - Offset: Natural; - File_Entry : Source_File_Entry; - begin - if Current_Context.Source = null then - raise Internal_Error; - end if; - Current_Token := Tok_Invalid; - Current_Context.Pos := Position; - Loc := File_Pos_To_Location (Current_Context.Source_File, - Current_Context.Pos); - Location_To_Coord (Loc, - File_Entry, Current_Context.Line_Pos, - Current_Context.Line_Number, Offset); - end Set_Current_Position; - - procedure Close_File is - begin - Current_Context.Source := null; - end Close_File; - - -- Emit an error when a character above 128 was found. - -- This must be called only in vhdl87. - procedure Error_8bit is - begin - Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - -- Emit an error when a separator is expected. - procedure Error_Separator is - begin - Error_Msg_Scan ("a separator is required here"); - end Error_Separator; - - -- scan a decimal literal or a based literal. - -- - -- LRM93 13.4.1 - -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] - -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER - -- - -- LRM93 13.4.2 - -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT - -- BASE ::= INTEGER - procedure Scan_Literal is separate; - - -- Scan a string literal. - -- - -- LRM93 13.6 - -- A string literal is formed by a sequence of graphic characters - -- (possibly none) enclosed between two quotation marks used as string - -- brackets. - -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " - -- - -- IN: for a string, at the call of this procedure, the current character - -- must be either '"' or '%'. - procedure Scan_String - is - -- The quotation character (can be " or %). - Mark: Character; - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - begin - Mark := Source (Pos); - if Mark /= Quotation and then Mark /= '%' then - raise Internal_Error; - end if; - Pos := Pos + 1; - Length := 0; - Current_Context.Str_Id := Str_Table.Start; - loop - C := Source (Pos); - if C = Mark then - -- LRM93 13.6 - -- If a quotation mark value is to be represented in the sequence - -- of character values, then a pair of adjacent quoatation - -- characters marks must be written at the corresponding place - -- within the string literal. - -- LRM93 13.10 - -- Any pourcent sign within the sequence of characters must then - -- be doubled, and each such doubled percent sign is interpreted - -- as a single percent sign value. - -- The same replacement is allowed for a bit string literal, - -- provieded that both bit string brackets are replaced. - Pos := Pos + 1; - exit when Source (Pos) /= Mark; - end if; - - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Scan ("format effector not allowed in a string"); - exit; - when Invalid => - Error_Msg_Scan - ("invalid character not allowed, even in a string"); - when Graphic_Character => - if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then - Error_8bit; - end if; - end case; - - if C = Quotation and Mark = '%' then - -- LRM93 13.10 - -- The quotation marks (") used as string brackets at both ends of - -- a string literal can be replaced by percent signs (%), provided - -- that the enclosed sequence of characters constains no quotation - -- marks, and provided that both string brackets are replaced. - Error_Msg_Scan - ("'""' cannot be used in a string delimited with '%'"); - end if; - - Length := Length + 1; - Str_Table.Append (C); - Pos := Pos + 1; - end loop; - - Str_Table.Finish; - - Current_Token := Tok_String; - Current_Context.Str_Len := Length; - end Scan_String; - - -- Scan a bit string literal. - -- - -- LRM93 13.7 - -- A bit string literal is formed by a sequence of extended digits - -- (possibly none) enclosed between two quotations used as bit string - -- brackets, preceded by a base specifier. - -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " - -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } - -- - -- The current character must be a base specifier, followed by '"' or '%'. - -- The base must be valid. - procedure Scan_Bit_String - is - -- The base specifier. - Base_Len : Nat32 range 1 .. 4; - -- The quotation character (can be " or %). - Mark: Character; - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - -- Digit value. - V : Natural; - begin - case Source (Pos) is - when 'x' | 'X' => - Base_Len := 4; - when 'o' | 'O' => - Base_Len := 3; - when 'b' | 'B' => - Base_Len := 1; - when others => - raise Internal_Error; - end case; - Pos := Pos + 1; - Mark := Source (Pos); - if Mark /= Quotation and then Mark /= '%' then - raise Internal_Error; - end if; - Pos := Pos + 1; - Length := 0; - Current_Context.Str_Id := Str_Table.Start; - loop - << Again >> null; - C := Source (Pos); - Pos := Pos + 1; - exit when C = Mark; - - -- LRM93 13.7 - -- If the base specifier is 'B', the extended digits in the bit - -- value are restricted to 0 and 1. - -- If the base specifier is 'O', the extended digits int the bit - -- value are restricted to legal digits in the octal number - -- system, ie, the digits 0 through 7. - -- If the base specifier is 'X', the extended digits are all digits - -- together with the letters A through F. - case C is - when '0' .. '9' => - V := Character'Pos (C) - Character'Pos ('0'); - when 'A' .. 'F' => - V := Character'Pos (C) - Character'Pos ('A') + 10; - when 'a' .. 'f' => - V := Character'Pos (C) - Character'Pos ('a') + 10; - when '_' => - if Source (Pos) = '_' then - Error_Msg_Scan - ("double underscore not allowed in a bit string"); - end if; - if Source (Pos - 2) = Mark then - Error_Msg_Scan - ("underscore not allowed at the start of a bit string"); - elsif Source (Pos) = Mark then - Error_Msg_Scan - ("underscore not allowed at the end of a bit string"); - end if; - goto Again; - when '"' => - pragma Assert (Mark = '%'); - Error_Msg_Scan - ("'""' cannot close a bit string opened by '%'"); - exit; - when '%' => - pragma Assert (Mark = '"'); - Error_Msg_Scan - ("'%' cannot close a bit string opened by '""'"); - exit; - when others => - Error_Msg_Scan ("bit string not terminated"); - Pos := Pos - 1; - exit; - end case; - - case Base_Len is - when 1 => - if V > 1 then - Error_Msg_Scan ("invalid character in a binary bit string"); - end if; - Str_Table.Append (C); - when 2 => - raise Internal_Error; - when 3 => - if V > 7 then - Error_Msg_Scan ("invalid character in a octal bit string"); - end if; - for I in 1 .. 3 loop - if (V / 4) = 1 then - Str_Table.Append ('1'); - else - Str_Table.Append ('0'); - end if; - V := (V mod 4) * 2; - end loop; - when 4 => - for I in 1 .. 4 loop - if (V / 8) = 1 then - Str_Table.Append ('1'); - else - Str_Table.Append ('0'); - end if; - V := (V mod 8) * 2; - end loop; - end case; - Length := Length + Base_Len; - end loop; - - Str_Table.Finish; - - if Length = 0 then - Error_Msg_Scan ("empty bit string is not allowed"); - end if; - Current_Token := Tok_Bit_String; - Current_Context.Int64 := Iir_Int64 (Base_Len); - Current_Context.Str_Len := Length; - end Scan_Bit_String; - - -- LRM93 13.3.1 - -- Basic Identifiers - -- A basic identifier consists only of letters, digits, and underlines. - -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } - -- LETTER_OR_DIGIT ::= LETTER | DIGIT - -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER - -- - -- NB: At the call of this procedure, the current character must be a legal - -- character for a basic identifier. - procedure Scan_Identifier - is - use Name_Table; - C : Character; - Len : Natural; - begin - -- This is an identifier or a key word. - Len := 0; - loop - -- source (pos) is correct. - -- LRM93 13.3.1 - -- All characters if a basic identifier are signifiant, including - -- any underline character inserted between a letter or digit and - -- an adjacent letter or digit. - -- Basic identifiers differing only in the use of the corresponding - -- upper and lower case letters are considered as the same. - -- This is achieved by converting all upper case letters into - -- equivalent lower case letters. - -- The opposite (converting in lower case letters) is not possible, - -- because two characters have no upper-case equivalent. - C := Source (Pos); - case Characters_Kind (C) is - when Upper_Case_Letter => - if Vhdl_Std = Vhdl_87 and C > 'Z' then - Error_8bit; - end if; - Len := Len + 1; - Name_Buffer (Len) := Ada.Characters.Handling.To_Lower (C); - when Lower_Case_Letter | Digit => - if Vhdl_Std = Vhdl_87 and C > 'z' then - Error_8bit; - end if; - Len := Len + 1; - Name_Buffer (Len) := C; - when Special_Character => - -- The current character is legal in an identifier. - if C = '_' then - if Source (Pos + 1) = '_' then - Error_Msg_Scan ("two underscores can't be consecutive"); - end if; - Len := Len + 1; - Name_Buffer (Len) := C; - else - exit; - end if; - when others => - exit; - end case; - Pos := Pos + 1; - end loop; - - if Source (Pos - 1) = '_' then - if not Flag_Psl then - -- Some PSL reserved words finish with '_'. This case is handled - -- later. - Error_Msg_Scan ("identifier cannot finish with '_'"); - end if; - Pos := Pos - 1; - Len := Len - 1; - C := '_'; - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (C) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - raise Internal_Error; - when Other_Special_Character => - if Vhdl_Std /= Vhdl_87 and then C = '\' then - Error_Separator; - end if; - when Invalid - | Format_Effector - | Space_Character - | Special_Character => - null; - end case; - Name_Length := Len; - - -- Hash it. - Current_Context.Identifier := Name_Table.Get_Identifier; - if Current_Identifier in Std_Names.Name_Id_Keywords then - -- LRM93 13.9 - -- The identifiers listed below are called reserved words and are - -- reserved for signifiances in the language. - -- IN: this is also achieved in packages std_names and tokens. - Current_Token := Token_Type'Val - (Token_Type'Pos (Tok_First_Keyword) - + Current_Identifier - Std_Names.Name_First_Keyword); - case Current_Identifier is - when Std_Names.Name_Id_AMS_Reserved_Words => - if not AMS_Vhdl then - if Flags.Warn_Reserved_Word then - Warning_Msg_Scan - ("using """ & Name_Buffer (1 .. Name_Length) - & """ AMS-VHDL reserved word as an identifier"); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl00_Reserved_Words => - if Vhdl_Std < Vhdl_00 then - if Flags.Warn_Reserved_Word then - Warning_Msg_Scan - ("using """ & Name_Buffer (1 .. Name_Length) - & """ vhdl00 reserved word as an identifier"); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl93_Reserved_Words => - if Vhdl_Std = Vhdl_87 then - if Flags.Warn_Reserved_Word then - Warning_Msg_Scan - ("using """ & Name_Buffer (1 .. Name_Length) - & """ vhdl93 reserved word as a vhdl87 identifier"); - Warning_Msg_Scan - ("(use option --std=93 to compile as vhdl93)"); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl87_Reserved_Words => - null; - when others => - raise Program_Error; - end case; - elsif Flag_Psl then - case Current_Identifier is - when Std_Names.Name_Clock => - Current_Token := Tok_Psl_Clock; - when Std_Names.Name_Const => - Current_Token := Tok_Psl_Const; - when Std_Names.Name_Boolean => - Current_Token := Tok_Psl_Boolean; - when Std_Names.Name_Sequence => - Current_Token := Tok_Psl_Sequence; - when Std_Names.Name_Property => - Current_Token := Tok_Psl_Property; - when Std_Names.Name_Inf => - Current_Token := Tok_Inf; - when Std_Names.Name_Within => - Current_Token := Tok_Within; - when Std_Names.Name_Abort => - Current_Token := Tok_Abort; - when Std_Names.Name_Before => - Current_Token := Tok_Before; - when Std_Names.Name_Always => - Current_Token := Tok_Always; - when Std_Names.Name_Never => - Current_Token := Tok_Never; - when Std_Names.Name_Eventually => - Current_Token := Tok_Eventually; - when Std_Names.Name_Next_A => - Current_Token := Tok_Next_A; - when Std_Names.Name_Next_E => - Current_Token := Tok_Next_E; - when Std_Names.Name_Next_Event => - Current_Token := Tok_Next_Event; - when Std_Names.Name_Next_Event_A => - Current_Token := Tok_Next_Event_A; - when Std_Names.Name_Next_Event_E => - Current_Token := Tok_Next_Event_E; - when Std_Names.Name_Until => - Current_Token := Tok_Until; - when others => - Current_Token := Tok_Identifier; - if C = '_' then - Error_Msg_Scan ("identifiers cannot finish with '_'"); - end if; - end case; - else - Current_Token := Tok_Identifier; - end if; - end Scan_Identifier; - - -- LRM93 13.3.2 - -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ - -- - -- Create an (extended) indentifier. - -- Extended identifiers are stored as they appear (leading and tailing - -- backslashes, doubling backslashes inside). - procedure Scan_Extended_Identifier - is - use Name_Table; - begin - -- LRM93 13.3.2 - -- Moreover, every extended identifiers is distinct from any basic - -- identifier. - -- This is satisfied by storing '\' in the name table. - Name_Length := 1; - Name_Buffer (1) := '\'; - loop - -- Next character. - Pos := Pos + 1; - - if Source (Pos) = '\' then - -- LRM93 13.3.2 - -- If a backslash is to be used as one of the graphic characters - -- of an extended literal, it must be doubled. - -- LRM93 13.3.2 - -- (a doubled backslash couting as one character) - Name_Length := Name_Length + 1; - Name_Buffer (Name_Length) := '\'; - - Pos := Pos + 1; - - exit when Source (Pos) /= '\'; - end if; - - -- source (pos) is correct. - case Characters_Kind (Source (Pos)) is - when Format_Effector => - Error_Msg_Scan ("format effector in extended identifier"); - exit; - when Graphic_Character => - null; - when Invalid => - Error_Msg_Scan ("invalid character in extended identifier"); - end case; - Name_Length := Name_Length + 1; - -- LRM93 13.3.2 - -- Extended identifiers differing only in the use of corresponding - -- upper and lower case letters are distinct. - Name_Buffer (Name_Length) := Source (Pos); - end loop; - - if Name_Length <= 2 then - Error_Msg_Scan ("empty extended identifier is not allowed"); - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (Source (Pos)) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - Error_Separator; - when Invalid - | Format_Effector - | Space_Character - | Special_Character - | Other_Special_Character => - null; - end case; - - -- Hash it. - Current_Context.Identifier := Name_Table.Get_Identifier; - Current_Token := Tok_Identifier; - end Scan_Extended_Identifier; - - procedure Convert_Identifier - is - procedure Error_Bad is - begin - Error_Msg_Option ("bad character in identifier"); - end Error_Bad; - - procedure Error_8bit is - begin - Error_Msg_Option ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - use Name_Table; - C : Character; - begin - if Name_Length = 0 then - Error_Msg_Option ("identifier required"); - return; - end if; - - if Name_Buffer (1) = '\' then - -- Extended identifier. - if Vhdl_Std = Vhdl_87 then - Error_Msg_Option ("extended identifiers not allowed in vhdl87"); - return; - end if; - - if Name_Length < 3 then - Error_Msg_Option ("extended identifier is too short"); - return; - end if; - if Name_Buffer (Name_Length) /= '\' then - Error_Msg_Option ("extended identifier must finish with a '\'"); - return; - end if; - for I in 2 .. Name_Length - 1 loop - C := Name_Buffer (I); - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Option ("format effector in extended identifier"); - return; - when Graphic_Character => - if C = '\' then - if Name_Buffer (I + 1) /= '\' - or else I = Name_Length - 1 - then - Error_Msg_Option ("anti-slash must be doubled " - & "in extended identifier"); - return; - end if; - end if; - when Invalid => - Error_Bad; - end case; - end loop; - else - -- Identifier - for I in 1 .. Name_Length loop - C := Name_Buffer (I); - case Characters_Kind (C) is - when Upper_Case_Letter => - if Vhdl_Std = Vhdl_87 and C > 'Z' then - Error_8bit; - end if; - Name_Buffer (I) := Ada.Characters.Handling.To_Lower (C); - when Lower_Case_Letter | Digit => - if Vhdl_Std = Vhdl_87 and C > 'z' then - Error_8bit; - end if; - when Special_Character => - -- The current character is legal in an identifier. - if C = '_' then - if I = 1 then - Error_Msg_Option - ("identifier cannot start with an underscore"); - return; - end if; - if Name_Buffer (I - 1) = '_' then - Error_Msg_Option - ("two underscores can't be consecutive"); - return; - end if; - if I = Name_Length then - Error_Msg_Option - ("identifier cannot finish with an underscore"); - return; - end if; - else - Error_Bad; - end if; - when others => - Error_Bad; - end case; - end loop; - end if; - end Convert_Identifier; - - -- Scan an identifier within a comment. Only lower case letters are - -- allowed. - function Scan_Comment_Identifier return Boolean - is - use Name_Table; - Len : Natural; - C : Character; - begin - -- Skip spaces. - while Source (Pos) = ' ' or Source (Pos) = HT loop - Pos := Pos + 1; - end loop; - - -- The identifier shall start with a lower case letter. - if Source (Pos) not in 'a' .. 'z' then - return False; - end if; - - -- Scan the identifier (in lower cases). - Len := 0; - loop - C := Source (Pos); - exit when C not in 'a' .. 'z' and C /= '_'; - Len := Len + 1; - Name_Buffer (Len) := C; - Pos := Pos + 1; - end loop; - - -- Shall be followed by a space or a new line. - case C is - when ' ' | HT | LF | CR => - null; - when others => - return False; - end case; - - Name_Length := Len; - return True; - end Scan_Comment_Identifier; - - -- Scan tokens within a comment. Return TRUE if Current_Token was set, - -- return FALSE to discard the comment (ie treat it like a real comment). - function Scan_Comment return Boolean - is - use Std_Names; - Id : Name_Id; - begin - if not Scan_Comment_Identifier then - return False; - end if; - - -- Hash it. - Id := Name_Table.Get_Identifier; - - case Id is - when Name_Psl => - -- Scan first identifier after '-- psl'. - if not Scan_Comment_Identifier then - return False; - end if; - Id := Name_Table.Get_Identifier; - case Id is - when Name_Property => - Current_Token := Tok_Psl_Property; - when Name_Sequence => - Current_Token := Tok_Psl_Sequence; - when Name_Endpoint => - Current_Token := Tok_Psl_Endpoint; - when Name_Assert => - Current_Token := Tok_Psl_Assert; - when Name_Cover => - Current_Token := Tok_Psl_Cover; - when Name_Default => - Current_Token := Tok_Psl_Default; - when others => - return False; - end case; - Flag_Scan_In_Comment := True; - return True; - when others => - return False; - end case; - end Scan_Comment; - - function Scan_Exclam_Mark return Boolean is - begin - if Source (Pos) = '!' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Exclam_Mark; - - function Scan_Underscore return Boolean is - begin - if Source (Pos) = '_' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Underscore; - - -- The Scan_Next_Line procedure must be called after each end-of-line to - -- register to next line number. This is called by Scan_CR_Newline and - -- Scan_LF_Newline. - procedure Scan_Next_Line is - begin - Current_Context.Line_Number := Current_Context.Line_Number + 1; - Current_Context.Line_Pos := Pos; - File_Add_Line_Number - (Current_Context.Source_File, Current_Context.Line_Number, Pos); - end Scan_Next_Line; - - -- Scan a CR end-of-line. - procedure Scan_CR_Newline is - begin - -- Accept CR or CR+LF as line separator. - if Source (Pos + 1) = LF then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_CR_Newline; - - -- Scan a LF end-of-line. - procedure Scan_LF_Newline is - begin - -- Accept LF or LF+CR as line separator. - if Source (Pos + 1) = CR then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_LF_Newline; - - -- Get a new token. - procedure Scan is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - end if; - - << Again >> null; - - -- Skip commonly used separators. - while Source(Pos) = ' ' or Source(Pos) = HT loop - Pos := Pos + 1; - end loop; - - Current_Context.Token_Pos := Pos; - Current_Context.Identifier := Null_Identifier; - - case Source (Pos) is - when HT | ' ' => - -- Must have already been skipped just above. - raise Internal_Error; - when NBSP => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan ("NBSP character not allowed in vhdl87"); - end if; - Pos := Pos + 1; - goto Again; - when VT | FF => - Pos := Pos + 1; - goto Again; - when LF => - Scan_LF_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when CR => - Scan_CR_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when '-' => - if Source (Pos + 1) = '-' then - -- This is a comment. - -- LRM93 13.8 - -- A comment starts with two adjacent hyphens and extends up - -- to the end of the line. - -- A comment can appear on any line line of a VHDL - -- description. - -- The presence or absence of comments has no influence on - -- wether a description is legal or illegal. - -- Futhermore, comments do not influence the execution of a - -- simulation module; their sole purpose is the enlightenment - -- of the human reader. - -- GHDL note: As a consequence, an obfruscating comment - -- is out of purpose, and a warning could be reported :-) - Pos := Pos + 2; - - -- Scan inside a comment. So we just ignore the two dashes. - if Flag_Scan_In_Comment then - goto Again; - end if; - - -- Handle keywords in comment (PSL). - if Flag_Comment_Keyword - and then Scan_Comment - then - return; - end if; - - -- LRM93 13.2 - -- In any case, a sequence of one or more format - -- effectors other than horizontal tabulation must - -- cause at least one end of line. - while Source (Pos) /= CR and Source (Pos) /= LF and - Source (Pos) /= VT and Source (Pos) /= FF and - Source (Pos) /= Files_Map.EOT - loop - if not Flags.Mb_Comment - and then Characters_Kind (Source (Pos)) = Invalid - then - Error_Msg_Scan ("invalid character, even in a comment"); - end if; - Pos := Pos + 1; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - elsif Flag_Psl and then Source (Pos + 1) = '>' then - Current_Token := Tok_Minus_Greater; - Pos := Pos + 2; - return; - else - Current_Token := Tok_Minus; - Pos := Pos + 1; - return; - end if; - when '+' => - Current_Token := Tok_Plus; - Pos := Pos + 1; - return; - when '*' => - if Source (Pos + 1) = '*' then - Current_Token := Tok_Double_Star; - Pos := Pos + 2; - else - Current_Token := Tok_Star; - Pos := Pos + 1; - end if; - return; - when '/' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Not_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '*' then - -- LRM08 15.9 Comments - -- A delimited comment start with a solidus (slash) character - -- immediately followed by an asterisk character and extends up - -- to the first subsequent occurrence of an asterisk character - -- immediately followed by a solidus character. - if Vhdl_Std < Vhdl_08 then - Error_Msg_Scan - ("block comment are not allowed before vhdl 2008"); - end if; - - -- Skip '/*'. - Pos := Pos + 2; - - loop - case Source (Pos) is - when '/' => - -- LRM08 15.9 - -- Moreover, an occurrence of a solidus character - -- immediately followed by an asterisk character - -- within a delimited comment is not interpreted as - -- the start of a nested delimited comment. - if Source (Pos + 1) = '*' then - Warning_Msg_Scan - ("'/*' found within a block comment"); - end if; - Pos := Pos + 1; - when '*' => - if Source (Pos + 1) = '/' then - Pos := Pos + 2; - exit; - else - Pos := Pos + 1; - end if; - when CR => - Scan_CR_Newline; - when LF => - Scan_LF_Newline; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- Point at the start of the comment. - Error_Msg_Scan - ("block comment not terminated at end of file", - File_Pos_To_Location - (Current_Context.Source_File, - Current_Context.Token_Pos)); - exit; - end if; - Pos := Pos + 1; - when others => - Pos := Pos + 1; - end case; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - else - Current_Token := Tok_Slash; - Pos := Pos + 1; - end if; - return; - when '(' => - Current_Token := Tok_Left_Paren; - Pos := Pos + 1; - return; - when ')' => - Current_Token := Tok_Right_Paren; - Pos := Pos + 1; - return; - when '|' => - if Flag_Psl then - if Source (Pos + 1) = '|' then - Current_Token := Tok_Bar_Bar; - Pos := Pos + 2; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Double_Arrow; - Pos := Pos + 3; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - return; - when '!' => - if Flag_Psl then - Current_Token := Tok_Exclam_Mark; - else - -- LRM93 13.10 - -- A vertical line (|) can be replaced by an exclamation - -- mark (!) where used as a delimiter. - Current_Token := Tok_Bar; - end if; - Pos := Pos + 1; - return; - when ':' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Assign; - Pos := Pos + 2; - else - Current_Token := Tok_Colon; - Pos := Pos + 1; - end if; - return; - when ';' => - Current_Token := Tok_Semi_Colon; - Pos := Pos + 1; - return; - when ',' => - Current_Token := Tok_Comma; - Pos := Pos + 1; - return; - when '.' => - if Source (Pos + 1) = '.' then - -- Be Ada friendly... - Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); - Current_Token := Tok_To; - Pos := Pos + 2; - return; - end if; - Current_Token := Tok_Dot; - Pos := Pos + 1; - return; - when '&' => - if Flag_Psl and then Source (Pos + 1) = '&' then - Current_Token := Tok_And_And; - Pos := Pos + 2; - else - Current_Token := Tok_Ampersand; - Pos := Pos + 1; - end if; - return; - when '<' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Less_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '>' then - Current_Token := Tok_Box; - Pos := Pos + 2; - else - Current_Token := Tok_Less; - Pos := Pos + 1; - end if; - return; - when '>' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Greater_Equal; - Pos := Pos + 2; - else - Current_Token := Tok_Greater; - Pos := Pos + 1; - end if; - return; - when '=' => - if Source (Pos + 1) = '=' then - if AMS_Vhdl then - Current_Token := Tok_Equal_Equal; - else - Error_Msg_Scan - ("'==' is not the vhdl equality, replaced by '='"); - Current_Token := Tok_Equal; - end if; - Pos := Pos + 2; - elsif Source (Pos + 1) = '>' then - Current_Token := Tok_Double_Arrow; - Pos := Pos + 2; - else - Current_Token := Tok_Equal; - Pos := Pos + 1; - end if; - return; - when ''' => - -- Handle cases such as character'('a') - -- FIXME: what about f ()'length ? or .all'length - if Current_Context.Prev_Token /= Tok_Identifier - and then Current_Context.Prev_Token /= Tok_Character - and then Source (Pos + 2) = ''' - then - -- LRM93 13.5 - -- A character literal is formed by enclosing one of the 191 - -- graphic character (...) between two apostrophe characters. - -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' - if Characters_Kind (Source (Pos + 1)) not in Graphic_Character - then - Error_Msg_Scan - ("a character literal can only be a graphic character"); - elsif Vhdl_Std = Vhdl_87 - and then Source (Pos + 1) > Character'Val (127) - then - Error_8bit; - end if; - Current_Token := Tok_Character; - Current_Context.Identifier := - Name_Table.Get_Identifier (Source (Pos + 1)); - Pos := Pos + 3; - return; - else - Current_Token := Tok_Tick; - Pos := Pos + 1; - end if; - return; - when '0' .. '9' => - Scan_Literal; - - -- LRM 13.2 - -- At least one separator is required between an identifier or - -- an abstract literal and an adjacent identifier or abstract - -- literal. - case Characters_Kind (Source (Pos)) is - when Digit => - raise Internal_Error; - when Upper_Case_Letter - | Lower_Case_Letter => - -- Could call Error_Separator, but use a clearer message - -- for this common case. - -- Note: the term "unit name" is not correct here, since it - -- can be any identifier or even a keyword; however it is - -- probably the most common case (eg 10ns). - Error_Msg_Scan - ("space is required between number and unit name"); - when Other_Special_Character => - if Vhdl_Std /= Vhdl_87 and then Source (Pos) = '\' then - Error_Separator; - end if; - when Invalid - | Format_Effector - | Space_Character - | Special_Character => - null; - end case; - return; - when '#' => - Error_Msg_Scan ("'#' is used for based literals and " - & "must be preceded by a base"); - -- Cannot easily continue. - raise Compilation_Error; - when Quotation | '%' => - Scan_String; - return; - when '[' => - if Flag_Psl then - if Source (Pos + 1) = '*' then - Current_Token := Tok_Brack_Star; - Pos := Pos + 2; - elsif Source (Pos + 1) = '+' - and then Source (Pos + 2) = ']' - then - Current_Token := Tok_Brack_Plus_Brack; - Pos := Pos + 3; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Brack_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Brack_Equal; - Pos := Pos + 2; - else - Current_Token := Tok_Left_Bracket; - Pos := Pos + 1; - end if; - else - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("'[' is an invalid character in vhdl87, replaced by '('"); - Current_Token := Tok_Left_Paren; - else - Current_Token := Tok_Left_Bracket; - end if; - Pos := Pos + 1; - end if; - return; - when ']' => - if Vhdl_Std = Vhdl_87 and not Flag_Psl then - Error_Msg_Scan - ("']' is an invalid character in vhdl87, replaced by ')'"); - Current_Token := Tok_Right_Paren; - else - Current_Token := Tok_Right_Bracket; - end if; - Pos := Pos + 1; - return; - when '{' => - if Flag_Psl then - Current_Token := Tok_Left_Curly; - else - Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); - Current_Token := Tok_Left_Paren; - end if; - Pos := Pos + 1; - return; - when '}' => - if Flag_Psl then - Current_Token := Tok_Right_Curly; - else - Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); - Current_Token := Tok_Right_Paren; - end if; - Pos := Pos + 1; - return; - when '\' => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("extended identifiers are not allowed in vhdl87"); - end if; - Scan_Extended_Identifier; - return; - when '^' => - Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); - Pos := Pos + 1; - Current_Token := Tok_Xor; - return; - when '~' => - Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); - Pos := Pos + 1; - Current_Token := Tok_Not; - return; - when '?' => - if Vhdl_Std < Vhdl_08 then - Error_Msg_Scan ("'?' can only be used in strings or comments"); - Pos := Pos + 1; - goto Again; - else - if Source (Pos + 1) = '<' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Less_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Less; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '>' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Greater_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Greater; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '?' then - Current_Token := Tok_Condition; - Pos := Pos + 2; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Match_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '/' - and then Source (Pos + 2) = '=' - then - Current_Token := Tok_Match_Not_Equal; - Pos := Pos + 3; - else - Error_Msg_Scan ("unknown matching operator"); - Pos := Pos + 1; - goto Again; - end if; - end if; - return; - when '$' | '`' - | Inverted_Exclamation .. Inverted_Question - | Multiplication_Sign | Division_Sign => - Error_Msg_Scan ("character """ & Source (Pos) - & """ can only be used in strings or comments"); - Pos := Pos + 1; - goto Again; - when '@' => - if Flag_Psl then - Current_Token := Tok_Arobase; - Pos := Pos + 1; - return; - else - Error_Msg_Scan - ("character """ & Source (Pos) - & """ can only be used in strings or comments"); - Pos := Pos + 1; - goto Again; - end if; - when '_' => - Error_Msg_Scan ("an identifier can't start with '_'"); - Pos := Pos + 1; - goto Again; - when 'B' | 'b' | 'O' | 'o' | 'X' | 'x' => - if Source (Pos + 1) = Quotation or else Source (Pos + 1) = '%' then - -- LRM93 13.7 - -- BASE_SPECIFIER ::= B | O | X - -- A letter in a bit string literal (either an extended digit or - -- the base specifier) can be written either in lower case or - -- in upper case, with the same meaning. - Scan_Bit_String; - else - Scan_Identifier; - end if; - return; - when 'A' | 'C' .. 'N' | 'P' .. 'W' | 'Y'| 'Z' - | 'a' | 'c' .. 'n' | 'p' .. 'w' | 'y'| 'z' => - Scan_Identifier; - return; - when UC_A_Grave .. UC_O_Diaeresis - | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("upper case letters above 128 are not allowed in vhdl87"); - end if; - Scan_Identifier; - return; - when LC_German_Sharp_S .. LC_O_Diaeresis - | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("lower case letters above 128 are not allowed in vhdl87"); - end if; - Scan_Identifier; - return; - when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => - Error_Msg_Scan - ("control character that is not CR, LF, FF, HT or VT " & - "is not allowed"); - Pos := Pos + 1; - goto Again; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- FIXME: should conditionnaly emit a warning if the file - -- is not terminated by an end of line. - Current_Token := Tok_Eof; - else - Error_Msg_Scan ("EOT is not allowed inside the file"); - Pos := Pos + 1; - goto Again; - end if; - return; - end case; - end Scan; - - function Get_Token_Location return Location_Type is - begin - return File_Pos_To_Location - (Current_Context.Source_File, Current_Context.Token_Pos); - end Get_Token_Location; -end Scanner; |