aboutsummaryrefslogtreecommitdiffstats
path: root/src/vhdl/vhdl-scanner.adb
diff options
context:
space:
mode:
authorTristan Gingold <tgingold@free.fr>2019-05-04 16:49:19 +0200
committerTristan Gingold <tgingold@free.fr>2019-05-04 19:24:20 +0200
commitfc028b5d21727da66dc8e146b3dbcfc870c64f90 (patch)
tree59462a5173067a24482976035564e135ce4303c0 /src/vhdl/vhdl-scanner.adb
parent7cee2ba98f47f6ee7b7ffbe0d9555a972c8afc8b (diff)
downloadghdl-fc028b5d21727da66dc8e146b3dbcfc870c64f90.tar.gz
ghdl-fc028b5d21727da66dc8e146b3dbcfc870c64f90.tar.bz2
ghdl-fc028b5d21727da66dc8e146b3dbcfc870c64f90.zip
vhdl: move scanner under vhdl hierarchy.
Diffstat (limited to 'src/vhdl/vhdl-scanner.adb')
-rw-r--r--src/vhdl/vhdl-scanner.adb2332
1 files changed, 2332 insertions, 0 deletions
diff --git a/src/vhdl/vhdl-scanner.adb b/src/vhdl/vhdl-scanner.adb
new file mode 100644
index 000000000..734b0c7ce
--- /dev/null
+++ b/src/vhdl/vhdl-scanner.adb
@@ -0,0 +1,2332 @@
+-- VHDL lexical scanner.
+-- Copyright (C) 2002 - 2014 Tristan Gingold
+--
+-- GHDL is free software; you can redistribute it and/or modify it under
+-- the terms of the GNU General Public License as published by the Free
+-- Software Foundation; either version 2, or (at your option) any later
+-- version.
+--
+-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY
+-- WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+-- for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with GHDL; see the file COPYING. If not, write to the Free
+-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+-- 02111-1307, USA.
+with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
+with Errorout; use Errorout;
+with Name_Table;
+with Files_Map; use Files_Map;
+with Std_Names;
+with Str_Table;
+with Flags; use Flags;
+
+package body Vhdl.Scanner is
+
+ -- This classification is a simplification of the categories of LRM93 13.1
+ -- LRM93 13.1
+ -- The only characters allowed in the text of a VHDL description are the
+ -- graphic characters and format effector.
+
+ type Character_Kind_Type is
+ (
+ -- Neither a format effector nor a graphic character.
+ Invalid,
+ Format_Effector,
+ Lower_Case_Letter,
+ Upper_Case_Letter,
+ Digit,
+ Special_Character,
+ Space_Character,
+ Other_Special_Character
+ );
+
+ -- LRM93 13.1
+ -- basic_graphic_character ::=
+ -- upper_case_letter | digit | special_character | space_character
+ --
+ --subtype Basic_Graphic_Character is
+ -- Character_Kind_Type range Upper_Case_Letter .. Space_Character;
+
+ -- LRM93 13.1
+ -- graphic_character ::=
+ -- basic_graphic_character | lower_case_letter | other_special_character
+ --
+ -- Note: There are 191 graphic characters.
+ subtype Graphic_Character is
+ Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character;
+
+ -- letter ::= upper_case_letter | lower_case_letter
+ subtype Letter is
+ Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter;
+
+ -- LRM93 13.1
+ -- The characters included in each of the categories of basic graphic
+ -- characters are defined as follows:
+ type Character_Array is array (Character) of Character_Kind_Type;
+ pragma Suppress_Initialization (Character_Array);
+ Characters_Kind : constant Character_Array :=
+ (NUL .. BS => Invalid,
+
+ -- Format effectors are the ISO (and ASCII) characters called horizontal
+ -- tabulation, vertical tabulation, carriage return, line feed, and form
+ -- feed.
+ HT | LF | VT | FF | CR => Format_Effector,
+
+ SO .. US => Invalid,
+
+ -- 1. upper case letters
+ 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis |
+ UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter,
+
+ -- 2. digits
+ '0' .. '9' => Digit,
+
+ -- 3. special characters
+ '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/'
+ | ':' | ';' | '<' | '=' | '>' | '[' | ']'
+ | '_' | '|' | '*' => Special_Character,
+
+ -- 4. the space characters
+ ' ' | NBSP => Space_Character,
+
+ -- 5. lower case letters
+ 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis |
+ LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter,
+
+ -- 6. other special characters
+ '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~'
+ | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign |
+ Division_Sign => Other_Special_Character,
+
+ -- '¡' -- INVERTED EXCLAMATION MARK
+ -- '¢' -- CENT SIGN
+ -- '£' -- POUND SIGN
+ -- '¤' -- CURRENCY SIGN
+ -- '¥' -- YEN SIGN
+ -- '¦' -- BROKEN BAR
+ -- '§' -- SECTION SIGN
+ -- '¨' -- DIAERESIS
+ -- '©' -- COPYRIGHT SIGN
+ -- 'ª' -- FEMININE ORDINAL INDICATOR
+ -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ -- '¬' -- NOT SIGN
+ -- '­' -- SOFT HYPHEN
+ -- '®' -- REGISTERED SIGN
+ -- '¯' -- MACRON
+ -- '°' -- DEGREE SIGN
+ -- '±' -- PLUS-MINUS SIGN
+ -- '²' -- SUPERSCRIPT TWO
+ -- '³' -- SUPERSCRIPT THREE
+ -- '´' -- ACUTE ACCENT
+ -- 'µ' -- MICRO SIGN
+ -- '¶' -- PILCROW SIGN
+ -- '·' -- MIDDLE DOT
+ -- '¸' -- CEDILLA
+ -- '¹' -- SUPERSCRIPT ONE
+ -- 'º' -- MASCULINE ORDINAL INDICATOR
+ -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ -- '¼' -- VULGAR FRACTION ONE QUARTER
+ -- '½' -- VULGAR FRACTION ONE HALF
+ -- '¾' -- VULGAR FRACTION THREE QUARTERS
+ -- '¿' -- INVERTED QUESTION MARK
+ -- '×' -- MULTIPLICATION SIGN
+ -- '÷' -- DIVISION SIGN
+
+ DEL .. APC => Invalid);
+
+ -- The context contains the whole internal state of the scanner, ie
+ -- it can be used to push/pop a lexical analysis, to restart the
+ -- scanner from a context marking a previous point.
+ type Scan_Context is record
+ Source : File_Buffer_Acc;
+ Source_File : Source_File_Entry;
+ Line_Number : Natural;
+ Line_Pos : Source_Ptr;
+ Prev_Pos : Source_Ptr;
+ Token_Pos : Source_Ptr;
+ Pos : Source_Ptr;
+ File_Len : Source_Ptr;
+ Token : Token_Type;
+ Prev_Token : Token_Type;
+
+ -- Additional values for the current token.
+ Bit_Str_Base : Character;
+ Bit_Str_Sign : Character;
+ Str_Id : String8_Id;
+ Str_Len : Nat32;
+ Identifier: Name_Id;
+ Int64 : Iir_Int64;
+ Fp64 : Iir_Fp64;
+ end record;
+ pragma Suppress_Initialization (Scan_Context);
+
+ -- Disp a message during scan.
+ -- The current location is automatically displayed before the message.
+ -- Disp a message during scan.
+ procedure Error_Msg_Scan (Msg: String) is
+ begin
+ Report_Msg (Msgid_Error, Scan, No_Location, Msg);
+ end Error_Msg_Scan;
+
+ procedure Error_Msg_Scan (Loc : Location_Type; Msg: String) is
+ begin
+ Report_Msg (Msgid_Error, Scan, Loc, Msg);
+ end Error_Msg_Scan;
+
+ procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is
+ begin
+ Report_Msg (Msgid_Error, Scan, No_Location, Msg, (1 => Arg1));
+ end Error_Msg_Scan;
+
+ -- Disp a message during scan.
+ procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String) is
+ begin
+ Report_Msg (Id, Scan, No_Location, Msg);
+ end Warning_Msg_Scan;
+
+ procedure Warning_Msg_Scan (Id : Msgid_Warnings;
+ Msg: String;
+ Arg1 : Earg_Type;
+ Cont : Boolean := False) is
+ begin
+ Report_Msg (Id, Scan, No_Location, Msg, (1 => Arg1), Cont);
+ end Warning_Msg_Scan;
+
+ -- The current context.
+ -- Default value is an invalid context.
+ Current_Context: Scan_Context := (Source => null,
+ Source_File => No_Source_File_Entry,
+ Line_Number => 0,
+ Line_Pos => 0,
+ Pos => 0,
+ Prev_Pos => 0,
+ Token_Pos => 0,
+ File_Len => 0,
+ Token => Tok_Invalid,
+ Prev_Token => Tok_Invalid,
+ Identifier => Null_Identifier,
+ Bit_Str_Base => ' ',
+ Bit_Str_Sign => ' ',
+ Str_Id => Null_String8,
+ Str_Len => 0,
+ Int64 => 0,
+ Fp64 => 0.0);
+
+ Source: File_Buffer_Acc renames Current_Context.Source;
+ Pos: Source_Ptr renames Current_Context.Pos;
+
+ -- When CURRENT_TOKEN is an identifier, its name_id is stored into
+ -- this global variable.
+ -- Function current_text can be used to convert it into an iir.
+ function Current_Identifier return Name_Id is
+ begin
+ return Current_Context.Identifier;
+ end Current_Identifier;
+
+ procedure Invalidate_Current_Identifier is
+ begin
+ Current_Context.Identifier := Null_Identifier;
+ end Invalidate_Current_Identifier;
+
+ procedure Invalidate_Current_Token is
+ begin
+ if Current_Token /= Tok_Invalid then
+ Current_Context.Prev_Token := Current_Token;
+ Current_Token := Tok_Invalid;
+ end if;
+ end Invalidate_Current_Token;
+
+ function Current_String_Id return String8_Id is
+ begin
+ return Current_Context.Str_Id;
+ end Current_String_Id;
+
+ function Current_String_Length return Nat32 is
+ begin
+ return Current_Context.Str_Len;
+ end Current_String_Length;
+
+ function Get_Bit_String_Base return Character is
+ begin
+ return Current_Context.Bit_Str_Base;
+ end Get_Bit_String_Base;
+
+ function Get_Bit_String_Sign return Character is
+ begin
+ return Current_Context.Bit_Str_Sign;
+ end Get_Bit_String_Sign;
+
+ function Current_Iir_Int64 return Iir_Int64 is
+ begin
+ return Current_Context.Int64;
+ end Current_Iir_Int64;
+
+ function Current_Iir_Fp64 return Iir_Fp64 is
+ begin
+ return Current_Context.Fp64;
+ end Current_Iir_Fp64;
+
+ function Get_Current_Source_File return Source_File_Entry is
+ begin
+ return Current_Context.Source_File;
+ end Get_Current_Source_File;
+
+ function Get_Current_Line return Natural is
+ begin
+ return Current_Context.Line_Number;
+ end Get_Current_Line;
+
+ function Get_Current_Offset return Natural is
+ begin
+ return Natural (Current_Context.Pos - Current_Context.Line_Pos);
+ end Get_Current_Offset;
+
+ function Get_Token_Offset return Natural is
+ begin
+ return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos);
+ end Get_Token_Offset;
+
+ function Get_Token_Position return Source_Ptr is
+ begin
+ return Current_Context.Token_Pos;
+ end Get_Token_Position;
+
+ function Get_Position return Source_Ptr is
+ begin
+ return Current_Context.Pos;
+ end Get_Position;
+
+ function Get_Token_Location return Location_Type is
+ begin
+ return File_Pos_To_Location
+ (Current_Context.Source_File, Current_Context.Token_Pos);
+ end Get_Token_Location;
+
+ function Get_Prev_Location return Location_Type is
+ begin
+ return File_Pos_To_Location
+ (Current_Context.Source_File, Current_Context.Prev_Pos);
+ end Get_Prev_Location;
+
+ procedure Set_File (Source_File : Source_File_Entry)
+ is
+ N_Source: File_Buffer_Acc;
+ begin
+ pragma Assert (Current_Context.Source = null);
+ pragma Assert (Source_File /= No_Source_File_Entry);
+ N_Source := Get_File_Source (Source_File);
+ Current_Context := (Source => N_Source,
+ Source_File => Source_File,
+ Line_Number => 1,
+ Line_Pos => 0,
+ Prev_Pos => N_Source'First,
+ Pos => N_Source'First,
+ Token_Pos => 0, -- should be invalid,
+ File_Len => Get_File_Length (Source_File),
+ Token => Tok_Invalid,
+ Prev_Token => Tok_Invalid,
+ Identifier => Null_Identifier,
+ Bit_Str_Base => ' ',
+ Bit_Str_Sign => ' ',
+ Str_Id => Null_String8,
+ Str_Len => 0,
+ Int64 => -1,
+ Fp64 => 0.0);
+ Current_Token := Tok_Invalid;
+ end Set_File;
+
+ function Detect_Encoding_Errors return Boolean
+ is
+ C : constant Character := Source (Pos);
+ begin
+ -- No need to check further if first character is plain ASCII-7
+ if C >= ' ' and C < Character'Val (127) then
+ return False;
+ end if;
+
+ -- UTF-8 BOM is EF BB BF
+ if Source (Pos + 0) = Character'Val (16#ef#)
+ and then Source (Pos + 1) = Character'Val (16#bb#)
+ and then Source (Pos + 2) = Character'Val (16#bf#)
+ then
+ Error_Msg_Scan
+ ("source encoding must be latin-1 (UTF-8 BOM detected)");
+ return True;
+ end if;
+
+ -- UTF-16 BE BOM is FE FF
+ if Source (Pos + 0) = Character'Val (16#fe#)
+ and then Source (Pos + 1) = Character'Val (16#ff#)
+ then
+ Error_Msg_Scan
+ ("source encoding must be latin-1 (UTF-16 BE BOM detected)");
+ return True;
+ end if;
+
+ -- UTF-16 LE BOM is FF FE
+ if Source (Pos + 0) = Character'Val (16#ff#)
+ and then Source (Pos + 1) = Character'Val (16#fe#)
+ then
+ Error_Msg_Scan
+ ("source encoding must be latin-1 (UTF-16 LE BOM detected)");
+ return True;
+ end if;
+
+ -- Certainly weird, but scanner/parser will catch it.
+ return False;
+ end Detect_Encoding_Errors;
+
+ procedure Set_Current_Position (Position: Source_Ptr)
+ is
+ Loc : Location_Type;
+ Offset: Natural;
+ File_Entry : Source_File_Entry;
+ begin
+ -- Scanner must have been initialized.
+ pragma Assert (Current_Context.Source /= null);
+
+ Current_Token := Tok_Invalid;
+ Current_Context.Pos := Position;
+ Loc := File_Pos_To_Location (Current_Context.Source_File,
+ Current_Context.Pos);
+ Location_To_Coord (Loc,
+ File_Entry, Current_Context.Line_Pos,
+ Current_Context.Line_Number, Offset);
+ end Set_Current_Position;
+
+ procedure Close_File is
+ begin
+ Current_Context.Source := null;
+ end Close_File;
+
+ -- Emit an error when a character above 128 was found.
+ -- This must be called only in vhdl87.
+ procedure Error_8bit is
+ begin
+ Error_Msg_Scan ("8 bits characters not allowed in vhdl87");
+ end Error_8bit;
+
+ -- Emit an error when a separator is expected.
+ procedure Error_Separator is
+ begin
+ Error_Msg_Scan ("a separator is required here");
+ end Error_Separator;
+
+ -- scan a decimal literal or a based literal.
+ --
+ -- LRM93 13.4.1
+ -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ]
+ -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER
+ --
+ -- LRM93 13.4.2
+ -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT
+ -- BASE ::= INTEGER
+ procedure Scan_Literal is separate;
+
+ -- Scan a string literal.
+ --
+ -- LRM93 13.6 / LRM08 15.7
+ -- A string literal is formed by a sequence of graphic characters
+ -- (possibly none) enclosed between two quotation marks used as string
+ -- brackets.
+ -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } "
+ --
+ -- IN: for a string, at the call of this procedure, the current character
+ -- must be either '"' or '%'.
+ procedure Scan_String
+ is
+ -- The quotation character (can be " or %).
+ Mark: Character;
+ -- Current character.
+ C : Character;
+ -- Current length.
+ Length : Nat32;
+ begin
+ -- String delimiter.
+ Mark := Source (Pos);
+ pragma Assert (Mark = '"' or else Mark = '%');
+
+ Pos := Pos + 1;
+ Length := 0;
+ Current_Context.Str_Id := Str_Table.Create_String8;
+ loop
+ C := Source (Pos);
+ if C = Mark then
+ -- LRM93 13.6
+ -- If a quotation mark value is to be represented in the sequence
+ -- of character values, then a pair of adjacent quoatation
+ -- characters marks must be written at the corresponding place
+ -- within the string literal.
+ -- LRM93 13.10
+ -- Any pourcent sign within the sequence of characters must then
+ -- be doubled, and each such doubled percent sign is interpreted
+ -- as a single percent sign value.
+ -- The same replacement is allowed for a bit string literal,
+ -- provieded that both bit string brackets are replaced.
+ Pos := Pos + 1;
+ exit when Source (Pos) /= Mark;
+ end if;
+
+ case Characters_Kind (C) is
+ when Format_Effector =>
+ if Mark = '%' then
+ -- No matching '%' has been found. Consider '%' was used
+ -- as the remainder operator, instead of 'rem'. This will
+ -- improve the error message.
+ Error_Msg_Scan
+ (Get_Token_Location,
+ "'%%' is not a vhdl operator, use 'rem'");
+ Current_Token := Tok_Rem;
+ Pos := Current_Context.Token_Pos + 1;
+ return;
+ end if;
+ if C = CR or C = LF then
+ Error_Msg_Scan
+ ("string cannot be multi-line, use concatenation");
+ else
+ Error_Msg_Scan ("format effector not allowed in a string");
+ end if;
+ exit;
+ when Invalid =>
+ if C = Files_Map.EOT
+ and then Pos >= Current_Context.File_Len
+ then
+ Error_Msg_Scan ("string not terminated at end of file");
+ exit;
+ end if;
+
+ Error_Msg_Scan
+ ("invalid character not allowed, even in a string");
+ when Graphic_Character =>
+ if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then
+ Error_8bit;
+ end if;
+ end case;
+
+ if C = '"' and Mark = '%' then
+ -- LRM93 13.10
+ -- The quotation marks (") used as string brackets at both ends of
+ -- a string literal can be replaced by percent signs (%), provided
+ -- that the enclosed sequence of characters constains no quotation
+ -- marks, and provided that both string brackets are replaced.
+ Error_Msg_Scan
+ ("'""' cannot be used in a string delimited with '%%'");
+ end if;
+
+ Length := Length + 1;
+ Str_Table.Append_String8 (Character'Pos (C));
+ Pos := Pos + 1;
+ end loop;
+
+ Current_Token := Tok_String;
+ Current_Context.Str_Len := Length;
+ end Scan_String;
+
+ -- Scan a bit string literal.
+ --
+ -- LRM93 13.7
+ -- A bit string literal is formed by a sequence of extended digits
+ -- (possibly none) enclosed between two quotations used as bit string
+ -- brackets, preceded by a base specifier.
+ -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] "
+ -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT }
+ --
+ -- The current character must be a base specifier, followed by '"' or '%'.
+ -- The base must be valid.
+ procedure Scan_Bit_String (Base_Log : Nat32)
+ is
+ -- Position of character '0'.
+ Pos_0 : constant Nat8 := Character'Pos ('0');
+
+ -- Used for the base.
+ subtype Nat4 is Natural range 1 .. 4;
+ Base : constant Nat32 := 2 ** Nat4 (Base_Log);
+
+ -- The quotation character (can be " or %).
+ Orig_Pos : constant Source_Ptr := Pos;
+ Mark : constant Character := Source (Orig_Pos);
+ -- Current character.
+ C : Character;
+ -- Current length.
+ Length : Nat32;
+ -- Digit value.
+ V, D : Nat8;
+ -- True if invalid character already found, to avoid duplicate message.
+ Has_Invalid : Boolean;
+ begin
+ pragma Assert (Mark = '"' or else Mark = '%');
+ Pos := Pos + 1;
+ Length := 0;
+ Has_Invalid := False;
+ Current_Context.Str_Id := Str_Table.Create_String8;
+ loop
+ << Again >> null;
+ C := Source (Pos);
+ Pos := Pos + 1;
+ exit when C = Mark;
+
+ -- LRM93 13.7
+ -- If the base specifier is 'B', the extended digits in the bit
+ -- value are restricted to 0 and 1.
+ -- If the base specifier is 'O', the extended digits int the bit
+ -- value are restricted to legal digits in the octal number
+ -- system, ie, the digits 0 through 7.
+ -- If the base specifier is 'X', the extended digits are all digits
+ -- together with the letters A through F.
+ case C is
+ when '0' .. '9' =>
+ V := Character'Pos (C) - Character'Pos ('0');
+ when 'A' .. 'F' =>
+ V := Character'Pos (C) - Character'Pos ('A') + 10;
+ when 'a' .. 'f' =>
+ -- LRM93 13.7
+ -- A letter in a bit string literal (...) can be written either
+ -- in lowercase or in upper case, with the same meaning.
+ V := Character'Pos (C) - Character'Pos ('a') + 10;
+ when '_' =>
+ if Source (Pos) = '_' then
+ Error_Msg_Scan
+ ("double underscore not allowed in a bit string");
+ end if;
+ if Source (Pos - 2) = Mark then
+ Error_Msg_Scan
+ ("underscore not allowed at the start of a bit string");
+ elsif Source (Pos) = Mark then
+ Error_Msg_Scan
+ ("underscore not allowed at the end of a bit string");
+ end if;
+ goto Again;
+ when '"' =>
+ pragma Assert (Mark = '%');
+ Error_Msg_Scan
+ ("'""' cannot close a bit string opened by '%%'");
+ exit;
+ when '%' =>
+ pragma Assert (Mark = '"');
+ Error_Msg_Scan
+ ("'%%' cannot close a bit string opened by '""'");
+ exit;
+ when others =>
+ if Characters_Kind (C) in Graphic_Character then
+ if Vhdl_Std >= Vhdl_08 then
+ V := Nat8'Last;
+ else
+ if not Has_Invalid then
+ Error_Msg_Scan ("invalid character in bit string");
+ Has_Invalid := True;
+ end if;
+ -- Continue the bit string
+ V := 0;
+ end if;
+ else
+ if Mark = '%' then
+ Error_Msg_Scan
+ (File_Pos_To_Location
+ (Current_Context.Source_File, Orig_Pos),
+ "'%%' is not a vhdl operator, use 'rem'");
+ Current_Token := Tok_Rem;
+ Pos := Orig_Pos + 1;
+ return;
+ else
+ Error_Msg_Scan ("bit string not terminated");
+ Pos := Pos - 1;
+ end if;
+ exit;
+ end if;
+ end case;
+
+ -- Expand bit value.
+ if Vhdl_Std >= Vhdl_08 and V > Base then
+ -- Expand as graphic character.
+ for I in 1 .. Base_Log loop
+ Str_Table.Append_String8_Char (C);
+ end loop;
+ else
+ -- Expand as extended digits.
+ case Base_Log is
+ when 1 =>
+ if V > 1 then
+ Error_Msg_Scan
+ ("invalid character in a binary bit string");
+ V := 1;
+ end if;
+ Str_Table.Append_String8 (Pos_0 + V);
+ when 3 =>
+ if V > 7 then
+ Error_Msg_Scan
+ ("invalid character in a octal bit string");
+ V := 7;
+ end if;
+ for I in 1 .. 3 loop
+ D := V / 4;
+ Str_Table.Append_String8 (Pos_0 + D);
+ V := (V - 4 * D) * 2;
+ end loop;
+ when 4 =>
+ for I in 1 .. 4 loop
+ D := V / 8;
+ Str_Table.Append_String8 (Pos_0 + D);
+ V := (V - 8 * D) * 2;
+ end loop;
+ when others =>
+ raise Internal_Error;
+ end case;
+ end if;
+
+ Length := Length + Base_Log;
+ end loop;
+
+ -- Note: the length of the bit string may be 0.
+
+ Current_Token := Tok_Bit_String;
+ Current_Context.Str_Len := Length;
+ end Scan_Bit_String;
+
+ -- Scan a decimal bit string literal. For base specifier D the algorithm
+ -- is rather different: all the graphic characters shall be digits, and we
+ -- need to use a (not very efficient) arbitrary precision multiplication.
+ procedure Scan_Dec_Bit_String
+ is
+ use Str_Table;
+
+ Id : String8_Id;
+
+ -- Position of character '0'.
+ Pos_0 : constant Nat8 := Character'Pos ('0');
+
+ -- Current character.
+ C : Character;
+ -- Current length.
+ Length : Nat32;
+ -- Digit value.
+ V, D : Nat8;
+
+ type Carries_Type is array (0 .. 3) of Nat8;
+ Carries : Carries_Type;
+ No_Carries : constant Carries_Type := (others => Pos_0);
+
+ -- Shift right carries. Note the Carries (0) is the LSB.
+ procedure Shr_Carries is
+ begin
+ Carries := (Carries (1), Carries (2), Carries (3), Pos_0);
+ end Shr_Carries;
+
+ procedure Append_Carries is
+ begin
+ -- Expand the bit string. Note that position 1 of the string8 is
+ -- the MSB.
+ while Carries /= No_Carries loop
+ Append_String8 (Pos_0);
+ Length := Length + 1;
+ for I in reverse 2 .. Length loop
+ Set_Element_String8 (Id, I, Element_String8 (Id, I - 1));
+ end loop;
+ Set_Element_String8 (Id, 1, Carries (0));
+ Shr_Carries;
+ end loop;
+ end Append_Carries;
+
+ -- Add 1 to Carries. Overflow is not allowed and should be prevented by
+ -- construction.
+ procedure Add_One_To_Carries is
+ begin
+ for I in Carries'Range loop
+ if Carries (I) = Pos_0 then
+ Carries (I) := Pos_0 + 1;
+ -- End of propagation.
+ exit;
+ else
+ Carries (I) := Pos_0;
+ -- Continue propagation.
+ pragma Assert (I < Carries'Last);
+ end if;
+ end loop;
+ end Add_One_To_Carries;
+ begin
+ pragma Assert (Source (Pos) = '"');
+ Pos := Pos + 1;
+ Length := 0;
+ Id := Create_String8;
+ Current_Context.Str_Id := Id;
+ loop
+ << Again >> null;
+ C := Source (Pos);
+ Pos := Pos + 1;
+ exit when C = '"';
+
+ if C in '0' .. '9' then
+ V := Character'Pos (C) - Character'Pos ('0');
+ elsif C = '_' then
+ if Source (Pos) = '_' then
+ Error_Msg_Scan
+ ("double underscore not allowed in a bit string");
+ end if;
+ if Source (Pos - 2) = '"' then
+ Error_Msg_Scan
+ ("underscore not allowed at the start of a bit string");
+ elsif Source (Pos) = '"' then
+ Error_Msg_Scan
+ ("underscore not allowed at the end of a bit string");
+ end if;
+ goto Again;
+ else
+ if Characters_Kind (C) in Graphic_Character then
+ Error_Msg_Scan
+ ("graphic character not allowed in decimal bit string");
+ -- Continue the bit string
+ V := 0;
+ else
+ Error_Msg_Scan ("bit string not terminated");
+ Pos := Pos - 1;
+ exit;
+ end if;
+ end if;
+
+ -- Multiply by 10.
+ Carries := (others => Pos_0);
+ for I in reverse 1 .. Length loop
+ -- Shift by 1 (*2).
+ D := Element_String8 (Id, I);
+ Set_Element_String8 (Id, I, Carries (0));
+ Shr_Carries;
+ -- Add D and D * 4.
+ if D /= Pos_0 then
+ Add_One_To_Carries;
+ -- Add_Four_To_Carries:
+ for I in 2 .. 3 loop
+ if Carries (I) = Pos_0 then
+ Carries (I) := Pos_0 + 1;
+ -- End of propagation.
+ exit;
+ else
+ Carries (I) := Pos_0;
+ -- Continue propagation.
+ end if;
+ end loop;
+ end if;
+ end loop;
+ Append_Carries;
+
+ -- Add V.
+ for I in Carries'Range loop
+ D := V / 2;
+ Carries (I) := Pos_0 + (V - 2 * D);
+ V := D;
+ end loop;
+ for I in reverse 1 .. Length loop
+ D := Element_String8 (Id, I);
+ if D /= Pos_0 then
+ Add_One_To_Carries;
+ end if;
+ Set_Element_String8 (Id, I, Carries (0));
+ Shr_Carries;
+ exit when Carries = No_Carries;
+ end loop;
+ Append_Carries;
+ end loop;
+
+ Current_Token := Tok_Bit_String;
+ Current_Context.Str_Len := Length;
+ end Scan_Dec_Bit_String;
+
+ -- LRM08 15.2 Character set
+ -- For each uppercase letter, there is a corresponding lowercase letter;
+ -- and for each lowercase letter except [y diaeresis] and [german sharp s],
+ -- there is a corresponding uppercase letter.
+ type Character_Map is array (Character) of Character;
+ To_Lower_Map : constant Character_Map :=
+ (
+ -- Uppercase ASCII letters.
+ 'A' => 'a',
+ 'B' => 'b',
+ 'C' => 'c',
+ 'D' => 'd',
+ 'E' => 'e',
+ 'F' => 'f',
+ 'G' => 'g',
+ 'H' => 'h',
+ 'I' => 'i',
+ 'J' => 'j',
+ 'K' => 'k',
+ 'L' => 'l',
+ 'M' => 'm',
+ 'N' => 'n',
+ 'O' => 'o',
+ 'P' => 'p',
+ 'Q' => 'q',
+ 'R' => 'r',
+ 'S' => 's',
+ 'T' => 't',
+ 'U' => 'u',
+ 'V' => 'v',
+ 'W' => 'w',
+ 'X' => 'x',
+ 'Y' => 'y',
+ 'Z' => 'z',
+
+ -- Lowercase ASCII letters.
+ 'a' => 'a',
+ 'b' => 'b',
+ 'c' => 'c',
+ 'd' => 'd',
+ 'e' => 'e',
+ 'f' => 'f',
+ 'g' => 'g',
+ 'h' => 'h',
+ 'i' => 'i',
+ 'j' => 'j',
+ 'k' => 'k',
+ 'l' => 'l',
+ 'm' => 'm',
+ 'n' => 'n',
+ 'o' => 'o',
+ 'p' => 'p',
+ 'q' => 'q',
+ 'r' => 'r',
+ 's' => 's',
+ 't' => 't',
+ 'u' => 'u',
+ 'v' => 'v',
+ 'w' => 'w',
+ 'x' => 'x',
+ 'y' => 'y',
+ 'z' => 'z',
+
+ -- Uppercase Latin-1 letters.
+ UC_A_Grave => LC_A_Grave,
+ UC_A_Acute => LC_A_Acute,
+ UC_A_Circumflex => LC_A_Circumflex,
+ UC_A_Tilde => LC_A_Tilde,
+ UC_A_Diaeresis => LC_A_Diaeresis,
+ UC_A_Ring => LC_A_Ring,
+ UC_AE_Diphthong => LC_AE_Diphthong,
+ UC_C_Cedilla => LC_C_Cedilla,
+ UC_E_Grave => LC_E_Grave,
+ UC_E_Acute => LC_E_Acute,
+ UC_E_Circumflex => LC_E_Circumflex,
+ UC_E_Diaeresis => LC_E_Diaeresis,
+ UC_I_Grave => LC_I_Grave,
+ UC_I_Acute => LC_I_Acute,
+ UC_I_Circumflex => LC_I_Circumflex,
+ UC_I_Diaeresis => LC_I_Diaeresis,
+ UC_Icelandic_Eth => LC_Icelandic_Eth,
+ UC_N_Tilde => LC_N_Tilde,
+ UC_O_Grave => LC_O_Grave,
+ UC_O_Acute => LC_O_Acute,
+ UC_O_Circumflex => LC_O_Circumflex,
+ UC_O_Tilde => LC_O_Tilde,
+ UC_O_Diaeresis => LC_O_Diaeresis,
+ UC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
+ UC_U_Grave => LC_U_Grave,
+ UC_U_Acute => LC_U_Acute,
+ UC_U_Circumflex => LC_U_Circumflex,
+ UC_U_Diaeresis => LC_U_Diaeresis,
+ UC_Y_Acute => LC_Y_Acute,
+ UC_Icelandic_Thorn => LC_Icelandic_Thorn,
+
+ -- Lowercase Latin-1 letters.
+ LC_A_Grave => LC_A_Grave,
+ LC_A_Acute => LC_A_Acute,
+ LC_A_Circumflex => LC_A_Circumflex,
+ LC_A_Tilde => LC_A_Tilde,
+ LC_A_Diaeresis => LC_A_Diaeresis,
+ LC_A_Ring => LC_A_Ring,
+ LC_AE_Diphthong => LC_AE_Diphthong,
+ LC_C_Cedilla => LC_C_Cedilla,
+ LC_E_Grave => LC_E_Grave,
+ LC_E_Acute => LC_E_Acute,
+ LC_E_Circumflex => LC_E_Circumflex,
+ LC_E_Diaeresis => LC_E_Diaeresis,
+ LC_I_Grave => LC_I_Grave,
+ LC_I_Acute => LC_I_Acute,
+ LC_I_Circumflex => LC_I_Circumflex,
+ LC_I_Diaeresis => LC_I_Diaeresis,
+ LC_Icelandic_Eth => LC_Icelandic_Eth,
+ LC_N_Tilde => LC_N_Tilde,
+ LC_O_Grave => LC_O_Grave,
+ LC_O_Acute => LC_O_Acute,
+ LC_O_Circumflex => LC_O_Circumflex,
+ LC_O_Tilde => LC_O_Tilde,
+ LC_O_Diaeresis => LC_O_Diaeresis,
+ LC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
+ LC_U_Grave => LC_U_Grave,
+ LC_U_Acute => LC_U_Acute,
+ LC_U_Circumflex => LC_U_Circumflex,
+ LC_U_Diaeresis => LC_U_Diaeresis,
+ LC_Y_Acute => LC_Y_Acute,
+ LC_Icelandic_Thorn => LC_Icelandic_Thorn,
+
+ -- Lowercase latin-1 characters without corresponding uppercase one.
+ LC_Y_Diaeresis => LC_Y_Diaeresis,
+ LC_German_Sharp_S => LC_German_Sharp_S,
+
+ -- Not a letter.
+ others => NUL);
+
+ procedure Error_Too_Long is
+ begin
+ Error_Msg_Scan ("identifier is too long (>"
+ & Natural'Image (Max_Name_Length - 1) & ")");
+ end Error_Too_Long;
+
+ -- LRM93 13.3.1
+ -- Basic Identifiers
+ -- A basic identifier consists only of letters, digits, and underlines.
+ -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT }
+ -- LETTER_OR_DIGIT ::= LETTER | DIGIT
+ -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER
+ --
+ -- NB: At the call of this procedure, the current character must be a legal
+ -- character for a basic identifier.
+ procedure Scan_Identifier (Allow_PSL : Boolean)
+ is
+ use Name_Table;
+ Buffer : String (1 .. Max_Name_Length);
+ C : Character;
+ Len : Natural;
+ begin
+ -- This is an identifier or a key word.
+ Len := 0;
+ loop
+ -- Source (pos) is correct.
+ -- LRM93 13.3.1
+ -- All characters if a basic identifier are signifiant, including
+ -- any underline character inserted between a letter or digit and
+ -- an adjacent letter or digit.
+ -- Basic identifiers differing only in the use of the corresponding
+ -- upper and lower case letters are considered as the same.
+ --
+ -- GHDL: This is achieved by converting all upper case letters into
+ -- equivalent lower case letters.
+ -- The opposite (converting to upper lower case letters) is not
+ -- possible because two characters have no upper-case equivalent.
+ C := Source (Pos);
+ case C is
+ when 'A' .. 'Z' =>
+ C := Character'Val
+ (Character'Pos (C)
+ + Character'Pos ('a') - Character'Pos ('A'));
+ when 'a' .. 'z' | '0' .. '9' =>
+ null;
+ when '_' =>
+ if Source (Pos + 1) = '_' then
+ Error_Msg_Scan ("two underscores can't be consecutive");
+ end if;
+ when ' ' | ')' | '.' | ';' | ':' =>
+ exit;
+ when others =>
+ -- Non common case.
+ case Characters_Kind (C) is
+ when Upper_Case_Letter | Lower_Case_Letter =>
+ if Vhdl_Std = Vhdl_87 then
+ Error_8bit;
+ end if;
+ C := To_Lower_Map (C);
+ pragma Assert (C /= NUL);
+ when Digit =>
+ raise Internal_Error;
+ when others =>
+ exit;
+ end case;
+ end case;
+
+ -- Put character in name buffer. FIXME: compute the hash at the same
+ -- time ?
+ if Len >= Max_Name_Length - 1 then
+ if Len = Max_Name_Length -1 then
+ Error_Msg_Scan ("identifier is too long (>"
+ & Natural'Image (Max_Name_Length - 1) & ")");
+ -- Accept this last one character, so that no error for the
+ -- following characters.
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+ else
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+
+ -- Next character.
+ Pos := Pos + 1;
+ end loop;
+
+ if Source (Pos - 1) = '_' then
+ if Allow_PSL then
+ -- Some PSL reserved words finish with '_'. This case is handled
+ -- later by Scan_Underscore and Scan_Exclam_Mark.
+ Pos := Pos - 1;
+ Len := Len - 1;
+ C := '_';
+ else
+ -- Eat the trailing underscore.
+ Error_Msg_Scan ("an identifier cannot finish with '_'");
+ end if;
+ end if;
+
+ -- LRM93 13.2
+ -- At least one separator is required between an identifier or an
+ -- abstract literal and an adjacent identifier or abstract literal.
+ case Characters_Kind (C) is
+ when Digit
+ | Upper_Case_Letter
+ | Lower_Case_Letter =>
+ raise Internal_Error;
+ when Other_Special_Character | Special_Character =>
+ if (C = '"' or C = '%') and then Len <= 2 then
+ if C = '%' and Vhdl_Std >= Vhdl_08 then
+ Error_Msg_Scan ("'%%' not allowed in vhdl 2008 "
+ & "(was replacement character)");
+ -- Continue as a bit string.
+ end if;
+
+ -- Good candidate for bit string.
+
+ -- LRM93 13.7
+ -- BASE_SPECIFIER ::= B | O | X
+ --
+ -- A letter in a bit string literal (either an extended digit
+ -- or the base specifier) can be written either in lower case
+ -- or in upper case, with the same meaning.
+ --
+ -- LRM08 15.8 Bit string literals
+ -- BASE_SPECICIER ::=
+ -- B | O | X | UB | UO | UX | SB | SO | SX | D
+ --
+ -- An extended digit and the base specifier in a bit string
+ -- literal can be written either in lowercase or in uppercase,
+ -- with the same meaning.
+ declare
+ Base : Nat32;
+ Cl : constant Character := Buffer (Len);
+ Cf : constant Character := Buffer (1);
+ begin
+ Current_Context.Bit_Str_Base := Cl;
+ if Cl = 'b' then
+ Base := 1;
+ elsif Cl = 'o' then
+ Base := 3;
+ elsif Cl = 'x' then
+ Base := 4;
+ elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then
+ Current_Context.Bit_Str_Sign := ' ';
+ Scan_Dec_Bit_String;
+ return;
+ else
+ Base := 0;
+ end if;
+ if Base > 0 then
+ if Len = 1 then
+ Current_Context.Bit_Str_Sign := ' ';
+ Scan_Bit_String (Base);
+ return;
+ elsif Vhdl_Std >= Vhdl_08
+ and then (Cf = 's' or Cf = 'u')
+ then
+ Current_Context.Bit_Str_Sign := Cf;
+ Scan_Bit_String (Base);
+ return;
+ end if;
+ end if;
+ end;
+ elsif Vhdl_Std > Vhdl_87 and then C = '\' then
+ -- Start of extended identifier. Cannot follow an identifier.
+ Error_Separator;
+ end if;
+
+ when Invalid =>
+ -- Improve error message for use of UTF-8 quote marks.
+ -- It's possible because in the sequence of UTF-8 bytes for the
+ -- quote marks, there are invalid character (in the 128-160
+ -- range).
+ if C = Character'Val (16#80#)
+ and then Buffer (Len) = Character'Val (16#e2#)
+ and then (Source (Pos + 1) = Character'Val (16#98#)
+ or else Source (Pos + 1) = Character'Val (16#99#))
+ then
+ -- UTF-8 left or right single quote mark.
+ if Len > 1 then
+ -- The first byte (0xe2) is part of the identifier. An
+ -- error will be detected as the next byte (0x80) is
+ -- invalid. Remove the first byte from the identifier, and
+ -- let's catch the error later.
+ Len := Len - 1;
+ Pos := Pos - 1;
+ else
+ Error_Msg_Scan ("invalid use of UTF8 character for '");
+ Pos := Pos + 2;
+
+ -- Distinguish between character literal and tick. Don't
+ -- care about possible invalid character literal, as in any
+ -- case we have already emitted an error message.
+ if Current_Context.Prev_Token /= Tok_Identifier
+ and then Current_Context.Prev_Token /= Tok_Character
+ and then
+ (Source (Pos + 1) = '''
+ or else
+ (Source (Pos + 1) = Character'Val (16#e2#)
+ and then Source (Pos + 2) = Character'Val (16#80#)
+ and then Source (Pos + 3) = Character'Val (16#99#)))
+ then
+ Current_Token := Tok_Character;
+ Current_Context.Identifier :=
+ Name_Table.Get_Identifier (Source (Pos));
+ if Source (Pos + 1) = ''' then
+ Pos := Pos + 2;
+ else
+ Pos := Pos + 4;
+ end if;
+ else
+ Current_Token := Tok_Tick;
+ end if;
+ return;
+ end if;
+ end if;
+ when Format_Effector
+ | Space_Character =>
+ null;
+ end case;
+
+ -- Hash it.
+ Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
+ Current_Token := Tok_Identifier;
+ end Scan_Identifier;
+
+ procedure Identifier_To_Token is
+ begin
+ if Current_Identifier in Std_Names.Name_Id_Keywords then
+ -- LRM93 13.9
+ -- The identifiers listed below are called reserved words and are
+ -- reserved for signifiances in the language.
+ -- IN: this is also achieved in packages std_names and tokens.
+ Current_Token := Token_Type'Val
+ (Token_Type'Pos (Tok_First_Keyword)
+ + Current_Identifier - Std_Names.Name_First_Keyword);
+ case Current_Identifier is
+ when Std_Names.Name_Id_AMS_Reserved_Words =>
+ if not AMS_Vhdl then
+ if Is_Warning_Enabled (Warnid_Reserved_Word) then
+ Warning_Msg_Scan
+ (Warnid_Reserved_Word,
+ "using %i AMS-VHDL reserved word as an identifier",
+ +Current_Identifier);
+ end if;
+ Current_Token := Tok_Identifier;
+ end if;
+ when Std_Names.Name_Id_Vhdl08_Reserved_Words =>
+ if Vhdl_Std < Vhdl_08 then
+ if Is_Warning_Enabled (Warnid_Reserved_Word) then
+ Warning_Msg_Scan
+ (Warnid_Reserved_Word,
+ "using %i vhdl-2008 reserved word as an identifier",
+ +Current_Identifier);
+ end if;
+ Current_Token := Tok_Identifier;
+ end if;
+ when Std_Names.Name_Id_Vhdl00_Reserved_Words =>
+ if Vhdl_Std < Vhdl_00 then
+ if Is_Warning_Enabled (Warnid_Reserved_Word) then
+ Warning_Msg_Scan
+ (Warnid_Reserved_Word,
+ "using %i vhdl-2000 reserved word as an identifier",
+ +Current_Identifier);
+ end if;
+ Current_Token := Tok_Identifier;
+ end if;
+ when Std_Names.Name_Id_Vhdl93_Reserved_Words =>
+ if Vhdl_Std = Vhdl_87 then
+ if Is_Warning_Enabled (Warnid_Reserved_Word) then
+ Warning_Msg_Scan
+ (Warnid_Reserved_Word,
+ "using %i vhdl93 reserved word as a vhdl87 identifier",
+ +Current_Identifier, True);
+ Warning_Msg_Scan
+ (Warnid_Reserved_Word,
+ "(use option --std=93 to compile as vhdl93)");
+ end if;
+ Current_Token := Tok_Identifier;
+ end if;
+ when Std_Names.Name_Id_Vhdl87_Reserved_Words =>
+ null;
+ when others =>
+ raise Program_Error;
+ end case;
+ elsif Flag_Psl then
+ case Current_Identifier is
+ when Std_Names.Name_Clock =>
+ Current_Token := Tok_Psl_Clock;
+ when Std_Names.Name_Const =>
+ Current_Token := Tok_Psl_Const;
+ when Std_Names.Name_Boolean =>
+ Current_Token := Tok_Psl_Boolean;
+ when Std_Names.Name_Sequence =>
+ Current_Token := Tok_Psl_Sequence;
+ when Std_Names.Name_Property =>
+ Current_Token := Tok_Psl_Property;
+ when Std_Names.Name_Endpoint =>
+ Current_Token := Tok_Psl_Endpoint;
+ when Std_Names.Name_Cover =>
+ Current_Token := Tok_Psl_Cover;
+ when Std_Names.Name_Default =>
+ Current_Token := Tok_Psl_Default;
+ when Std_Names.Name_Inf =>
+ Current_Token := Tok_Inf;
+ when Std_Names.Name_Within =>
+ Current_Token := Tok_Within;
+ when Std_Names.Name_Abort =>
+ Current_Token := Tok_Abort;
+ when Std_Names.Name_Before =>
+ Current_Token := Tok_Before;
+ when Std_Names.Name_Always =>
+ Current_Token := Tok_Always;
+ when Std_Names.Name_Never =>
+ Current_Token := Tok_Never;
+ when Std_Names.Name_Eventually =>
+ Current_Token := Tok_Eventually;
+ when Std_Names.Name_Next_A =>
+ Current_Token := Tok_Next_A;
+ when Std_Names.Name_Next_E =>
+ Current_Token := Tok_Next_E;
+ when Std_Names.Name_Next_Event =>
+ Current_Token := Tok_Next_Event;
+ when Std_Names.Name_Next_Event_A =>
+ Current_Token := Tok_Next_Event_A;
+ when Std_Names.Name_Next_Event_E =>
+ Current_Token := Tok_Next_Event_E;
+ when Std_Names.Name_Until =>
+ Current_Token := Tok_Until;
+ when others =>
+ Current_Token := Tok_Identifier;
+ if Source (Pos - 1) = '_' then
+ Error_Msg_Scan ("identifiers cannot finish with '_'");
+ end if;
+ end case;
+ end if;
+ end Identifier_To_Token;
+
+ -- LRM93 13.3.2
+ -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \
+ --
+ -- Create an (extended) indentifier.
+ -- Extended identifiers are stored as they appear (leading and tailing
+ -- backslashes, doubling backslashes inside).
+ procedure Scan_Extended_Identifier
+ is
+ use Name_Table;
+ Buffer : String (1 .. Max_Name_Length);
+ Len : Natural;
+ C : Character;
+ begin
+ -- LRM93 13.3.2
+ -- Moreover, every extended identifiers is distinct from any basic
+ -- identifier.
+ -- GHDL: This is satisfied by storing '\' in the name table.
+ Len := 1;
+ Buffer (1) := '\';
+ loop
+ -- Next character.
+ Pos := Pos + 1;
+ C := Source (Pos);
+
+ if C = '\' then
+ -- LRM93 13.3.2
+ -- If a backslash is to be used as one of the graphic characters
+ -- of an extended literal, it must be doubled.
+ -- LRM93 13.3.2
+ -- (a doubled backslash couting as one character)
+ if Len >= Max_Name_Length - 1 then
+ if Len = Max_Name_Length - 1 then
+ Error_Too_Long;
+ -- Accept this last one.
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+ else
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+
+ Pos := Pos + 1;
+ C := Source (Pos);
+
+ exit when C /= '\';
+ end if;
+
+ case Characters_Kind (C) is
+ when Format_Effector =>
+ Error_Msg_Scan ("format effector in extended identifier");
+ exit;
+ when Graphic_Character =>
+ null;
+ when Invalid =>
+ if C = Files_Map.EOT
+ and then Pos >= Current_Context.File_Len
+ then
+ Error_Msg_Scan
+ ("extended identifier not terminated at end of file");
+ elsif C = LF or C = CR then
+ Error_Msg_Scan
+ ("extended identifier not terminated at end of line");
+ else
+ Error_Msg_Scan ("invalid character in extended identifier");
+ end if;
+ exit;
+ end case;
+
+ -- LRM93 13.3.2
+ -- Extended identifiers differing only in the use of corresponding
+ -- upper and lower case letters are distinct.
+ if Len >= Max_Name_Length - 1 then
+ if Len = Max_Name_Length - 1 then
+ Error_Too_Long;
+ -- Accept this last one.
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+ else
+ Len := Len + 1;
+ Buffer (Len) := C;
+ end if;
+ end loop;
+
+ if Len <= 2 then
+ Error_Msg_Scan ("empty extended identifier is not allowed");
+ end if;
+
+ -- LRM93 13.2
+ -- At least one separator is required between an identifier or an
+ -- abstract literal and an adjacent identifier or abstract literal.
+ case Characters_Kind (C) is
+ when Digit
+ | Upper_Case_Letter
+ | Lower_Case_Letter =>
+ Error_Separator;
+ when Invalid
+ | Format_Effector
+ | Space_Character
+ | Special_Character
+ | Other_Special_Character =>
+ null;
+ end case;
+
+ -- Hash it.
+ Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
+ Current_Token := Tok_Identifier;
+ end Scan_Extended_Identifier;
+
+ procedure Convert_Identifier (Str : in out String)
+ is
+ procedure Error_Bad is
+ begin
+ Error_Msg_Option ("bad character in identifier");
+ end Error_Bad;
+
+ procedure Error_8bit is
+ begin
+ Error_Msg_Option ("8 bits characters not allowed in vhdl87");
+ end Error_8bit;
+
+ C : Character;
+ subtype Id_Subtype is String (1 .. Str'Length);
+ Id : Id_Subtype renames Str;
+ begin
+ if Id'Length = 0 then
+ Error_Msg_Option ("identifier required");
+ return;
+ end if;
+
+ if Id (1) = '\' then
+ -- Extended identifier.
+ if Vhdl_Std = Vhdl_87 then
+ Error_Msg_Option ("extended identifiers not allowed in vhdl87");
+ return;
+ end if;
+
+ if Id'Length < 3 then
+ Error_Msg_Option ("extended identifier is too short");
+ return;
+ end if;
+ if Id (Id'Last) /= '\' then
+ Error_Msg_Option ("extended identifier must finish with a '\'");
+ return;
+ end if;
+ for I in 2 .. Id'Last - 1 loop
+ C := Id (I);
+ case Characters_Kind (C) is
+ when Format_Effector =>
+ Error_Msg_Option ("format effector in extended identifier");
+ return;
+ when Graphic_Character =>
+ if C = '\' then
+ if Id (I + 1) /= '\'
+ or else I = Id'Last - 1
+ then
+ Error_Msg_Option ("anti-slash must be doubled "
+ & "in extended identifier");
+ return;
+ end if;
+ end if;
+ when Invalid =>
+ Error_Bad;
+ end case;
+ end loop;
+ else
+ -- Identifier
+ for I in 1 .. Id'Length loop
+ C := Id (I);
+ case Characters_Kind (C) is
+ when Upper_Case_Letter =>
+ if Vhdl_Std = Vhdl_87 and C > 'Z' then
+ Error_8bit;
+ end if;
+ Id (I) := To_Lower_Map (C);
+ when Lower_Case_Letter | Digit =>
+ if Vhdl_Std = Vhdl_87 and C > 'z' then
+ Error_8bit;
+ end if;
+ when Special_Character =>
+ -- The current character is legal in an identifier.
+ if C = '_' then
+ if I = 1 then
+ Error_Msg_Option
+ ("an identifier cannot start with an underscore");
+ return;
+ end if;
+ if Id (I - 1) = '_' then
+ Error_Msg_Option
+ ("two underscores can't be consecutive");
+ return;
+ end if;
+ if I = Id'Last then
+ Error_Msg_Option
+ ("an identifier cannot finish with an underscore");
+ return;
+ end if;
+ else
+ Error_Bad;
+ end if;
+ when others =>
+ Error_Bad;
+ end case;
+ end loop;
+ end if;
+ end Convert_Identifier;
+
+ -- Internal scanner function: return True if C must be considered as a line
+ -- terminator. This also includes EOT (which terminates the file or is
+ -- invalid).
+ function Is_EOL (C : Character) return Boolean is
+ begin
+ case C is
+ when CR | LF | VT | FF | Files_Map.EOT =>
+ return True;
+ when others =>
+ return False;
+ end case;
+ end Is_EOL;
+
+ -- Advance scanner till the first non-space character.
+ procedure Skip_Spaces is
+ begin
+ while Source (Pos) = ' ' or Source (Pos) = HT loop
+ Pos := Pos + 1;
+ end loop;
+ end Skip_Spaces;
+
+ -- Eat all characters until end-of-line (not included).
+ procedure Skip_Until_EOL is
+ begin
+ while not Is_EOL (Source (Pos)) loop
+ -- Don't warn about invalid character, it's somewhat out of the
+ -- scope.
+ Pos := Pos + 1;
+ end loop;
+ end Skip_Until_EOL;
+
+ -- Scan an identifier within a comment. Only lower case letters are
+ -- allowed.
+ procedure Scan_Comment_Identifier (Id : out Name_Id)
+ is
+ use Name_Table;
+ Buffer : String (1 .. Max_Name_Length);
+ Len : Natural;
+ C : Character;
+ begin
+ Id := Null_Identifier;
+ Skip_Spaces;
+
+ -- The identifier shall start with a lower case letter.
+ if Source (Pos) not in 'a' .. 'z' then
+ return;
+ end if;
+
+ -- Scan the identifier (in lower cases).
+ Len := 0;
+ loop
+ C := Source (Pos);
+ exit when C not in 'a' .. 'z' and C /= '_';
+ Len := Len + 1;
+ Buffer (Len) := C;
+ Pos := Pos + 1;
+ end loop;
+
+ -- Shall be followed by a space or a new line.
+ if not (C = ' ' or else C = HT or else Is_EOL (C)) then
+ return;
+ end if;
+
+ Id := Get_Identifier (Buffer (1 .. Len));
+ end Scan_Comment_Identifier;
+
+ package Directive_Protect is
+ -- Called to scan a protect tool directive.
+ procedure Scan_Protect_Directive;
+ end Directive_Protect;
+
+ -- Body is put in a separate file to avoid pollution.
+ package body Directive_Protect is separate;
+
+ -- Called to scan a tool directive.
+ procedure Scan_Tool_Directive
+ is
+ procedure Error_Missing_Directive is
+ begin
+ Error_Msg_Scan ("tool directive required after '`'");
+ Skip_Until_EOL;
+ end Error_Missing_Directive;
+
+ C : Character;
+ begin
+ -- The current character is '`'.
+ Pos := Pos + 1;
+ Skip_Spaces;
+
+ -- Check and scan identifier.
+ C := Source (Pos);
+ if Characters_Kind (C) not in Letter then
+ Error_Missing_Directive;
+ return;
+ end if;
+
+ Scan_Identifier (False);
+
+ if Current_Token /= Tok_Identifier then
+ Error_Missing_Directive;
+ return;
+ end if;
+
+ Skip_Spaces;
+
+ -- Dispatch according to the identifier.
+ if Current_Identifier = Std_Names.Name_Protect then
+ Directive_Protect.Scan_Protect_Directive;
+ else
+ Error_Msg_Scan
+ ("unknown tool directive %i ignored", +Current_Identifier);
+ Skip_Until_EOL;
+ end if;
+ end Scan_Tool_Directive;
+
+ -- Scan tokens within a comment. Return TRUE if Current_Token was set,
+ -- return FALSE to discard the comment (ie treat it like a real comment).
+ function Scan_Comment return Boolean
+ is
+ use Std_Names;
+ Id : Name_Id;
+ begin
+ Scan_Comment_Identifier (Id);
+
+ if Id = Null_Identifier then
+ return False;
+ end if;
+
+ case Id is
+ when Name_Psl =>
+ -- Accept tokens after '-- psl'.
+ if Flag_Psl_Comment then
+ Flag_Psl := True;
+ Flag_Scan_In_Comment := True;
+ return True;
+ end if;
+ when others =>
+ null;
+ end case;
+ return False;
+ end Scan_Comment;
+
+ function Scan_Exclam_Mark return Boolean is
+ begin
+ if Source (Pos) = '!' then
+ Pos := Pos + 1;
+ return True;
+ else
+ return False;
+ end if;
+ end Scan_Exclam_Mark;
+
+ function Scan_Underscore return Boolean is
+ begin
+ if Source (Pos) = '_' then
+ Pos := Pos + 1;
+ return True;
+ else
+ return False;
+ end if;
+ end Scan_Underscore;
+
+ -- The Scan_Next_Line procedure must be called after each end-of-line to
+ -- register to next line number. This is called by Scan_CR_Newline and
+ -- Scan_LF_Newline.
+ procedure Scan_Next_Line is
+ begin
+ Files_Map.Skip_Gap (Current_Context.Source_File, Pos);
+ Current_Context.Line_Number := Current_Context.Line_Number + 1;
+ Current_Context.Line_Pos := Pos;
+ File_Add_Line_Number
+ (Current_Context.Source_File, Current_Context.Line_Number, Pos);
+ end Scan_Next_Line;
+
+ -- Scan a CR end-of-line.
+ procedure Scan_CR_Newline is
+ begin
+ -- Accept CR or CR+LF as line separator.
+ if Source (Pos + 1) = LF then
+ Pos := Pos + 2;
+ else
+ Pos := Pos + 1;
+ end if;
+ Scan_Next_Line;
+ end Scan_CR_Newline;
+
+ -- Scan a LF end-of-line.
+ procedure Scan_LF_Newline is
+ begin
+ -- Accept LF or LF+CR as line separator.
+ if Source (Pos + 1) = CR then
+ Pos := Pos + 2;
+ else
+ Pos := Pos + 1;
+ end if;
+ Scan_Next_Line;
+ end Scan_LF_Newline;
+
+ -- Emit an error message for an invalid character.
+ procedure Error_Bad_Character is
+ begin
+ -- Technically character literals, string literals, extended
+ -- identifiers and comments.
+ Error_Msg_Scan ("character %c can only be used in strings or comments",
+ +Source (Pos));
+ end Error_Bad_Character;
+
+ -- Get a new token.
+ procedure Scan is
+ begin
+ if Current_Token /= Tok_Invalid then
+ Current_Context.Prev_Token := Current_Token;
+ end if;
+
+ Current_Context.Prev_Pos := Pos;
+
+ << Again >> null;
+
+ -- Skip commonly used separators.
+ -- (Like Skip_Spaces but manually inlined for speed).
+ while Source (Pos) = ' ' or Source (Pos) = HT loop
+ Pos := Pos + 1;
+ end loop;
+
+ Current_Context.Token_Pos := Pos;
+ Current_Context.Identifier := Null_Identifier;
+
+ case Source (Pos) is
+ when HT | ' ' =>
+ -- Must have already been skipped just above.
+ raise Internal_Error;
+ when NBSP =>
+ if Vhdl_Std = Vhdl_87 then
+ Error_Msg_Scan ("NBSP character not allowed in vhdl87");
+ end if;
+ Pos := Pos + 1;
+ goto Again;
+ when VT | FF =>
+ Pos := Pos + 1;
+ goto Again;
+ when LF =>
+ Scan_LF_Newline;
+ if Flag_Newline then
+ Current_Token := Tok_Newline;
+ return;
+ end if;
+ goto Again;
+ when CR =>
+ Scan_CR_Newline;
+ if Flag_Newline then
+ Current_Token := Tok_Newline;
+ return;
+ end if;
+ goto Again;
+ when '-' =>
+ if Source (Pos + 1) = '-' then
+ -- This is a comment.
+ -- LRM93 13.8
+ -- A comment starts with two adjacent hyphens and extends up
+ -- to the end of the line.
+ -- A comment can appear on any line line of a VHDL
+ -- description.
+ -- The presence or absence of comments has no influence on
+ -- whether a description is legal or illegal.
+ -- Futhermore, comments do not influence the execution of a
+ -- simulation module; their sole purpose is the enlightenment
+ -- of the human reader.
+ -- GHDL note: As a consequence, an obfruscating comment
+ -- is out of purpose, and a warning could be reported :-)
+ Pos := Pos + 2;
+
+ -- Scan inside a comment. So we just ignore the two dashes.
+ if Flag_Scan_In_Comment then
+ goto Again;
+ end if;
+
+ -- Handle keywords in comment (PSL).
+ if Flag_Comment_Keyword and then Scan_Comment then
+ goto Again;
+ end if;
+
+ -- LRM93 13.2
+ -- In any case, a sequence of one or more format
+ -- effectors other than horizontal tabulation must
+ -- cause at least one end of line.
+ while not Is_EOL (Source (Pos)) loop
+ -- LRM93 13.1
+ -- The only characters allowed in the text of a VHDL
+ -- description are the graphic characters and the format
+ -- effectors.
+
+ -- LRM02 13.1 Character set
+ -- The only characters allowed in the text of a VHDL
+ -- description (except within comments -- see 13.8) [...]
+ --
+ -- LRM02 13.8 Comments
+ -- A comment [...] may contain any character except the
+ -- format effectors vertical tab, carriage return, line
+ -- feed and form feed.
+ if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02)
+ and then Characters_Kind (Source (Pos)) = Invalid
+ then
+ Error_Msg_Scan ("invalid character, even in a comment");
+ end if;
+ Pos := Pos + 1;
+ end loop;
+ if Flag_Comment then
+ Current_Token := Tok_Comment;
+ return;
+ end if;
+ goto Again;
+ elsif Flag_Psl and then Source (Pos + 1) = '>' then
+ Current_Token := Tok_Minus_Greater;
+ Pos := Pos + 2;
+ return;
+ else
+ Current_Token := Tok_Minus;
+ Pos := Pos + 1;
+ return;
+ end if;
+ when '+' =>
+ Current_Token := Tok_Plus;
+ Pos := Pos + 1;
+ return;
+ when '*' =>
+ if Source (Pos + 1) = '*' then
+ Current_Token := Tok_Double_Star;
+ Pos := Pos + 2;
+ else
+ Current_Token := Tok_Star;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when '/' =>
+ if Source (Pos + 1) = '=' then
+ Current_Token := Tok_Not_Equal;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '*' then
+ -- LRM08 15.9 Comments
+ -- A delimited comment start with a solidus (slash) character
+ -- immediately followed by an asterisk character and extends up
+ -- to the first subsequent occurrence of an asterisk character
+ -- immediately followed by a solidus character.
+ if Vhdl_Std < Vhdl_08 then
+ Error_Msg_Scan
+ ("block comment are not allowed before vhdl 2008");
+ end if;
+
+ -- Skip '/*'.
+ Pos := Pos + 2;
+
+ loop
+ case Source (Pos) is
+ when '/' =>
+ -- LRM08 15.9
+ -- Moreover, an occurrence of a solidus character
+ -- immediately followed by an asterisk character
+ -- within a delimited comment is not interpreted as
+ -- the start of a nested delimited comment.
+ if Source (Pos + 1) = '*' then
+ Warning_Msg_Scan
+ (Warnid_Nested_Comment,
+ "'/*' found within a block comment");
+ end if;
+ Pos := Pos + 1;
+ when '*' =>
+ if Source (Pos + 1) = '/' then
+ Pos := Pos + 2;
+ exit;
+ else
+ Pos := Pos + 1;
+ end if;
+ when CR =>
+ Scan_CR_Newline;
+ when LF =>
+ Scan_LF_Newline;
+ when Files_Map.EOT =>
+ if Pos >= Current_Context.File_Len then
+ -- Point at the start of the comment.
+ Error_Msg_Scan
+ (Get_Token_Location,
+ "block comment not terminated at end of file");
+ exit;
+ end if;
+ Pos := Pos + 1;
+ when others =>
+ Pos := Pos + 1;
+ end case;
+ end loop;
+ if Flag_Comment then
+ Current_Token := Tok_Comment;
+ return;
+ end if;
+ goto Again;
+ else
+ Current_Token := Tok_Slash;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when '(' =>
+ Current_Token := Tok_Left_Paren;
+ Pos := Pos + 1;
+ return;
+ when ')' =>
+ Current_Token := Tok_Right_Paren;
+ Pos := Pos + 1;
+ return;
+ when '|' =>
+ if Flag_Psl then
+ if Source (Pos + 1) = '|' then
+ Current_Token := Tok_Bar_Bar;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '-'
+ and then Source (Pos + 2) = '>'
+ then
+ Current_Token := Tok_Bar_Arrow;
+ Pos := Pos + 3;
+ elsif Source (Pos + 1) = '='
+ and then Source (Pos + 2) = '>'
+ then
+ Current_Token := Tok_Bar_Double_Arrow;
+ Pos := Pos + 3;
+ else
+ Current_Token := Tok_Bar;
+ Pos := Pos + 1;
+ end if;
+ else
+ Current_Token := Tok_Bar;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when '!' =>
+ if Flag_Psl then
+ Current_Token := Tok_Exclam_Mark;
+ else
+ if Source (Pos + 1) = '=' then
+ -- != is not allowed in VHDL, but be friendly with C users.
+ Error_Msg_Scan
+ (Get_Token_Location, "Use '/=' for inequality in vhdl");
+ Current_Token := Tok_Not_Equal;
+ Pos := Pos + 1;
+ else
+ -- LRM93 13.10
+ -- A vertical line (|) can be replaced by an exclamation
+ -- mark (!) where used as a delimiter.
+ Current_Token := Tok_Bar;
+ end if;
+ end if;
+ Pos := Pos + 1;
+ return;
+ when ':' =>
+ if Source (Pos + 1) = '=' then
+ Current_Token := Tok_Assign;
+ Pos := Pos + 2;
+ else
+ Current_Token := Tok_Colon;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when ';' =>
+ Current_Token := Tok_Semi_Colon;
+ Pos := Pos + 1;
+ return;
+ when ',' =>
+ Current_Token := Tok_Comma;
+ Pos := Pos + 1;
+ return;
+ when '.' =>
+ if Source (Pos + 1) = '.' then
+ -- Be Ada friendly...
+ Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'");
+ Current_Token := Tok_To;
+ Pos := Pos + 2;
+ return;
+ end if;
+ Current_Token := Tok_Dot;
+ Pos := Pos + 1;
+ return;
+ when '&' =>
+ if Flag_Psl and then Source (Pos + 1) = '&' then
+ Current_Token := Tok_And_And;
+ Pos := Pos + 2;
+ else
+ Current_Token := Tok_Ampersand;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when '<' =>
+ case Source (Pos + 1) is
+ when '=' =>
+ Current_Token := Tok_Less_Equal;
+ Pos := Pos + 2;
+ when '>' =>
+ Current_Token := Tok_Box;
+ Pos := Pos + 2;
+ when '<' =>
+ Current_Token := Tok_Double_Less;
+ Pos := Pos + 2;
+ when others =>
+ Current_Token := Tok_Less;
+ Pos := Pos + 1;
+ end case;
+ return;
+ when '>' =>
+ case Source (Pos + 1) is
+ when '=' =>
+ Current_Token := Tok_Greater_Equal;
+ Pos := Pos + 2;
+ when '>' =>
+ Current_Token := Tok_Double_Greater;
+ Pos := Pos + 2;
+ when others =>
+ Current_Token := Tok_Greater;
+ Pos := Pos + 1;
+ end case;
+ return;
+ when '=' =>
+ if Source (Pos + 1) = '=' then
+ if AMS_Vhdl then
+ Current_Token := Tok_Equal_Equal;
+ else
+ Error_Msg_Scan
+ ("'==' is not the vhdl equality, replaced by '='");
+ Current_Token := Tok_Equal;
+ end if;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '>' then
+ Current_Token := Tok_Double_Arrow;
+ Pos := Pos + 2;
+ else
+ Current_Token := Tok_Equal;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when ''' =>
+ -- Handle cases such as character'('a')
+ -- FIXME: what about f ()'length ? or .all'length
+ if Current_Context.Prev_Token /= Tok_Identifier
+ and then Current_Context.Prev_Token /= Tok_Character
+ and then Source (Pos + 2) = '''
+ then
+ -- LRM93 13.5
+ -- A character literal is formed by enclosing one of the 191
+ -- graphic character (...) between two apostrophe characters.
+ -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER '
+ if Characters_Kind (Source (Pos + 1)) not in Graphic_Character
+ then
+ Error_Msg_Scan
+ ("a character literal can only be a graphic character");
+ elsif Vhdl_Std = Vhdl_87
+ and then Source (Pos + 1) > Character'Val (127)
+ then
+ Error_8bit;
+ end if;
+ Current_Token := Tok_Character;
+ Current_Context.Identifier :=
+ Name_Table.Get_Identifier (Source (Pos + 1));
+ Pos := Pos + 3;
+ return;
+ else
+ Current_Token := Tok_Tick;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when '0' .. '9' =>
+ Scan_Literal;
+
+ -- LRM93 13.2
+ -- At least one separator is required between an identifier or
+ -- an abstract literal and an adjacent identifier or abstract
+ -- literal.
+ case Characters_Kind (Source (Pos)) is
+ when Digit =>
+ -- Happen if d#ddd# is followed by a number.
+ Error_Msg_Scan ("space is required between numbers");
+ when Upper_Case_Letter
+ | Lower_Case_Letter =>
+ -- Could call Error_Separator, but use a clearer message
+ -- for this common case.
+ -- Note: the term "unit name" is not correct here, since
+ -- it can be any identifier or even a keyword; however it
+ -- is probably the most common case (eg 10ns).
+ if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer
+ then
+ Current_Token := Tok_Integer_Letter;
+ else
+ Error_Msg_Scan
+ ("space is required between number and unit name");
+ end if;
+ when Other_Special_Character =>
+ if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then
+ -- Start of extended identifier.
+ Error_Separator;
+ end if;
+ when Invalid
+ | Format_Effector
+ | Space_Character
+ | Special_Character =>
+ null;
+ end case;
+ return;
+ when '#' =>
+ Error_Msg_Scan ("'#' is used for based literals and "
+ & "must be preceded by a base");
+ -- Skip.
+ Pos := Pos + 1;
+ goto Again;
+ when '"' =>
+ Scan_String;
+ return;
+ when '%' =>
+ if Vhdl_Std >= Vhdl_08 then
+ Error_Msg_Scan
+ ("'%%' not allowed in vhdl 2008 (was replacement character)");
+ -- Continue as a string.
+ end if;
+ Scan_String;
+ return;
+ when '[' =>
+ if Flag_Psl then
+ if Source (Pos + 1) = '*' then
+ Current_Token := Tok_Brack_Star;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '+'
+ and then Source (Pos + 2) = ']'
+ then
+ Current_Token := Tok_Brack_Plus_Brack;
+ Pos := Pos + 3;
+ elsif Source (Pos + 1) = '-'
+ and then Source (Pos + 2) = '>'
+ then
+ Current_Token := Tok_Brack_Arrow;
+ Pos := Pos + 3;
+ elsif Source (Pos + 1) = '=' then
+ Current_Token := Tok_Brack_Equal;
+ Pos := Pos + 2;
+ else
+ Current_Token := Tok_Left_Bracket;
+ Pos := Pos + 1;
+ end if;
+ else
+ if Vhdl_Std = Vhdl_87 then
+ Error_Msg_Scan
+ ("'[' is an invalid character in vhdl87, replaced by '('");
+ Current_Token := Tok_Left_Paren;
+ else
+ Current_Token := Tok_Left_Bracket;
+ end if;
+ Pos := Pos + 1;
+ end if;
+ return;
+ when ']' =>
+ if Vhdl_Std = Vhdl_87 and not Flag_Psl then
+ Error_Msg_Scan
+ ("']' is an invalid character in vhdl87, replaced by ')'");
+ Current_Token := Tok_Right_Paren;
+ else
+ Current_Token := Tok_Right_Bracket;
+ end if;
+ Pos := Pos + 1;
+ return;
+ when '{' =>
+ if Flag_Psl then
+ Current_Token := Tok_Left_Curly;
+ else
+ Error_Msg_Scan ("'{' is an invalid character, replaced by '('");
+ Current_Token := Tok_Left_Paren;
+ end if;
+ Pos := Pos + 1;
+ return;
+ when '}' =>
+ if Flag_Psl then
+ Current_Token := Tok_Right_Curly;
+ else
+ Error_Msg_Scan ("'}' is an invalid character, replaced by ')'");
+ Current_Token := Tok_Right_Paren;
+ end if;
+ Pos := Pos + 1;
+ return;
+ when '\' =>
+ if Vhdl_Std = Vhdl_87 then
+ Error_Msg_Scan
+ ("extended identifiers are not allowed in vhdl87");
+ end if;
+ Scan_Extended_Identifier;
+ return;
+ when '^' =>
+ if Vhdl_Std >= Vhdl_08 then
+ Current_Token := Tok_Caret;
+ else
+ Current_Token := Tok_Xor;
+ Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'");
+ end if;
+ Pos := Pos + 1;
+ return;
+ when '~' =>
+ Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'");
+ Pos := Pos + 1;
+ Current_Token := Tok_Not;
+ return;
+ when '?' =>
+ if Vhdl_Std < Vhdl_08 then
+ Error_Bad_Character;
+ Pos := Pos + 1;
+ goto Again;
+ else
+ if Source (Pos + 1) = '<' then
+ if Source (Pos + 2) = '=' then
+ Current_Token := Tok_Match_Less_Equal;
+ Pos := Pos + 3;
+ else
+ Current_Token := Tok_Match_Less;
+ Pos := Pos + 2;
+ end if;
+ elsif Source (Pos + 1) = '>' then
+ if Source (Pos + 2) = '=' then
+ Current_Token := Tok_Match_Greater_Equal;
+ Pos := Pos + 3;
+ else
+ Current_Token := Tok_Match_Greater;
+ Pos := Pos + 2;
+ end if;
+ elsif Source (Pos + 1) = '?' then
+ Current_Token := Tok_Condition;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '=' then
+ Current_Token := Tok_Match_Equal;
+ Pos := Pos + 2;
+ elsif Source (Pos + 1) = '/'
+ and then Source (Pos + 2) = '='
+ then
+ Current_Token := Tok_Match_Not_Equal;
+ Pos := Pos + 3;
+ else
+ Error_Msg_Scan ("unknown matching operator");
+ Pos := Pos + 1;
+ goto Again;
+ end if;
+ end if;
+ return;
+ when '`' =>
+ if Vhdl_Std >= Vhdl_08 then
+ Scan_Tool_Directive;
+ else
+ Error_Bad_Character;
+ Skip_Until_EOL;
+ end if;
+ goto Again;
+ when '$'
+ | Inverted_Exclamation .. Inverted_Question
+ | Multiplication_Sign | Division_Sign =>
+ Error_Bad_Character;
+ Pos := Pos + 1;
+ goto Again;
+ when '@' =>
+ if Vhdl_Std >= Vhdl_08 or Flag_Psl then
+ Current_Token := Tok_Arobase;
+ Pos := Pos + 1;
+ return;
+ else
+ Error_Bad_Character;
+ Pos := Pos + 1;
+ goto Again;
+ end if;
+ when '_' =>
+ Error_Msg_Scan ("an identifier can't start with '_'");
+ Scan_Identifier (Flag_Psl);
+ -- Cannot be a reserved word.
+ return;
+ when 'A' .. 'Z' | 'a' .. 'z' =>
+ Scan_Identifier (Flag_Psl);
+ Identifier_To_Token;
+ return;
+ when UC_A_Grave .. UC_O_Diaeresis
+ | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn
+ | LC_German_Sharp_S .. LC_O_Diaeresis
+ | LC_O_Oblique_Stroke .. LC_Y_Diaeresis =>
+ if Vhdl_Std = Vhdl_87 then
+ Error_Msg_Scan
+ ("non 7-bit latin-1 letters are not allowed in vhdl87");
+ end if;
+ Scan_Identifier (False);
+ -- Not a reserved word.
+ return;
+ when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC =>
+ Error_Msg_Scan
+ ("control character that is not CR, LF, FF, HT or VT " &
+ "is not allowed");
+ Pos := Pos + 1;
+ goto Again;
+ when Files_Map.EOT =>
+ if Pos >= Current_Context.File_Len then
+ -- FIXME: should conditionnaly emit a warning if the file
+ -- is not terminated by an end of line.
+ Current_Token := Tok_Eof;
+ else
+ Error_Msg_Scan ("EOT is not allowed inside the file");
+ Pos := Pos + 1;
+ goto Again;
+ end if;
+ return;
+ end case;
+ -- Not reachable: all case should use goto Again or return.
+ end Scan;
+
+ function Is_Whitespace (C : Character) return Boolean is
+ begin
+ if C = ' ' then
+ return True;
+ elsif Vhdl_Std > Vhdl_87 and C = NBSP then
+ return True;
+ else
+ return False;
+ end if;
+ end Is_Whitespace;
+end Vhdl.Scanner;