diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/name_table.adb | 47 | ||||
| -rw-r--r-- | src/name_table.ads | 62 | 
2 files changed, 59 insertions, 50 deletions
| diff --git a/src/name_table.adb b/src/name_table.adb index 4a8b98477..a3c335347 100644 --- a/src/name_table.adb +++ b/src/name_table.adb @@ -17,37 +17,46 @@  --  02111-1307, USA.  with Ada.Text_IO; use Ada.Text_IO;  with Ada.Unchecked_Deallocation; +with Interfaces;  with GNAT.Table;  package body Name_Table is -   -- A flag that creates verbosity. -   Debug_Name_Table: constant Boolean := False; - +   --  Id of the first character (NUL).     First_Character_Name_Id : constant Name_Id := 1; +   --  Type for the hash value.     type Hash_Value_Type is mod 2**32;     --  An entry in the name table.     type Identifier is record +      --  Hash value of the identifier.        Hash : Hash_Value_Type; + +      --  Simply linked collision chain.        Next : Name_Id; -      --  Index in strings_table. +      --  Index in Strings_Table of the first character of the identifier. +      --  The name is always NUL terminated, but the length can be computed +      --  from the name of the next identifier.  Indeed, names are put in +      --  Strings_Table in the same order as identifiers.        Name : Natural;        --  User infos.        Info : Int32;     end record; -   --  Hash table. -   --  Number of entry points. +   --  Size of the hash table.  Must be a power of 2, so that bit masked can be +   --  used to get the entry number from the hash value.     Hash_Table_Size : Hash_Value_Type := 1024;     type Hash_Array is array (Hash_Value_Type range <>) of Name_Id;     type Hash_Array_Acc is access Hash_Array; +   --  Hash table.  Lower bound is always 0, upper bound is always +   --  Hash_Table_Size - 1.     Hash_Table: Hash_Array_Acc; +   --  Table of identifiers.     package Names_Table is new GNAT.Table       (Table_Index_Type => Name_Id,        Table_Component_Type => Identifier, @@ -80,6 +89,9 @@ package body Name_Table is        return Res;     end Store; +   --  Append the terminator in Names_Table.  This is required so that the +   --  length of the last identifier can be computed (like any other +   --  identifiers).     procedure Append_Terminator is     begin        Names_Table.Append ((Hash => 0, @@ -105,7 +117,7 @@ package body Name_Table is                             Info => 0));        pragma Assert (Names_Table.Last = Null_Identifier); -      --  Store characters. +      --  Store characters.  They aren't put in the hash table.        for C in Character loop           Strings_Table.Append (C);           Names_Table.Append ((Hash => 0, @@ -122,17 +134,18 @@ package body Name_Table is          new Hash_Array'(0 .. Hash_Table_Size - 1 => Null_Identifier);     end Initialize; -   --  Compute the hash value of a string. +   --  Compute the hash value of a string.  In case of algorithm change, check +   --  the performance using Disp_Stats.     function Hash return Hash_Value_Type     is -      Res : Hash_Value_Type; +      use Interfaces; +      Res : Unsigned_32;     begin -      Res := 0; +      Res := Unsigned_32 (Name_Length);        for I in 1 .. Name_Length loop -         Res := Res * 7 + Character'Pos (Name_Buffer (I)); -         Res := Res + Res / 2**28; +         Res := Rotate_Left (Res, 4) + Res + Character'Pos (Name_Buffer (I));        end loop; -      return Res; +      return Hash_Value_Type (Res);     end Hash;     --  Get the string associed to an identifier. @@ -270,19 +283,13 @@ package body Name_Table is        Hash_Value := Hash;        Hash_Index := Hash_Value and (Hash_Table_Size - 1); -      if Debug_Name_Table then -         Put_Line ("get_identifier " & Name_Buffer (1 .. Name_Length)); -      end if; -        --  Find the name.        Res := Hash_Table (Hash_Index);        while Res /= Null_Identifier loop -         --Put_Line ("compare with " & Get_String (Res));           if Names_Table.Table (Res).Hash = Hash_Value             and then Get_Name_Length (Res) = Name_Length             and then Compare_Name_Buffer_With_Name (Res)           then -            --Put_Line ("found");              return Res;           end if;           Res := Names_Table.Table (Res).Next; @@ -411,6 +418,8 @@ package body Name_Table is        Put_Line ("Name table statistics:");        Put_Line (" number of identifiers: " & Name_Id'Image (Last_Name_Id));        Put_Line (" size of strings: " & Natural'Image (Strings_Table.Last)); +      Put_Line (" hash array length: " +                  & Hash_Value_Type'Image (Hash_Table_Size));        Put_Line (" hash distribution (number of entries per length):");        Min := Natural'Last;        Max := Natural'First; diff --git a/src/name_table.ads b/src/name_table.ads index c3d3e72f1..c5b6b9cb4 100644 --- a/src/name_table.ads +++ b/src/name_table.ads @@ -18,42 +18,42 @@  with System;  with Types; use Types; --- A very simple name table. --- This is an hash table, such as id1=id2 <=> get_string(id1)=get_string(id2). +--  A very simple name table. This is an hash table, so that +--  id1 = id2 <=> get_string (id1) = get_string (id2).  package Name_Table is -   -- Initialize the package, ie create tables. +   --  Initialize the package, ie create tables.     procedure Initialize; -   -- Get an entry in the name table. -   -- (entries for characters are already built). +   --  Get an entry in the name table for a character. +   --  (entries for characters are already built).  Characters are put in the +   --  name table, but are always different from identifiers.  They simply +   --  share the same Name_Id type.     function Get_Identifier (Char: Character) return Name_Id;     pragma Inline (Get_Identifier); -   -- Get or create an entry in the name table. -   -- If an entry is created, its token value is tok_identifier. -   -- Note: -   -- an identifier is represented in all lower case letter, -   -- an extended identifier is represented in backslashes, double internal -   --   backslashes are simplified, -   -- a string is represented by its contents (without the quotation -   --  characters, and simplified), -   -- a bit string is represented by its raw contents (no simplification). +   --  Get or create an entry in the name table.  Note: +   --  * an identifier is represented in all lower case letter, +   --  * an extended identifier is represented in backslashes, double internal +   --    backslashes are simplified.     function Get_Identifier (Str: String) return Name_Id; -   -- Get the string associed to a name. -   -- If the name is a character, then single quote are added. +   --  Get the string associed to a name. +   --  If the name is a character, then single quote are added.     function Image (Id: Name_Id) return String; -   -- Get the address of the first character of ID. -   -- The string is NUL-terminated (this is done by get_identifier). +   --  Get the address of the first character of ID.  The address is valid +   --  until the next call to Get_Identifier (which may reallocate the string +   --  table). +   --  The string is NUL-terminated (this is done by get_identifier).     function Get_Address (Id: Name_Id) return System.Address; -   -- Get the length of ID. +   --  Get the length of ID.     function Get_Name_Length (Id: Name_Id) return Natural;     pragma Inline (Get_Name_Length); -   -- Get the character associed to a name. +   --  Get the character associed to a name.  This is valid only for character +   --  ids.     function Get_Character (Id: Name_Id) return Character;     pragma Inline (Get_Character); @@ -61,35 +61,35 @@ package Name_Table is     function Is_Character (Id: Name_Id) return Boolean;     pragma Inline (Is_Character); -   -- Get or create an entry in the name table, use NAME_BUFFER/NAME_LENGTH. +   --  Get or create an entry in the name table, use NAME_BUFFER/NAME_LENGTH.     function Get_Identifier return Name_Id;     --  Like GET_IDENTIFIER, but return NULL_IDENTIFIER if the identifier     --  is not found (and do not create an entry for it).     function Get_Identifier_No_Create return Name_Id; -   --  Set NAME_BUFFER/NAME_LENGTH with the image of ID. +   --  Set NAME_BUFFER/NAME_LENGTH with the image of ID.  Characters aren't +   --  quoted.     procedure Image (Id : Name_Id); -   -- Get and set the info field associated with each identifier. -   -- Used to store interpretations of the name. +   --  Get and set the info field associated with each identifier. +   --  Used to store interpretations of the name.     function Get_Info (Id: Name_Id) return Int32;     pragma Inline (Get_Info);     procedure Set_Info (Id: Name_Id; Info: Int32);     pragma Inline (Set_Info); -   -- Return the latest name_id used. -   -- kludge, use only for debugging. +   --  Return the latest name_id used.  This is only for debugging or stats.     function Last_Name_Id return Name_Id; -   -- Be sure all info fields have their default value. +   --  Be sure all info fields have their default value.     procedure Assert_No_Infos; -   -- This buffer is used by get_token to set the name. -   -- This can be seen as a copy buffer but this is necessary for two reasons: -   --  names case must be 'normalized', because VHDL is case insensitive. +   --  This buffer is used by get_token to set the name. +   --  This can be seen as a copy buffer but this is necessary for two reasons: +   --   names case must be 'normalized', because VHDL is case insensitive.     Name_Buffer : String (1 .. 1024); -   -- The length of the name string. +   --  The length of the name string.     Name_Length: Natural;     --  Disp statistics. | 
