diff options
Diffstat (limited to 'src/vhdl/scanner.adb')
-rw-r--r-- | src/vhdl/scanner.adb | 2332 |
1 files changed, 0 insertions, 2332 deletions
diff --git a/src/vhdl/scanner.adb b/src/vhdl/scanner.adb deleted file mode 100644 index 913883ab1..000000000 --- a/src/vhdl/scanner.adb +++ /dev/null @@ -1,2332 +0,0 @@ --- VHDL lexical scanner. --- Copyright (C) 2002 - 2014 Tristan Gingold --- --- GHDL is free software; you can redistribute it and/or modify it under --- the terms of the GNU General Public License as published by the Free --- Software Foundation; either version 2, or (at your option) any later --- version. --- --- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or --- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --- for more details. --- --- You should have received a copy of the GNU General Public License --- along with GHDL; see the file COPYING. If not, write to the Free --- Software Foundation, 59 Temple Place - Suite 330, Boston, MA --- 02111-1307, USA. -with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; -with Errorout; use Errorout; -with Name_Table; -with Files_Map; use Files_Map; -with Std_Names; -with Str_Table; -with Flags; use Flags; - -package body Scanner is - - -- This classification is a simplification of the categories of LRM93 13.1 - -- LRM93 13.1 - -- The only characters allowed in the text of a VHDL description are the - -- graphic characters and format effector. - - type Character_Kind_Type is - ( - -- Neither a format effector nor a graphic character. - Invalid, - Format_Effector, - Lower_Case_Letter, - Upper_Case_Letter, - Digit, - Special_Character, - Space_Character, - Other_Special_Character - ); - - -- LRM93 13.1 - -- basic_graphic_character ::= - -- upper_case_letter | digit | special_character | space_character - -- - --subtype Basic_Graphic_Character is - -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; - - -- LRM93 13.1 - -- graphic_character ::= - -- basic_graphic_character | lower_case_letter | other_special_character - -- - -- Note: There are 191 graphic characters. - subtype Graphic_Character is - Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character; - - -- letter ::= upper_case_letter | lower_case_letter - subtype Letter is - Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter; - - -- LRM93 13.1 - -- The characters included in each of the categories of basic graphic - -- characters are defined as follows: - type Character_Array is array (Character) of Character_Kind_Type; - pragma Suppress_Initialization (Character_Array); - Characters_Kind : constant Character_Array := - (NUL .. BS => Invalid, - - -- Format effectors are the ISO (and ASCII) characters called horizontal - -- tabulation, vertical tabulation, carriage return, line feed, and form - -- feed. - HT | LF | VT | FF | CR => Format_Effector, - - SO .. US => Invalid, - - -- 1. upper case letters - 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | - UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, - - -- 2. digits - '0' .. '9' => Digit, - - -- 3. special characters - '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' - | ':' | ';' | '<' | '=' | '>' | '[' | ']' - | '_' | '|' | '*' => Special_Character, - - -- 4. the space characters - ' ' | NBSP => Space_Character, - - -- 5. lower case letters - 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | - LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, - - -- 6. other special characters - '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' - | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | - Division_Sign => Other_Special_Character, - - -- '¡' -- INVERTED EXCLAMATION MARK - -- '¢' -- CENT SIGN - -- '£' -- POUND SIGN - -- '¤' -- CURRENCY SIGN - -- '¥' -- YEN SIGN - -- '¦' -- BROKEN BAR - -- '§' -- SECTION SIGN - -- '¨' -- DIAERESIS - -- '©' -- COPYRIGHT SIGN - -- 'ª' -- FEMININE ORDINAL INDICATOR - -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¬' -- NOT SIGN - -- '' -- SOFT HYPHEN - -- '®' -- REGISTERED SIGN - -- '¯' -- MACRON - -- '°' -- DEGREE SIGN - -- '±' -- PLUS-MINUS SIGN - -- '²' -- SUPERSCRIPT TWO - -- '³' -- SUPERSCRIPT THREE - -- '´' -- ACUTE ACCENT - -- 'µ' -- MICRO SIGN - -- '¶' -- PILCROW SIGN - -- '·' -- MIDDLE DOT - -- '¸' -- CEDILLA - -- '¹' -- SUPERSCRIPT ONE - -- 'º' -- MASCULINE ORDINAL INDICATOR - -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¼' -- VULGAR FRACTION ONE QUARTER - -- '½' -- VULGAR FRACTION ONE HALF - -- '¾' -- VULGAR FRACTION THREE QUARTERS - -- '¿' -- INVERTED QUESTION MARK - -- '×' -- MULTIPLICATION SIGN - -- '÷' -- DIVISION SIGN - - DEL .. APC => Invalid); - - -- The context contains the whole internal state of the scanner, ie - -- it can be used to push/pop a lexical analysis, to restart the - -- scanner from a context marking a previous point. - type Scan_Context is record - Source : File_Buffer_Acc; - Source_File : Source_File_Entry; - Line_Number : Natural; - Line_Pos : Source_Ptr; - Prev_Pos : Source_Ptr; - Token_Pos : Source_Ptr; - Pos : Source_Ptr; - File_Len : Source_Ptr; - Token : Token_Type; - Prev_Token : Token_Type; - - -- Additional values for the current token. - Bit_Str_Base : Character; - Bit_Str_Sign : Character; - Str_Id : String8_Id; - Str_Len : Nat32; - Identifier: Name_Id; - Int64 : Iir_Int64; - Fp64 : Iir_Fp64; - end record; - pragma Suppress_Initialization (Scan_Context); - - -- Disp a message during scan. - -- The current location is automatically displayed before the message. - -- Disp a message during scan. - procedure Error_Msg_Scan (Msg: String) is - begin - Report_Msg (Msgid_Error, Scan, No_Location, Msg); - end Error_Msg_Scan; - - procedure Error_Msg_Scan (Loc : Location_Type; Msg: String) is - begin - Report_Msg (Msgid_Error, Scan, Loc, Msg); - end Error_Msg_Scan; - - procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is - begin - Report_Msg (Msgid_Error, Scan, No_Location, Msg, (1 => Arg1)); - end Error_Msg_Scan; - - -- Disp a message during scan. - procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String) is - begin - Report_Msg (Id, Scan, No_Location, Msg); - end Warning_Msg_Scan; - - procedure Warning_Msg_Scan (Id : Msgid_Warnings; - Msg: String; - Arg1 : Earg_Type; - Cont : Boolean := False) is - begin - Report_Msg (Id, Scan, No_Location, Msg, (1 => Arg1), Cont); - end Warning_Msg_Scan; - - -- The current context. - -- Default value is an invalid context. - Current_Context: Scan_Context := (Source => null, - Source_File => No_Source_File_Entry, - Line_Number => 0, - Line_Pos => 0, - Pos => 0, - Prev_Pos => 0, - Token_Pos => 0, - File_Len => 0, - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Bit_Str_Base => ' ', - Bit_Str_Sign => ' ', - Str_Id => Null_String8, - Str_Len => 0, - Int64 => 0, - Fp64 => 0.0); - - Source: File_Buffer_Acc renames Current_Context.Source; - Pos: Source_Ptr renames Current_Context.Pos; - - -- When CURRENT_TOKEN is an identifier, its name_id is stored into - -- this global variable. - -- Function current_text can be used to convert it into an iir. - function Current_Identifier return Name_Id is - begin - return Current_Context.Identifier; - end Current_Identifier; - - procedure Invalidate_Current_Identifier is - begin - Current_Context.Identifier := Null_Identifier; - end Invalidate_Current_Identifier; - - procedure Invalidate_Current_Token is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - Current_Token := Tok_Invalid; - end if; - end Invalidate_Current_Token; - - function Current_String_Id return String8_Id is - begin - return Current_Context.Str_Id; - end Current_String_Id; - - function Current_String_Length return Nat32 is - begin - return Current_Context.Str_Len; - end Current_String_Length; - - function Get_Bit_String_Base return Character is - begin - return Current_Context.Bit_Str_Base; - end Get_Bit_String_Base; - - function Get_Bit_String_Sign return Character is - begin - return Current_Context.Bit_Str_Sign; - end Get_Bit_String_Sign; - - function Current_Iir_Int64 return Iir_Int64 is - begin - return Current_Context.Int64; - end Current_Iir_Int64; - - function Current_Iir_Fp64 return Iir_Fp64 is - begin - return Current_Context.Fp64; - end Current_Iir_Fp64; - - function Get_Current_Source_File return Source_File_Entry is - begin - return Current_Context.Source_File; - end Get_Current_Source_File; - - function Get_Current_Line return Natural is - begin - return Current_Context.Line_Number; - end Get_Current_Line; - - function Get_Current_Offset return Natural is - begin - return Natural (Current_Context.Pos - Current_Context.Line_Pos); - end Get_Current_Offset; - - function Get_Token_Offset return Natural is - begin - return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos); - end Get_Token_Offset; - - function Get_Token_Position return Source_Ptr is - begin - return Current_Context.Token_Pos; - end Get_Token_Position; - - function Get_Position return Source_Ptr is - begin - return Current_Context.Pos; - end Get_Position; - - function Get_Token_Location return Location_Type is - begin - return File_Pos_To_Location - (Current_Context.Source_File, Current_Context.Token_Pos); - end Get_Token_Location; - - function Get_Prev_Location return Location_Type is - begin - return File_Pos_To_Location - (Current_Context.Source_File, Current_Context.Prev_Pos); - end Get_Prev_Location; - - procedure Set_File (Source_File : Source_File_Entry) - is - N_Source: File_Buffer_Acc; - begin - pragma Assert (Current_Context.Source = null); - pragma Assert (Source_File /= No_Source_File_Entry); - N_Source := Get_File_Source (Source_File); - Current_Context := (Source => N_Source, - Source_File => Source_File, - Line_Number => 1, - Line_Pos => 0, - Prev_Pos => N_Source'First, - Pos => N_Source'First, - Token_Pos => 0, -- should be invalid, - File_Len => Get_File_Length (Source_File), - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Bit_Str_Base => ' ', - Bit_Str_Sign => ' ', - Str_Id => Null_String8, - Str_Len => 0, - Int64 => -1, - Fp64 => 0.0); - Current_Token := Tok_Invalid; - end Set_File; - - function Detect_Encoding_Errors return Boolean - is - C : constant Character := Source (Pos); - begin - -- No need to check further if first character is plain ASCII-7 - if C >= ' ' and C < Character'Val (127) then - return False; - end if; - - -- UTF-8 BOM is EF BB BF - if Source (Pos + 0) = Character'Val (16#ef#) - and then Source (Pos + 1) = Character'Val (16#bb#) - and then Source (Pos + 2) = Character'Val (16#bf#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-8 BOM detected)"); - return True; - end if; - - -- UTF-16 BE BOM is FE FF - if Source (Pos + 0) = Character'Val (16#fe#) - and then Source (Pos + 1) = Character'Val (16#ff#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); - return True; - end if; - - -- UTF-16 LE BOM is FF FE - if Source (Pos + 0) = Character'Val (16#ff#) - and then Source (Pos + 1) = Character'Val (16#fe#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); - return True; - end if; - - -- Certainly weird, but scanner/parser will catch it. - return False; - end Detect_Encoding_Errors; - - procedure Set_Current_Position (Position: Source_Ptr) - is - Loc : Location_Type; - Offset: Natural; - File_Entry : Source_File_Entry; - begin - -- Scanner must have been initialized. - pragma Assert (Current_Context.Source /= null); - - Current_Token := Tok_Invalid; - Current_Context.Pos := Position; - Loc := File_Pos_To_Location (Current_Context.Source_File, - Current_Context.Pos); - Location_To_Coord (Loc, - File_Entry, Current_Context.Line_Pos, - Current_Context.Line_Number, Offset); - end Set_Current_Position; - - procedure Close_File is - begin - Current_Context.Source := null; - end Close_File; - - -- Emit an error when a character above 128 was found. - -- This must be called only in vhdl87. - procedure Error_8bit is - begin - Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - -- Emit an error when a separator is expected. - procedure Error_Separator is - begin - Error_Msg_Scan ("a separator is required here"); - end Error_Separator; - - -- scan a decimal literal or a based literal. - -- - -- LRM93 13.4.1 - -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] - -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER - -- - -- LRM93 13.4.2 - -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT - -- BASE ::= INTEGER - procedure Scan_Literal is separate; - - -- Scan a string literal. - -- - -- LRM93 13.6 / LRM08 15.7 - -- A string literal is formed by a sequence of graphic characters - -- (possibly none) enclosed between two quotation marks used as string - -- brackets. - -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " - -- - -- IN: for a string, at the call of this procedure, the current character - -- must be either '"' or '%'. - procedure Scan_String - is - -- The quotation character (can be " or %). - Mark: Character; - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - begin - -- String delimiter. - Mark := Source (Pos); - pragma Assert (Mark = '"' or else Mark = '%'); - - Pos := Pos + 1; - Length := 0; - Current_Context.Str_Id := Str_Table.Create_String8; - loop - C := Source (Pos); - if C = Mark then - -- LRM93 13.6 - -- If a quotation mark value is to be represented in the sequence - -- of character values, then a pair of adjacent quoatation - -- characters marks must be written at the corresponding place - -- within the string literal. - -- LRM93 13.10 - -- Any pourcent sign within the sequence of characters must then - -- be doubled, and each such doubled percent sign is interpreted - -- as a single percent sign value. - -- The same replacement is allowed for a bit string literal, - -- provieded that both bit string brackets are replaced. - Pos := Pos + 1; - exit when Source (Pos) /= Mark; - end if; - - case Characters_Kind (C) is - when Format_Effector => - if Mark = '%' then - -- No matching '%' has been found. Consider '%' was used - -- as the remainder operator, instead of 'rem'. This will - -- improve the error message. - Error_Msg_Scan - (Get_Token_Location, - "'%%' is not a vhdl operator, use 'rem'"); - Current_Token := Tok_Rem; - Pos := Current_Context.Token_Pos + 1; - return; - end if; - if C = CR or C = LF then - Error_Msg_Scan - ("string cannot be multi-line, use concatenation"); - else - Error_Msg_Scan ("format effector not allowed in a string"); - end if; - exit; - when Invalid => - if C = Files_Map.EOT - and then Pos >= Current_Context.File_Len - then - Error_Msg_Scan ("string not terminated at end of file"); - exit; - end if; - - Error_Msg_Scan - ("invalid character not allowed, even in a string"); - when Graphic_Character => - if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then - Error_8bit; - end if; - end case; - - if C = '"' and Mark = '%' then - -- LRM93 13.10 - -- The quotation marks (") used as string brackets at both ends of - -- a string literal can be replaced by percent signs (%), provided - -- that the enclosed sequence of characters constains no quotation - -- marks, and provided that both string brackets are replaced. - Error_Msg_Scan - ("'""' cannot be used in a string delimited with '%%'"); - end if; - - Length := Length + 1; - Str_Table.Append_String8 (Character'Pos (C)); - Pos := Pos + 1; - end loop; - - Current_Token := Tok_String; - Current_Context.Str_Len := Length; - end Scan_String; - - -- Scan a bit string literal. - -- - -- LRM93 13.7 - -- A bit string literal is formed by a sequence of extended digits - -- (possibly none) enclosed between two quotations used as bit string - -- brackets, preceded by a base specifier. - -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " - -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } - -- - -- The current character must be a base specifier, followed by '"' or '%'. - -- The base must be valid. - procedure Scan_Bit_String (Base_Log : Nat32) - is - -- Position of character '0'. - Pos_0 : constant Nat8 := Character'Pos ('0'); - - -- Used for the base. - subtype Nat4 is Natural range 1 .. 4; - Base : constant Nat32 := 2 ** Nat4 (Base_Log); - - -- The quotation character (can be " or %). - Orig_Pos : constant Source_Ptr := Pos; - Mark : constant Character := Source (Orig_Pos); - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - -- Digit value. - V, D : Nat8; - -- True if invalid character already found, to avoid duplicate message. - Has_Invalid : Boolean; - begin - pragma Assert (Mark = '"' or else Mark = '%'); - Pos := Pos + 1; - Length := 0; - Has_Invalid := False; - Current_Context.Str_Id := Str_Table.Create_String8; - loop - << Again >> null; - C := Source (Pos); - Pos := Pos + 1; - exit when C = Mark; - - -- LRM93 13.7 - -- If the base specifier is 'B', the extended digits in the bit - -- value are restricted to 0 and 1. - -- If the base specifier is 'O', the extended digits int the bit - -- value are restricted to legal digits in the octal number - -- system, ie, the digits 0 through 7. - -- If the base specifier is 'X', the extended digits are all digits - -- together with the letters A through F. - case C is - when '0' .. '9' => - V := Character'Pos (C) - Character'Pos ('0'); - when 'A' .. 'F' => - V := Character'Pos (C) - Character'Pos ('A') + 10; - when 'a' .. 'f' => - -- LRM93 13.7 - -- A letter in a bit string literal (...) can be written either - -- in lowercase or in upper case, with the same meaning. - V := Character'Pos (C) - Character'Pos ('a') + 10; - when '_' => - if Source (Pos) = '_' then - Error_Msg_Scan - ("double underscore not allowed in a bit string"); - end if; - if Source (Pos - 2) = Mark then - Error_Msg_Scan - ("underscore not allowed at the start of a bit string"); - elsif Source (Pos) = Mark then - Error_Msg_Scan - ("underscore not allowed at the end of a bit string"); - end if; - goto Again; - when '"' => - pragma Assert (Mark = '%'); - Error_Msg_Scan - ("'""' cannot close a bit string opened by '%%'"); - exit; - when '%' => - pragma Assert (Mark = '"'); - Error_Msg_Scan - ("'%%' cannot close a bit string opened by '""'"); - exit; - when others => - if Characters_Kind (C) in Graphic_Character then - if Vhdl_Std >= Vhdl_08 then - V := Nat8'Last; - else - if not Has_Invalid then - Error_Msg_Scan ("invalid character in bit string"); - Has_Invalid := True; - end if; - -- Continue the bit string - V := 0; - end if; - else - if Mark = '%' then - Error_Msg_Scan - (File_Pos_To_Location - (Current_Context.Source_File, Orig_Pos), - "'%%' is not a vhdl operator, use 'rem'"); - Current_Token := Tok_Rem; - Pos := Orig_Pos + 1; - return; - else - Error_Msg_Scan ("bit string not terminated"); - Pos := Pos - 1; - end if; - exit; - end if; - end case; - - -- Expand bit value. - if Vhdl_Std >= Vhdl_08 and V > Base then - -- Expand as graphic character. - for I in 1 .. Base_Log loop - Str_Table.Append_String8_Char (C); - end loop; - else - -- Expand as extended digits. - case Base_Log is - when 1 => - if V > 1 then - Error_Msg_Scan - ("invalid character in a binary bit string"); - V := 1; - end if; - Str_Table.Append_String8 (Pos_0 + V); - when 3 => - if V > 7 then - Error_Msg_Scan - ("invalid character in a octal bit string"); - V := 7; - end if; - for I in 1 .. 3 loop - D := V / 4; - Str_Table.Append_String8 (Pos_0 + D); - V := (V - 4 * D) * 2; - end loop; - when 4 => - for I in 1 .. 4 loop - D := V / 8; - Str_Table.Append_String8 (Pos_0 + D); - V := (V - 8 * D) * 2; - end loop; - when others => - raise Internal_Error; - end case; - end if; - - Length := Length + Base_Log; - end loop; - - -- Note: the length of the bit string may be 0. - - Current_Token := Tok_Bit_String; - Current_Context.Str_Len := Length; - end Scan_Bit_String; - - -- Scan a decimal bit string literal. For base specifier D the algorithm - -- is rather different: all the graphic characters shall be digits, and we - -- need to use a (not very efficient) arbitrary precision multiplication. - procedure Scan_Dec_Bit_String - is - use Str_Table; - - Id : String8_Id; - - -- Position of character '0'. - Pos_0 : constant Nat8 := Character'Pos ('0'); - - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - -- Digit value. - V, D : Nat8; - - type Carries_Type is array (0 .. 3) of Nat8; - Carries : Carries_Type; - No_Carries : constant Carries_Type := (others => Pos_0); - - -- Shift right carries. Note the Carries (0) is the LSB. - procedure Shr_Carries is - begin - Carries := (Carries (1), Carries (2), Carries (3), Pos_0); - end Shr_Carries; - - procedure Append_Carries is - begin - -- Expand the bit string. Note that position 1 of the string8 is - -- the MSB. - while Carries /= No_Carries loop - Append_String8 (Pos_0); - Length := Length + 1; - for I in reverse 2 .. Length loop - Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); - end loop; - Set_Element_String8 (Id, 1, Carries (0)); - Shr_Carries; - end loop; - end Append_Carries; - - -- Add 1 to Carries. Overflow is not allowed and should be prevented by - -- construction. - procedure Add_One_To_Carries is - begin - for I in Carries'Range loop - if Carries (I) = Pos_0 then - Carries (I) := Pos_0 + 1; - -- End of propagation. - exit; - else - Carries (I) := Pos_0; - -- Continue propagation. - pragma Assert (I < Carries'Last); - end if; - end loop; - end Add_One_To_Carries; - begin - pragma Assert (Source (Pos) = '"'); - Pos := Pos + 1; - Length := 0; - Id := Create_String8; - Current_Context.Str_Id := Id; - loop - << Again >> null; - C := Source (Pos); - Pos := Pos + 1; - exit when C = '"'; - - if C in '0' .. '9' then - V := Character'Pos (C) - Character'Pos ('0'); - elsif C = '_' then - if Source (Pos) = '_' then - Error_Msg_Scan - ("double underscore not allowed in a bit string"); - end if; - if Source (Pos - 2) = '"' then - Error_Msg_Scan - ("underscore not allowed at the start of a bit string"); - elsif Source (Pos) = '"' then - Error_Msg_Scan - ("underscore not allowed at the end of a bit string"); - end if; - goto Again; - else - if Characters_Kind (C) in Graphic_Character then - Error_Msg_Scan - ("graphic character not allowed in decimal bit string"); - -- Continue the bit string - V := 0; - else - Error_Msg_Scan ("bit string not terminated"); - Pos := Pos - 1; - exit; - end if; - end if; - - -- Multiply by 10. - Carries := (others => Pos_0); - for I in reverse 1 .. Length loop - -- Shift by 1 (*2). - D := Element_String8 (Id, I); - Set_Element_String8 (Id, I, Carries (0)); - Shr_Carries; - -- Add D and D * 4. - if D /= Pos_0 then - Add_One_To_Carries; - -- Add_Four_To_Carries: - for I in 2 .. 3 loop - if Carries (I) = Pos_0 then - Carries (I) := Pos_0 + 1; - -- End of propagation. - exit; - else - Carries (I) := Pos_0; - -- Continue propagation. - end if; - end loop; - end if; - end loop; - Append_Carries; - - -- Add V. - for I in Carries'Range loop - D := V / 2; - Carries (I) := Pos_0 + (V - 2 * D); - V := D; - end loop; - for I in reverse 1 .. Length loop - D := Element_String8 (Id, I); - if D /= Pos_0 then - Add_One_To_Carries; - end if; - Set_Element_String8 (Id, I, Carries (0)); - Shr_Carries; - exit when Carries = No_Carries; - end loop; - Append_Carries; - end loop; - - Current_Token := Tok_Bit_String; - Current_Context.Str_Len := Length; - end Scan_Dec_Bit_String; - - -- LRM08 15.2 Character set - -- For each uppercase letter, there is a corresponding lowercase letter; - -- and for each lowercase letter except [y diaeresis] and [german sharp s], - -- there is a corresponding uppercase letter. - type Character_Map is array (Character) of Character; - To_Lower_Map : constant Character_Map := - ( - -- Uppercase ASCII letters. - 'A' => 'a', - 'B' => 'b', - 'C' => 'c', - 'D' => 'd', - 'E' => 'e', - 'F' => 'f', - 'G' => 'g', - 'H' => 'h', - 'I' => 'i', - 'J' => 'j', - 'K' => 'k', - 'L' => 'l', - 'M' => 'm', - 'N' => 'n', - 'O' => 'o', - 'P' => 'p', - 'Q' => 'q', - 'R' => 'r', - 'S' => 's', - 'T' => 't', - 'U' => 'u', - 'V' => 'v', - 'W' => 'w', - 'X' => 'x', - 'Y' => 'y', - 'Z' => 'z', - - -- Lowercase ASCII letters. - 'a' => 'a', - 'b' => 'b', - 'c' => 'c', - 'd' => 'd', - 'e' => 'e', - 'f' => 'f', - 'g' => 'g', - 'h' => 'h', - 'i' => 'i', - 'j' => 'j', - 'k' => 'k', - 'l' => 'l', - 'm' => 'm', - 'n' => 'n', - 'o' => 'o', - 'p' => 'p', - 'q' => 'q', - 'r' => 'r', - 's' => 's', - 't' => 't', - 'u' => 'u', - 'v' => 'v', - 'w' => 'w', - 'x' => 'x', - 'y' => 'y', - 'z' => 'z', - - -- Uppercase Latin-1 letters. - UC_A_Grave => LC_A_Grave, - UC_A_Acute => LC_A_Acute, - UC_A_Circumflex => LC_A_Circumflex, - UC_A_Tilde => LC_A_Tilde, - UC_A_Diaeresis => LC_A_Diaeresis, - UC_A_Ring => LC_A_Ring, - UC_AE_Diphthong => LC_AE_Diphthong, - UC_C_Cedilla => LC_C_Cedilla, - UC_E_Grave => LC_E_Grave, - UC_E_Acute => LC_E_Acute, - UC_E_Circumflex => LC_E_Circumflex, - UC_E_Diaeresis => LC_E_Diaeresis, - UC_I_Grave => LC_I_Grave, - UC_I_Acute => LC_I_Acute, - UC_I_Circumflex => LC_I_Circumflex, - UC_I_Diaeresis => LC_I_Diaeresis, - UC_Icelandic_Eth => LC_Icelandic_Eth, - UC_N_Tilde => LC_N_Tilde, - UC_O_Grave => LC_O_Grave, - UC_O_Acute => LC_O_Acute, - UC_O_Circumflex => LC_O_Circumflex, - UC_O_Tilde => LC_O_Tilde, - UC_O_Diaeresis => LC_O_Diaeresis, - UC_O_Oblique_Stroke => LC_O_Oblique_Stroke, - UC_U_Grave => LC_U_Grave, - UC_U_Acute => LC_U_Acute, - UC_U_Circumflex => LC_U_Circumflex, - UC_U_Diaeresis => LC_U_Diaeresis, - UC_Y_Acute => LC_Y_Acute, - UC_Icelandic_Thorn => LC_Icelandic_Thorn, - - -- Lowercase Latin-1 letters. - LC_A_Grave => LC_A_Grave, - LC_A_Acute => LC_A_Acute, - LC_A_Circumflex => LC_A_Circumflex, - LC_A_Tilde => LC_A_Tilde, - LC_A_Diaeresis => LC_A_Diaeresis, - LC_A_Ring => LC_A_Ring, - LC_AE_Diphthong => LC_AE_Diphthong, - LC_C_Cedilla => LC_C_Cedilla, - LC_E_Grave => LC_E_Grave, - LC_E_Acute => LC_E_Acute, - LC_E_Circumflex => LC_E_Circumflex, - LC_E_Diaeresis => LC_E_Diaeresis, - LC_I_Grave => LC_I_Grave, - LC_I_Acute => LC_I_Acute, - LC_I_Circumflex => LC_I_Circumflex, - LC_I_Diaeresis => LC_I_Diaeresis, - LC_Icelandic_Eth => LC_Icelandic_Eth, - LC_N_Tilde => LC_N_Tilde, - LC_O_Grave => LC_O_Grave, - LC_O_Acute => LC_O_Acute, - LC_O_Circumflex => LC_O_Circumflex, - LC_O_Tilde => LC_O_Tilde, - LC_O_Diaeresis => LC_O_Diaeresis, - LC_O_Oblique_Stroke => LC_O_Oblique_Stroke, - LC_U_Grave => LC_U_Grave, - LC_U_Acute => LC_U_Acute, - LC_U_Circumflex => LC_U_Circumflex, - LC_U_Diaeresis => LC_U_Diaeresis, - LC_Y_Acute => LC_Y_Acute, - LC_Icelandic_Thorn => LC_Icelandic_Thorn, - - -- Lowercase latin-1 characters without corresponding uppercase one. - LC_Y_Diaeresis => LC_Y_Diaeresis, - LC_German_Sharp_S => LC_German_Sharp_S, - - -- Not a letter. - others => NUL); - - procedure Error_Too_Long is - begin - Error_Msg_Scan ("identifier is too long (>" - & Natural'Image (Max_Name_Length - 1) & ")"); - end Error_Too_Long; - - -- LRM93 13.3.1 - -- Basic Identifiers - -- A basic identifier consists only of letters, digits, and underlines. - -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } - -- LETTER_OR_DIGIT ::= LETTER | DIGIT - -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER - -- - -- NB: At the call of this procedure, the current character must be a legal - -- character for a basic identifier. - procedure Scan_Identifier (Allow_PSL : Boolean) - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - C : Character; - Len : Natural; - begin - -- This is an identifier or a key word. - Len := 0; - loop - -- Source (pos) is correct. - -- LRM93 13.3.1 - -- All characters if a basic identifier are signifiant, including - -- any underline character inserted between a letter or digit and - -- an adjacent letter or digit. - -- Basic identifiers differing only in the use of the corresponding - -- upper and lower case letters are considered as the same. - -- - -- GHDL: This is achieved by converting all upper case letters into - -- equivalent lower case letters. - -- The opposite (converting to upper lower case letters) is not - -- possible because two characters have no upper-case equivalent. - C := Source (Pos); - case C is - when 'A' .. 'Z' => - C := Character'Val - (Character'Pos (C) - + Character'Pos ('a') - Character'Pos ('A')); - when 'a' .. 'z' | '0' .. '9' => - null; - when '_' => - if Source (Pos + 1) = '_' then - Error_Msg_Scan ("two underscores can't be consecutive"); - end if; - when ' ' | ')' | '.' | ';' | ':' => - exit; - when others => - -- Non common case. - case Characters_Kind (C) is - when Upper_Case_Letter | Lower_Case_Letter => - if Vhdl_Std = Vhdl_87 then - Error_8bit; - end if; - C := To_Lower_Map (C); - pragma Assert (C /= NUL); - when Digit => - raise Internal_Error; - when others => - exit; - end case; - end case; - - -- Put character in name buffer. FIXME: compute the hash at the same - -- time ? - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length -1 then - Error_Msg_Scan ("identifier is too long (>" - & Natural'Image (Max_Name_Length - 1) & ")"); - -- Accept this last one character, so that no error for the - -- following characters. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - - -- Next character. - Pos := Pos + 1; - end loop; - - if Source (Pos - 1) = '_' then - if Allow_PSL then - -- Some PSL reserved words finish with '_'. This case is handled - -- later by Scan_Underscore and Scan_Exclam_Mark. - Pos := Pos - 1; - Len := Len - 1; - C := '_'; - else - -- Eat the trailing underscore. - Error_Msg_Scan ("an identifier cannot finish with '_'"); - end if; - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (C) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - raise Internal_Error; - when Other_Special_Character | Special_Character => - if (C = '"' or C = '%') and then Len <= 2 then - if C = '%' and Vhdl_Std >= Vhdl_08 then - Error_Msg_Scan ("'%%' not allowed in vhdl 2008 " - & "(was replacement character)"); - -- Continue as a bit string. - end if; - - -- Good candidate for bit string. - - -- LRM93 13.7 - -- BASE_SPECIFIER ::= B | O | X - -- - -- A letter in a bit string literal (either an extended digit - -- or the base specifier) can be written either in lower case - -- or in upper case, with the same meaning. - -- - -- LRM08 15.8 Bit string literals - -- BASE_SPECICIER ::= - -- B | O | X | UB | UO | UX | SB | SO | SX | D - -- - -- An extended digit and the base specifier in a bit string - -- literal can be written either in lowercase or in uppercase, - -- with the same meaning. - declare - Base : Nat32; - Cl : constant Character := Buffer (Len); - Cf : constant Character := Buffer (1); - begin - Current_Context.Bit_Str_Base := Cl; - if Cl = 'b' then - Base := 1; - elsif Cl = 'o' then - Base := 3; - elsif Cl = 'x' then - Base := 4; - elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then - Current_Context.Bit_Str_Sign := ' '; - Scan_Dec_Bit_String; - return; - else - Base := 0; - end if; - if Base > 0 then - if Len = 1 then - Current_Context.Bit_Str_Sign := ' '; - Scan_Bit_String (Base); - return; - elsif Vhdl_Std >= Vhdl_08 - and then (Cf = 's' or Cf = 'u') - then - Current_Context.Bit_Str_Sign := Cf; - Scan_Bit_String (Base); - return; - end if; - end if; - end; - elsif Vhdl_Std > Vhdl_87 and then C = '\' then - -- Start of extended identifier. Cannot follow an identifier. - Error_Separator; - end if; - - when Invalid => - -- Improve error message for use of UTF-8 quote marks. - -- It's possible because in the sequence of UTF-8 bytes for the - -- quote marks, there are invalid character (in the 128-160 - -- range). - if C = Character'Val (16#80#) - and then Buffer (Len) = Character'Val (16#e2#) - and then (Source (Pos + 1) = Character'Val (16#98#) - or else Source (Pos + 1) = Character'Val (16#99#)) - then - -- UTF-8 left or right single quote mark. - if Len > 1 then - -- The first byte (0xe2) is part of the identifier. An - -- error will be detected as the next byte (0x80) is - -- invalid. Remove the first byte from the identifier, and - -- let's catch the error later. - Len := Len - 1; - Pos := Pos - 1; - else - Error_Msg_Scan ("invalid use of UTF8 character for '"); - Pos := Pos + 2; - - -- Distinguish between character literal and tick. Don't - -- care about possible invalid character literal, as in any - -- case we have already emitted an error message. - if Current_Context.Prev_Token /= Tok_Identifier - and then Current_Context.Prev_Token /= Tok_Character - and then - (Source (Pos + 1) = ''' - or else - (Source (Pos + 1) = Character'Val (16#e2#) - and then Source (Pos + 2) = Character'Val (16#80#) - and then Source (Pos + 3) = Character'Val (16#99#))) - then - Current_Token := Tok_Character; - Current_Context.Identifier := - Name_Table.Get_Identifier (Source (Pos)); - if Source (Pos + 1) = ''' then - Pos := Pos + 2; - else - Pos := Pos + 4; - end if; - else - Current_Token := Tok_Tick; - end if; - return; - end if; - end if; - when Format_Effector - | Space_Character => - null; - end case; - - -- Hash it. - Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); - Current_Token := Tok_Identifier; - end Scan_Identifier; - - procedure Identifier_To_Token is - begin - if Current_Identifier in Std_Names.Name_Id_Keywords then - -- LRM93 13.9 - -- The identifiers listed below are called reserved words and are - -- reserved for signifiances in the language. - -- IN: this is also achieved in packages std_names and tokens. - Current_Token := Token_Type'Val - (Token_Type'Pos (Tok_First_Keyword) - + Current_Identifier - Std_Names.Name_First_Keyword); - case Current_Identifier is - when Std_Names.Name_Id_AMS_Reserved_Words => - if not AMS_Vhdl then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i AMS-VHDL reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl08_Reserved_Words => - if Vhdl_Std < Vhdl_08 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl-2008 reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl00_Reserved_Words => - if Vhdl_Std < Vhdl_00 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl-2000 reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl93_Reserved_Words => - if Vhdl_Std = Vhdl_87 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl93 reserved word as a vhdl87 identifier", - +Current_Identifier, True); - Warning_Msg_Scan - (Warnid_Reserved_Word, - "(use option --std=93 to compile as vhdl93)"); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl87_Reserved_Words => - null; - when others => - raise Program_Error; - end case; - elsif Flag_Psl then - case Current_Identifier is - when Std_Names.Name_Clock => - Current_Token := Tok_Psl_Clock; - when Std_Names.Name_Const => - Current_Token := Tok_Psl_Const; - when Std_Names.Name_Boolean => - Current_Token := Tok_Psl_Boolean; - when Std_Names.Name_Sequence => - Current_Token := Tok_Psl_Sequence; - when Std_Names.Name_Property => - Current_Token := Tok_Psl_Property; - when Std_Names.Name_Endpoint => - Current_Token := Tok_Psl_Endpoint; - when Std_Names.Name_Cover => - Current_Token := Tok_Psl_Cover; - when Std_Names.Name_Default => - Current_Token := Tok_Psl_Default; - when Std_Names.Name_Inf => - Current_Token := Tok_Inf; - when Std_Names.Name_Within => - Current_Token := Tok_Within; - when Std_Names.Name_Abort => - Current_Token := Tok_Abort; - when Std_Names.Name_Before => - Current_Token := Tok_Before; - when Std_Names.Name_Always => - Current_Token := Tok_Always; - when Std_Names.Name_Never => - Current_Token := Tok_Never; - when Std_Names.Name_Eventually => - Current_Token := Tok_Eventually; - when Std_Names.Name_Next_A => - Current_Token := Tok_Next_A; - when Std_Names.Name_Next_E => - Current_Token := Tok_Next_E; - when Std_Names.Name_Next_Event => - Current_Token := Tok_Next_Event; - when Std_Names.Name_Next_Event_A => - Current_Token := Tok_Next_Event_A; - when Std_Names.Name_Next_Event_E => - Current_Token := Tok_Next_Event_E; - when Std_Names.Name_Until => - Current_Token := Tok_Until; - when others => - Current_Token := Tok_Identifier; - if Source (Pos - 1) = '_' then - Error_Msg_Scan ("identifiers cannot finish with '_'"); - end if; - end case; - end if; - end Identifier_To_Token; - - -- LRM93 13.3.2 - -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ - -- - -- Create an (extended) indentifier. - -- Extended identifiers are stored as they appear (leading and tailing - -- backslashes, doubling backslashes inside). - procedure Scan_Extended_Identifier - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - Len : Natural; - C : Character; - begin - -- LRM93 13.3.2 - -- Moreover, every extended identifiers is distinct from any basic - -- identifier. - -- GHDL: This is satisfied by storing '\' in the name table. - Len := 1; - Buffer (1) := '\'; - loop - -- Next character. - Pos := Pos + 1; - C := Source (Pos); - - if C = '\' then - -- LRM93 13.3.2 - -- If a backslash is to be used as one of the graphic characters - -- of an extended literal, it must be doubled. - -- LRM93 13.3.2 - -- (a doubled backslash couting as one character) - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length - 1 then - Error_Too_Long; - -- Accept this last one. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - - Pos := Pos + 1; - C := Source (Pos); - - exit when C /= '\'; - end if; - - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Scan ("format effector in extended identifier"); - exit; - when Graphic_Character => - null; - when Invalid => - if C = Files_Map.EOT - and then Pos >= Current_Context.File_Len - then - Error_Msg_Scan - ("extended identifier not terminated at end of file"); - elsif C = LF or C = CR then - Error_Msg_Scan - ("extended identifier not terminated at end of line"); - else - Error_Msg_Scan ("invalid character in extended identifier"); - end if; - exit; - end case; - - -- LRM93 13.3.2 - -- Extended identifiers differing only in the use of corresponding - -- upper and lower case letters are distinct. - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length - 1 then - Error_Too_Long; - -- Accept this last one. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - end loop; - - if Len <= 2 then - Error_Msg_Scan ("empty extended identifier is not allowed"); - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (C) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - Error_Separator; - when Invalid - | Format_Effector - | Space_Character - | Special_Character - | Other_Special_Character => - null; - end case; - - -- Hash it. - Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); - Current_Token := Tok_Identifier; - end Scan_Extended_Identifier; - - procedure Convert_Identifier (Str : in out String) - is - procedure Error_Bad is - begin - Error_Msg_Option ("bad character in identifier"); - end Error_Bad; - - procedure Error_8bit is - begin - Error_Msg_Option ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - C : Character; - subtype Id_Subtype is String (1 .. Str'Length); - Id : Id_Subtype renames Str; - begin - if Id'Length = 0 then - Error_Msg_Option ("identifier required"); - return; - end if; - - if Id (1) = '\' then - -- Extended identifier. - if Vhdl_Std = Vhdl_87 then - Error_Msg_Option ("extended identifiers not allowed in vhdl87"); - return; - end if; - - if Id'Length < 3 then - Error_Msg_Option ("extended identifier is too short"); - return; - end if; - if Id (Id'Last) /= '\' then - Error_Msg_Option ("extended identifier must finish with a '\'"); - return; - end if; - for I in 2 .. Id'Last - 1 loop - C := Id (I); - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Option ("format effector in extended identifier"); - return; - when Graphic_Character => - if C = '\' then - if Id (I + 1) /= '\' - or else I = Id'Last - 1 - then - Error_Msg_Option ("anti-slash must be doubled " - & "in extended identifier"); - return; - end if; - end if; - when Invalid => - Error_Bad; - end case; - end loop; - else - -- Identifier - for I in 1 .. Id'Length loop - C := Id (I); - case Characters_Kind (C) is - when Upper_Case_Letter => - if Vhdl_Std = Vhdl_87 and C > 'Z' then - Error_8bit; - end if; - Id (I) := To_Lower_Map (C); - when Lower_Case_Letter | Digit => - if Vhdl_Std = Vhdl_87 and C > 'z' then - Error_8bit; - end if; - when Special_Character => - -- The current character is legal in an identifier. - if C = '_' then - if I = 1 then - Error_Msg_Option - ("an identifier cannot start with an underscore"); - return; - end if; - if Id (I - 1) = '_' then - Error_Msg_Option - ("two underscores can't be consecutive"); - return; - end if; - if I = Id'Last then - Error_Msg_Option - ("an identifier cannot finish with an underscore"); - return; - end if; - else - Error_Bad; - end if; - when others => - Error_Bad; - end case; - end loop; - end if; - end Convert_Identifier; - - -- Internal scanner function: return True if C must be considered as a line - -- terminator. This also includes EOT (which terminates the file or is - -- invalid). - function Is_EOL (C : Character) return Boolean is - begin - case C is - when CR | LF | VT | FF | Files_Map.EOT => - return True; - when others => - return False; - end case; - end Is_EOL; - - -- Advance scanner till the first non-space character. - procedure Skip_Spaces is - begin - while Source (Pos) = ' ' or Source (Pos) = HT loop - Pos := Pos + 1; - end loop; - end Skip_Spaces; - - -- Eat all characters until end-of-line (not included). - procedure Skip_Until_EOL is - begin - while not Is_EOL (Source (Pos)) loop - -- Don't warn about invalid character, it's somewhat out of the - -- scope. - Pos := Pos + 1; - end loop; - end Skip_Until_EOL; - - -- Scan an identifier within a comment. Only lower case letters are - -- allowed. - procedure Scan_Comment_Identifier (Id : out Name_Id) - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - Len : Natural; - C : Character; - begin - Id := Null_Identifier; - Skip_Spaces; - - -- The identifier shall start with a lower case letter. - if Source (Pos) not in 'a' .. 'z' then - return; - end if; - - -- Scan the identifier (in lower cases). - Len := 0; - loop - C := Source (Pos); - exit when C not in 'a' .. 'z' and C /= '_'; - Len := Len + 1; - Buffer (Len) := C; - Pos := Pos + 1; - end loop; - - -- Shall be followed by a space or a new line. - if not (C = ' ' or else C = HT or else Is_EOL (C)) then - return; - end if; - - Id := Get_Identifier (Buffer (1 .. Len)); - end Scan_Comment_Identifier; - - package Directive_Protect is - -- Called to scan a protect tool directive. - procedure Scan_Protect_Directive; - end Directive_Protect; - - -- Body is put in a separate file to avoid pollution. - package body Directive_Protect is separate; - - -- Called to scan a tool directive. - procedure Scan_Tool_Directive - is - procedure Error_Missing_Directive is - begin - Error_Msg_Scan ("tool directive required after '`'"); - Skip_Until_EOL; - end Error_Missing_Directive; - - C : Character; - begin - -- The current character is '`'. - Pos := Pos + 1; - Skip_Spaces; - - -- Check and scan identifier. - C := Source (Pos); - if Characters_Kind (C) not in Letter then - Error_Missing_Directive; - return; - end if; - - Scan_Identifier (False); - - if Current_Token /= Tok_Identifier then - Error_Missing_Directive; - return; - end if; - - Skip_Spaces; - - -- Dispatch according to the identifier. - if Current_Identifier = Std_Names.Name_Protect then - Directive_Protect.Scan_Protect_Directive; - else - Error_Msg_Scan - ("unknown tool directive %i ignored", +Current_Identifier); - Skip_Until_EOL; - end if; - end Scan_Tool_Directive; - - -- Scan tokens within a comment. Return TRUE if Current_Token was set, - -- return FALSE to discard the comment (ie treat it like a real comment). - function Scan_Comment return Boolean - is - use Std_Names; - Id : Name_Id; - begin - Scan_Comment_Identifier (Id); - - if Id = Null_Identifier then - return False; - end if; - - case Id is - when Name_Psl => - -- Accept tokens after '-- psl'. - if Flag_Psl_Comment then - Flag_Psl := True; - Flag_Scan_In_Comment := True; - return True; - end if; - when others => - null; - end case; - return False; - end Scan_Comment; - - function Scan_Exclam_Mark return Boolean is - begin - if Source (Pos) = '!' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Exclam_Mark; - - function Scan_Underscore return Boolean is - begin - if Source (Pos) = '_' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Underscore; - - -- The Scan_Next_Line procedure must be called after each end-of-line to - -- register to next line number. This is called by Scan_CR_Newline and - -- Scan_LF_Newline. - procedure Scan_Next_Line is - begin - Files_Map.Skip_Gap (Current_Context.Source_File, Pos); - Current_Context.Line_Number := Current_Context.Line_Number + 1; - Current_Context.Line_Pos := Pos; - File_Add_Line_Number - (Current_Context.Source_File, Current_Context.Line_Number, Pos); - end Scan_Next_Line; - - -- Scan a CR end-of-line. - procedure Scan_CR_Newline is - begin - -- Accept CR or CR+LF as line separator. - if Source (Pos + 1) = LF then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_CR_Newline; - - -- Scan a LF end-of-line. - procedure Scan_LF_Newline is - begin - -- Accept LF or LF+CR as line separator. - if Source (Pos + 1) = CR then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_LF_Newline; - - -- Emit an error message for an invalid character. - procedure Error_Bad_Character is - begin - -- Technically character literals, string literals, extended - -- identifiers and comments. - Error_Msg_Scan ("character %c can only be used in strings or comments", - +Source (Pos)); - end Error_Bad_Character; - - -- Get a new token. - procedure Scan is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - end if; - - Current_Context.Prev_Pos := Pos; - - << Again >> null; - - -- Skip commonly used separators. - -- (Like Skip_Spaces but manually inlined for speed). - while Source (Pos) = ' ' or Source (Pos) = HT loop - Pos := Pos + 1; - end loop; - - Current_Context.Token_Pos := Pos; - Current_Context.Identifier := Null_Identifier; - - case Source (Pos) is - when HT | ' ' => - -- Must have already been skipped just above. - raise Internal_Error; - when NBSP => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan ("NBSP character not allowed in vhdl87"); - end if; - Pos := Pos + 1; - goto Again; - when VT | FF => - Pos := Pos + 1; - goto Again; - when LF => - Scan_LF_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when CR => - Scan_CR_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when '-' => - if Source (Pos + 1) = '-' then - -- This is a comment. - -- LRM93 13.8 - -- A comment starts with two adjacent hyphens and extends up - -- to the end of the line. - -- A comment can appear on any line line of a VHDL - -- description. - -- The presence or absence of comments has no influence on - -- whether a description is legal or illegal. - -- Futhermore, comments do not influence the execution of a - -- simulation module; their sole purpose is the enlightenment - -- of the human reader. - -- GHDL note: As a consequence, an obfruscating comment - -- is out of purpose, and a warning could be reported :-) - Pos := Pos + 2; - - -- Scan inside a comment. So we just ignore the two dashes. - if Flag_Scan_In_Comment then - goto Again; - end if; - - -- Handle keywords in comment (PSL). - if Flag_Comment_Keyword and then Scan_Comment then - goto Again; - end if; - - -- LRM93 13.2 - -- In any case, a sequence of one or more format - -- effectors other than horizontal tabulation must - -- cause at least one end of line. - while not Is_EOL (Source (Pos)) loop - -- LRM93 13.1 - -- The only characters allowed in the text of a VHDL - -- description are the graphic characters and the format - -- effectors. - - -- LRM02 13.1 Character set - -- The only characters allowed in the text of a VHDL - -- description (except within comments -- see 13.8) [...] - -- - -- LRM02 13.8 Comments - -- A comment [...] may contain any character except the - -- format effectors vertical tab, carriage return, line - -- feed and form feed. - if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02) - and then Characters_Kind (Source (Pos)) = Invalid - then - Error_Msg_Scan ("invalid character, even in a comment"); - end if; - Pos := Pos + 1; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - elsif Flag_Psl and then Source (Pos + 1) = '>' then - Current_Token := Tok_Minus_Greater; - Pos := Pos + 2; - return; - else - Current_Token := Tok_Minus; - Pos := Pos + 1; - return; - end if; - when '+' => - Current_Token := Tok_Plus; - Pos := Pos + 1; - return; - when '*' => - if Source (Pos + 1) = '*' then - Current_Token := Tok_Double_Star; - Pos := Pos + 2; - else - Current_Token := Tok_Star; - Pos := Pos + 1; - end if; - return; - when '/' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Not_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '*' then - -- LRM08 15.9 Comments - -- A delimited comment start with a solidus (slash) character - -- immediately followed by an asterisk character and extends up - -- to the first subsequent occurrence of an asterisk character - -- immediately followed by a solidus character. - if Vhdl_Std < Vhdl_08 then - Error_Msg_Scan - ("block comment are not allowed before vhdl 2008"); - end if; - - -- Skip '/*'. - Pos := Pos + 2; - - loop - case Source (Pos) is - when '/' => - -- LRM08 15.9 - -- Moreover, an occurrence of a solidus character - -- immediately followed by an asterisk character - -- within a delimited comment is not interpreted as - -- the start of a nested delimited comment. - if Source (Pos + 1) = '*' then - Warning_Msg_Scan - (Warnid_Nested_Comment, - "'/*' found within a block comment"); - end if; - Pos := Pos + 1; - when '*' => - if Source (Pos + 1) = '/' then - Pos := Pos + 2; - exit; - else - Pos := Pos + 1; - end if; - when CR => - Scan_CR_Newline; - when LF => - Scan_LF_Newline; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- Point at the start of the comment. - Error_Msg_Scan - (Get_Token_Location, - "block comment not terminated at end of file"); - exit; - end if; - Pos := Pos + 1; - when others => - Pos := Pos + 1; - end case; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - else - Current_Token := Tok_Slash; - Pos := Pos + 1; - end if; - return; - when '(' => - Current_Token := Tok_Left_Paren; - Pos := Pos + 1; - return; - when ')' => - Current_Token := Tok_Right_Paren; - Pos := Pos + 1; - return; - when '|' => - if Flag_Psl then - if Source (Pos + 1) = '|' then - Current_Token := Tok_Bar_Bar; - Pos := Pos + 2; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Double_Arrow; - Pos := Pos + 3; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - return; - when '!' => - if Flag_Psl then - Current_Token := Tok_Exclam_Mark; - else - if Source (Pos + 1) = '=' then - -- != is not allowed in VHDL, but be friendly with C users. - Error_Msg_Scan - (Get_Token_Location, "Use '/=' for inequality in vhdl"); - Current_Token := Tok_Not_Equal; - Pos := Pos + 1; - else - -- LRM93 13.10 - -- A vertical line (|) can be replaced by an exclamation - -- mark (!) where used as a delimiter. - Current_Token := Tok_Bar; - end if; - end if; - Pos := Pos + 1; - return; - when ':' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Assign; - Pos := Pos + 2; - else - Current_Token := Tok_Colon; - Pos := Pos + 1; - end if; - return; - when ';' => - Current_Token := Tok_Semi_Colon; - Pos := Pos + 1; - return; - when ',' => - Current_Token := Tok_Comma; - Pos := Pos + 1; - return; - when '.' => - if Source (Pos + 1) = '.' then - -- Be Ada friendly... - Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); - Current_Token := Tok_To; - Pos := Pos + 2; - return; - end if; - Current_Token := Tok_Dot; - Pos := Pos + 1; - return; - when '&' => - if Flag_Psl and then Source (Pos + 1) = '&' then - Current_Token := Tok_And_And; - Pos := Pos + 2; - else - Current_Token := Tok_Ampersand; - Pos := Pos + 1; - end if; - return; - when '<' => - case Source (Pos + 1) is - when '=' => - Current_Token := Tok_Less_Equal; - Pos := Pos + 2; - when '>' => - Current_Token := Tok_Box; - Pos := Pos + 2; - when '<' => - Current_Token := Tok_Double_Less; - Pos := Pos + 2; - when others => - Current_Token := Tok_Less; - Pos := Pos + 1; - end case; - return; - when '>' => - case Source (Pos + 1) is - when '=' => - Current_Token := Tok_Greater_Equal; - Pos := Pos + 2; - when '>' => - Current_Token := Tok_Double_Greater; - Pos := Pos + 2; - when others => - Current_Token := Tok_Greater; - Pos := Pos + 1; - end case; - return; - when '=' => - if Source (Pos + 1) = '=' then - if AMS_Vhdl then - Current_Token := Tok_Equal_Equal; - else - Error_Msg_Scan - ("'==' is not the vhdl equality, replaced by '='"); - Current_Token := Tok_Equal; - end if; - Pos := Pos + 2; - elsif Source (Pos + 1) = '>' then - Current_Token := Tok_Double_Arrow; - Pos := Pos + 2; - else - Current_Token := Tok_Equal; - Pos := Pos + 1; - end if; - return; - when ''' => - -- Handle cases such as character'('a') - -- FIXME: what about f ()'length ? or .all'length - if Current_Context.Prev_Token /= Tok_Identifier - and then Current_Context.Prev_Token /= Tok_Character - and then Source (Pos + 2) = ''' - then - -- LRM93 13.5 - -- A character literal is formed by enclosing one of the 191 - -- graphic character (...) between two apostrophe characters. - -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' - if Characters_Kind (Source (Pos + 1)) not in Graphic_Character - then - Error_Msg_Scan - ("a character literal can only be a graphic character"); - elsif Vhdl_Std = Vhdl_87 - and then Source (Pos + 1) > Character'Val (127) - then - Error_8bit; - end if; - Current_Token := Tok_Character; - Current_Context.Identifier := - Name_Table.Get_Identifier (Source (Pos + 1)); - Pos := Pos + 3; - return; - else - Current_Token := Tok_Tick; - Pos := Pos + 1; - end if; - return; - when '0' .. '9' => - Scan_Literal; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or - -- an abstract literal and an adjacent identifier or abstract - -- literal. - case Characters_Kind (Source (Pos)) is - when Digit => - -- Happen if d#ddd# is followed by a number. - Error_Msg_Scan ("space is required between numbers"); - when Upper_Case_Letter - | Lower_Case_Letter => - -- Could call Error_Separator, but use a clearer message - -- for this common case. - -- Note: the term "unit name" is not correct here, since - -- it can be any identifier or even a keyword; however it - -- is probably the most common case (eg 10ns). - if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer - then - Current_Token := Tok_Integer_Letter; - else - Error_Msg_Scan - ("space is required between number and unit name"); - end if; - when Other_Special_Character => - if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then - -- Start of extended identifier. - Error_Separator; - end if; - when Invalid - | Format_Effector - | Space_Character - | Special_Character => - null; - end case; - return; - when '#' => - Error_Msg_Scan ("'#' is used for based literals and " - & "must be preceded by a base"); - -- Skip. - Pos := Pos + 1; - goto Again; - when '"' => - Scan_String; - return; - when '%' => - if Vhdl_Std >= Vhdl_08 then - Error_Msg_Scan - ("'%%' not allowed in vhdl 2008 (was replacement character)"); - -- Continue as a string. - end if; - Scan_String; - return; - when '[' => - if Flag_Psl then - if Source (Pos + 1) = '*' then - Current_Token := Tok_Brack_Star; - Pos := Pos + 2; - elsif Source (Pos + 1) = '+' - and then Source (Pos + 2) = ']' - then - Current_Token := Tok_Brack_Plus_Brack; - Pos := Pos + 3; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Brack_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Brack_Equal; - Pos := Pos + 2; - else - Current_Token := Tok_Left_Bracket; - Pos := Pos + 1; - end if; - else - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("'[' is an invalid character in vhdl87, replaced by '('"); - Current_Token := Tok_Left_Paren; - else - Current_Token := Tok_Left_Bracket; - end if; - Pos := Pos + 1; - end if; - return; - when ']' => - if Vhdl_Std = Vhdl_87 and not Flag_Psl then - Error_Msg_Scan - ("']' is an invalid character in vhdl87, replaced by ')'"); - Current_Token := Tok_Right_Paren; - else - Current_Token := Tok_Right_Bracket; - end if; - Pos := Pos + 1; - return; - when '{' => - if Flag_Psl then - Current_Token := Tok_Left_Curly; - else - Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); - Current_Token := Tok_Left_Paren; - end if; - Pos := Pos + 1; - return; - when '}' => - if Flag_Psl then - Current_Token := Tok_Right_Curly; - else - Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); - Current_Token := Tok_Right_Paren; - end if; - Pos := Pos + 1; - return; - when '\' => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("extended identifiers are not allowed in vhdl87"); - end if; - Scan_Extended_Identifier; - return; - when '^' => - if Vhdl_Std >= Vhdl_08 then - Current_Token := Tok_Caret; - else - Current_Token := Tok_Xor; - Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); - end if; - Pos := Pos + 1; - return; - when '~' => - Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); - Pos := Pos + 1; - Current_Token := Tok_Not; - return; - when '?' => - if Vhdl_Std < Vhdl_08 then - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - else - if Source (Pos + 1) = '<' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Less_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Less; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '>' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Greater_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Greater; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '?' then - Current_Token := Tok_Condition; - Pos := Pos + 2; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Match_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '/' - and then Source (Pos + 2) = '=' - then - Current_Token := Tok_Match_Not_Equal; - Pos := Pos + 3; - else - Error_Msg_Scan ("unknown matching operator"); - Pos := Pos + 1; - goto Again; - end if; - end if; - return; - when '`' => - if Vhdl_Std >= Vhdl_08 then - Scan_Tool_Directive; - else - Error_Bad_Character; - Skip_Until_EOL; - end if; - goto Again; - when '$' - | Inverted_Exclamation .. Inverted_Question - | Multiplication_Sign | Division_Sign => - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - when '@' => - if Vhdl_Std >= Vhdl_08 or Flag_Psl then - Current_Token := Tok_Arobase; - Pos := Pos + 1; - return; - else - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - end if; - when '_' => - Error_Msg_Scan ("an identifier can't start with '_'"); - Scan_Identifier (Flag_Psl); - -- Cannot be a reserved word. - return; - when 'A' .. 'Z' | 'a' .. 'z' => - Scan_Identifier (Flag_Psl); - Identifier_To_Token; - return; - when UC_A_Grave .. UC_O_Diaeresis - | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn - | LC_German_Sharp_S .. LC_O_Diaeresis - | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("non 7-bit latin-1 letters are not allowed in vhdl87"); - end if; - Scan_Identifier (False); - -- Not a reserved word. - return; - when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => - Error_Msg_Scan - ("control character that is not CR, LF, FF, HT or VT " & - "is not allowed"); - Pos := Pos + 1; - goto Again; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- FIXME: should conditionnaly emit a warning if the file - -- is not terminated by an end of line. - Current_Token := Tok_Eof; - else - Error_Msg_Scan ("EOT is not allowed inside the file"); - Pos := Pos + 1; - goto Again; - end if; - return; - end case; - -- Not reachable: all case should use goto Again or return. - end Scan; - - function Is_Whitespace (C : Character) return Boolean is - begin - if C = ' ' then - return True; - elsif Vhdl_Std > Vhdl_87 and C = NBSP then - return True; - else - return False; - end if; - end Is_Whitespace; -end Scanner; |