From fc028b5d21727da66dc8e146b3dbcfc870c64f90 Mon Sep 17 00:00:00 2001 From: Tristan Gingold Date: Sat, 4 May 2019 16:49:19 +0200 Subject: vhdl: move scanner under vhdl hierarchy. --- src/ghdldrv/ghdllocal.adb | 4 +- src/ghdldrv/ghdlprint.adb | 12 +- src/libraries.adb | 8 +- src/options.adb | 8 +- src/vhdl/errorout.adb | 16 +- src/vhdl/evaluation.adb | 8 +- src/vhdl/iirs_utils.adb | 2 +- src/vhdl/parse.adb | 22 +- src/vhdl/parse_psl.adb | 2 +- src/vhdl/scanner-directive_protect.adb | 98 -- src/vhdl/scanner-scan_literal.adb | 317 ---- src/vhdl/scanner.adb | 2332 --------------------------- src/vhdl/scanner.ads | 144 -- src/vhdl/sem_lib.adb | 10 +- src/vhdl/simulate/simul-debugger.adb | 10 +- src/vhdl/vhdl-scanner-directive_protect.adb | 116 ++ src/vhdl/vhdl-scanner-scan_literal.adb | 317 ++++ src/vhdl/vhdl-scanner.adb | 2332 +++++++++++++++++++++++++++ src/vhdl/vhdl-scanner.ads | 144 ++ src/vhdl/vhdl.ads | 21 + 20 files changed, 2981 insertions(+), 2942 deletions(-) delete mode 100644 src/vhdl/scanner-directive_protect.adb delete mode 100644 src/vhdl/scanner-scan_literal.adb delete mode 100644 src/vhdl/scanner.adb delete mode 100644 src/vhdl/scanner.ads create mode 100644 src/vhdl/vhdl-scanner-directive_protect.adb create mode 100644 src/vhdl/vhdl-scanner-scan_literal.adb create mode 100644 src/vhdl/vhdl-scanner.adb create mode 100644 src/vhdl/vhdl-scanner.ads create mode 100644 src/vhdl/vhdl.ads (limited to 'src') diff --git a/src/ghdldrv/ghdllocal.adb b/src/ghdldrv/ghdllocal.adb index aab8885c3..297d39798 100644 --- a/src/ghdldrv/ghdllocal.adb +++ b/src/ghdldrv/ghdllocal.adb @@ -27,7 +27,7 @@ with Name_Table; with Std_Names; with Disp_Vhdl; with Default_Paths; -with Scanner; +with Vhdl.Scanner; with Errorout; with Configuration; with Files_Map; @@ -1621,7 +1621,7 @@ package body Ghdllocal is raise Option_Error; end if; Res := new String'(Name.all); - Scanner.Convert_Identifier (Res.all); + Vhdl.Scanner.Convert_Identifier (Res.all); return Res; end Convert_Name; diff --git a/src/ghdldrv/ghdlprint.adb b/src/ghdldrv/ghdlprint.adb index 63acd06f3..d72b1747d 100644 --- a/src/ghdldrv/ghdlprint.adb +++ b/src/ghdldrv/ghdlprint.adb @@ -28,7 +28,7 @@ with Libraries; with Errorout; use Errorout; with Iirs_Utils; use Iirs_Utils; with Tokens; -with Scanner; +with Vhdl.Scanner; with Parse; with Canon; with Version; @@ -85,7 +85,7 @@ package body Ghdlprint is procedure PP_Html_File (File : Source_File_Entry) is use Flags; - use Scanner; + use Vhdl.Scanner; use Tokens; use Files_Map; use Ada.Characters.Latin_1; @@ -345,8 +345,8 @@ package body Ghdlprint is end if; end Disp_Attribute; begin - Scanner.Flag_Comment := True; - Scanner.Flag_Newline := True; + Vhdl.Scanner.Flag_Comment := True; + Vhdl.Scanner.Flag_Newline := True; Set_File (File); Buf := Get_File_Source (File); @@ -855,7 +855,7 @@ package body Ghdlprint is procedure Perform_Action (Cmd : Command_Lines; Args : Argument_List) is pragma Unreferenced (Cmd); - use Scanner; + use Vhdl.Scanner; use Tokens; use Files_Map; use Ada.Characters.Latin_1; @@ -1048,7 +1048,7 @@ package body Ghdlprint is is pragma Unreferenced (Cmd); use Tokens; - use Scanner; + use Vhdl.Scanner; package Ref_Tokens is new Tables (Table_Component_Type => Token_Type, diff --git a/src/libraries.adb b/src/libraries.adb index dd70b615a..ff499717c 100644 --- a/src/libraries.adb +++ b/src/libraries.adb @@ -21,7 +21,7 @@ with GNAT.OS_Lib; with Logging; use Logging; with Tables; with Errorout; use Errorout; -with Scanner; +with Vhdl.Scanner; with Iirs_Utils; use Iirs_Utils; with Name_Table; use Name_Table; with Str_Table; @@ -327,7 +327,7 @@ package body Libraries is -- Return TRUE if the library was found. function Load_Library (Library: Iir_Library_Declaration) return Boolean is - use Scanner; + use Vhdl.Scanner; use Tokens; File : Source_File_Entry; @@ -418,7 +418,7 @@ package body Libraries is return False; end if; - Scanner.Set_File (File); + Vhdl.Scanner.Set_File (File); -- Parse header. Scan; @@ -601,7 +601,7 @@ package body Libraries is end loop; Set_Date (Library, Max_Date); - Scanner.Close_File; + Vhdl.Scanner.Close_File; -- Don't need the library file anymore. Files_Map.Unload_Last_Source_File (File); diff --git a/src/options.adb b/src/options.adb index a627bde04..e2da44919 100644 --- a/src/options.adb +++ b/src/options.adb @@ -23,7 +23,7 @@ with Std_Names; with PSL.Nodes; with PSL.Dump_Tree; with Disp_Tree; -with Scanner; +with Vhdl.Scanner; with Back_End; use Back_End; with Flags; use Flags; with Files_Map; @@ -121,7 +121,7 @@ package body Options is Name : String (1 .. Opt'Last - 8 + 1); begin Name := Opt (8 .. Opt'Last); - Scanner.Convert_Identifier (Name); + Vhdl.Scanner.Convert_Identifier (Name); Libraries.Work_Library_Name := Get_Identifier (Name); end; elsif Opt = "-C" or else Opt = "--mb-comments" then @@ -167,8 +167,8 @@ package body Options is elsif Opt = "--vital-checks" then Flag_Vital_Checks := True; elsif Opt = "-fpsl" then - Scanner.Flag_Psl_Comment := True; - Scanner.Flag_Comment_Keyword := True; + Vhdl.Scanner.Flag_Psl_Comment := True; + Vhdl.Scanner.Flag_Comment_Keyword := True; elsif Opt = "-dp" then Dump_Parse := True; elsif Opt = "-ds" then diff --git a/src/vhdl/errorout.adb b/src/vhdl/errorout.adb index 90d30e138..fb6211ce4 100644 --- a/src/vhdl/errorout.adb +++ b/src/vhdl/errorout.adb @@ -17,7 +17,7 @@ -- 02111-1307, USA. with Logging; use Logging; -with Scanner; +with Vhdl.Scanner; with Name_Table; with Iirs_Utils; use Iirs_Utils; with Files_Map; use Files_Map; @@ -257,15 +257,15 @@ package body Errorout is when Elaboration => null; when Scan => - File := Scanner.Get_Current_Source_File; - Line := Scanner.Get_Current_Line; - Offset := Scanner.Get_Current_Offset; + File := Vhdl.Scanner.Get_Current_Source_File; + Line := Vhdl.Scanner.Get_Current_Line; + Offset := Vhdl.Scanner.Get_Current_Offset; Loc_Length := 1; when Parse => - File := Scanner.Get_Current_Source_File; - Line := Scanner.Get_Current_Line; - Offset := Scanner.Get_Token_Offset; - Loc_Length := Scanner.Get_Current_Offset - Offset; + File := Vhdl.Scanner.Get_Current_Source_File; + Line := Vhdl.Scanner.Get_Current_Line; + Offset := Vhdl.Scanner.Get_Token_Offset; + Loc_Length := Vhdl.Scanner.Get_Current_Offset - Offset; when Semantic => null; end case; diff --git a/src/vhdl/evaluation.adb b/src/vhdl/evaluation.adb index 72c5a9152..ec366aeef 100644 --- a/src/vhdl/evaluation.adb +++ b/src/vhdl/evaluation.adb @@ -17,7 +17,7 @@ -- 02111-1307, USA. with Ada.Unchecked_Deallocation; with Interfaces; -with Scanner; +with Vhdl.Scanner; with Errorout; use Errorout; with Name_Table; use Name_Table; with Str_Table; @@ -1906,7 +1906,7 @@ package body Evaluation is -- Separate string into numeric value and make lowercase unit. for I in reverse Val'range loop UnitName (I) := Ada.Characters.Handling.To_Lower (Val (I)); - if Scanner.Is_Whitespace (Val (I)) and Found_Unit then + if Vhdl.Scanner.Is_Whitespace (Val (I)) and Found_Unit then Sep := I; exit; else @@ -2193,11 +2193,11 @@ package body Evaluation is First := Value'First; Last := Value'Last; while First <= Last loop - exit when not Scanner.Is_Whitespace (Value (First)); + exit when not Vhdl.Scanner.Is_Whitespace (Value (First)); First := First + 1; end loop; while Last >= First loop - exit when not Scanner.Is_Whitespace (Value (Last)); + exit when not Vhdl.Scanner.Is_Whitespace (Value (Last)); Last := Last - 1; end loop; diff --git a/src/vhdl/iirs_utils.adb b/src/vhdl/iirs_utils.adb index 6a888911d..3ea7aed3c 100644 --- a/src/vhdl/iirs_utils.adb +++ b/src/vhdl/iirs_utils.adb @@ -15,7 +15,7 @@ -- along with GHDL; see the file COPYING. If not, write to the Free -- Software Foundation, 59 Temple Place - Suite 330, Boston, MA -- 02111-1307, USA. -with Scanner; use Scanner; +with Vhdl.Scanner; use Vhdl.Scanner; with Tokens; use Tokens; with Errorout; use Errorout; with Name_Table; diff --git a/src/vhdl/parse.adb b/src/vhdl/parse.adb index 874a5ae2d..d4e8b6121 100644 --- a/src/vhdl/parse.adb +++ b/src/vhdl/parse.adb @@ -17,7 +17,7 @@ -- 02111-1307, USA. with Iir_Chains; use Iir_Chains; with Tokens; use Tokens; -with Scanner; use Scanner; +with Vhdl.Scanner; use Vhdl.Scanner; with Iirs_Utils; use Iirs_Utils; with Errorout; use Errorout; with Std_Names; use Std_Names; @@ -4371,7 +4371,7 @@ package body Parse is Xrefs.Xref_Keyword (Get_Token_Location); -- Recognize PSL keywords. - Scanner.Flag_Psl := True; + Vhdl.Scanner.Flag_Psl := True; -- Skip 'clock'. Expect_Scan (Tok_Psl_Clock); @@ -4381,8 +4381,8 @@ package body Parse is Set_Psl_Boolean (Res, Parse_Psl.Parse_Psl_Boolean); - Scanner.Flag_Scan_In_Comment := False; - Scanner.Flag_Psl := False; + Vhdl.Scanner.Flag_Scan_In_Comment := False; + Vhdl.Scanner.Flag_Psl := False; Expect_Scan (Tok_Semi_Colon); @@ -4398,7 +4398,7 @@ package body Parse is Loc := Get_Token_Location; -- Recognize PSL keywords. - Scanner.Flag_Psl := True; + Vhdl.Scanner.Flag_Psl := True; -- Skip 'default'. Scan; @@ -4425,10 +4425,10 @@ package body Parse is end if; -- Parse PSL declaration. - Scanner.Flag_Psl := True; + Vhdl.Scanner.Flag_Psl := True; Decl := Parse_Psl.Parse_Psl_Declaration (Tok); - Scanner.Flag_Scan_In_Comment := False; - Scanner.Flag_Psl := False; + Vhdl.Scanner.Flag_Scan_In_Comment := False; + Vhdl.Scanner.Flag_Psl := False; Expect_Scan (Tok_Semi_Colon); @@ -8469,7 +8469,7 @@ package body Parse is procedure Parse_Psl_Assert_Report_Severity (Stmt : Iir) is begin -- No more PSL tokens after the property. - Scanner.Flag_Psl := False; + Vhdl.Scanner.Flag_Psl := False; if Current_Token = Tok_Report then -- Skip 'report' @@ -8485,7 +8485,7 @@ package body Parse is Set_Severity_Expression (Stmt, Parse_Expression); end if; - Scanner.Flag_Scan_In_Comment := False; + Vhdl.Scanner.Flag_Scan_In_Comment := False; Expect_Scan (Tok_Semi_Colon); end Parse_Psl_Assert_Report_Severity; @@ -8498,7 +8498,7 @@ package body Parse is -- Accept PSL tokens if Flags.Vhdl_Std >= Vhdl_08 then - Scanner.Flag_Psl := True; + Vhdl.Scanner.Flag_Psl := True; end if; -- Skip 'assert' diff --git a/src/vhdl/parse_psl.adb b/src/vhdl/parse_psl.adb index 32d24d478..5d78efba6 100644 --- a/src/vhdl/parse_psl.adb +++ b/src/vhdl/parse_psl.adb @@ -19,7 +19,7 @@ with Errorout; use Errorout; with PSL.Nodes; use PSL.Nodes; with Iirs; -with Scanner; use Scanner; +with Vhdl.Scanner; use Vhdl.Scanner; with PSL.Errors; use PSL.Errors; with PSL.Priorities; use PSL.Priorities; with Parse; diff --git a/src/vhdl/scanner-directive_protect.adb b/src/vhdl/scanner-directive_protect.adb deleted file mode 100644 index 1a70144d8..000000000 --- a/src/vhdl/scanner-directive_protect.adb +++ /dev/null @@ -1,98 +0,0 @@ -separate (Scanner) -package body Directive_Protect is - function Scan_Expression_List return Boolean; - - -- Scan/parse a keyword expression. - -- Initial spaces must have been skipped. - -- Return False in case of error. - function Scan_Keyword_Expression return Boolean is - begin - if Characters_Kind (Source (Pos)) not in Letter then - Error_Msg_Scan ("identifier expected in protect directive"); - return False; - end if; - - Scan_Identifier (False); - if Current_Token /= Tok_Identifier then - Error_Msg_Scan (Get_Token_Location, "keyword must be an identifier"); - return False; - end if; - - Skip_Spaces; - if Source (Pos) /= '=' then - return True; - end if; - - -- Eat '='. - Pos := Pos + 1; - Skip_Spaces; - - case Source (Pos) is - when 'A' .. 'Z' | 'a' .. 'z' => - Scan_Identifier (False); - when '0' .. '9' => - Scan_Literal; - when '"' => - Scan_String; - when '(' => - -- Eat '('. - Pos := Pos + 1; - Skip_Spaces; - - if not Scan_Expression_List then - return False; - end if; - - Skip_Spaces; - if Source (Pos) /= ')' then - Error_Msg_Scan ("')' expected at end of protect keyword list"); - return False; - end if; - - -- Eat ')'. - Pos := Pos + 1; - - when others => - -- Ok, we don't handle all the letters, nor extended identifiers. - Error_Msg_Scan ("literal expected in protect tool directive"); - return False; - end case; - - return True; - end Scan_Keyword_Expression; - - -- Scan: keyword_expression { , keyword_expression } - function Scan_Expression_List return Boolean is - begin - loop - if not Scan_Keyword_Expression then - return False; - end if; - - Skip_Spaces; - - if Source (Pos) /= ',' then - return True; - end if; - - -- Eat ','. - Pos := Pos + 1; - - Skip_Spaces; - end loop; - end Scan_Expression_List; - - -- LRM08 24.1 Protect tool directives - -- protect_directive ::= - -- `PROTECT keyword_expression {, keyword_expression } - procedure Scan_Protect_Directive is - begin - if Scan_Expression_List then - if not Is_EOL (Source (Pos)) then - Error_Msg_Scan ("end of line expected in protect directive"); - end if; - end if; - - Skip_Until_EOL; - end Scan_Protect_Directive; -end Directive_Protect; diff --git a/src/vhdl/scanner-scan_literal.adb b/src/vhdl/scanner-scan_literal.adb deleted file mode 100644 index ef6718925..000000000 --- a/src/vhdl/scanner-scan_literal.adb +++ /dev/null @@ -1,317 +0,0 @@ --- Lexical analysis for numbers. --- Copyright (C) 2002 - 2014 Tristan Gingold --- --- GHDL is free software; you can redistribute it and/or modify it under --- the terms of the GNU General Public License as published by the Free --- Software Foundation; either version 2, or (at your option) any later --- version. --- --- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or --- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --- for more details. --- --- You should have received a copy of the GNU General Public License --- along with GHDL; see the file COPYING. If not, write to the Free --- Software Foundation, 59 Temple Place - Suite 330, Boston, MA --- 02111-1307, USA. - -with Interfaces; use Interfaces; -with Grt.Fcvt; use Grt.Fcvt; - -separate (Scanner) - --- scan a decimal literal or a based literal. --- --- LRM93 13.4.1 --- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] --- EXPONENT ::= E [ + ] INTEGER | E - INTEGER --- --- LRM93 13.4.2 --- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT --- BASE ::= INTEGER -procedure Scan_Literal is - -- Numbers of digits. - Scale : Integer; - Res : Bignum; - - -- LRM 13.4.1 - -- INTEGER ::= DIGIT { [ UNDERLINE ] DIGIT } - -- - -- Update SCALE, RES. - -- The first character must be a digit. - procedure Scan_Integer - is - C : Character; - begin - C := Source (Pos); - loop - -- C is a digit. - Bignum_Mul_Int (Res, 10, Character'Pos (C) - Character'Pos ('0')); - Scale := Scale + 1; - - Pos := Pos + 1; - C := Source (Pos); - if C = '_' then - loop - Pos := Pos + 1; - C := Source (Pos); - exit when C /= '_'; - Error_Msg_Scan ("double underscore in number"); - end loop; - if C not in '0' .. '9' then - Error_Msg_Scan ("underscore must be followed by a digit"); - end if; - end if; - exit when C not in '0' .. '9'; - end loop; - end Scan_Integer; - - C : Character; - D : Natural; - Ok : Boolean; - Has_Dot : Boolean; - Exp : Integer; - Exp_Neg : Boolean; - Base : Positive; -begin - -- Start with a simple and fast conversion. - C := Source (Pos); - D := 0; - loop - D := D * 10 + Character'Pos (C) - Character'Pos ('0'); - - Pos := Pos + 1; - C := Source (Pos); - if C = '_' then - loop - Pos := Pos + 1; - C := Source (Pos); - exit when C /= '_'; - Error_Msg_Scan ("double underscore in number"); - end loop; - if C not in '0' .. '9' then - Error_Msg_Scan ("underscore must be followed by a digit"); - end if; - end if; - if C not in '0' .. '9' then - if C = '.' or else C = '#' or else (C = 'e' or C = 'E' or C = ':') - then - -- Continue scanning. - Bignum_Int (Res, D); - exit; - end if; - - -- Finished. - -- a universal integer. - Current_Token := Tok_Integer; - -- No possible overflow. - Current_Context.Int64 := Iir_Int64 (D); - return; - elsif D >= (Natural'Last / 10) - 1 then - -- Number may be greather than the natural limit. - Scale := 0; - Bignum_Int (Res, D); - Scan_Integer; - exit; - end if; - end loop; - - Has_Dot := False; - Base := 10; - Scale := 0; - - C := Source (Pos); - if C = '.' then - -- Decimal integer. - Has_Dot := True; - Pos := Pos + 1; - C := Source (Pos); - if C not in '0' .. '9' then - Error_Msg_Scan ("a dot must be followed by a digit"); - Current_Token := Tok_Real; - Current_Context.Fp64 := Fp64 (To_Float_64 (False, Res, Base, 0)); - return; - end if; - Scan_Integer; - elsif C = '#' - or else (C = ':' and then (Source (Pos + 1) in '0' .. '9' - or else Source (Pos + 1) in 'a' .. 'f' - or else Source (Pos + 1) in 'A' .. 'F')) - then - -- LRM 13.10 - -- The number sign (#) of a based literal can be replaced by colon (:), - -- provided that the replacement is done for both occurrences. - -- GHDL: correctly handle 'variable v : integer range 0 to 7:= 3'. - -- Is there any other places where a digit can be followed - -- by a colon ? (See IR 1093). - - -- Based integer. - declare - Number_Sign : constant Character := C; - Res_Int : Interfaces.Unsigned_64; - begin - Bignum_To_Int (Res, Res_Int, Ok); - if not Ok or else Res_Int > 16 then - -- LRM 13.4.2 - -- The base must be [...] at most sixteen. - Error_Msg_Scan ("base must be at most 16"); - -- Fallback. - Base := 16; - elsif Res_Int < 2 then - -- LRM 13.4.2 - -- The base must be at least two [...]. - Error_Msg_Scan ("base must be at least 2"); - -- Fallback. - Base := 2; - else - Base := Natural (Res_Int); - end if; - - Pos := Pos + 1; - Bignum_Int (Res, 0); - C := Source (Pos); - loop - if C >= '0' and C <= '9' then - D := Character'Pos (C) - Character'Pos ('0'); - elsif C >= 'A' and C <= 'F' then - D := Character'Pos (C) - Character'Pos ('A') + 10; - elsif C >= 'a' and C <= 'f' then - D := Character'Pos (C) - Character'Pos ('a') + 10; - else - Error_Msg_Scan ("bad extended digit"); - exit; - end if; - - if D >= Base then - -- LRM 13.4.2 - -- The conventional meaning of base notation is - -- assumed; in particular the value of each extended - -- digit of a based literal must be less then the base. - Error_Msg_Scan ("digit beyond base"); - D := 1; - end if; - Pos := Pos + 1; - Bignum_Mul_Int (Res, Base, D); - Scale := Scale + 1; - - C := Source (Pos); - if C = '_' then - loop - Pos := Pos + 1; - C := Source (Pos); - exit when C /= '_'; - Error_Msg_Scan ("double underscore in based integer"); - end loop; - elsif C = '.' then - if Has_Dot then - Error_Msg_Scan ("double dot ignored"); - else - Has_Dot := True; - Scale := 0; - end if; - Pos := Pos + 1; - C := Source (Pos); - elsif C = Number_Sign then - Pos := Pos + 1; - exit; - elsif C = '#' or C = ':' then - Error_Msg_Scan ("bad number sign replacement character"); - exit; - end if; - end loop; - end; - end if; - - -- Exponent. - C := Source (Pos); - Exp := 0; - if C = 'E' or else C = 'e' then - Pos := Pos + 1; - C := Source (Pos); - Exp_Neg := False; - if C = '+' then - Pos := Pos + 1; - C := Source (Pos); - elsif C = '-' then - if Has_Dot then - Exp_Neg := True; - else - -- LRM 13.4.1 - -- An exponent for an integer literal must not have a minus sign. - -- - -- LRM 13.4.2 - -- An exponent for a based integer literal must not have a minus - -- sign. - Error_Msg_Scan - ("negative exponent not allowed for integer literal"); - end if; - Pos := Pos + 1; - C := Source (Pos); - end if; - if C not in '0' .. '9' then - Error_Msg_Scan ("digit expected after exponent"); - else - loop - -- C is a digit. - Exp := Exp * 10 + (Character'Pos (C) - Character'Pos ('0')); - - Pos := Pos + 1; - C := Source (Pos); - if C = '_' then - loop - Pos := Pos + 1; - C := Source (Pos); - exit when C /= '_'; - Error_Msg_Scan ("double underscore not allowed in integer"); - end loop; - if C not in '0' .. '9' then - Error_Msg_Scan ("digit expected after underscore"); - exit; - end if; - elsif C not in '0' .. '9' then - exit; - end if; - end loop; - end if; - if Exp_Neg then - Exp := -Exp; - end if; - end if; - - if Has_Dot then - -- a universal real. - Current_Token := Tok_Real; - - Current_Context.Fp64 := - Fp64 (To_Float_64 (False, Res, Base, Exp - Scale)); - else - -- a universal integer. - Current_Token := Tok_Integer; - - -- Set to a valid literal, in case of constraint error. - if Exp /= 0 then - Res := Bignum_Mul (Res, Bignum_Pow (Base, Exp)); - end if; - - declare - U : Unsigned_64; - begin - Bignum_To_Int (Res, U, Ok); - if U > Unsigned_64 (Iir_Int64'Last) then - Ok := False; - else - Current_Context.Int64 := Iir_Int64 (U); - end if; - end; - if not Ok then - Error_Msg_Scan ("literal beyond integer bounds"); - end if; - end if; -exception - when Constraint_Error => - Error_Msg_Scan ("literal overflow"); - - Current_Token := Tok_Integer; - Current_Context.Int64 := 0; -end Scan_Literal; diff --git a/src/vhdl/scanner.adb b/src/vhdl/scanner.adb deleted file mode 100644 index 913883ab1..000000000 --- a/src/vhdl/scanner.adb +++ /dev/null @@ -1,2332 +0,0 @@ --- VHDL lexical scanner. --- Copyright (C) 2002 - 2014 Tristan Gingold --- --- GHDL is free software; you can redistribute it and/or modify it under --- the terms of the GNU General Public License as published by the Free --- Software Foundation; either version 2, or (at your option) any later --- version. --- --- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or --- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --- for more details. --- --- You should have received a copy of the GNU General Public License --- along with GHDL; see the file COPYING. If not, write to the Free --- Software Foundation, 59 Temple Place - Suite 330, Boston, MA --- 02111-1307, USA. -with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; -with Errorout; use Errorout; -with Name_Table; -with Files_Map; use Files_Map; -with Std_Names; -with Str_Table; -with Flags; use Flags; - -package body Scanner is - - -- This classification is a simplification of the categories of LRM93 13.1 - -- LRM93 13.1 - -- The only characters allowed in the text of a VHDL description are the - -- graphic characters and format effector. - - type Character_Kind_Type is - ( - -- Neither a format effector nor a graphic character. - Invalid, - Format_Effector, - Lower_Case_Letter, - Upper_Case_Letter, - Digit, - Special_Character, - Space_Character, - Other_Special_Character - ); - - -- LRM93 13.1 - -- basic_graphic_character ::= - -- upper_case_letter | digit | special_character | space_character - -- - --subtype Basic_Graphic_Character is - -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; - - -- LRM93 13.1 - -- graphic_character ::= - -- basic_graphic_character | lower_case_letter | other_special_character - -- - -- Note: There are 191 graphic characters. - subtype Graphic_Character is - Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character; - - -- letter ::= upper_case_letter | lower_case_letter - subtype Letter is - Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter; - - -- LRM93 13.1 - -- The characters included in each of the categories of basic graphic - -- characters are defined as follows: - type Character_Array is array (Character) of Character_Kind_Type; - pragma Suppress_Initialization (Character_Array); - Characters_Kind : constant Character_Array := - (NUL .. BS => Invalid, - - -- Format effectors are the ISO (and ASCII) characters called horizontal - -- tabulation, vertical tabulation, carriage return, line feed, and form - -- feed. - HT | LF | VT | FF | CR => Format_Effector, - - SO .. US => Invalid, - - -- 1. upper case letters - 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | - UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, - - -- 2. digits - '0' .. '9' => Digit, - - -- 3. special characters - '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' - | ':' | ';' | '<' | '=' | '>' | '[' | ']' - | '_' | '|' | '*' => Special_Character, - - -- 4. the space characters - ' ' | NBSP => Space_Character, - - -- 5. lower case letters - 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | - LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, - - -- 6. other special characters - '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' - | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | - Division_Sign => Other_Special_Character, - - -- '¡' -- INVERTED EXCLAMATION MARK - -- '¢' -- CENT SIGN - -- '£' -- POUND SIGN - -- '¤' -- CURRENCY SIGN - -- '¥' -- YEN SIGN - -- '¦' -- BROKEN BAR - -- '§' -- SECTION SIGN - -- '¨' -- DIAERESIS - -- '©' -- COPYRIGHT SIGN - -- 'ª' -- FEMININE ORDINAL INDICATOR - -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¬' -- NOT SIGN - -- '­' -- SOFT HYPHEN - -- '®' -- REGISTERED SIGN - -- '¯' -- MACRON - -- '°' -- DEGREE SIGN - -- '±' -- PLUS-MINUS SIGN - -- '²' -- SUPERSCRIPT TWO - -- '³' -- SUPERSCRIPT THREE - -- '´' -- ACUTE ACCENT - -- 'µ' -- MICRO SIGN - -- '¶' -- PILCROW SIGN - -- '·' -- MIDDLE DOT - -- '¸' -- CEDILLA - -- '¹' -- SUPERSCRIPT ONE - -- 'º' -- MASCULINE ORDINAL INDICATOR - -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - -- '¼' -- VULGAR FRACTION ONE QUARTER - -- '½' -- VULGAR FRACTION ONE HALF - -- '¾' -- VULGAR FRACTION THREE QUARTERS - -- '¿' -- INVERTED QUESTION MARK - -- '×' -- MULTIPLICATION SIGN - -- '÷' -- DIVISION SIGN - - DEL .. APC => Invalid); - - -- The context contains the whole internal state of the scanner, ie - -- it can be used to push/pop a lexical analysis, to restart the - -- scanner from a context marking a previous point. - type Scan_Context is record - Source : File_Buffer_Acc; - Source_File : Source_File_Entry; - Line_Number : Natural; - Line_Pos : Source_Ptr; - Prev_Pos : Source_Ptr; - Token_Pos : Source_Ptr; - Pos : Source_Ptr; - File_Len : Source_Ptr; - Token : Token_Type; - Prev_Token : Token_Type; - - -- Additional values for the current token. - Bit_Str_Base : Character; - Bit_Str_Sign : Character; - Str_Id : String8_Id; - Str_Len : Nat32; - Identifier: Name_Id; - Int64 : Iir_Int64; - Fp64 : Iir_Fp64; - end record; - pragma Suppress_Initialization (Scan_Context); - - -- Disp a message during scan. - -- The current location is automatically displayed before the message. - -- Disp a message during scan. - procedure Error_Msg_Scan (Msg: String) is - begin - Report_Msg (Msgid_Error, Scan, No_Location, Msg); - end Error_Msg_Scan; - - procedure Error_Msg_Scan (Loc : Location_Type; Msg: String) is - begin - Report_Msg (Msgid_Error, Scan, Loc, Msg); - end Error_Msg_Scan; - - procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is - begin - Report_Msg (Msgid_Error, Scan, No_Location, Msg, (1 => Arg1)); - end Error_Msg_Scan; - - -- Disp a message during scan. - procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String) is - begin - Report_Msg (Id, Scan, No_Location, Msg); - end Warning_Msg_Scan; - - procedure Warning_Msg_Scan (Id : Msgid_Warnings; - Msg: String; - Arg1 : Earg_Type; - Cont : Boolean := False) is - begin - Report_Msg (Id, Scan, No_Location, Msg, (1 => Arg1), Cont); - end Warning_Msg_Scan; - - -- The current context. - -- Default value is an invalid context. - Current_Context: Scan_Context := (Source => null, - Source_File => No_Source_File_Entry, - Line_Number => 0, - Line_Pos => 0, - Pos => 0, - Prev_Pos => 0, - Token_Pos => 0, - File_Len => 0, - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Bit_Str_Base => ' ', - Bit_Str_Sign => ' ', - Str_Id => Null_String8, - Str_Len => 0, - Int64 => 0, - Fp64 => 0.0); - - Source: File_Buffer_Acc renames Current_Context.Source; - Pos: Source_Ptr renames Current_Context.Pos; - - -- When CURRENT_TOKEN is an identifier, its name_id is stored into - -- this global variable. - -- Function current_text can be used to convert it into an iir. - function Current_Identifier return Name_Id is - begin - return Current_Context.Identifier; - end Current_Identifier; - - procedure Invalidate_Current_Identifier is - begin - Current_Context.Identifier := Null_Identifier; - end Invalidate_Current_Identifier; - - procedure Invalidate_Current_Token is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - Current_Token := Tok_Invalid; - end if; - end Invalidate_Current_Token; - - function Current_String_Id return String8_Id is - begin - return Current_Context.Str_Id; - end Current_String_Id; - - function Current_String_Length return Nat32 is - begin - return Current_Context.Str_Len; - end Current_String_Length; - - function Get_Bit_String_Base return Character is - begin - return Current_Context.Bit_Str_Base; - end Get_Bit_String_Base; - - function Get_Bit_String_Sign return Character is - begin - return Current_Context.Bit_Str_Sign; - end Get_Bit_String_Sign; - - function Current_Iir_Int64 return Iir_Int64 is - begin - return Current_Context.Int64; - end Current_Iir_Int64; - - function Current_Iir_Fp64 return Iir_Fp64 is - begin - return Current_Context.Fp64; - end Current_Iir_Fp64; - - function Get_Current_Source_File return Source_File_Entry is - begin - return Current_Context.Source_File; - end Get_Current_Source_File; - - function Get_Current_Line return Natural is - begin - return Current_Context.Line_Number; - end Get_Current_Line; - - function Get_Current_Offset return Natural is - begin - return Natural (Current_Context.Pos - Current_Context.Line_Pos); - end Get_Current_Offset; - - function Get_Token_Offset return Natural is - begin - return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos); - end Get_Token_Offset; - - function Get_Token_Position return Source_Ptr is - begin - return Current_Context.Token_Pos; - end Get_Token_Position; - - function Get_Position return Source_Ptr is - begin - return Current_Context.Pos; - end Get_Position; - - function Get_Token_Location return Location_Type is - begin - return File_Pos_To_Location - (Current_Context.Source_File, Current_Context.Token_Pos); - end Get_Token_Location; - - function Get_Prev_Location return Location_Type is - begin - return File_Pos_To_Location - (Current_Context.Source_File, Current_Context.Prev_Pos); - end Get_Prev_Location; - - procedure Set_File (Source_File : Source_File_Entry) - is - N_Source: File_Buffer_Acc; - begin - pragma Assert (Current_Context.Source = null); - pragma Assert (Source_File /= No_Source_File_Entry); - N_Source := Get_File_Source (Source_File); - Current_Context := (Source => N_Source, - Source_File => Source_File, - Line_Number => 1, - Line_Pos => 0, - Prev_Pos => N_Source'First, - Pos => N_Source'First, - Token_Pos => 0, -- should be invalid, - File_Len => Get_File_Length (Source_File), - Token => Tok_Invalid, - Prev_Token => Tok_Invalid, - Identifier => Null_Identifier, - Bit_Str_Base => ' ', - Bit_Str_Sign => ' ', - Str_Id => Null_String8, - Str_Len => 0, - Int64 => -1, - Fp64 => 0.0); - Current_Token := Tok_Invalid; - end Set_File; - - function Detect_Encoding_Errors return Boolean - is - C : constant Character := Source (Pos); - begin - -- No need to check further if first character is plain ASCII-7 - if C >= ' ' and C < Character'Val (127) then - return False; - end if; - - -- UTF-8 BOM is EF BB BF - if Source (Pos + 0) = Character'Val (16#ef#) - and then Source (Pos + 1) = Character'Val (16#bb#) - and then Source (Pos + 2) = Character'Val (16#bf#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-8 BOM detected)"); - return True; - end if; - - -- UTF-16 BE BOM is FE FF - if Source (Pos + 0) = Character'Val (16#fe#) - and then Source (Pos + 1) = Character'Val (16#ff#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); - return True; - end if; - - -- UTF-16 LE BOM is FF FE - if Source (Pos + 0) = Character'Val (16#ff#) - and then Source (Pos + 1) = Character'Val (16#fe#) - then - Error_Msg_Scan - ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); - return True; - end if; - - -- Certainly weird, but scanner/parser will catch it. - return False; - end Detect_Encoding_Errors; - - procedure Set_Current_Position (Position: Source_Ptr) - is - Loc : Location_Type; - Offset: Natural; - File_Entry : Source_File_Entry; - begin - -- Scanner must have been initialized. - pragma Assert (Current_Context.Source /= null); - - Current_Token := Tok_Invalid; - Current_Context.Pos := Position; - Loc := File_Pos_To_Location (Current_Context.Source_File, - Current_Context.Pos); - Location_To_Coord (Loc, - File_Entry, Current_Context.Line_Pos, - Current_Context.Line_Number, Offset); - end Set_Current_Position; - - procedure Close_File is - begin - Current_Context.Source := null; - end Close_File; - - -- Emit an error when a character above 128 was found. - -- This must be called only in vhdl87. - procedure Error_8bit is - begin - Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - -- Emit an error when a separator is expected. - procedure Error_Separator is - begin - Error_Msg_Scan ("a separator is required here"); - end Error_Separator; - - -- scan a decimal literal or a based literal. - -- - -- LRM93 13.4.1 - -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] - -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER - -- - -- LRM93 13.4.2 - -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT - -- BASE ::= INTEGER - procedure Scan_Literal is separate; - - -- Scan a string literal. - -- - -- LRM93 13.6 / LRM08 15.7 - -- A string literal is formed by a sequence of graphic characters - -- (possibly none) enclosed between two quotation marks used as string - -- brackets. - -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " - -- - -- IN: for a string, at the call of this procedure, the current character - -- must be either '"' or '%'. - procedure Scan_String - is - -- The quotation character (can be " or %). - Mark: Character; - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - begin - -- String delimiter. - Mark := Source (Pos); - pragma Assert (Mark = '"' or else Mark = '%'); - - Pos := Pos + 1; - Length := 0; - Current_Context.Str_Id := Str_Table.Create_String8; - loop - C := Source (Pos); - if C = Mark then - -- LRM93 13.6 - -- If a quotation mark value is to be represented in the sequence - -- of character values, then a pair of adjacent quoatation - -- characters marks must be written at the corresponding place - -- within the string literal. - -- LRM93 13.10 - -- Any pourcent sign within the sequence of characters must then - -- be doubled, and each such doubled percent sign is interpreted - -- as a single percent sign value. - -- The same replacement is allowed for a bit string literal, - -- provieded that both bit string brackets are replaced. - Pos := Pos + 1; - exit when Source (Pos) /= Mark; - end if; - - case Characters_Kind (C) is - when Format_Effector => - if Mark = '%' then - -- No matching '%' has been found. Consider '%' was used - -- as the remainder operator, instead of 'rem'. This will - -- improve the error message. - Error_Msg_Scan - (Get_Token_Location, - "'%%' is not a vhdl operator, use 'rem'"); - Current_Token := Tok_Rem; - Pos := Current_Context.Token_Pos + 1; - return; - end if; - if C = CR or C = LF then - Error_Msg_Scan - ("string cannot be multi-line, use concatenation"); - else - Error_Msg_Scan ("format effector not allowed in a string"); - end if; - exit; - when Invalid => - if C = Files_Map.EOT - and then Pos >= Current_Context.File_Len - then - Error_Msg_Scan ("string not terminated at end of file"); - exit; - end if; - - Error_Msg_Scan - ("invalid character not allowed, even in a string"); - when Graphic_Character => - if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then - Error_8bit; - end if; - end case; - - if C = '"' and Mark = '%' then - -- LRM93 13.10 - -- The quotation marks (") used as string brackets at both ends of - -- a string literal can be replaced by percent signs (%), provided - -- that the enclosed sequence of characters constains no quotation - -- marks, and provided that both string brackets are replaced. - Error_Msg_Scan - ("'""' cannot be used in a string delimited with '%%'"); - end if; - - Length := Length + 1; - Str_Table.Append_String8 (Character'Pos (C)); - Pos := Pos + 1; - end loop; - - Current_Token := Tok_String; - Current_Context.Str_Len := Length; - end Scan_String; - - -- Scan a bit string literal. - -- - -- LRM93 13.7 - -- A bit string literal is formed by a sequence of extended digits - -- (possibly none) enclosed between two quotations used as bit string - -- brackets, preceded by a base specifier. - -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " - -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } - -- - -- The current character must be a base specifier, followed by '"' or '%'. - -- The base must be valid. - procedure Scan_Bit_String (Base_Log : Nat32) - is - -- Position of character '0'. - Pos_0 : constant Nat8 := Character'Pos ('0'); - - -- Used for the base. - subtype Nat4 is Natural range 1 .. 4; - Base : constant Nat32 := 2 ** Nat4 (Base_Log); - - -- The quotation character (can be " or %). - Orig_Pos : constant Source_Ptr := Pos; - Mark : constant Character := Source (Orig_Pos); - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - -- Digit value. - V, D : Nat8; - -- True if invalid character already found, to avoid duplicate message. - Has_Invalid : Boolean; - begin - pragma Assert (Mark = '"' or else Mark = '%'); - Pos := Pos + 1; - Length := 0; - Has_Invalid := False; - Current_Context.Str_Id := Str_Table.Create_String8; - loop - << Again >> null; - C := Source (Pos); - Pos := Pos + 1; - exit when C = Mark; - - -- LRM93 13.7 - -- If the base specifier is 'B', the extended digits in the bit - -- value are restricted to 0 and 1. - -- If the base specifier is 'O', the extended digits int the bit - -- value are restricted to legal digits in the octal number - -- system, ie, the digits 0 through 7. - -- If the base specifier is 'X', the extended digits are all digits - -- together with the letters A through F. - case C is - when '0' .. '9' => - V := Character'Pos (C) - Character'Pos ('0'); - when 'A' .. 'F' => - V := Character'Pos (C) - Character'Pos ('A') + 10; - when 'a' .. 'f' => - -- LRM93 13.7 - -- A letter in a bit string literal (...) can be written either - -- in lowercase or in upper case, with the same meaning. - V := Character'Pos (C) - Character'Pos ('a') + 10; - when '_' => - if Source (Pos) = '_' then - Error_Msg_Scan - ("double underscore not allowed in a bit string"); - end if; - if Source (Pos - 2) = Mark then - Error_Msg_Scan - ("underscore not allowed at the start of a bit string"); - elsif Source (Pos) = Mark then - Error_Msg_Scan - ("underscore not allowed at the end of a bit string"); - end if; - goto Again; - when '"' => - pragma Assert (Mark = '%'); - Error_Msg_Scan - ("'""' cannot close a bit string opened by '%%'"); - exit; - when '%' => - pragma Assert (Mark = '"'); - Error_Msg_Scan - ("'%%' cannot close a bit string opened by '""'"); - exit; - when others => - if Characters_Kind (C) in Graphic_Character then - if Vhdl_Std >= Vhdl_08 then - V := Nat8'Last; - else - if not Has_Invalid then - Error_Msg_Scan ("invalid character in bit string"); - Has_Invalid := True; - end if; - -- Continue the bit string - V := 0; - end if; - else - if Mark = '%' then - Error_Msg_Scan - (File_Pos_To_Location - (Current_Context.Source_File, Orig_Pos), - "'%%' is not a vhdl operator, use 'rem'"); - Current_Token := Tok_Rem; - Pos := Orig_Pos + 1; - return; - else - Error_Msg_Scan ("bit string not terminated"); - Pos := Pos - 1; - end if; - exit; - end if; - end case; - - -- Expand bit value. - if Vhdl_Std >= Vhdl_08 and V > Base then - -- Expand as graphic character. - for I in 1 .. Base_Log loop - Str_Table.Append_String8_Char (C); - end loop; - else - -- Expand as extended digits. - case Base_Log is - when 1 => - if V > 1 then - Error_Msg_Scan - ("invalid character in a binary bit string"); - V := 1; - end if; - Str_Table.Append_String8 (Pos_0 + V); - when 3 => - if V > 7 then - Error_Msg_Scan - ("invalid character in a octal bit string"); - V := 7; - end if; - for I in 1 .. 3 loop - D := V / 4; - Str_Table.Append_String8 (Pos_0 + D); - V := (V - 4 * D) * 2; - end loop; - when 4 => - for I in 1 .. 4 loop - D := V / 8; - Str_Table.Append_String8 (Pos_0 + D); - V := (V - 8 * D) * 2; - end loop; - when others => - raise Internal_Error; - end case; - end if; - - Length := Length + Base_Log; - end loop; - - -- Note: the length of the bit string may be 0. - - Current_Token := Tok_Bit_String; - Current_Context.Str_Len := Length; - end Scan_Bit_String; - - -- Scan a decimal bit string literal. For base specifier D the algorithm - -- is rather different: all the graphic characters shall be digits, and we - -- need to use a (not very efficient) arbitrary precision multiplication. - procedure Scan_Dec_Bit_String - is - use Str_Table; - - Id : String8_Id; - - -- Position of character '0'. - Pos_0 : constant Nat8 := Character'Pos ('0'); - - -- Current character. - C : Character; - -- Current length. - Length : Nat32; - -- Digit value. - V, D : Nat8; - - type Carries_Type is array (0 .. 3) of Nat8; - Carries : Carries_Type; - No_Carries : constant Carries_Type := (others => Pos_0); - - -- Shift right carries. Note the Carries (0) is the LSB. - procedure Shr_Carries is - begin - Carries := (Carries (1), Carries (2), Carries (3), Pos_0); - end Shr_Carries; - - procedure Append_Carries is - begin - -- Expand the bit string. Note that position 1 of the string8 is - -- the MSB. - while Carries /= No_Carries loop - Append_String8 (Pos_0); - Length := Length + 1; - for I in reverse 2 .. Length loop - Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); - end loop; - Set_Element_String8 (Id, 1, Carries (0)); - Shr_Carries; - end loop; - end Append_Carries; - - -- Add 1 to Carries. Overflow is not allowed and should be prevented by - -- construction. - procedure Add_One_To_Carries is - begin - for I in Carries'Range loop - if Carries (I) = Pos_0 then - Carries (I) := Pos_0 + 1; - -- End of propagation. - exit; - else - Carries (I) := Pos_0; - -- Continue propagation. - pragma Assert (I < Carries'Last); - end if; - end loop; - end Add_One_To_Carries; - begin - pragma Assert (Source (Pos) = '"'); - Pos := Pos + 1; - Length := 0; - Id := Create_String8; - Current_Context.Str_Id := Id; - loop - << Again >> null; - C := Source (Pos); - Pos := Pos + 1; - exit when C = '"'; - - if C in '0' .. '9' then - V := Character'Pos (C) - Character'Pos ('0'); - elsif C = '_' then - if Source (Pos) = '_' then - Error_Msg_Scan - ("double underscore not allowed in a bit string"); - end if; - if Source (Pos - 2) = '"' then - Error_Msg_Scan - ("underscore not allowed at the start of a bit string"); - elsif Source (Pos) = '"' then - Error_Msg_Scan - ("underscore not allowed at the end of a bit string"); - end if; - goto Again; - else - if Characters_Kind (C) in Graphic_Character then - Error_Msg_Scan - ("graphic character not allowed in decimal bit string"); - -- Continue the bit string - V := 0; - else - Error_Msg_Scan ("bit string not terminated"); - Pos := Pos - 1; - exit; - end if; - end if; - - -- Multiply by 10. - Carries := (others => Pos_0); - for I in reverse 1 .. Length loop - -- Shift by 1 (*2). - D := Element_String8 (Id, I); - Set_Element_String8 (Id, I, Carries (0)); - Shr_Carries; - -- Add D and D * 4. - if D /= Pos_0 then - Add_One_To_Carries; - -- Add_Four_To_Carries: - for I in 2 .. 3 loop - if Carries (I) = Pos_0 then - Carries (I) := Pos_0 + 1; - -- End of propagation. - exit; - else - Carries (I) := Pos_0; - -- Continue propagation. - end if; - end loop; - end if; - end loop; - Append_Carries; - - -- Add V. - for I in Carries'Range loop - D := V / 2; - Carries (I) := Pos_0 + (V - 2 * D); - V := D; - end loop; - for I in reverse 1 .. Length loop - D := Element_String8 (Id, I); - if D /= Pos_0 then - Add_One_To_Carries; - end if; - Set_Element_String8 (Id, I, Carries (0)); - Shr_Carries; - exit when Carries = No_Carries; - end loop; - Append_Carries; - end loop; - - Current_Token := Tok_Bit_String; - Current_Context.Str_Len := Length; - end Scan_Dec_Bit_String; - - -- LRM08 15.2 Character set - -- For each uppercase letter, there is a corresponding lowercase letter; - -- and for each lowercase letter except [y diaeresis] and [german sharp s], - -- there is a corresponding uppercase letter. - type Character_Map is array (Character) of Character; - To_Lower_Map : constant Character_Map := - ( - -- Uppercase ASCII letters. - 'A' => 'a', - 'B' => 'b', - 'C' => 'c', - 'D' => 'd', - 'E' => 'e', - 'F' => 'f', - 'G' => 'g', - 'H' => 'h', - 'I' => 'i', - 'J' => 'j', - 'K' => 'k', - 'L' => 'l', - 'M' => 'm', - 'N' => 'n', - 'O' => 'o', - 'P' => 'p', - 'Q' => 'q', - 'R' => 'r', - 'S' => 's', - 'T' => 't', - 'U' => 'u', - 'V' => 'v', - 'W' => 'w', - 'X' => 'x', - 'Y' => 'y', - 'Z' => 'z', - - -- Lowercase ASCII letters. - 'a' => 'a', - 'b' => 'b', - 'c' => 'c', - 'd' => 'd', - 'e' => 'e', - 'f' => 'f', - 'g' => 'g', - 'h' => 'h', - 'i' => 'i', - 'j' => 'j', - 'k' => 'k', - 'l' => 'l', - 'm' => 'm', - 'n' => 'n', - 'o' => 'o', - 'p' => 'p', - 'q' => 'q', - 'r' => 'r', - 's' => 's', - 't' => 't', - 'u' => 'u', - 'v' => 'v', - 'w' => 'w', - 'x' => 'x', - 'y' => 'y', - 'z' => 'z', - - -- Uppercase Latin-1 letters. - UC_A_Grave => LC_A_Grave, - UC_A_Acute => LC_A_Acute, - UC_A_Circumflex => LC_A_Circumflex, - UC_A_Tilde => LC_A_Tilde, - UC_A_Diaeresis => LC_A_Diaeresis, - UC_A_Ring => LC_A_Ring, - UC_AE_Diphthong => LC_AE_Diphthong, - UC_C_Cedilla => LC_C_Cedilla, - UC_E_Grave => LC_E_Grave, - UC_E_Acute => LC_E_Acute, - UC_E_Circumflex => LC_E_Circumflex, - UC_E_Diaeresis => LC_E_Diaeresis, - UC_I_Grave => LC_I_Grave, - UC_I_Acute => LC_I_Acute, - UC_I_Circumflex => LC_I_Circumflex, - UC_I_Diaeresis => LC_I_Diaeresis, - UC_Icelandic_Eth => LC_Icelandic_Eth, - UC_N_Tilde => LC_N_Tilde, - UC_O_Grave => LC_O_Grave, - UC_O_Acute => LC_O_Acute, - UC_O_Circumflex => LC_O_Circumflex, - UC_O_Tilde => LC_O_Tilde, - UC_O_Diaeresis => LC_O_Diaeresis, - UC_O_Oblique_Stroke => LC_O_Oblique_Stroke, - UC_U_Grave => LC_U_Grave, - UC_U_Acute => LC_U_Acute, - UC_U_Circumflex => LC_U_Circumflex, - UC_U_Diaeresis => LC_U_Diaeresis, - UC_Y_Acute => LC_Y_Acute, - UC_Icelandic_Thorn => LC_Icelandic_Thorn, - - -- Lowercase Latin-1 letters. - LC_A_Grave => LC_A_Grave, - LC_A_Acute => LC_A_Acute, - LC_A_Circumflex => LC_A_Circumflex, - LC_A_Tilde => LC_A_Tilde, - LC_A_Diaeresis => LC_A_Diaeresis, - LC_A_Ring => LC_A_Ring, - LC_AE_Diphthong => LC_AE_Diphthong, - LC_C_Cedilla => LC_C_Cedilla, - LC_E_Grave => LC_E_Grave, - LC_E_Acute => LC_E_Acute, - LC_E_Circumflex => LC_E_Circumflex, - LC_E_Diaeresis => LC_E_Diaeresis, - LC_I_Grave => LC_I_Grave, - LC_I_Acute => LC_I_Acute, - LC_I_Circumflex => LC_I_Circumflex, - LC_I_Diaeresis => LC_I_Diaeresis, - LC_Icelandic_Eth => LC_Icelandic_Eth, - LC_N_Tilde => LC_N_Tilde, - LC_O_Grave => LC_O_Grave, - LC_O_Acute => LC_O_Acute, - LC_O_Circumflex => LC_O_Circumflex, - LC_O_Tilde => LC_O_Tilde, - LC_O_Diaeresis => LC_O_Diaeresis, - LC_O_Oblique_Stroke => LC_O_Oblique_Stroke, - LC_U_Grave => LC_U_Grave, - LC_U_Acute => LC_U_Acute, - LC_U_Circumflex => LC_U_Circumflex, - LC_U_Diaeresis => LC_U_Diaeresis, - LC_Y_Acute => LC_Y_Acute, - LC_Icelandic_Thorn => LC_Icelandic_Thorn, - - -- Lowercase latin-1 characters without corresponding uppercase one. - LC_Y_Diaeresis => LC_Y_Diaeresis, - LC_German_Sharp_S => LC_German_Sharp_S, - - -- Not a letter. - others => NUL); - - procedure Error_Too_Long is - begin - Error_Msg_Scan ("identifier is too long (>" - & Natural'Image (Max_Name_Length - 1) & ")"); - end Error_Too_Long; - - -- LRM93 13.3.1 - -- Basic Identifiers - -- A basic identifier consists only of letters, digits, and underlines. - -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } - -- LETTER_OR_DIGIT ::= LETTER | DIGIT - -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER - -- - -- NB: At the call of this procedure, the current character must be a legal - -- character for a basic identifier. - procedure Scan_Identifier (Allow_PSL : Boolean) - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - C : Character; - Len : Natural; - begin - -- This is an identifier or a key word. - Len := 0; - loop - -- Source (pos) is correct. - -- LRM93 13.3.1 - -- All characters if a basic identifier are signifiant, including - -- any underline character inserted between a letter or digit and - -- an adjacent letter or digit. - -- Basic identifiers differing only in the use of the corresponding - -- upper and lower case letters are considered as the same. - -- - -- GHDL: This is achieved by converting all upper case letters into - -- equivalent lower case letters. - -- The opposite (converting to upper lower case letters) is not - -- possible because two characters have no upper-case equivalent. - C := Source (Pos); - case C is - when 'A' .. 'Z' => - C := Character'Val - (Character'Pos (C) - + Character'Pos ('a') - Character'Pos ('A')); - when 'a' .. 'z' | '0' .. '9' => - null; - when '_' => - if Source (Pos + 1) = '_' then - Error_Msg_Scan ("two underscores can't be consecutive"); - end if; - when ' ' | ')' | '.' | ';' | ':' => - exit; - when others => - -- Non common case. - case Characters_Kind (C) is - when Upper_Case_Letter | Lower_Case_Letter => - if Vhdl_Std = Vhdl_87 then - Error_8bit; - end if; - C := To_Lower_Map (C); - pragma Assert (C /= NUL); - when Digit => - raise Internal_Error; - when others => - exit; - end case; - end case; - - -- Put character in name buffer. FIXME: compute the hash at the same - -- time ? - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length -1 then - Error_Msg_Scan ("identifier is too long (>" - & Natural'Image (Max_Name_Length - 1) & ")"); - -- Accept this last one character, so that no error for the - -- following characters. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - - -- Next character. - Pos := Pos + 1; - end loop; - - if Source (Pos - 1) = '_' then - if Allow_PSL then - -- Some PSL reserved words finish with '_'. This case is handled - -- later by Scan_Underscore and Scan_Exclam_Mark. - Pos := Pos - 1; - Len := Len - 1; - C := '_'; - else - -- Eat the trailing underscore. - Error_Msg_Scan ("an identifier cannot finish with '_'"); - end if; - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (C) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - raise Internal_Error; - when Other_Special_Character | Special_Character => - if (C = '"' or C = '%') and then Len <= 2 then - if C = '%' and Vhdl_Std >= Vhdl_08 then - Error_Msg_Scan ("'%%' not allowed in vhdl 2008 " - & "(was replacement character)"); - -- Continue as a bit string. - end if; - - -- Good candidate for bit string. - - -- LRM93 13.7 - -- BASE_SPECIFIER ::= B | O | X - -- - -- A letter in a bit string literal (either an extended digit - -- or the base specifier) can be written either in lower case - -- or in upper case, with the same meaning. - -- - -- LRM08 15.8 Bit string literals - -- BASE_SPECICIER ::= - -- B | O | X | UB | UO | UX | SB | SO | SX | D - -- - -- An extended digit and the base specifier in a bit string - -- literal can be written either in lowercase or in uppercase, - -- with the same meaning. - declare - Base : Nat32; - Cl : constant Character := Buffer (Len); - Cf : constant Character := Buffer (1); - begin - Current_Context.Bit_Str_Base := Cl; - if Cl = 'b' then - Base := 1; - elsif Cl = 'o' then - Base := 3; - elsif Cl = 'x' then - Base := 4; - elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then - Current_Context.Bit_Str_Sign := ' '; - Scan_Dec_Bit_String; - return; - else - Base := 0; - end if; - if Base > 0 then - if Len = 1 then - Current_Context.Bit_Str_Sign := ' '; - Scan_Bit_String (Base); - return; - elsif Vhdl_Std >= Vhdl_08 - and then (Cf = 's' or Cf = 'u') - then - Current_Context.Bit_Str_Sign := Cf; - Scan_Bit_String (Base); - return; - end if; - end if; - end; - elsif Vhdl_Std > Vhdl_87 and then C = '\' then - -- Start of extended identifier. Cannot follow an identifier. - Error_Separator; - end if; - - when Invalid => - -- Improve error message for use of UTF-8 quote marks. - -- It's possible because in the sequence of UTF-8 bytes for the - -- quote marks, there are invalid character (in the 128-160 - -- range). - if C = Character'Val (16#80#) - and then Buffer (Len) = Character'Val (16#e2#) - and then (Source (Pos + 1) = Character'Val (16#98#) - or else Source (Pos + 1) = Character'Val (16#99#)) - then - -- UTF-8 left or right single quote mark. - if Len > 1 then - -- The first byte (0xe2) is part of the identifier. An - -- error will be detected as the next byte (0x80) is - -- invalid. Remove the first byte from the identifier, and - -- let's catch the error later. - Len := Len - 1; - Pos := Pos - 1; - else - Error_Msg_Scan ("invalid use of UTF8 character for '"); - Pos := Pos + 2; - - -- Distinguish between character literal and tick. Don't - -- care about possible invalid character literal, as in any - -- case we have already emitted an error message. - if Current_Context.Prev_Token /= Tok_Identifier - and then Current_Context.Prev_Token /= Tok_Character - and then - (Source (Pos + 1) = ''' - or else - (Source (Pos + 1) = Character'Val (16#e2#) - and then Source (Pos + 2) = Character'Val (16#80#) - and then Source (Pos + 3) = Character'Val (16#99#))) - then - Current_Token := Tok_Character; - Current_Context.Identifier := - Name_Table.Get_Identifier (Source (Pos)); - if Source (Pos + 1) = ''' then - Pos := Pos + 2; - else - Pos := Pos + 4; - end if; - else - Current_Token := Tok_Tick; - end if; - return; - end if; - end if; - when Format_Effector - | Space_Character => - null; - end case; - - -- Hash it. - Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); - Current_Token := Tok_Identifier; - end Scan_Identifier; - - procedure Identifier_To_Token is - begin - if Current_Identifier in Std_Names.Name_Id_Keywords then - -- LRM93 13.9 - -- The identifiers listed below are called reserved words and are - -- reserved for signifiances in the language. - -- IN: this is also achieved in packages std_names and tokens. - Current_Token := Token_Type'Val - (Token_Type'Pos (Tok_First_Keyword) - + Current_Identifier - Std_Names.Name_First_Keyword); - case Current_Identifier is - when Std_Names.Name_Id_AMS_Reserved_Words => - if not AMS_Vhdl then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i AMS-VHDL reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl08_Reserved_Words => - if Vhdl_Std < Vhdl_08 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl-2008 reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl00_Reserved_Words => - if Vhdl_Std < Vhdl_00 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl-2000 reserved word as an identifier", - +Current_Identifier); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl93_Reserved_Words => - if Vhdl_Std = Vhdl_87 then - if Is_Warning_Enabled (Warnid_Reserved_Word) then - Warning_Msg_Scan - (Warnid_Reserved_Word, - "using %i vhdl93 reserved word as a vhdl87 identifier", - +Current_Identifier, True); - Warning_Msg_Scan - (Warnid_Reserved_Word, - "(use option --std=93 to compile as vhdl93)"); - end if; - Current_Token := Tok_Identifier; - end if; - when Std_Names.Name_Id_Vhdl87_Reserved_Words => - null; - when others => - raise Program_Error; - end case; - elsif Flag_Psl then - case Current_Identifier is - when Std_Names.Name_Clock => - Current_Token := Tok_Psl_Clock; - when Std_Names.Name_Const => - Current_Token := Tok_Psl_Const; - when Std_Names.Name_Boolean => - Current_Token := Tok_Psl_Boolean; - when Std_Names.Name_Sequence => - Current_Token := Tok_Psl_Sequence; - when Std_Names.Name_Property => - Current_Token := Tok_Psl_Property; - when Std_Names.Name_Endpoint => - Current_Token := Tok_Psl_Endpoint; - when Std_Names.Name_Cover => - Current_Token := Tok_Psl_Cover; - when Std_Names.Name_Default => - Current_Token := Tok_Psl_Default; - when Std_Names.Name_Inf => - Current_Token := Tok_Inf; - when Std_Names.Name_Within => - Current_Token := Tok_Within; - when Std_Names.Name_Abort => - Current_Token := Tok_Abort; - when Std_Names.Name_Before => - Current_Token := Tok_Before; - when Std_Names.Name_Always => - Current_Token := Tok_Always; - when Std_Names.Name_Never => - Current_Token := Tok_Never; - when Std_Names.Name_Eventually => - Current_Token := Tok_Eventually; - when Std_Names.Name_Next_A => - Current_Token := Tok_Next_A; - when Std_Names.Name_Next_E => - Current_Token := Tok_Next_E; - when Std_Names.Name_Next_Event => - Current_Token := Tok_Next_Event; - when Std_Names.Name_Next_Event_A => - Current_Token := Tok_Next_Event_A; - when Std_Names.Name_Next_Event_E => - Current_Token := Tok_Next_Event_E; - when Std_Names.Name_Until => - Current_Token := Tok_Until; - when others => - Current_Token := Tok_Identifier; - if Source (Pos - 1) = '_' then - Error_Msg_Scan ("identifiers cannot finish with '_'"); - end if; - end case; - end if; - end Identifier_To_Token; - - -- LRM93 13.3.2 - -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ - -- - -- Create an (extended) indentifier. - -- Extended identifiers are stored as they appear (leading and tailing - -- backslashes, doubling backslashes inside). - procedure Scan_Extended_Identifier - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - Len : Natural; - C : Character; - begin - -- LRM93 13.3.2 - -- Moreover, every extended identifiers is distinct from any basic - -- identifier. - -- GHDL: This is satisfied by storing '\' in the name table. - Len := 1; - Buffer (1) := '\'; - loop - -- Next character. - Pos := Pos + 1; - C := Source (Pos); - - if C = '\' then - -- LRM93 13.3.2 - -- If a backslash is to be used as one of the graphic characters - -- of an extended literal, it must be doubled. - -- LRM93 13.3.2 - -- (a doubled backslash couting as one character) - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length - 1 then - Error_Too_Long; - -- Accept this last one. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - - Pos := Pos + 1; - C := Source (Pos); - - exit when C /= '\'; - end if; - - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Scan ("format effector in extended identifier"); - exit; - when Graphic_Character => - null; - when Invalid => - if C = Files_Map.EOT - and then Pos >= Current_Context.File_Len - then - Error_Msg_Scan - ("extended identifier not terminated at end of file"); - elsif C = LF or C = CR then - Error_Msg_Scan - ("extended identifier not terminated at end of line"); - else - Error_Msg_Scan ("invalid character in extended identifier"); - end if; - exit; - end case; - - -- LRM93 13.3.2 - -- Extended identifiers differing only in the use of corresponding - -- upper and lower case letters are distinct. - if Len >= Max_Name_Length - 1 then - if Len = Max_Name_Length - 1 then - Error_Too_Long; - -- Accept this last one. - Len := Len + 1; - Buffer (Len) := C; - end if; - else - Len := Len + 1; - Buffer (Len) := C; - end if; - end loop; - - if Len <= 2 then - Error_Msg_Scan ("empty extended identifier is not allowed"); - end if; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or an - -- abstract literal and an adjacent identifier or abstract literal. - case Characters_Kind (C) is - when Digit - | Upper_Case_Letter - | Lower_Case_Letter => - Error_Separator; - when Invalid - | Format_Effector - | Space_Character - | Special_Character - | Other_Special_Character => - null; - end case; - - -- Hash it. - Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); - Current_Token := Tok_Identifier; - end Scan_Extended_Identifier; - - procedure Convert_Identifier (Str : in out String) - is - procedure Error_Bad is - begin - Error_Msg_Option ("bad character in identifier"); - end Error_Bad; - - procedure Error_8bit is - begin - Error_Msg_Option ("8 bits characters not allowed in vhdl87"); - end Error_8bit; - - C : Character; - subtype Id_Subtype is String (1 .. Str'Length); - Id : Id_Subtype renames Str; - begin - if Id'Length = 0 then - Error_Msg_Option ("identifier required"); - return; - end if; - - if Id (1) = '\' then - -- Extended identifier. - if Vhdl_Std = Vhdl_87 then - Error_Msg_Option ("extended identifiers not allowed in vhdl87"); - return; - end if; - - if Id'Length < 3 then - Error_Msg_Option ("extended identifier is too short"); - return; - end if; - if Id (Id'Last) /= '\' then - Error_Msg_Option ("extended identifier must finish with a '\'"); - return; - end if; - for I in 2 .. Id'Last - 1 loop - C := Id (I); - case Characters_Kind (C) is - when Format_Effector => - Error_Msg_Option ("format effector in extended identifier"); - return; - when Graphic_Character => - if C = '\' then - if Id (I + 1) /= '\' - or else I = Id'Last - 1 - then - Error_Msg_Option ("anti-slash must be doubled " - & "in extended identifier"); - return; - end if; - end if; - when Invalid => - Error_Bad; - end case; - end loop; - else - -- Identifier - for I in 1 .. Id'Length loop - C := Id (I); - case Characters_Kind (C) is - when Upper_Case_Letter => - if Vhdl_Std = Vhdl_87 and C > 'Z' then - Error_8bit; - end if; - Id (I) := To_Lower_Map (C); - when Lower_Case_Letter | Digit => - if Vhdl_Std = Vhdl_87 and C > 'z' then - Error_8bit; - end if; - when Special_Character => - -- The current character is legal in an identifier. - if C = '_' then - if I = 1 then - Error_Msg_Option - ("an identifier cannot start with an underscore"); - return; - end if; - if Id (I - 1) = '_' then - Error_Msg_Option - ("two underscores can't be consecutive"); - return; - end if; - if I = Id'Last then - Error_Msg_Option - ("an identifier cannot finish with an underscore"); - return; - end if; - else - Error_Bad; - end if; - when others => - Error_Bad; - end case; - end loop; - end if; - end Convert_Identifier; - - -- Internal scanner function: return True if C must be considered as a line - -- terminator. This also includes EOT (which terminates the file or is - -- invalid). - function Is_EOL (C : Character) return Boolean is - begin - case C is - when CR | LF | VT | FF | Files_Map.EOT => - return True; - when others => - return False; - end case; - end Is_EOL; - - -- Advance scanner till the first non-space character. - procedure Skip_Spaces is - begin - while Source (Pos) = ' ' or Source (Pos) = HT loop - Pos := Pos + 1; - end loop; - end Skip_Spaces; - - -- Eat all characters until end-of-line (not included). - procedure Skip_Until_EOL is - begin - while not Is_EOL (Source (Pos)) loop - -- Don't warn about invalid character, it's somewhat out of the - -- scope. - Pos := Pos + 1; - end loop; - end Skip_Until_EOL; - - -- Scan an identifier within a comment. Only lower case letters are - -- allowed. - procedure Scan_Comment_Identifier (Id : out Name_Id) - is - use Name_Table; - Buffer : String (1 .. Max_Name_Length); - Len : Natural; - C : Character; - begin - Id := Null_Identifier; - Skip_Spaces; - - -- The identifier shall start with a lower case letter. - if Source (Pos) not in 'a' .. 'z' then - return; - end if; - - -- Scan the identifier (in lower cases). - Len := 0; - loop - C := Source (Pos); - exit when C not in 'a' .. 'z' and C /= '_'; - Len := Len + 1; - Buffer (Len) := C; - Pos := Pos + 1; - end loop; - - -- Shall be followed by a space or a new line. - if not (C = ' ' or else C = HT or else Is_EOL (C)) then - return; - end if; - - Id := Get_Identifier (Buffer (1 .. Len)); - end Scan_Comment_Identifier; - - package Directive_Protect is - -- Called to scan a protect tool directive. - procedure Scan_Protect_Directive; - end Directive_Protect; - - -- Body is put in a separate file to avoid pollution. - package body Directive_Protect is separate; - - -- Called to scan a tool directive. - procedure Scan_Tool_Directive - is - procedure Error_Missing_Directive is - begin - Error_Msg_Scan ("tool directive required after '`'"); - Skip_Until_EOL; - end Error_Missing_Directive; - - C : Character; - begin - -- The current character is '`'. - Pos := Pos + 1; - Skip_Spaces; - - -- Check and scan identifier. - C := Source (Pos); - if Characters_Kind (C) not in Letter then - Error_Missing_Directive; - return; - end if; - - Scan_Identifier (False); - - if Current_Token /= Tok_Identifier then - Error_Missing_Directive; - return; - end if; - - Skip_Spaces; - - -- Dispatch according to the identifier. - if Current_Identifier = Std_Names.Name_Protect then - Directive_Protect.Scan_Protect_Directive; - else - Error_Msg_Scan - ("unknown tool directive %i ignored", +Current_Identifier); - Skip_Until_EOL; - end if; - end Scan_Tool_Directive; - - -- Scan tokens within a comment. Return TRUE if Current_Token was set, - -- return FALSE to discard the comment (ie treat it like a real comment). - function Scan_Comment return Boolean - is - use Std_Names; - Id : Name_Id; - begin - Scan_Comment_Identifier (Id); - - if Id = Null_Identifier then - return False; - end if; - - case Id is - when Name_Psl => - -- Accept tokens after '-- psl'. - if Flag_Psl_Comment then - Flag_Psl := True; - Flag_Scan_In_Comment := True; - return True; - end if; - when others => - null; - end case; - return False; - end Scan_Comment; - - function Scan_Exclam_Mark return Boolean is - begin - if Source (Pos) = '!' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Exclam_Mark; - - function Scan_Underscore return Boolean is - begin - if Source (Pos) = '_' then - Pos := Pos + 1; - return True; - else - return False; - end if; - end Scan_Underscore; - - -- The Scan_Next_Line procedure must be called after each end-of-line to - -- register to next line number. This is called by Scan_CR_Newline and - -- Scan_LF_Newline. - procedure Scan_Next_Line is - begin - Files_Map.Skip_Gap (Current_Context.Source_File, Pos); - Current_Context.Line_Number := Current_Context.Line_Number + 1; - Current_Context.Line_Pos := Pos; - File_Add_Line_Number - (Current_Context.Source_File, Current_Context.Line_Number, Pos); - end Scan_Next_Line; - - -- Scan a CR end-of-line. - procedure Scan_CR_Newline is - begin - -- Accept CR or CR+LF as line separator. - if Source (Pos + 1) = LF then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_CR_Newline; - - -- Scan a LF end-of-line. - procedure Scan_LF_Newline is - begin - -- Accept LF or LF+CR as line separator. - if Source (Pos + 1) = CR then - Pos := Pos + 2; - else - Pos := Pos + 1; - end if; - Scan_Next_Line; - end Scan_LF_Newline; - - -- Emit an error message for an invalid character. - procedure Error_Bad_Character is - begin - -- Technically character literals, string literals, extended - -- identifiers and comments. - Error_Msg_Scan ("character %c can only be used in strings or comments", - +Source (Pos)); - end Error_Bad_Character; - - -- Get a new token. - procedure Scan is - begin - if Current_Token /= Tok_Invalid then - Current_Context.Prev_Token := Current_Token; - end if; - - Current_Context.Prev_Pos := Pos; - - << Again >> null; - - -- Skip commonly used separators. - -- (Like Skip_Spaces but manually inlined for speed). - while Source (Pos) = ' ' or Source (Pos) = HT loop - Pos := Pos + 1; - end loop; - - Current_Context.Token_Pos := Pos; - Current_Context.Identifier := Null_Identifier; - - case Source (Pos) is - when HT | ' ' => - -- Must have already been skipped just above. - raise Internal_Error; - when NBSP => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan ("NBSP character not allowed in vhdl87"); - end if; - Pos := Pos + 1; - goto Again; - when VT | FF => - Pos := Pos + 1; - goto Again; - when LF => - Scan_LF_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when CR => - Scan_CR_Newline; - if Flag_Newline then - Current_Token := Tok_Newline; - return; - end if; - goto Again; - when '-' => - if Source (Pos + 1) = '-' then - -- This is a comment. - -- LRM93 13.8 - -- A comment starts with two adjacent hyphens and extends up - -- to the end of the line. - -- A comment can appear on any line line of a VHDL - -- description. - -- The presence or absence of comments has no influence on - -- whether a description is legal or illegal. - -- Futhermore, comments do not influence the execution of a - -- simulation module; their sole purpose is the enlightenment - -- of the human reader. - -- GHDL note: As a consequence, an obfruscating comment - -- is out of purpose, and a warning could be reported :-) - Pos := Pos + 2; - - -- Scan inside a comment. So we just ignore the two dashes. - if Flag_Scan_In_Comment then - goto Again; - end if; - - -- Handle keywords in comment (PSL). - if Flag_Comment_Keyword and then Scan_Comment then - goto Again; - end if; - - -- LRM93 13.2 - -- In any case, a sequence of one or more format - -- effectors other than horizontal tabulation must - -- cause at least one end of line. - while not Is_EOL (Source (Pos)) loop - -- LRM93 13.1 - -- The only characters allowed in the text of a VHDL - -- description are the graphic characters and the format - -- effectors. - - -- LRM02 13.1 Character set - -- The only characters allowed in the text of a VHDL - -- description (except within comments -- see 13.8) [...] - -- - -- LRM02 13.8 Comments - -- A comment [...] may contain any character except the - -- format effectors vertical tab, carriage return, line - -- feed and form feed. - if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02) - and then Characters_Kind (Source (Pos)) = Invalid - then - Error_Msg_Scan ("invalid character, even in a comment"); - end if; - Pos := Pos + 1; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - elsif Flag_Psl and then Source (Pos + 1) = '>' then - Current_Token := Tok_Minus_Greater; - Pos := Pos + 2; - return; - else - Current_Token := Tok_Minus; - Pos := Pos + 1; - return; - end if; - when '+' => - Current_Token := Tok_Plus; - Pos := Pos + 1; - return; - when '*' => - if Source (Pos + 1) = '*' then - Current_Token := Tok_Double_Star; - Pos := Pos + 2; - else - Current_Token := Tok_Star; - Pos := Pos + 1; - end if; - return; - when '/' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Not_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '*' then - -- LRM08 15.9 Comments - -- A delimited comment start with a solidus (slash) character - -- immediately followed by an asterisk character and extends up - -- to the first subsequent occurrence of an asterisk character - -- immediately followed by a solidus character. - if Vhdl_Std < Vhdl_08 then - Error_Msg_Scan - ("block comment are not allowed before vhdl 2008"); - end if; - - -- Skip '/*'. - Pos := Pos + 2; - - loop - case Source (Pos) is - when '/' => - -- LRM08 15.9 - -- Moreover, an occurrence of a solidus character - -- immediately followed by an asterisk character - -- within a delimited comment is not interpreted as - -- the start of a nested delimited comment. - if Source (Pos + 1) = '*' then - Warning_Msg_Scan - (Warnid_Nested_Comment, - "'/*' found within a block comment"); - end if; - Pos := Pos + 1; - when '*' => - if Source (Pos + 1) = '/' then - Pos := Pos + 2; - exit; - else - Pos := Pos + 1; - end if; - when CR => - Scan_CR_Newline; - when LF => - Scan_LF_Newline; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- Point at the start of the comment. - Error_Msg_Scan - (Get_Token_Location, - "block comment not terminated at end of file"); - exit; - end if; - Pos := Pos + 1; - when others => - Pos := Pos + 1; - end case; - end loop; - if Flag_Comment then - Current_Token := Tok_Comment; - return; - end if; - goto Again; - else - Current_Token := Tok_Slash; - Pos := Pos + 1; - end if; - return; - when '(' => - Current_Token := Tok_Left_Paren; - Pos := Pos + 1; - return; - when ')' => - Current_Token := Tok_Right_Paren; - Pos := Pos + 1; - return; - when '|' => - if Flag_Psl then - if Source (Pos + 1) = '|' then - Current_Token := Tok_Bar_Bar; - Pos := Pos + 2; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Bar_Double_Arrow; - Pos := Pos + 3; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - else - Current_Token := Tok_Bar; - Pos := Pos + 1; - end if; - return; - when '!' => - if Flag_Psl then - Current_Token := Tok_Exclam_Mark; - else - if Source (Pos + 1) = '=' then - -- != is not allowed in VHDL, but be friendly with C users. - Error_Msg_Scan - (Get_Token_Location, "Use '/=' for inequality in vhdl"); - Current_Token := Tok_Not_Equal; - Pos := Pos + 1; - else - -- LRM93 13.10 - -- A vertical line (|) can be replaced by an exclamation - -- mark (!) where used as a delimiter. - Current_Token := Tok_Bar; - end if; - end if; - Pos := Pos + 1; - return; - when ':' => - if Source (Pos + 1) = '=' then - Current_Token := Tok_Assign; - Pos := Pos + 2; - else - Current_Token := Tok_Colon; - Pos := Pos + 1; - end if; - return; - when ';' => - Current_Token := Tok_Semi_Colon; - Pos := Pos + 1; - return; - when ',' => - Current_Token := Tok_Comma; - Pos := Pos + 1; - return; - when '.' => - if Source (Pos + 1) = '.' then - -- Be Ada friendly... - Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); - Current_Token := Tok_To; - Pos := Pos + 2; - return; - end if; - Current_Token := Tok_Dot; - Pos := Pos + 1; - return; - when '&' => - if Flag_Psl and then Source (Pos + 1) = '&' then - Current_Token := Tok_And_And; - Pos := Pos + 2; - else - Current_Token := Tok_Ampersand; - Pos := Pos + 1; - end if; - return; - when '<' => - case Source (Pos + 1) is - when '=' => - Current_Token := Tok_Less_Equal; - Pos := Pos + 2; - when '>' => - Current_Token := Tok_Box; - Pos := Pos + 2; - when '<' => - Current_Token := Tok_Double_Less; - Pos := Pos + 2; - when others => - Current_Token := Tok_Less; - Pos := Pos + 1; - end case; - return; - when '>' => - case Source (Pos + 1) is - when '=' => - Current_Token := Tok_Greater_Equal; - Pos := Pos + 2; - when '>' => - Current_Token := Tok_Double_Greater; - Pos := Pos + 2; - when others => - Current_Token := Tok_Greater; - Pos := Pos + 1; - end case; - return; - when '=' => - if Source (Pos + 1) = '=' then - if AMS_Vhdl then - Current_Token := Tok_Equal_Equal; - else - Error_Msg_Scan - ("'==' is not the vhdl equality, replaced by '='"); - Current_Token := Tok_Equal; - end if; - Pos := Pos + 2; - elsif Source (Pos + 1) = '>' then - Current_Token := Tok_Double_Arrow; - Pos := Pos + 2; - else - Current_Token := Tok_Equal; - Pos := Pos + 1; - end if; - return; - when ''' => - -- Handle cases such as character'('a') - -- FIXME: what about f ()'length ? or .all'length - if Current_Context.Prev_Token /= Tok_Identifier - and then Current_Context.Prev_Token /= Tok_Character - and then Source (Pos + 2) = ''' - then - -- LRM93 13.5 - -- A character literal is formed by enclosing one of the 191 - -- graphic character (...) between two apostrophe characters. - -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' - if Characters_Kind (Source (Pos + 1)) not in Graphic_Character - then - Error_Msg_Scan - ("a character literal can only be a graphic character"); - elsif Vhdl_Std = Vhdl_87 - and then Source (Pos + 1) > Character'Val (127) - then - Error_8bit; - end if; - Current_Token := Tok_Character; - Current_Context.Identifier := - Name_Table.Get_Identifier (Source (Pos + 1)); - Pos := Pos + 3; - return; - else - Current_Token := Tok_Tick; - Pos := Pos + 1; - end if; - return; - when '0' .. '9' => - Scan_Literal; - - -- LRM93 13.2 - -- At least one separator is required between an identifier or - -- an abstract literal and an adjacent identifier or abstract - -- literal. - case Characters_Kind (Source (Pos)) is - when Digit => - -- Happen if d#ddd# is followed by a number. - Error_Msg_Scan ("space is required between numbers"); - when Upper_Case_Letter - | Lower_Case_Letter => - -- Could call Error_Separator, but use a clearer message - -- for this common case. - -- Note: the term "unit name" is not correct here, since - -- it can be any identifier or even a keyword; however it - -- is probably the most common case (eg 10ns). - if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer - then - Current_Token := Tok_Integer_Letter; - else - Error_Msg_Scan - ("space is required between number and unit name"); - end if; - when Other_Special_Character => - if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then - -- Start of extended identifier. - Error_Separator; - end if; - when Invalid - | Format_Effector - | Space_Character - | Special_Character => - null; - end case; - return; - when '#' => - Error_Msg_Scan ("'#' is used for based literals and " - & "must be preceded by a base"); - -- Skip. - Pos := Pos + 1; - goto Again; - when '"' => - Scan_String; - return; - when '%' => - if Vhdl_Std >= Vhdl_08 then - Error_Msg_Scan - ("'%%' not allowed in vhdl 2008 (was replacement character)"); - -- Continue as a string. - end if; - Scan_String; - return; - when '[' => - if Flag_Psl then - if Source (Pos + 1) = '*' then - Current_Token := Tok_Brack_Star; - Pos := Pos + 2; - elsif Source (Pos + 1) = '+' - and then Source (Pos + 2) = ']' - then - Current_Token := Tok_Brack_Plus_Brack; - Pos := Pos + 3; - elsif Source (Pos + 1) = '-' - and then Source (Pos + 2) = '>' - then - Current_Token := Tok_Brack_Arrow; - Pos := Pos + 3; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Brack_Equal; - Pos := Pos + 2; - else - Current_Token := Tok_Left_Bracket; - Pos := Pos + 1; - end if; - else - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("'[' is an invalid character in vhdl87, replaced by '('"); - Current_Token := Tok_Left_Paren; - else - Current_Token := Tok_Left_Bracket; - end if; - Pos := Pos + 1; - end if; - return; - when ']' => - if Vhdl_Std = Vhdl_87 and not Flag_Psl then - Error_Msg_Scan - ("']' is an invalid character in vhdl87, replaced by ')'"); - Current_Token := Tok_Right_Paren; - else - Current_Token := Tok_Right_Bracket; - end if; - Pos := Pos + 1; - return; - when '{' => - if Flag_Psl then - Current_Token := Tok_Left_Curly; - else - Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); - Current_Token := Tok_Left_Paren; - end if; - Pos := Pos + 1; - return; - when '}' => - if Flag_Psl then - Current_Token := Tok_Right_Curly; - else - Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); - Current_Token := Tok_Right_Paren; - end if; - Pos := Pos + 1; - return; - when '\' => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("extended identifiers are not allowed in vhdl87"); - end if; - Scan_Extended_Identifier; - return; - when '^' => - if Vhdl_Std >= Vhdl_08 then - Current_Token := Tok_Caret; - else - Current_Token := Tok_Xor; - Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); - end if; - Pos := Pos + 1; - return; - when '~' => - Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); - Pos := Pos + 1; - Current_Token := Tok_Not; - return; - when '?' => - if Vhdl_Std < Vhdl_08 then - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - else - if Source (Pos + 1) = '<' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Less_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Less; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '>' then - if Source (Pos + 2) = '=' then - Current_Token := Tok_Match_Greater_Equal; - Pos := Pos + 3; - else - Current_Token := Tok_Match_Greater; - Pos := Pos + 2; - end if; - elsif Source (Pos + 1) = '?' then - Current_Token := Tok_Condition; - Pos := Pos + 2; - elsif Source (Pos + 1) = '=' then - Current_Token := Tok_Match_Equal; - Pos := Pos + 2; - elsif Source (Pos + 1) = '/' - and then Source (Pos + 2) = '=' - then - Current_Token := Tok_Match_Not_Equal; - Pos := Pos + 3; - else - Error_Msg_Scan ("unknown matching operator"); - Pos := Pos + 1; - goto Again; - end if; - end if; - return; - when '`' => - if Vhdl_Std >= Vhdl_08 then - Scan_Tool_Directive; - else - Error_Bad_Character; - Skip_Until_EOL; - end if; - goto Again; - when '$' - | Inverted_Exclamation .. Inverted_Question - | Multiplication_Sign | Division_Sign => - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - when '@' => - if Vhdl_Std >= Vhdl_08 or Flag_Psl then - Current_Token := Tok_Arobase; - Pos := Pos + 1; - return; - else - Error_Bad_Character; - Pos := Pos + 1; - goto Again; - end if; - when '_' => - Error_Msg_Scan ("an identifier can't start with '_'"); - Scan_Identifier (Flag_Psl); - -- Cannot be a reserved word. - return; - when 'A' .. 'Z' | 'a' .. 'z' => - Scan_Identifier (Flag_Psl); - Identifier_To_Token; - return; - when UC_A_Grave .. UC_O_Diaeresis - | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn - | LC_German_Sharp_S .. LC_O_Diaeresis - | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => - if Vhdl_Std = Vhdl_87 then - Error_Msg_Scan - ("non 7-bit latin-1 letters are not allowed in vhdl87"); - end if; - Scan_Identifier (False); - -- Not a reserved word. - return; - when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => - Error_Msg_Scan - ("control character that is not CR, LF, FF, HT or VT " & - "is not allowed"); - Pos := Pos + 1; - goto Again; - when Files_Map.EOT => - if Pos >= Current_Context.File_Len then - -- FIXME: should conditionnaly emit a warning if the file - -- is not terminated by an end of line. - Current_Token := Tok_Eof; - else - Error_Msg_Scan ("EOT is not allowed inside the file"); - Pos := Pos + 1; - goto Again; - end if; - return; - end case; - -- Not reachable: all case should use goto Again or return. - end Scan; - - function Is_Whitespace (C : Character) return Boolean is - begin - if C = ' ' then - return True; - elsif Vhdl_Std > Vhdl_87 and C = NBSP then - return True; - else - return False; - end if; - end Is_Whitespace; -end Scanner; diff --git a/src/vhdl/scanner.ads b/src/vhdl/scanner.ads deleted file mode 100644 index 2fc3db7be..000000000 --- a/src/vhdl/scanner.ads +++ /dev/null @@ -1,144 +0,0 @@ --- VHDL lexical scanner. --- Copyright (C) 2002 - 2014 Tristan Gingold --- --- GHDL is free software; you can redistribute it and/or modify it under --- the terms of the GNU General Public License as published by the Free --- Software Foundation; either version 2, or (at your option) any later --- version. --- --- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or --- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --- for more details. --- --- You should have received a copy of the GNU General Public License --- along with GHDL; see the file COPYING. If not, write to the Free --- Software Foundation, 59 Temple Place - Suite 330, Boston, MA --- 02111-1307, USA. -with Types; use Types; -with Tokens; use Tokens; - -package Scanner is - -- Global variables - -- The token that was just scanned. - -- When the token was eaten, you can call invalidate_current_token to - -- set it to tok_invalid. - -- Current_token should not be written outside of scan package. - -- It can be replaced by a function call. - Current_Token: Token_Type := Tok_Invalid; - - -- Maximal length for identifiers. - Max_Name_Length : constant Natural := 1024; - - -- Simply set current_token to tok_invalid. - procedure Invalidate_Current_Token; - pragma Inline (Invalidate_Current_Token); - - -- When CURRENT_TOKEN is an tok_identifier, tok_char or tok_string, - -- its name_id can be got via this function. - function Current_Identifier return Name_Id; - pragma Inline (Current_Identifier); - - -- Get current string identifier and length. - function Current_String_Id return String8_Id; - function Current_String_Length return Nat32; - pragma Inline (Current_String_Id); - pragma Inline (Current_String_Length); - - -- When the current token is Tok_Bit_String, return the base ('b', 'o', - -- 'x' or 'd') and the sign ('s', 'u', or ' ' for none). - function Get_Bit_String_Base return Character; - function Get_Bit_String_Sign return Character; - pragma Inline (Get_Bit_String_Base); - pragma Inline (Get_Bit_String_Sign); - - -- Set Current_identifier to null_identifier. - -- Can be used to catch bugs. - procedure Invalidate_Current_Identifier; - pragma Inline (Invalidate_Current_Identifier); - - -- When CURRENT_TOKEN is tok_integer, returns the value. - -- When CURRENT_TOKEN is tok_bit_string, returns the log of the base. - function Current_Iir_Int64 return Iir_Int64; - pragma Inline (Current_Iir_Int64); - - -- When CURRENT_TOKEN is tok_real, it returns the value. - function Current_Iir_Fp64 return Iir_Fp64; - pragma Inline (Current_Iir_Fp64); - - -- Advances the lexical analyser. Put a new token into current_token. - procedure Scan; - - -- Initialize the scanner with file SOURCE_FILE. - procedure Set_File (Source_File : Source_File_Entry); - - -- This function can be called just after Set_File to detect UTF BOM - -- patterns. It reports an error if a BOM is present and return True. - -- Silently return False if no error detected. - function Detect_Encoding_Errors return Boolean; - - procedure Set_Current_Position (Position: Source_Ptr); - - -- Finalize the scanner. - procedure Close_File; - - -- If true comments are reported as a token. - Flag_Comment : Boolean := False; - - -- If true newlines are reported as a token. - Flag_Newline : Boolean := False; - - -- If true also scan PSL tokens. - Flag_Psl : Boolean := False; - - -- If true handle PSL embedded in comments: '-- psl' is ignored. - Flag_Psl_Comment : Boolean := False; - - -- If true, ignore '--'. This is automatically set when Flag_Psl_Comment - -- is true and a starting PSL keyword has been identified. - -- Must be reset to false by the parser. - Flag_Scan_In_Comment : Boolean := False; - - -- If true scan for keywords in comments. Must be enabled if - -- Flag_Psl_Comment is true. - Flag_Comment_Keyword : Boolean := False; - - -- If the next character is '!', eat it and return True, otherwise return - -- False (used by PSL). - function Scan_Exclam_Mark return Boolean; - - -- If the next character is '_', eat it and return True, otherwise return - -- False (used by PSL). - function Scan_Underscore return Boolean; - - -- Get the current location, or the location of the current token. - -- Since a token cannot spread over lines, file and line of the current - -- token are the same as those of the current position. - -- The offset is the offset in the current line. - function Get_Current_Source_File return Source_File_Entry; - function Get_Current_Line return Natural; - function Get_Current_Offset return Natural; - function Get_Position return Source_Ptr; - function Get_Token_Location return Location_Type; - function Get_Token_Offset return Natural; - function Get_Token_Position return Source_Ptr; - - -- Return the initial location before the current token (ie before all - -- the blanks, comments and newlines have been skipped). Useful for the - -- location of a missing token. - function Get_Prev_Location return Location_Type; - - -- Convert (canonicalize) an identifier stored in name_buffer/name_length. - -- Upper case letters are converted into lower case. - -- Lexical checks are performed. - -- This procedure is not used by Scan, but should be used for identifiers - -- given in the command line. - -- Errors are directly reported through error_msg_option. - -- Also, Vhdl_Std should be set. - procedure Convert_Identifier (Str : in out String); - - -- Return TRUE iff C is a whitespace. - -- LRM93 13.2 Lexical elements, separators, and delimiters - -- A space character (SPACE or NBSP) ... - function Is_Whitespace (C : Character) return Boolean; -end Scanner; diff --git a/src/vhdl/sem_lib.adb b/src/vhdl/sem_lib.adb index 19ca27230..6cf642f2f 100644 --- a/src/vhdl/sem_lib.adb +++ b/src/vhdl/sem_lib.adb @@ -21,7 +21,7 @@ with Files_Map; with Iirs_Utils; use Iirs_Utils; with Errorout; use Errorout; with Libraries; use Libraries; -with Scanner; +with Vhdl.Scanner; with Parse; with Disp_Tree; with Disp_Vhdl; @@ -40,15 +40,15 @@ package body Sem_Lib is is Res : Iir_Design_File; begin - Scanner.Set_File (File); - if Scanner.Detect_Encoding_Errors then + Vhdl.Scanner.Set_File (File); + if Vhdl.Scanner.Detect_Encoding_Errors then -- Don't even try to parse such a file. The BOM will be interpreted -- as an identifier, which is not valid at the beginning of a file. Res := Null_Iir; else Res := Parse.Parse_Design_File; end if; - Scanner.Close_File; + Vhdl.Scanner.Close_File; if Res /= Null_Iir then Set_Parent (Res, Work_Library); @@ -159,7 +159,7 @@ package body Sem_Lib is procedure Load_Parse_Design_Unit (Design_Unit: Iir_Design_Unit; Loc : Iir) is - use Scanner; + use Vhdl.Scanner; Design_File : constant Iir_Design_File := Get_Design_File (Design_Unit); Fe : Source_File_Entry; Line, Off: Natural; diff --git a/src/vhdl/simulate/simul-debugger.adb b/src/vhdl/simulate/simul-debugger.adb index ba15bb329..3e868a44a 100644 --- a/src/vhdl/simulate/simul-debugger.adb +++ b/src/vhdl/simulate/simul-debugger.adb @@ -24,7 +24,7 @@ with Name_Table; with Str_Table; with Files_Map; with Parse; -with Scanner; +with Vhdl.Scanner; with Tokens; with Sem_Expr; with Sem_Scopes; @@ -1899,13 +1899,13 @@ package body Simul.Debugger is File := Files_Map.Create_Source_File_From_String (Name_Table.Get_Identifier ("*debug" & Index_Str & '*'), Line (P .. Line'Last)); - Scanner.Set_File (File); - Scanner.Scan; + Vhdl.Scanner.Set_File (File); + Vhdl.Scanner.Scan; Expr := Parse.Parse_Expression; - if Scanner.Current_Token /= Tok_Eof then + if Vhdl.Scanner.Current_Token /= Tok_Eof then Put_Line ("garbage at end of expression ignored"); end if; - Scanner.Close_File; + Vhdl.Scanner.Close_File; if Nbr_Errors /= 0 then Put_Line ("error while parsing expression, evaluation aborted"); Nbr_Errors := 0; diff --git a/src/vhdl/vhdl-scanner-directive_protect.adb b/src/vhdl/vhdl-scanner-directive_protect.adb new file mode 100644 index 000000000..bad3bd6ce --- /dev/null +++ b/src/vhdl/vhdl-scanner-directive_protect.adb @@ -0,0 +1,116 @@ +-- Lexical analysis for protect directive. +-- Copyright (C) 2019 Tristan Gingold +-- +-- GHDL is free software; you can redistribute it and/or modify it under +-- the terms of the GNU General Public License as published by the Free +-- Software Foundation; either version 2, or (at your option) any later +-- version. +-- +-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or +-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-- for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with GHDL; see the file COPYING. If not, write to the Free +-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA +-- 02111-1307, USA. + +separate (Vhdl.Scanner) +package body Directive_Protect is + function Scan_Expression_List return Boolean; + + -- Scan/parse a keyword expression. + -- Initial spaces must have been skipped. + -- Return False in case of error. + function Scan_Keyword_Expression return Boolean is + begin + if Characters_Kind (Source (Pos)) not in Letter then + Error_Msg_Scan ("identifier expected in protect directive"); + return False; + end if; + + Scan_Identifier (False); + if Current_Token /= Tok_Identifier then + Error_Msg_Scan (Get_Token_Location, "keyword must be an identifier"); + return False; + end if; + + Skip_Spaces; + if Source (Pos) /= '=' then + return True; + end if; + + -- Eat '='. + Pos := Pos + 1; + Skip_Spaces; + + case Source (Pos) is + when 'A' .. 'Z' | 'a' .. 'z' => + Scan_Identifier (False); + when '0' .. '9' => + Scan_Literal; + when '"' => + Scan_String; + when '(' => + -- Eat '('. + Pos := Pos + 1; + Skip_Spaces; + + if not Scan_Expression_List then + return False; + end if; + + Skip_Spaces; + if Source (Pos) /= ')' then + Error_Msg_Scan ("')' expected at end of protect keyword list"); + return False; + end if; + + -- Eat ')'. + Pos := Pos + 1; + + when others => + -- Ok, we don't handle all the letters, nor extended identifiers. + Error_Msg_Scan ("literal expected in protect tool directive"); + return False; + end case; + + return True; + end Scan_Keyword_Expression; + + -- Scan: keyword_expression { , keyword_expression } + function Scan_Expression_List return Boolean is + begin + loop + if not Scan_Keyword_Expression then + return False; + end if; + + Skip_Spaces; + + if Source (Pos) /= ',' then + return True; + end if; + + -- Eat ','. + Pos := Pos + 1; + + Skip_Spaces; + end loop; + end Scan_Expression_List; + + -- LRM08 24.1 Protect tool directives + -- protect_directive ::= + -- `PROTECT keyword_expression {, keyword_expression } + procedure Scan_Protect_Directive is + begin + if Scan_Expression_List then + if not Is_EOL (Source (Pos)) then + Error_Msg_Scan ("end of line expected in protect directive"); + end if; + end if; + + Skip_Until_EOL; + end Scan_Protect_Directive; +end Directive_Protect; diff --git a/src/vhdl/vhdl-scanner-scan_literal.adb b/src/vhdl/vhdl-scanner-scan_literal.adb new file mode 100644 index 000000000..9006587fc --- /dev/null +++ b/src/vhdl/vhdl-scanner-scan_literal.adb @@ -0,0 +1,317 @@ +-- Lexical analysis for numbers. +-- Copyright (C) 2002 - 2014 Tristan Gingold +-- +-- GHDL is free software; you can redistribute it and/or modify it under +-- the terms of the GNU General Public License as published by the Free +-- Software Foundation; either version 2, or (at your option) any later +-- version. +-- +-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or +-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-- for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with GHDL; see the file COPYING. If not, write to the Free +-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA +-- 02111-1307, USA. + +with Interfaces; use Interfaces; +with Grt.Fcvt; use Grt.Fcvt; + +separate (Vhdl.Scanner) + +-- scan a decimal literal or a based literal. +-- +-- LRM93 13.4.1 +-- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] +-- EXPONENT ::= E [ + ] INTEGER | E - INTEGER +-- +-- LRM93 13.4.2 +-- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT +-- BASE ::= INTEGER +procedure Scan_Literal is + -- Numbers of digits. + Scale : Integer; + Res : Bignum; + + -- LRM 13.4.1 + -- INTEGER ::= DIGIT { [ UNDERLINE ] DIGIT } + -- + -- Update SCALE, RES. + -- The first character must be a digit. + procedure Scan_Integer + is + C : Character; + begin + C := Source (Pos); + loop + -- C is a digit. + Bignum_Mul_Int (Res, 10, Character'Pos (C) - Character'Pos ('0')); + Scale := Scale + 1; + + Pos := Pos + 1; + C := Source (Pos); + if C = '_' then + loop + Pos := Pos + 1; + C := Source (Pos); + exit when C /= '_'; + Error_Msg_Scan ("double underscore in number"); + end loop; + if C not in '0' .. '9' then + Error_Msg_Scan ("underscore must be followed by a digit"); + end if; + end if; + exit when C not in '0' .. '9'; + end loop; + end Scan_Integer; + + C : Character; + D : Natural; + Ok : Boolean; + Has_Dot : Boolean; + Exp : Integer; + Exp_Neg : Boolean; + Base : Positive; +begin + -- Start with a simple and fast conversion. + C := Source (Pos); + D := 0; + loop + D := D * 10 + Character'Pos (C) - Character'Pos ('0'); + + Pos := Pos + 1; + C := Source (Pos); + if C = '_' then + loop + Pos := Pos + 1; + C := Source (Pos); + exit when C /= '_'; + Error_Msg_Scan ("double underscore in number"); + end loop; + if C not in '0' .. '9' then + Error_Msg_Scan ("underscore must be followed by a digit"); + end if; + end if; + if C not in '0' .. '9' then + if C = '.' or else C = '#' or else (C = 'e' or C = 'E' or C = ':') + then + -- Continue scanning. + Bignum_Int (Res, D); + exit; + end if; + + -- Finished. + -- a universal integer. + Current_Token := Tok_Integer; + -- No possible overflow. + Current_Context.Int64 := Iir_Int64 (D); + return; + elsif D >= (Natural'Last / 10) - 1 then + -- Number may be greather than the natural limit. + Scale := 0; + Bignum_Int (Res, D); + Scan_Integer; + exit; + end if; + end loop; + + Has_Dot := False; + Base := 10; + Scale := 0; + + C := Source (Pos); + if C = '.' then + -- Decimal integer. + Has_Dot := True; + Pos := Pos + 1; + C := Source (Pos); + if C not in '0' .. '9' then + Error_Msg_Scan ("a dot must be followed by a digit"); + Current_Token := Tok_Real; + Current_Context.Fp64 := Fp64 (To_Float_64 (False, Res, Base, 0)); + return; + end if; + Scan_Integer; + elsif C = '#' + or else (C = ':' and then (Source (Pos + 1) in '0' .. '9' + or else Source (Pos + 1) in 'a' .. 'f' + or else Source (Pos + 1) in 'A' .. 'F')) + then + -- LRM 13.10 + -- The number sign (#) of a based literal can be replaced by colon (:), + -- provided that the replacement is done for both occurrences. + -- GHDL: correctly handle 'variable v : integer range 0 to 7:= 3'. + -- Is there any other places where a digit can be followed + -- by a colon ? (See IR 1093). + + -- Based integer. + declare + Number_Sign : constant Character := C; + Res_Int : Interfaces.Unsigned_64; + begin + Bignum_To_Int (Res, Res_Int, Ok); + if not Ok or else Res_Int > 16 then + -- LRM 13.4.2 + -- The base must be [...] at most sixteen. + Error_Msg_Scan ("base must be at most 16"); + -- Fallback. + Base := 16; + elsif Res_Int < 2 then + -- LRM 13.4.2 + -- The base must be at least two [...]. + Error_Msg_Scan ("base must be at least 2"); + -- Fallback. + Base := 2; + else + Base := Natural (Res_Int); + end if; + + Pos := Pos + 1; + Bignum_Int (Res, 0); + C := Source (Pos); + loop + if C >= '0' and C <= '9' then + D := Character'Pos (C) - Character'Pos ('0'); + elsif C >= 'A' and C <= 'F' then + D := Character'Pos (C) - Character'Pos ('A') + 10; + elsif C >= 'a' and C <= 'f' then + D := Character'Pos (C) - Character'Pos ('a') + 10; + else + Error_Msg_Scan ("bad extended digit"); + exit; + end if; + + if D >= Base then + -- LRM 13.4.2 + -- The conventional meaning of base notation is + -- assumed; in particular the value of each extended + -- digit of a based literal must be less then the base. + Error_Msg_Scan ("digit beyond base"); + D := 1; + end if; + Pos := Pos + 1; + Bignum_Mul_Int (Res, Base, D); + Scale := Scale + 1; + + C := Source (Pos); + if C = '_' then + loop + Pos := Pos + 1; + C := Source (Pos); + exit when C /= '_'; + Error_Msg_Scan ("double underscore in based integer"); + end loop; + elsif C = '.' then + if Has_Dot then + Error_Msg_Scan ("double dot ignored"); + else + Has_Dot := True; + Scale := 0; + end if; + Pos := Pos + 1; + C := Source (Pos); + elsif C = Number_Sign then + Pos := Pos + 1; + exit; + elsif C = '#' or C = ':' then + Error_Msg_Scan ("bad number sign replacement character"); + exit; + end if; + end loop; + end; + end if; + + -- Exponent. + C := Source (Pos); + Exp := 0; + if C = 'E' or else C = 'e' then + Pos := Pos + 1; + C := Source (Pos); + Exp_Neg := False; + if C = '+' then + Pos := Pos + 1; + C := Source (Pos); + elsif C = '-' then + if Has_Dot then + Exp_Neg := True; + else + -- LRM 13.4.1 + -- An exponent for an integer literal must not have a minus sign. + -- + -- LRM 13.4.2 + -- An exponent for a based integer literal must not have a minus + -- sign. + Error_Msg_Scan + ("negative exponent not allowed for integer literal"); + end if; + Pos := Pos + 1; + C := Source (Pos); + end if; + if C not in '0' .. '9' then + Error_Msg_Scan ("digit expected after exponent"); + else + loop + -- C is a digit. + Exp := Exp * 10 + (Character'Pos (C) - Character'Pos ('0')); + + Pos := Pos + 1; + C := Source (Pos); + if C = '_' then + loop + Pos := Pos + 1; + C := Source (Pos); + exit when C /= '_'; + Error_Msg_Scan ("double underscore not allowed in integer"); + end loop; + if C not in '0' .. '9' then + Error_Msg_Scan ("digit expected after underscore"); + exit; + end if; + elsif C not in '0' .. '9' then + exit; + end if; + end loop; + end if; + if Exp_Neg then + Exp := -Exp; + end if; + end if; + + if Has_Dot then + -- a universal real. + Current_Token := Tok_Real; + + Current_Context.Fp64 := + Fp64 (To_Float_64 (False, Res, Base, Exp - Scale)); + else + -- a universal integer. + Current_Token := Tok_Integer; + + -- Set to a valid literal, in case of constraint error. + if Exp /= 0 then + Res := Bignum_Mul (Res, Bignum_Pow (Base, Exp)); + end if; + + declare + U : Unsigned_64; + begin + Bignum_To_Int (Res, U, Ok); + if U > Unsigned_64 (Iir_Int64'Last) then + Ok := False; + else + Current_Context.Int64 := Iir_Int64 (U); + end if; + end; + if not Ok then + Error_Msg_Scan ("literal beyond integer bounds"); + end if; + end if; +exception + when Constraint_Error => + Error_Msg_Scan ("literal overflow"); + + Current_Token := Tok_Integer; + Current_Context.Int64 := 0; +end Scan_Literal; diff --git a/src/vhdl/vhdl-scanner.adb b/src/vhdl/vhdl-scanner.adb new file mode 100644 index 000000000..734b0c7ce --- /dev/null +++ b/src/vhdl/vhdl-scanner.adb @@ -0,0 +1,2332 @@ +-- VHDL lexical scanner. +-- Copyright (C) 2002 - 2014 Tristan Gingold +-- +-- GHDL is free software; you can redistribute it and/or modify it under +-- the terms of the GNU General Public License as published by the Free +-- Software Foundation; either version 2, or (at your option) any later +-- version. +-- +-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or +-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-- for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with GHDL; see the file COPYING. If not, write to the Free +-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA +-- 02111-1307, USA. +with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; +with Errorout; use Errorout; +with Name_Table; +with Files_Map; use Files_Map; +with Std_Names; +with Str_Table; +with Flags; use Flags; + +package body Vhdl.Scanner is + + -- This classification is a simplification of the categories of LRM93 13.1 + -- LRM93 13.1 + -- The only characters allowed in the text of a VHDL description are the + -- graphic characters and format effector. + + type Character_Kind_Type is + ( + -- Neither a format effector nor a graphic character. + Invalid, + Format_Effector, + Lower_Case_Letter, + Upper_Case_Letter, + Digit, + Special_Character, + Space_Character, + Other_Special_Character + ); + + -- LRM93 13.1 + -- basic_graphic_character ::= + -- upper_case_letter | digit | special_character | space_character + -- + --subtype Basic_Graphic_Character is + -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; + + -- LRM93 13.1 + -- graphic_character ::= + -- basic_graphic_character | lower_case_letter | other_special_character + -- + -- Note: There are 191 graphic characters. + subtype Graphic_Character is + Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character; + + -- letter ::= upper_case_letter | lower_case_letter + subtype Letter is + Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter; + + -- LRM93 13.1 + -- The characters included in each of the categories of basic graphic + -- characters are defined as follows: + type Character_Array is array (Character) of Character_Kind_Type; + pragma Suppress_Initialization (Character_Array); + Characters_Kind : constant Character_Array := + (NUL .. BS => Invalid, + + -- Format effectors are the ISO (and ASCII) characters called horizontal + -- tabulation, vertical tabulation, carriage return, line feed, and form + -- feed. + HT | LF | VT | FF | CR => Format_Effector, + + SO .. US => Invalid, + + -- 1. upper case letters + 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | + UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, + + -- 2. digits + '0' .. '9' => Digit, + + -- 3. special characters + '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' + | ':' | ';' | '<' | '=' | '>' | '[' | ']' + | '_' | '|' | '*' => Special_Character, + + -- 4. the space characters + ' ' | NBSP => Space_Character, + + -- 5. lower case letters + 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | + LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, + + -- 6. other special characters + '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' + | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | + Division_Sign => Other_Special_Character, + + -- '¡' -- INVERTED EXCLAMATION MARK + -- '¢' -- CENT SIGN + -- '£' -- POUND SIGN + -- '¤' -- CURRENCY SIGN + -- '¥' -- YEN SIGN + -- '¦' -- BROKEN BAR + -- '§' -- SECTION SIGN + -- '¨' -- DIAERESIS + -- '©' -- COPYRIGHT SIGN + -- 'ª' -- FEMININE ORDINAL INDICATOR + -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + -- '¬' -- NOT SIGN + -- '­' -- SOFT HYPHEN + -- '®' -- REGISTERED SIGN + -- '¯' -- MACRON + -- '°' -- DEGREE SIGN + -- '±' -- PLUS-MINUS SIGN + -- '²' -- SUPERSCRIPT TWO + -- '³' -- SUPERSCRIPT THREE + -- '´' -- ACUTE ACCENT + -- 'µ' -- MICRO SIGN + -- '¶' -- PILCROW SIGN + -- '·' -- MIDDLE DOT + -- '¸' -- CEDILLA + -- '¹' -- SUPERSCRIPT ONE + -- 'º' -- MASCULINE ORDINAL INDICATOR + -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + -- '¼' -- VULGAR FRACTION ONE QUARTER + -- '½' -- VULGAR FRACTION ONE HALF + -- '¾' -- VULGAR FRACTION THREE QUARTERS + -- '¿' -- INVERTED QUESTION MARK + -- '×' -- MULTIPLICATION SIGN + -- '÷' -- DIVISION SIGN + + DEL .. APC => Invalid); + + -- The context contains the whole internal state of the scanner, ie + -- it can be used to push/pop a lexical analysis, to restart the + -- scanner from a context marking a previous point. + type Scan_Context is record + Source : File_Buffer_Acc; + Source_File : Source_File_Entry; + Line_Number : Natural; + Line_Pos : Source_Ptr; + Prev_Pos : Source_Ptr; + Token_Pos : Source_Ptr; + Pos : Source_Ptr; + File_Len : Source_Ptr; + Token : Token_Type; + Prev_Token : Token_Type; + + -- Additional values for the current token. + Bit_Str_Base : Character; + Bit_Str_Sign : Character; + Str_Id : String8_Id; + Str_Len : Nat32; + Identifier: Name_Id; + Int64 : Iir_Int64; + Fp64 : Iir_Fp64; + end record; + pragma Suppress_Initialization (Scan_Context); + + -- Disp a message during scan. + -- The current location is automatically displayed before the message. + -- Disp a message during scan. + procedure Error_Msg_Scan (Msg: String) is + begin + Report_Msg (Msgid_Error, Scan, No_Location, Msg); + end Error_Msg_Scan; + + procedure Error_Msg_Scan (Loc : Location_Type; Msg: String) is + begin + Report_Msg (Msgid_Error, Scan, Loc, Msg); + end Error_Msg_Scan; + + procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is + begin + Report_Msg (Msgid_Error, Scan, No_Location, Msg, (1 => Arg1)); + end Error_Msg_Scan; + + -- Disp a message during scan. + procedure Warning_Msg_Scan (Id : Msgid_Warnings; Msg: String) is + begin + Report_Msg (Id, Scan, No_Location, Msg); + end Warning_Msg_Scan; + + procedure Warning_Msg_Scan (Id : Msgid_Warnings; + Msg: String; + Arg1 : Earg_Type; + Cont : Boolean := False) is + begin + Report_Msg (Id, Scan, No_Location, Msg, (1 => Arg1), Cont); + end Warning_Msg_Scan; + + -- The current context. + -- Default value is an invalid context. + Current_Context: Scan_Context := (Source => null, + Source_File => No_Source_File_Entry, + Line_Number => 0, + Line_Pos => 0, + Pos => 0, + Prev_Pos => 0, + Token_Pos => 0, + File_Len => 0, + Token => Tok_Invalid, + Prev_Token => Tok_Invalid, + Identifier => Null_Identifier, + Bit_Str_Base => ' ', + Bit_Str_Sign => ' ', + Str_Id => Null_String8, + Str_Len => 0, + Int64 => 0, + Fp64 => 0.0); + + Source: File_Buffer_Acc renames Current_Context.Source; + Pos: Source_Ptr renames Current_Context.Pos; + + -- When CURRENT_TOKEN is an identifier, its name_id is stored into + -- this global variable. + -- Function current_text can be used to convert it into an iir. + function Current_Identifier return Name_Id is + begin + return Current_Context.Identifier; + end Current_Identifier; + + procedure Invalidate_Current_Identifier is + begin + Current_Context.Identifier := Null_Identifier; + end Invalidate_Current_Identifier; + + procedure Invalidate_Current_Token is + begin + if Current_Token /= Tok_Invalid then + Current_Context.Prev_Token := Current_Token; + Current_Token := Tok_Invalid; + end if; + end Invalidate_Current_Token; + + function Current_String_Id return String8_Id is + begin + return Current_Context.Str_Id; + end Current_String_Id; + + function Current_String_Length return Nat32 is + begin + return Current_Context.Str_Len; + end Current_String_Length; + + function Get_Bit_String_Base return Character is + begin + return Current_Context.Bit_Str_Base; + end Get_Bit_String_Base; + + function Get_Bit_String_Sign return Character is + begin + return Current_Context.Bit_Str_Sign; + end Get_Bit_String_Sign; + + function Current_Iir_Int64 return Iir_Int64 is + begin + return Current_Context.Int64; + end Current_Iir_Int64; + + function Current_Iir_Fp64 return Iir_Fp64 is + begin + return Current_Context.Fp64; + end Current_Iir_Fp64; + + function Get_Current_Source_File return Source_File_Entry is + begin + return Current_Context.Source_File; + end Get_Current_Source_File; + + function Get_Current_Line return Natural is + begin + return Current_Context.Line_Number; + end Get_Current_Line; + + function Get_Current_Offset return Natural is + begin + return Natural (Current_Context.Pos - Current_Context.Line_Pos); + end Get_Current_Offset; + + function Get_Token_Offset return Natural is + begin + return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos); + end Get_Token_Offset; + + function Get_Token_Position return Source_Ptr is + begin + return Current_Context.Token_Pos; + end Get_Token_Position; + + function Get_Position return Source_Ptr is + begin + return Current_Context.Pos; + end Get_Position; + + function Get_Token_Location return Location_Type is + begin + return File_Pos_To_Location + (Current_Context.Source_File, Current_Context.Token_Pos); + end Get_Token_Location; + + function Get_Prev_Location return Location_Type is + begin + return File_Pos_To_Location + (Current_Context.Source_File, Current_Context.Prev_Pos); + end Get_Prev_Location; + + procedure Set_File (Source_File : Source_File_Entry) + is + N_Source: File_Buffer_Acc; + begin + pragma Assert (Current_Context.Source = null); + pragma Assert (Source_File /= No_Source_File_Entry); + N_Source := Get_File_Source (Source_File); + Current_Context := (Source => N_Source, + Source_File => Source_File, + Line_Number => 1, + Line_Pos => 0, + Prev_Pos => N_Source'First, + Pos => N_Source'First, + Token_Pos => 0, -- should be invalid, + File_Len => Get_File_Length (Source_File), + Token => Tok_Invalid, + Prev_Token => Tok_Invalid, + Identifier => Null_Identifier, + Bit_Str_Base => ' ', + Bit_Str_Sign => ' ', + Str_Id => Null_String8, + Str_Len => 0, + Int64 => -1, + Fp64 => 0.0); + Current_Token := Tok_Invalid; + end Set_File; + + function Detect_Encoding_Errors return Boolean + is + C : constant Character := Source (Pos); + begin + -- No need to check further if first character is plain ASCII-7 + if C >= ' ' and C < Character'Val (127) then + return False; + end if; + + -- UTF-8 BOM is EF BB BF + if Source (Pos + 0) = Character'Val (16#ef#) + and then Source (Pos + 1) = Character'Val (16#bb#) + and then Source (Pos + 2) = Character'Val (16#bf#) + then + Error_Msg_Scan + ("source encoding must be latin-1 (UTF-8 BOM detected)"); + return True; + end if; + + -- UTF-16 BE BOM is FE FF + if Source (Pos + 0) = Character'Val (16#fe#) + and then Source (Pos + 1) = Character'Val (16#ff#) + then + Error_Msg_Scan + ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); + return True; + end if; + + -- UTF-16 LE BOM is FF FE + if Source (Pos + 0) = Character'Val (16#ff#) + and then Source (Pos + 1) = Character'Val (16#fe#) + then + Error_Msg_Scan + ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); + return True; + end if; + + -- Certainly weird, but scanner/parser will catch it. + return False; + end Detect_Encoding_Errors; + + procedure Set_Current_Position (Position: Source_Ptr) + is + Loc : Location_Type; + Offset: Natural; + File_Entry : Source_File_Entry; + begin + -- Scanner must have been initialized. + pragma Assert (Current_Context.Source /= null); + + Current_Token := Tok_Invalid; + Current_Context.Pos := Position; + Loc := File_Pos_To_Location (Current_Context.Source_File, + Current_Context.Pos); + Location_To_Coord (Loc, + File_Entry, Current_Context.Line_Pos, + Current_Context.Line_Number, Offset); + end Set_Current_Position; + + procedure Close_File is + begin + Current_Context.Source := null; + end Close_File; + + -- Emit an error when a character above 128 was found. + -- This must be called only in vhdl87. + procedure Error_8bit is + begin + Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); + end Error_8bit; + + -- Emit an error when a separator is expected. + procedure Error_Separator is + begin + Error_Msg_Scan ("a separator is required here"); + end Error_Separator; + + -- scan a decimal literal or a based literal. + -- + -- LRM93 13.4.1 + -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] + -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER + -- + -- LRM93 13.4.2 + -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT + -- BASE ::= INTEGER + procedure Scan_Literal is separate; + + -- Scan a string literal. + -- + -- LRM93 13.6 / LRM08 15.7 + -- A string literal is formed by a sequence of graphic characters + -- (possibly none) enclosed between two quotation marks used as string + -- brackets. + -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " + -- + -- IN: for a string, at the call of this procedure, the current character + -- must be either '"' or '%'. + procedure Scan_String + is + -- The quotation character (can be " or %). + Mark: Character; + -- Current character. + C : Character; + -- Current length. + Length : Nat32; + begin + -- String delimiter. + Mark := Source (Pos); + pragma Assert (Mark = '"' or else Mark = '%'); + + Pos := Pos + 1; + Length := 0; + Current_Context.Str_Id := Str_Table.Create_String8; + loop + C := Source (Pos); + if C = Mark then + -- LRM93 13.6 + -- If a quotation mark value is to be represented in the sequence + -- of character values, then a pair of adjacent quoatation + -- characters marks must be written at the corresponding place + -- within the string literal. + -- LRM93 13.10 + -- Any pourcent sign within the sequence of characters must then + -- be doubled, and each such doubled percent sign is interpreted + -- as a single percent sign value. + -- The same replacement is allowed for a bit string literal, + -- provieded that both bit string brackets are replaced. + Pos := Pos + 1; + exit when Source (Pos) /= Mark; + end if; + + case Characters_Kind (C) is + when Format_Effector => + if Mark = '%' then + -- No matching '%' has been found. Consider '%' was used + -- as the remainder operator, instead of 'rem'. This will + -- improve the error message. + Error_Msg_Scan + (Get_Token_Location, + "'%%' is not a vhdl operator, use 'rem'"); + Current_Token := Tok_Rem; + Pos := Current_Context.Token_Pos + 1; + return; + end if; + if C = CR or C = LF then + Error_Msg_Scan + ("string cannot be multi-line, use concatenation"); + else + Error_Msg_Scan ("format effector not allowed in a string"); + end if; + exit; + when Invalid => + if C = Files_Map.EOT + and then Pos >= Current_Context.File_Len + then + Error_Msg_Scan ("string not terminated at end of file"); + exit; + end if; + + Error_Msg_Scan + ("invalid character not allowed, even in a string"); + when Graphic_Character => + if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then + Error_8bit; + end if; + end case; + + if C = '"' and Mark = '%' then + -- LRM93 13.10 + -- The quotation marks (") used as string brackets at both ends of + -- a string literal can be replaced by percent signs (%), provided + -- that the enclosed sequence of characters constains no quotation + -- marks, and provided that both string brackets are replaced. + Error_Msg_Scan + ("'""' cannot be used in a string delimited with '%%'"); + end if; + + Length := Length + 1; + Str_Table.Append_String8 (Character'Pos (C)); + Pos := Pos + 1; + end loop; + + Current_Token := Tok_String; + Current_Context.Str_Len := Length; + end Scan_String; + + -- Scan a bit string literal. + -- + -- LRM93 13.7 + -- A bit string literal is formed by a sequence of extended digits + -- (possibly none) enclosed between two quotations used as bit string + -- brackets, preceded by a base specifier. + -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " + -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } + -- + -- The current character must be a base specifier, followed by '"' or '%'. + -- The base must be valid. + procedure Scan_Bit_String (Base_Log : Nat32) + is + -- Position of character '0'. + Pos_0 : constant Nat8 := Character'Pos ('0'); + + -- Used for the base. + subtype Nat4 is Natural range 1 .. 4; + Base : constant Nat32 := 2 ** Nat4 (Base_Log); + + -- The quotation character (can be " or %). + Orig_Pos : constant Source_Ptr := Pos; + Mark : constant Character := Source (Orig_Pos); + -- Current character. + C : Character; + -- Current length. + Length : Nat32; + -- Digit value. + V, D : Nat8; + -- True if invalid character already found, to avoid duplicate message. + Has_Invalid : Boolean; + begin + pragma Assert (Mark = '"' or else Mark = '%'); + Pos := Pos + 1; + Length := 0; + Has_Invalid := False; + Current_Context.Str_Id := Str_Table.Create_String8; + loop + << Again >> null; + C := Source (Pos); + Pos := Pos + 1; + exit when C = Mark; + + -- LRM93 13.7 + -- If the base specifier is 'B', the extended digits in the bit + -- value are restricted to 0 and 1. + -- If the base specifier is 'O', the extended digits int the bit + -- value are restricted to legal digits in the octal number + -- system, ie, the digits 0 through 7. + -- If the base specifier is 'X', the extended digits are all digits + -- together with the letters A through F. + case C is + when '0' .. '9' => + V := Character'Pos (C) - Character'Pos ('0'); + when 'A' .. 'F' => + V := Character'Pos (C) - Character'Pos ('A') + 10; + when 'a' .. 'f' => + -- LRM93 13.7 + -- A letter in a bit string literal (...) can be written either + -- in lowercase or in upper case, with the same meaning. + V := Character'Pos (C) - Character'Pos ('a') + 10; + when '_' => + if Source (Pos) = '_' then + Error_Msg_Scan + ("double underscore not allowed in a bit string"); + end if; + if Source (Pos - 2) = Mark then + Error_Msg_Scan + ("underscore not allowed at the start of a bit string"); + elsif Source (Pos) = Mark then + Error_Msg_Scan + ("underscore not allowed at the end of a bit string"); + end if; + goto Again; + when '"' => + pragma Assert (Mark = '%'); + Error_Msg_Scan + ("'""' cannot close a bit string opened by '%%'"); + exit; + when '%' => + pragma Assert (Mark = '"'); + Error_Msg_Scan + ("'%%' cannot close a bit string opened by '""'"); + exit; + when others => + if Characters_Kind (C) in Graphic_Character then + if Vhdl_Std >= Vhdl_08 then + V := Nat8'Last; + else + if not Has_Invalid then + Error_Msg_Scan ("invalid character in bit string"); + Has_Invalid := True; + end if; + -- Continue the bit string + V := 0; + end if; + else + if Mark = '%' then + Error_Msg_Scan + (File_Pos_To_Location + (Current_Context.Source_File, Orig_Pos), + "'%%' is not a vhdl operator, use 'rem'"); + Current_Token := Tok_Rem; + Pos := Orig_Pos + 1; + return; + else + Error_Msg_Scan ("bit string not terminated"); + Pos := Pos - 1; + end if; + exit; + end if; + end case; + + -- Expand bit value. + if Vhdl_Std >= Vhdl_08 and V > Base then + -- Expand as graphic character. + for I in 1 .. Base_Log loop + Str_Table.Append_String8_Char (C); + end loop; + else + -- Expand as extended digits. + case Base_Log is + when 1 => + if V > 1 then + Error_Msg_Scan + ("invalid character in a binary bit string"); + V := 1; + end if; + Str_Table.Append_String8 (Pos_0 + V); + when 3 => + if V > 7 then + Error_Msg_Scan + ("invalid character in a octal bit string"); + V := 7; + end if; + for I in 1 .. 3 loop + D := V / 4; + Str_Table.Append_String8 (Pos_0 + D); + V := (V - 4 * D) * 2; + end loop; + when 4 => + for I in 1 .. 4 loop + D := V / 8; + Str_Table.Append_String8 (Pos_0 + D); + V := (V - 8 * D) * 2; + end loop; + when others => + raise Internal_Error; + end case; + end if; + + Length := Length + Base_Log; + end loop; + + -- Note: the length of the bit string may be 0. + + Current_Token := Tok_Bit_String; + Current_Context.Str_Len := Length; + end Scan_Bit_String; + + -- Scan a decimal bit string literal. For base specifier D the algorithm + -- is rather different: all the graphic characters shall be digits, and we + -- need to use a (not very efficient) arbitrary precision multiplication. + procedure Scan_Dec_Bit_String + is + use Str_Table; + + Id : String8_Id; + + -- Position of character '0'. + Pos_0 : constant Nat8 := Character'Pos ('0'); + + -- Current character. + C : Character; + -- Current length. + Length : Nat32; + -- Digit value. + V, D : Nat8; + + type Carries_Type is array (0 .. 3) of Nat8; + Carries : Carries_Type; + No_Carries : constant Carries_Type := (others => Pos_0); + + -- Shift right carries. Note the Carries (0) is the LSB. + procedure Shr_Carries is + begin + Carries := (Carries (1), Carries (2), Carries (3), Pos_0); + end Shr_Carries; + + procedure Append_Carries is + begin + -- Expand the bit string. Note that position 1 of the string8 is + -- the MSB. + while Carries /= No_Carries loop + Append_String8 (Pos_0); + Length := Length + 1; + for I in reverse 2 .. Length loop + Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); + end loop; + Set_Element_String8 (Id, 1, Carries (0)); + Shr_Carries; + end loop; + end Append_Carries; + + -- Add 1 to Carries. Overflow is not allowed and should be prevented by + -- construction. + procedure Add_One_To_Carries is + begin + for I in Carries'Range loop + if Carries (I) = Pos_0 then + Carries (I) := Pos_0 + 1; + -- End of propagation. + exit; + else + Carries (I) := Pos_0; + -- Continue propagation. + pragma Assert (I < Carries'Last); + end if; + end loop; + end Add_One_To_Carries; + begin + pragma Assert (Source (Pos) = '"'); + Pos := Pos + 1; + Length := 0; + Id := Create_String8; + Current_Context.Str_Id := Id; + loop + << Again >> null; + C := Source (Pos); + Pos := Pos + 1; + exit when C = '"'; + + if C in '0' .. '9' then + V := Character'Pos (C) - Character'Pos ('0'); + elsif C = '_' then + if Source (Pos) = '_' then + Error_Msg_Scan + ("double underscore not allowed in a bit string"); + end if; + if Source (Pos - 2) = '"' then + Error_Msg_Scan + ("underscore not allowed at the start of a bit string"); + elsif Source (Pos) = '"' then + Error_Msg_Scan + ("underscore not allowed at the end of a bit string"); + end if; + goto Again; + else + if Characters_Kind (C) in Graphic_Character then + Error_Msg_Scan + ("graphic character not allowed in decimal bit string"); + -- Continue the bit string + V := 0; + else + Error_Msg_Scan ("bit string not terminated"); + Pos := Pos - 1; + exit; + end if; + end if; + + -- Multiply by 10. + Carries := (others => Pos_0); + for I in reverse 1 .. Length loop + -- Shift by 1 (*2). + D := Element_String8 (Id, I); + Set_Element_String8 (Id, I, Carries (0)); + Shr_Carries; + -- Add D and D * 4. + if D /= Pos_0 then + Add_One_To_Carries; + -- Add_Four_To_Carries: + for I in 2 .. 3 loop + if Carries (I) = Pos_0 then + Carries (I) := Pos_0 + 1; + -- End of propagation. + exit; + else + Carries (I) := Pos_0; + -- Continue propagation. + end if; + end loop; + end if; + end loop; + Append_Carries; + + -- Add V. + for I in Carries'Range loop + D := V / 2; + Carries (I) := Pos_0 + (V - 2 * D); + V := D; + end loop; + for I in reverse 1 .. Length loop + D := Element_String8 (Id, I); + if D /= Pos_0 then + Add_One_To_Carries; + end if; + Set_Element_String8 (Id, I, Carries (0)); + Shr_Carries; + exit when Carries = No_Carries; + end loop; + Append_Carries; + end loop; + + Current_Token := Tok_Bit_String; + Current_Context.Str_Len := Length; + end Scan_Dec_Bit_String; + + -- LRM08 15.2 Character set + -- For each uppercase letter, there is a corresponding lowercase letter; + -- and for each lowercase letter except [y diaeresis] and [german sharp s], + -- there is a corresponding uppercase letter. + type Character_Map is array (Character) of Character; + To_Lower_Map : constant Character_Map := + ( + -- Uppercase ASCII letters. + 'A' => 'a', + 'B' => 'b', + 'C' => 'c', + 'D' => 'd', + 'E' => 'e', + 'F' => 'f', + 'G' => 'g', + 'H' => 'h', + 'I' => 'i', + 'J' => 'j', + 'K' => 'k', + 'L' => 'l', + 'M' => 'm', + 'N' => 'n', + 'O' => 'o', + 'P' => 'p', + 'Q' => 'q', + 'R' => 'r', + 'S' => 's', + 'T' => 't', + 'U' => 'u', + 'V' => 'v', + 'W' => 'w', + 'X' => 'x', + 'Y' => 'y', + 'Z' => 'z', + + -- Lowercase ASCII letters. + 'a' => 'a', + 'b' => 'b', + 'c' => 'c', + 'd' => 'd', + 'e' => 'e', + 'f' => 'f', + 'g' => 'g', + 'h' => 'h', + 'i' => 'i', + 'j' => 'j', + 'k' => 'k', + 'l' => 'l', + 'm' => 'm', + 'n' => 'n', + 'o' => 'o', + 'p' => 'p', + 'q' => 'q', + 'r' => 'r', + 's' => 's', + 't' => 't', + 'u' => 'u', + 'v' => 'v', + 'w' => 'w', + 'x' => 'x', + 'y' => 'y', + 'z' => 'z', + + -- Uppercase Latin-1 letters. + UC_A_Grave => LC_A_Grave, + UC_A_Acute => LC_A_Acute, + UC_A_Circumflex => LC_A_Circumflex, + UC_A_Tilde => LC_A_Tilde, + UC_A_Diaeresis => LC_A_Diaeresis, + UC_A_Ring => LC_A_Ring, + UC_AE_Diphthong => LC_AE_Diphthong, + UC_C_Cedilla => LC_C_Cedilla, + UC_E_Grave => LC_E_Grave, + UC_E_Acute => LC_E_Acute, + UC_E_Circumflex => LC_E_Circumflex, + UC_E_Diaeresis => LC_E_Diaeresis, + UC_I_Grave => LC_I_Grave, + UC_I_Acute => LC_I_Acute, + UC_I_Circumflex => LC_I_Circumflex, + UC_I_Diaeresis => LC_I_Diaeresis, + UC_Icelandic_Eth => LC_Icelandic_Eth, + UC_N_Tilde => LC_N_Tilde, + UC_O_Grave => LC_O_Grave, + UC_O_Acute => LC_O_Acute, + UC_O_Circumflex => LC_O_Circumflex, + UC_O_Tilde => LC_O_Tilde, + UC_O_Diaeresis => LC_O_Diaeresis, + UC_O_Oblique_Stroke => LC_O_Oblique_Stroke, + UC_U_Grave => LC_U_Grave, + UC_U_Acute => LC_U_Acute, + UC_U_Circumflex => LC_U_Circumflex, + UC_U_Diaeresis => LC_U_Diaeresis, + UC_Y_Acute => LC_Y_Acute, + UC_Icelandic_Thorn => LC_Icelandic_Thorn, + + -- Lowercase Latin-1 letters. + LC_A_Grave => LC_A_Grave, + LC_A_Acute => LC_A_Acute, + LC_A_Circumflex => LC_A_Circumflex, + LC_A_Tilde => LC_A_Tilde, + LC_A_Diaeresis => LC_A_Diaeresis, + LC_A_Ring => LC_A_Ring, + LC_AE_Diphthong => LC_AE_Diphthong, + LC_C_Cedilla => LC_C_Cedilla, + LC_E_Grave => LC_E_Grave, + LC_E_Acute => LC_E_Acute, + LC_E_Circumflex => LC_E_Circumflex, + LC_E_Diaeresis => LC_E_Diaeresis, + LC_I_Grave => LC_I_Grave, + LC_I_Acute => LC_I_Acute, + LC_I_Circumflex => LC_I_Circumflex, + LC_I_Diaeresis => LC_I_Diaeresis, + LC_Icelandic_Eth => LC_Icelandic_Eth, + LC_N_Tilde => LC_N_Tilde, + LC_O_Grave => LC_O_Grave, + LC_O_Acute => LC_O_Acute, + LC_O_Circumflex => LC_O_Circumflex, + LC_O_Tilde => LC_O_Tilde, + LC_O_Diaeresis => LC_O_Diaeresis, + LC_O_Oblique_Stroke => LC_O_Oblique_Stroke, + LC_U_Grave => LC_U_Grave, + LC_U_Acute => LC_U_Acute, + LC_U_Circumflex => LC_U_Circumflex, + LC_U_Diaeresis => LC_U_Diaeresis, + LC_Y_Acute => LC_Y_Acute, + LC_Icelandic_Thorn => LC_Icelandic_Thorn, + + -- Lowercase latin-1 characters without corresponding uppercase one. + LC_Y_Diaeresis => LC_Y_Diaeresis, + LC_German_Sharp_S => LC_German_Sharp_S, + + -- Not a letter. + others => NUL); + + procedure Error_Too_Long is + begin + Error_Msg_Scan ("identifier is too long (>" + & Natural'Image (Max_Name_Length - 1) & ")"); + end Error_Too_Long; + + -- LRM93 13.3.1 + -- Basic Identifiers + -- A basic identifier consists only of letters, digits, and underlines. + -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } + -- LETTER_OR_DIGIT ::= LETTER | DIGIT + -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER + -- + -- NB: At the call of this procedure, the current character must be a legal + -- character for a basic identifier. + procedure Scan_Identifier (Allow_PSL : Boolean) + is + use Name_Table; + Buffer : String (1 .. Max_Name_Length); + C : Character; + Len : Natural; + begin + -- This is an identifier or a key word. + Len := 0; + loop + -- Source (pos) is correct. + -- LRM93 13.3.1 + -- All characters if a basic identifier are signifiant, including + -- any underline character inserted between a letter or digit and + -- an adjacent letter or digit. + -- Basic identifiers differing only in the use of the corresponding + -- upper and lower case letters are considered as the same. + -- + -- GHDL: This is achieved by converting all upper case letters into + -- equivalent lower case letters. + -- The opposite (converting to upper lower case letters) is not + -- possible because two characters have no upper-case equivalent. + C := Source (Pos); + case C is + when 'A' .. 'Z' => + C := Character'Val + (Character'Pos (C) + + Character'Pos ('a') - Character'Pos ('A')); + when 'a' .. 'z' | '0' .. '9' => + null; + when '_' => + if Source (Pos + 1) = '_' then + Error_Msg_Scan ("two underscores can't be consecutive"); + end if; + when ' ' | ')' | '.' | ';' | ':' => + exit; + when others => + -- Non common case. + case Characters_Kind (C) is + when Upper_Case_Letter | Lower_Case_Letter => + if Vhdl_Std = Vhdl_87 then + Error_8bit; + end if; + C := To_Lower_Map (C); + pragma Assert (C /= NUL); + when Digit => + raise Internal_Error; + when others => + exit; + end case; + end case; + + -- Put character in name buffer. FIXME: compute the hash at the same + -- time ? + if Len >= Max_Name_Length - 1 then + if Len = Max_Name_Length -1 then + Error_Msg_Scan ("identifier is too long (>" + & Natural'Image (Max_Name_Length - 1) & ")"); + -- Accept this last one character, so that no error for the + -- following characters. + Len := Len + 1; + Buffer (Len) := C; + end if; + else + Len := Len + 1; + Buffer (Len) := C; + end if; + + -- Next character. + Pos := Pos + 1; + end loop; + + if Source (Pos - 1) = '_' then + if Allow_PSL then + -- Some PSL reserved words finish with '_'. This case is handled + -- later by Scan_Underscore and Scan_Exclam_Mark. + Pos := Pos - 1; + Len := Len - 1; + C := '_'; + else + -- Eat the trailing underscore. + Error_Msg_Scan ("an identifier cannot finish with '_'"); + end if; + end if; + + -- LRM93 13.2 + -- At least one separator is required between an identifier or an + -- abstract literal and an adjacent identifier or abstract literal. + case Characters_Kind (C) is + when Digit + | Upper_Case_Letter + | Lower_Case_Letter => + raise Internal_Error; + when Other_Special_Character | Special_Character => + if (C = '"' or C = '%') and then Len <= 2 then + if C = '%' and Vhdl_Std >= Vhdl_08 then + Error_Msg_Scan ("'%%' not allowed in vhdl 2008 " + & "(was replacement character)"); + -- Continue as a bit string. + end if; + + -- Good candidate for bit string. + + -- LRM93 13.7 + -- BASE_SPECIFIER ::= B | O | X + -- + -- A letter in a bit string literal (either an extended digit + -- or the base specifier) can be written either in lower case + -- or in upper case, with the same meaning. + -- + -- LRM08 15.8 Bit string literals + -- BASE_SPECICIER ::= + -- B | O | X | UB | UO | UX | SB | SO | SX | D + -- + -- An extended digit and the base specifier in a bit string + -- literal can be written either in lowercase or in uppercase, + -- with the same meaning. + declare + Base : Nat32; + Cl : constant Character := Buffer (Len); + Cf : constant Character := Buffer (1); + begin + Current_Context.Bit_Str_Base := Cl; + if Cl = 'b' then + Base := 1; + elsif Cl = 'o' then + Base := 3; + elsif Cl = 'x' then + Base := 4; + elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then + Current_Context.Bit_Str_Sign := ' '; + Scan_Dec_Bit_String; + return; + else + Base := 0; + end if; + if Base > 0 then + if Len = 1 then + Current_Context.Bit_Str_Sign := ' '; + Scan_Bit_String (Base); + return; + elsif Vhdl_Std >= Vhdl_08 + and then (Cf = 's' or Cf = 'u') + then + Current_Context.Bit_Str_Sign := Cf; + Scan_Bit_String (Base); + return; + end if; + end if; + end; + elsif Vhdl_Std > Vhdl_87 and then C = '\' then + -- Start of extended identifier. Cannot follow an identifier. + Error_Separator; + end if; + + when Invalid => + -- Improve error message for use of UTF-8 quote marks. + -- It's possible because in the sequence of UTF-8 bytes for the + -- quote marks, there are invalid character (in the 128-160 + -- range). + if C = Character'Val (16#80#) + and then Buffer (Len) = Character'Val (16#e2#) + and then (Source (Pos + 1) = Character'Val (16#98#) + or else Source (Pos + 1) = Character'Val (16#99#)) + then + -- UTF-8 left or right single quote mark. + if Len > 1 then + -- The first byte (0xe2) is part of the identifier. An + -- error will be detected as the next byte (0x80) is + -- invalid. Remove the first byte from the identifier, and + -- let's catch the error later. + Len := Len - 1; + Pos := Pos - 1; + else + Error_Msg_Scan ("invalid use of UTF8 character for '"); + Pos := Pos + 2; + + -- Distinguish between character literal and tick. Don't + -- care about possible invalid character literal, as in any + -- case we have already emitted an error message. + if Current_Context.Prev_Token /= Tok_Identifier + and then Current_Context.Prev_Token /= Tok_Character + and then + (Source (Pos + 1) = ''' + or else + (Source (Pos + 1) = Character'Val (16#e2#) + and then Source (Pos + 2) = Character'Val (16#80#) + and then Source (Pos + 3) = Character'Val (16#99#))) + then + Current_Token := Tok_Character; + Current_Context.Identifier := + Name_Table.Get_Identifier (Source (Pos)); + if Source (Pos + 1) = ''' then + Pos := Pos + 2; + else + Pos := Pos + 4; + end if; + else + Current_Token := Tok_Tick; + end if; + return; + end if; + end if; + when Format_Effector + | Space_Character => + null; + end case; + + -- Hash it. + Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); + Current_Token := Tok_Identifier; + end Scan_Identifier; + + procedure Identifier_To_Token is + begin + if Current_Identifier in Std_Names.Name_Id_Keywords then + -- LRM93 13.9 + -- The identifiers listed below are called reserved words and are + -- reserved for signifiances in the language. + -- IN: this is also achieved in packages std_names and tokens. + Current_Token := Token_Type'Val + (Token_Type'Pos (Tok_First_Keyword) + + Current_Identifier - Std_Names.Name_First_Keyword); + case Current_Identifier is + when Std_Names.Name_Id_AMS_Reserved_Words => + if not AMS_Vhdl then + if Is_Warning_Enabled (Warnid_Reserved_Word) then + Warning_Msg_Scan + (Warnid_Reserved_Word, + "using %i AMS-VHDL reserved word as an identifier", + +Current_Identifier); + end if; + Current_Token := Tok_Identifier; + end if; + when Std_Names.Name_Id_Vhdl08_Reserved_Words => + if Vhdl_Std < Vhdl_08 then + if Is_Warning_Enabled (Warnid_Reserved_Word) then + Warning_Msg_Scan + (Warnid_Reserved_Word, + "using %i vhdl-2008 reserved word as an identifier", + +Current_Identifier); + end if; + Current_Token := Tok_Identifier; + end if; + when Std_Names.Name_Id_Vhdl00_Reserved_Words => + if Vhdl_Std < Vhdl_00 then + if Is_Warning_Enabled (Warnid_Reserved_Word) then + Warning_Msg_Scan + (Warnid_Reserved_Word, + "using %i vhdl-2000 reserved word as an identifier", + +Current_Identifier); + end if; + Current_Token := Tok_Identifier; + end if; + when Std_Names.Name_Id_Vhdl93_Reserved_Words => + if Vhdl_Std = Vhdl_87 then + if Is_Warning_Enabled (Warnid_Reserved_Word) then + Warning_Msg_Scan + (Warnid_Reserved_Word, + "using %i vhdl93 reserved word as a vhdl87 identifier", + +Current_Identifier, True); + Warning_Msg_Scan + (Warnid_Reserved_Word, + "(use option --std=93 to compile as vhdl93)"); + end if; + Current_Token := Tok_Identifier; + end if; + when Std_Names.Name_Id_Vhdl87_Reserved_Words => + null; + when others => + raise Program_Error; + end case; + elsif Flag_Psl then + case Current_Identifier is + when Std_Names.Name_Clock => + Current_Token := Tok_Psl_Clock; + when Std_Names.Name_Const => + Current_Token := Tok_Psl_Const; + when Std_Names.Name_Boolean => + Current_Token := Tok_Psl_Boolean; + when Std_Names.Name_Sequence => + Current_Token := Tok_Psl_Sequence; + when Std_Names.Name_Property => + Current_Token := Tok_Psl_Property; + when Std_Names.Name_Endpoint => + Current_Token := Tok_Psl_Endpoint; + when Std_Names.Name_Cover => + Current_Token := Tok_Psl_Cover; + when Std_Names.Name_Default => + Current_Token := Tok_Psl_Default; + when Std_Names.Name_Inf => + Current_Token := Tok_Inf; + when Std_Names.Name_Within => + Current_Token := Tok_Within; + when Std_Names.Name_Abort => + Current_Token := Tok_Abort; + when Std_Names.Name_Before => + Current_Token := Tok_Before; + when Std_Names.Name_Always => + Current_Token := Tok_Always; + when Std_Names.Name_Never => + Current_Token := Tok_Never; + when Std_Names.Name_Eventually => + Current_Token := Tok_Eventually; + when Std_Names.Name_Next_A => + Current_Token := Tok_Next_A; + when Std_Names.Name_Next_E => + Current_Token := Tok_Next_E; + when Std_Names.Name_Next_Event => + Current_Token := Tok_Next_Event; + when Std_Names.Name_Next_Event_A => + Current_Token := Tok_Next_Event_A; + when Std_Names.Name_Next_Event_E => + Current_Token := Tok_Next_Event_E; + when Std_Names.Name_Until => + Current_Token := Tok_Until; + when others => + Current_Token := Tok_Identifier; + if Source (Pos - 1) = '_' then + Error_Msg_Scan ("identifiers cannot finish with '_'"); + end if; + end case; + end if; + end Identifier_To_Token; + + -- LRM93 13.3.2 + -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ + -- + -- Create an (extended) indentifier. + -- Extended identifiers are stored as they appear (leading and tailing + -- backslashes, doubling backslashes inside). + procedure Scan_Extended_Identifier + is + use Name_Table; + Buffer : String (1 .. Max_Name_Length); + Len : Natural; + C : Character; + begin + -- LRM93 13.3.2 + -- Moreover, every extended identifiers is distinct from any basic + -- identifier. + -- GHDL: This is satisfied by storing '\' in the name table. + Len := 1; + Buffer (1) := '\'; + loop + -- Next character. + Pos := Pos + 1; + C := Source (Pos); + + if C = '\' then + -- LRM93 13.3.2 + -- If a backslash is to be used as one of the graphic characters + -- of an extended literal, it must be doubled. + -- LRM93 13.3.2 + -- (a doubled backslash couting as one character) + if Len >= Max_Name_Length - 1 then + if Len = Max_Name_Length - 1 then + Error_Too_Long; + -- Accept this last one. + Len := Len + 1; + Buffer (Len) := C; + end if; + else + Len := Len + 1; + Buffer (Len) := C; + end if; + + Pos := Pos + 1; + C := Source (Pos); + + exit when C /= '\'; + end if; + + case Characters_Kind (C) is + when Format_Effector => + Error_Msg_Scan ("format effector in extended identifier"); + exit; + when Graphic_Character => + null; + when Invalid => + if C = Files_Map.EOT + and then Pos >= Current_Context.File_Len + then + Error_Msg_Scan + ("extended identifier not terminated at end of file"); + elsif C = LF or C = CR then + Error_Msg_Scan + ("extended identifier not terminated at end of line"); + else + Error_Msg_Scan ("invalid character in extended identifier"); + end if; + exit; + end case; + + -- LRM93 13.3.2 + -- Extended identifiers differing only in the use of corresponding + -- upper and lower case letters are distinct. + if Len >= Max_Name_Length - 1 then + if Len = Max_Name_Length - 1 then + Error_Too_Long; + -- Accept this last one. + Len := Len + 1; + Buffer (Len) := C; + end if; + else + Len := Len + 1; + Buffer (Len) := C; + end if; + end loop; + + if Len <= 2 then + Error_Msg_Scan ("empty extended identifier is not allowed"); + end if; + + -- LRM93 13.2 + -- At least one separator is required between an identifier or an + -- abstract literal and an adjacent identifier or abstract literal. + case Characters_Kind (C) is + when Digit + | Upper_Case_Letter + | Lower_Case_Letter => + Error_Separator; + when Invalid + | Format_Effector + | Space_Character + | Special_Character + | Other_Special_Character => + null; + end case; + + -- Hash it. + Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); + Current_Token := Tok_Identifier; + end Scan_Extended_Identifier; + + procedure Convert_Identifier (Str : in out String) + is + procedure Error_Bad is + begin + Error_Msg_Option ("bad character in identifier"); + end Error_Bad; + + procedure Error_8bit is + begin + Error_Msg_Option ("8 bits characters not allowed in vhdl87"); + end Error_8bit; + + C : Character; + subtype Id_Subtype is String (1 .. Str'Length); + Id : Id_Subtype renames Str; + begin + if Id'Length = 0 then + Error_Msg_Option ("identifier required"); + return; + end if; + + if Id (1) = '\' then + -- Extended identifier. + if Vhdl_Std = Vhdl_87 then + Error_Msg_Option ("extended identifiers not allowed in vhdl87"); + return; + end if; + + if Id'Length < 3 then + Error_Msg_Option ("extended identifier is too short"); + return; + end if; + if Id (Id'Last) /= '\' then + Error_Msg_Option ("extended identifier must finish with a '\'"); + return; + end if; + for I in 2 .. Id'Last - 1 loop + C := Id (I); + case Characters_Kind (C) is + when Format_Effector => + Error_Msg_Option ("format effector in extended identifier"); + return; + when Graphic_Character => + if C = '\' then + if Id (I + 1) /= '\' + or else I = Id'Last - 1 + then + Error_Msg_Option ("anti-slash must be doubled " + & "in extended identifier"); + return; + end if; + end if; + when Invalid => + Error_Bad; + end case; + end loop; + else + -- Identifier + for I in 1 .. Id'Length loop + C := Id (I); + case Characters_Kind (C) is + when Upper_Case_Letter => + if Vhdl_Std = Vhdl_87 and C > 'Z' then + Error_8bit; + end if; + Id (I) := To_Lower_Map (C); + when Lower_Case_Letter | Digit => + if Vhdl_Std = Vhdl_87 and C > 'z' then + Error_8bit; + end if; + when Special_Character => + -- The current character is legal in an identifier. + if C = '_' then + if I = 1 then + Error_Msg_Option + ("an identifier cannot start with an underscore"); + return; + end if; + if Id (I - 1) = '_' then + Error_Msg_Option + ("two underscores can't be consecutive"); + return; + end if; + if I = Id'Last then + Error_Msg_Option + ("an identifier cannot finish with an underscore"); + return; + end if; + else + Error_Bad; + end if; + when others => + Error_Bad; + end case; + end loop; + end if; + end Convert_Identifier; + + -- Internal scanner function: return True if C must be considered as a line + -- terminator. This also includes EOT (which terminates the file or is + -- invalid). + function Is_EOL (C : Character) return Boolean is + begin + case C is + when CR | LF | VT | FF | Files_Map.EOT => + return True; + when others => + return False; + end case; + end Is_EOL; + + -- Advance scanner till the first non-space character. + procedure Skip_Spaces is + begin + while Source (Pos) = ' ' or Source (Pos) = HT loop + Pos := Pos + 1; + end loop; + end Skip_Spaces; + + -- Eat all characters until end-of-line (not included). + procedure Skip_Until_EOL is + begin + while not Is_EOL (Source (Pos)) loop + -- Don't warn about invalid character, it's somewhat out of the + -- scope. + Pos := Pos + 1; + end loop; + end Skip_Until_EOL; + + -- Scan an identifier within a comment. Only lower case letters are + -- allowed. + procedure Scan_Comment_Identifier (Id : out Name_Id) + is + use Name_Table; + Buffer : String (1 .. Max_Name_Length); + Len : Natural; + C : Character; + begin + Id := Null_Identifier; + Skip_Spaces; + + -- The identifier shall start with a lower case letter. + if Source (Pos) not in 'a' .. 'z' then + return; + end if; + + -- Scan the identifier (in lower cases). + Len := 0; + loop + C := Source (Pos); + exit when C not in 'a' .. 'z' and C /= '_'; + Len := Len + 1; + Buffer (Len) := C; + Pos := Pos + 1; + end loop; + + -- Shall be followed by a space or a new line. + if not (C = ' ' or else C = HT or else Is_EOL (C)) then + return; + end if; + + Id := Get_Identifier (Buffer (1 .. Len)); + end Scan_Comment_Identifier; + + package Directive_Protect is + -- Called to scan a protect tool directive. + procedure Scan_Protect_Directive; + end Directive_Protect; + + -- Body is put in a separate file to avoid pollution. + package body Directive_Protect is separate; + + -- Called to scan a tool directive. + procedure Scan_Tool_Directive + is + procedure Error_Missing_Directive is + begin + Error_Msg_Scan ("tool directive required after '`'"); + Skip_Until_EOL; + end Error_Missing_Directive; + + C : Character; + begin + -- The current character is '`'. + Pos := Pos + 1; + Skip_Spaces; + + -- Check and scan identifier. + C := Source (Pos); + if Characters_Kind (C) not in Letter then + Error_Missing_Directive; + return; + end if; + + Scan_Identifier (False); + + if Current_Token /= Tok_Identifier then + Error_Missing_Directive; + return; + end if; + + Skip_Spaces; + + -- Dispatch according to the identifier. + if Current_Identifier = Std_Names.Name_Protect then + Directive_Protect.Scan_Protect_Directive; + else + Error_Msg_Scan + ("unknown tool directive %i ignored", +Current_Identifier); + Skip_Until_EOL; + end if; + end Scan_Tool_Directive; + + -- Scan tokens within a comment. Return TRUE if Current_Token was set, + -- return FALSE to discard the comment (ie treat it like a real comment). + function Scan_Comment return Boolean + is + use Std_Names; + Id : Name_Id; + begin + Scan_Comment_Identifier (Id); + + if Id = Null_Identifier then + return False; + end if; + + case Id is + when Name_Psl => + -- Accept tokens after '-- psl'. + if Flag_Psl_Comment then + Flag_Psl := True; + Flag_Scan_In_Comment := True; + return True; + end if; + when others => + null; + end case; + return False; + end Scan_Comment; + + function Scan_Exclam_Mark return Boolean is + begin + if Source (Pos) = '!' then + Pos := Pos + 1; + return True; + else + return False; + end if; + end Scan_Exclam_Mark; + + function Scan_Underscore return Boolean is + begin + if Source (Pos) = '_' then + Pos := Pos + 1; + return True; + else + return False; + end if; + end Scan_Underscore; + + -- The Scan_Next_Line procedure must be called after each end-of-line to + -- register to next line number. This is called by Scan_CR_Newline and + -- Scan_LF_Newline. + procedure Scan_Next_Line is + begin + Files_Map.Skip_Gap (Current_Context.Source_File, Pos); + Current_Context.Line_Number := Current_Context.Line_Number + 1; + Current_Context.Line_Pos := Pos; + File_Add_Line_Number + (Current_Context.Source_File, Current_Context.Line_Number, Pos); + end Scan_Next_Line; + + -- Scan a CR end-of-line. + procedure Scan_CR_Newline is + begin + -- Accept CR or CR+LF as line separator. + if Source (Pos + 1) = LF then + Pos := Pos + 2; + else + Pos := Pos + 1; + end if; + Scan_Next_Line; + end Scan_CR_Newline; + + -- Scan a LF end-of-line. + procedure Scan_LF_Newline is + begin + -- Accept LF or LF+CR as line separator. + if Source (Pos + 1) = CR then + Pos := Pos + 2; + else + Pos := Pos + 1; + end if; + Scan_Next_Line; + end Scan_LF_Newline; + + -- Emit an error message for an invalid character. + procedure Error_Bad_Character is + begin + -- Technically character literals, string literals, extended + -- identifiers and comments. + Error_Msg_Scan ("character %c can only be used in strings or comments", + +Source (Pos)); + end Error_Bad_Character; + + -- Get a new token. + procedure Scan is + begin + if Current_Token /= Tok_Invalid then + Current_Context.Prev_Token := Current_Token; + end if; + + Current_Context.Prev_Pos := Pos; + + << Again >> null; + + -- Skip commonly used separators. + -- (Like Skip_Spaces but manually inlined for speed). + while Source (Pos) = ' ' or Source (Pos) = HT loop + Pos := Pos + 1; + end loop; + + Current_Context.Token_Pos := Pos; + Current_Context.Identifier := Null_Identifier; + + case Source (Pos) is + when HT | ' ' => + -- Must have already been skipped just above. + raise Internal_Error; + when NBSP => + if Vhdl_Std = Vhdl_87 then + Error_Msg_Scan ("NBSP character not allowed in vhdl87"); + end if; + Pos := Pos + 1; + goto Again; + when VT | FF => + Pos := Pos + 1; + goto Again; + when LF => + Scan_LF_Newline; + if Flag_Newline then + Current_Token := Tok_Newline; + return; + end if; + goto Again; + when CR => + Scan_CR_Newline; + if Flag_Newline then + Current_Token := Tok_Newline; + return; + end if; + goto Again; + when '-' => + if Source (Pos + 1) = '-' then + -- This is a comment. + -- LRM93 13.8 + -- A comment starts with two adjacent hyphens and extends up + -- to the end of the line. + -- A comment can appear on any line line of a VHDL + -- description. + -- The presence or absence of comments has no influence on + -- whether a description is legal or illegal. + -- Futhermore, comments do not influence the execution of a + -- simulation module; their sole purpose is the enlightenment + -- of the human reader. + -- GHDL note: As a consequence, an obfruscating comment + -- is out of purpose, and a warning could be reported :-) + Pos := Pos + 2; + + -- Scan inside a comment. So we just ignore the two dashes. + if Flag_Scan_In_Comment then + goto Again; + end if; + + -- Handle keywords in comment (PSL). + if Flag_Comment_Keyword and then Scan_Comment then + goto Again; + end if; + + -- LRM93 13.2 + -- In any case, a sequence of one or more format + -- effectors other than horizontal tabulation must + -- cause at least one end of line. + while not Is_EOL (Source (Pos)) loop + -- LRM93 13.1 + -- The only characters allowed in the text of a VHDL + -- description are the graphic characters and the format + -- effectors. + + -- LRM02 13.1 Character set + -- The only characters allowed in the text of a VHDL + -- description (except within comments -- see 13.8) [...] + -- + -- LRM02 13.8 Comments + -- A comment [...] may contain any character except the + -- format effectors vertical tab, carriage return, line + -- feed and form feed. + if not (Flags.Mb_Comment or Vhdl_Std >= Vhdl_02) + and then Characters_Kind (Source (Pos)) = Invalid + then + Error_Msg_Scan ("invalid character, even in a comment"); + end if; + Pos := Pos + 1; + end loop; + if Flag_Comment then + Current_Token := Tok_Comment; + return; + end if; + goto Again; + elsif Flag_Psl and then Source (Pos + 1) = '>' then + Current_Token := Tok_Minus_Greater; + Pos := Pos + 2; + return; + else + Current_Token := Tok_Minus; + Pos := Pos + 1; + return; + end if; + when '+' => + Current_Token := Tok_Plus; + Pos := Pos + 1; + return; + when '*' => + if Source (Pos + 1) = '*' then + Current_Token := Tok_Double_Star; + Pos := Pos + 2; + else + Current_Token := Tok_Star; + Pos := Pos + 1; + end if; + return; + when '/' => + if Source (Pos + 1) = '=' then + Current_Token := Tok_Not_Equal; + Pos := Pos + 2; + elsif Source (Pos + 1) = '*' then + -- LRM08 15.9 Comments + -- A delimited comment start with a solidus (slash) character + -- immediately followed by an asterisk character and extends up + -- to the first subsequent occurrence of an asterisk character + -- immediately followed by a solidus character. + if Vhdl_Std < Vhdl_08 then + Error_Msg_Scan + ("block comment are not allowed before vhdl 2008"); + end if; + + -- Skip '/*'. + Pos := Pos + 2; + + loop + case Source (Pos) is + when '/' => + -- LRM08 15.9 + -- Moreover, an occurrence of a solidus character + -- immediately followed by an asterisk character + -- within a delimited comment is not interpreted as + -- the start of a nested delimited comment. + if Source (Pos + 1) = '*' then + Warning_Msg_Scan + (Warnid_Nested_Comment, + "'/*' found within a block comment"); + end if; + Pos := Pos + 1; + when '*' => + if Source (Pos + 1) = '/' then + Pos := Pos + 2; + exit; + else + Pos := Pos + 1; + end if; + when CR => + Scan_CR_Newline; + when LF => + Scan_LF_Newline; + when Files_Map.EOT => + if Pos >= Current_Context.File_Len then + -- Point at the start of the comment. + Error_Msg_Scan + (Get_Token_Location, + "block comment not terminated at end of file"); + exit; + end if; + Pos := Pos + 1; + when others => + Pos := Pos + 1; + end case; + end loop; + if Flag_Comment then + Current_Token := Tok_Comment; + return; + end if; + goto Again; + else + Current_Token := Tok_Slash; + Pos := Pos + 1; + end if; + return; + when '(' => + Current_Token := Tok_Left_Paren; + Pos := Pos + 1; + return; + when ')' => + Current_Token := Tok_Right_Paren; + Pos := Pos + 1; + return; + when '|' => + if Flag_Psl then + if Source (Pos + 1) = '|' then + Current_Token := Tok_Bar_Bar; + Pos := Pos + 2; + elsif Source (Pos + 1) = '-' + and then Source (Pos + 2) = '>' + then + Current_Token := Tok_Bar_Arrow; + Pos := Pos + 3; + elsif Source (Pos + 1) = '=' + and then Source (Pos + 2) = '>' + then + Current_Token := Tok_Bar_Double_Arrow; + Pos := Pos + 3; + else + Current_Token := Tok_Bar; + Pos := Pos + 1; + end if; + else + Current_Token := Tok_Bar; + Pos := Pos + 1; + end if; + return; + when '!' => + if Flag_Psl then + Current_Token := Tok_Exclam_Mark; + else + if Source (Pos + 1) = '=' then + -- != is not allowed in VHDL, but be friendly with C users. + Error_Msg_Scan + (Get_Token_Location, "Use '/=' for inequality in vhdl"); + Current_Token := Tok_Not_Equal; + Pos := Pos + 1; + else + -- LRM93 13.10 + -- A vertical line (|) can be replaced by an exclamation + -- mark (!) where used as a delimiter. + Current_Token := Tok_Bar; + end if; + end if; + Pos := Pos + 1; + return; + when ':' => + if Source (Pos + 1) = '=' then + Current_Token := Tok_Assign; + Pos := Pos + 2; + else + Current_Token := Tok_Colon; + Pos := Pos + 1; + end if; + return; + when ';' => + Current_Token := Tok_Semi_Colon; + Pos := Pos + 1; + return; + when ',' => + Current_Token := Tok_Comma; + Pos := Pos + 1; + return; + when '.' => + if Source (Pos + 1) = '.' then + -- Be Ada friendly... + Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); + Current_Token := Tok_To; + Pos := Pos + 2; + return; + end if; + Current_Token := Tok_Dot; + Pos := Pos + 1; + return; + when '&' => + if Flag_Psl and then Source (Pos + 1) = '&' then + Current_Token := Tok_And_And; + Pos := Pos + 2; + else + Current_Token := Tok_Ampersand; + Pos := Pos + 1; + end if; + return; + when '<' => + case Source (Pos + 1) is + when '=' => + Current_Token := Tok_Less_Equal; + Pos := Pos + 2; + when '>' => + Current_Token := Tok_Box; + Pos := Pos + 2; + when '<' => + Current_Token := Tok_Double_Less; + Pos := Pos + 2; + when others => + Current_Token := Tok_Less; + Pos := Pos + 1; + end case; + return; + when '>' => + case Source (Pos + 1) is + when '=' => + Current_Token := Tok_Greater_Equal; + Pos := Pos + 2; + when '>' => + Current_Token := Tok_Double_Greater; + Pos := Pos + 2; + when others => + Current_Token := Tok_Greater; + Pos := Pos + 1; + end case; + return; + when '=' => + if Source (Pos + 1) = '=' then + if AMS_Vhdl then + Current_Token := Tok_Equal_Equal; + else + Error_Msg_Scan + ("'==' is not the vhdl equality, replaced by '='"); + Current_Token := Tok_Equal; + end if; + Pos := Pos + 2; + elsif Source (Pos + 1) = '>' then + Current_Token := Tok_Double_Arrow; + Pos := Pos + 2; + else + Current_Token := Tok_Equal; + Pos := Pos + 1; + end if; + return; + when ''' => + -- Handle cases such as character'('a') + -- FIXME: what about f ()'length ? or .all'length + if Current_Context.Prev_Token /= Tok_Identifier + and then Current_Context.Prev_Token /= Tok_Character + and then Source (Pos + 2) = ''' + then + -- LRM93 13.5 + -- A character literal is formed by enclosing one of the 191 + -- graphic character (...) between two apostrophe characters. + -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' + if Characters_Kind (Source (Pos + 1)) not in Graphic_Character + then + Error_Msg_Scan + ("a character literal can only be a graphic character"); + elsif Vhdl_Std = Vhdl_87 + and then Source (Pos + 1) > Character'Val (127) + then + Error_8bit; + end if; + Current_Token := Tok_Character; + Current_Context.Identifier := + Name_Table.Get_Identifier (Source (Pos + 1)); + Pos := Pos + 3; + return; + else + Current_Token := Tok_Tick; + Pos := Pos + 1; + end if; + return; + when '0' .. '9' => + Scan_Literal; + + -- LRM93 13.2 + -- At least one separator is required between an identifier or + -- an abstract literal and an adjacent identifier or abstract + -- literal. + case Characters_Kind (Source (Pos)) is + when Digit => + -- Happen if d#ddd# is followed by a number. + Error_Msg_Scan ("space is required between numbers"); + when Upper_Case_Letter + | Lower_Case_Letter => + -- Could call Error_Separator, but use a clearer message + -- for this common case. + -- Note: the term "unit name" is not correct here, since + -- it can be any identifier or even a keyword; however it + -- is probably the most common case (eg 10ns). + if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer + then + Current_Token := Tok_Integer_Letter; + else + Error_Msg_Scan + ("space is required between number and unit name"); + end if; + when Other_Special_Character => + if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then + -- Start of extended identifier. + Error_Separator; + end if; + when Invalid + | Format_Effector + | Space_Character + | Special_Character => + null; + end case; + return; + when '#' => + Error_Msg_Scan ("'#' is used for based literals and " + & "must be preceded by a base"); + -- Skip. + Pos := Pos + 1; + goto Again; + when '"' => + Scan_String; + return; + when '%' => + if Vhdl_Std >= Vhdl_08 then + Error_Msg_Scan + ("'%%' not allowed in vhdl 2008 (was replacement character)"); + -- Continue as a string. + end if; + Scan_String; + return; + when '[' => + if Flag_Psl then + if Source (Pos + 1) = '*' then + Current_Token := Tok_Brack_Star; + Pos := Pos + 2; + elsif Source (Pos + 1) = '+' + and then Source (Pos + 2) = ']' + then + Current_Token := Tok_Brack_Plus_Brack; + Pos := Pos + 3; + elsif Source (Pos + 1) = '-' + and then Source (Pos + 2) = '>' + then + Current_Token := Tok_Brack_Arrow; + Pos := Pos + 3; + elsif Source (Pos + 1) = '=' then + Current_Token := Tok_Brack_Equal; + Pos := Pos + 2; + else + Current_Token := Tok_Left_Bracket; + Pos := Pos + 1; + end if; + else + if Vhdl_Std = Vhdl_87 then + Error_Msg_Scan + ("'[' is an invalid character in vhdl87, replaced by '('"); + Current_Token := Tok_Left_Paren; + else + Current_Token := Tok_Left_Bracket; + end if; + Pos := Pos + 1; + end if; + return; + when ']' => + if Vhdl_Std = Vhdl_87 and not Flag_Psl then + Error_Msg_Scan + ("']' is an invalid character in vhdl87, replaced by ')'"); + Current_Token := Tok_Right_Paren; + else + Current_Token := Tok_Right_Bracket; + end if; + Pos := Pos + 1; + return; + when '{' => + if Flag_Psl then + Current_Token := Tok_Left_Curly; + else + Error_Msg_Scan ("'{' is an invalid character, replaced by '('"); + Current_Token := Tok_Left_Paren; + end if; + Pos := Pos + 1; + return; + when '}' => + if Flag_Psl then + Current_Token := Tok_Right_Curly; + else + Error_Msg_Scan ("'}' is an invalid character, replaced by ')'"); + Current_Token := Tok_Right_Paren; + end if; + Pos := Pos + 1; + return; + when '\' => + if Vhdl_Std = Vhdl_87 then + Error_Msg_Scan + ("extended identifiers are not allowed in vhdl87"); + end if; + Scan_Extended_Identifier; + return; + when '^' => + if Vhdl_Std >= Vhdl_08 then + Current_Token := Tok_Caret; + else + Current_Token := Tok_Xor; + Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); + end if; + Pos := Pos + 1; + return; + when '~' => + Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); + Pos := Pos + 1; + Current_Token := Tok_Not; + return; + when '?' => + if Vhdl_Std < Vhdl_08 then + Error_Bad_Character; + Pos := Pos + 1; + goto Again; + else + if Source (Pos + 1) = '<' then + if Source (Pos + 2) = '=' then + Current_Token := Tok_Match_Less_Equal; + Pos := Pos + 3; + else + Current_Token := Tok_Match_Less; + Pos := Pos + 2; + end if; + elsif Source (Pos + 1) = '>' then + if Source (Pos + 2) = '=' then + Current_Token := Tok_Match_Greater_Equal; + Pos := Pos + 3; + else + Current_Token := Tok_Match_Greater; + Pos := Pos + 2; + end if; + elsif Source (Pos + 1) = '?' then + Current_Token := Tok_Condition; + Pos := Pos + 2; + elsif Source (Pos + 1) = '=' then + Current_Token := Tok_Match_Equal; + Pos := Pos + 2; + elsif Source (Pos + 1) = '/' + and then Source (Pos + 2) = '=' + then + Current_Token := Tok_Match_Not_Equal; + Pos := Pos + 3; + else + Error_Msg_Scan ("unknown matching operator"); + Pos := Pos + 1; + goto Again; + end if; + end if; + return; + when '`' => + if Vhdl_Std >= Vhdl_08 then + Scan_Tool_Directive; + else + Error_Bad_Character; + Skip_Until_EOL; + end if; + goto Again; + when '$' + | Inverted_Exclamation .. Inverted_Question + | Multiplication_Sign | Division_Sign => + Error_Bad_Character; + Pos := Pos + 1; + goto Again; + when '@' => + if Vhdl_Std >= Vhdl_08 or Flag_Psl then + Current_Token := Tok_Arobase; + Pos := Pos + 1; + return; + else + Error_Bad_Character; + Pos := Pos + 1; + goto Again; + end if; + when '_' => + Error_Msg_Scan ("an identifier can't start with '_'"); + Scan_Identifier (Flag_Psl); + -- Cannot be a reserved word. + return; + when 'A' .. 'Z' | 'a' .. 'z' => + Scan_Identifier (Flag_Psl); + Identifier_To_Token; + return; + when UC_A_Grave .. UC_O_Diaeresis + | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn + | LC_German_Sharp_S .. LC_O_Diaeresis + | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => + if Vhdl_Std = Vhdl_87 then + Error_Msg_Scan + ("non 7-bit latin-1 letters are not allowed in vhdl87"); + end if; + Scan_Identifier (False); + -- Not a reserved word. + return; + when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => + Error_Msg_Scan + ("control character that is not CR, LF, FF, HT or VT " & + "is not allowed"); + Pos := Pos + 1; + goto Again; + when Files_Map.EOT => + if Pos >= Current_Context.File_Len then + -- FIXME: should conditionnaly emit a warning if the file + -- is not terminated by an end of line. + Current_Token := Tok_Eof; + else + Error_Msg_Scan ("EOT is not allowed inside the file"); + Pos := Pos + 1; + goto Again; + end if; + return; + end case; + -- Not reachable: all case should use goto Again or return. + end Scan; + + function Is_Whitespace (C : Character) return Boolean is + begin + if C = ' ' then + return True; + elsif Vhdl_Std > Vhdl_87 and C = NBSP then + return True; + else + return False; + end if; + end Is_Whitespace; +end Vhdl.Scanner; diff --git a/src/vhdl/vhdl-scanner.ads b/src/vhdl/vhdl-scanner.ads new file mode 100644 index 000000000..221defd91 --- /dev/null +++ b/src/vhdl/vhdl-scanner.ads @@ -0,0 +1,144 @@ +-- VHDL lexical scanner. +-- Copyright (C) 2002 - 2014 Tristan Gingold +-- +-- GHDL is free software; you can redistribute it and/or modify it under +-- the terms of the GNU General Public License as published by the Free +-- Software Foundation; either version 2, or (at your option) any later +-- version. +-- +-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or +-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-- for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with GHDL; see the file COPYING. If not, write to the Free +-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA +-- 02111-1307, USA. +with Types; use Types; +with Tokens; use Tokens; + +package Vhdl.Scanner is + -- Global variables + -- The token that was just scanned. + -- When the token was eaten, you can call invalidate_current_token to + -- set it to tok_invalid. + -- Current_token should not be written outside of scan package. + -- It can be replaced by a function call. + Current_Token: Token_Type := Tok_Invalid; + + -- Maximal length for identifiers. + Max_Name_Length : constant Natural := 1024; + + -- Simply set current_token to tok_invalid. + procedure Invalidate_Current_Token; + pragma Inline (Invalidate_Current_Token); + + -- When CURRENT_TOKEN is an tok_identifier, tok_char or tok_string, + -- its name_id can be got via this function. + function Current_Identifier return Name_Id; + pragma Inline (Current_Identifier); + + -- Get current string identifier and length. + function Current_String_Id return String8_Id; + function Current_String_Length return Nat32; + pragma Inline (Current_String_Id); + pragma Inline (Current_String_Length); + + -- When the current token is Tok_Bit_String, return the base ('b', 'o', + -- 'x' or 'd') and the sign ('s', 'u', or ' ' for none). + function Get_Bit_String_Base return Character; + function Get_Bit_String_Sign return Character; + pragma Inline (Get_Bit_String_Base); + pragma Inline (Get_Bit_String_Sign); + + -- Set Current_identifier to null_identifier. + -- Can be used to catch bugs. + procedure Invalidate_Current_Identifier; + pragma Inline (Invalidate_Current_Identifier); + + -- When CURRENT_TOKEN is tok_integer, returns the value. + -- When CURRENT_TOKEN is tok_bit_string, returns the log of the base. + function Current_Iir_Int64 return Iir_Int64; + pragma Inline (Current_Iir_Int64); + + -- When CURRENT_TOKEN is tok_real, it returns the value. + function Current_Iir_Fp64 return Iir_Fp64; + pragma Inline (Current_Iir_Fp64); + + -- Advances the lexical analyser. Put a new token into current_token. + procedure Scan; + + -- Initialize the scanner with file SOURCE_FILE. + procedure Set_File (Source_File : Source_File_Entry); + + -- This function can be called just after Set_File to detect UTF BOM + -- patterns. It reports an error if a BOM is present and return True. + -- Silently return False if no error detected. + function Detect_Encoding_Errors return Boolean; + + procedure Set_Current_Position (Position: Source_Ptr); + + -- Finalize the scanner. + procedure Close_File; + + -- If true comments are reported as a token. + Flag_Comment : Boolean := False; + + -- If true newlines are reported as a token. + Flag_Newline : Boolean := False; + + -- If true also scan PSL tokens. + Flag_Psl : Boolean := False; + + -- If true handle PSL embedded in comments: '-- psl' is ignored. + Flag_Psl_Comment : Boolean := False; + + -- If true, ignore '--'. This is automatically set when Flag_Psl_Comment + -- is true and a starting PSL keyword has been identified. + -- Must be reset to false by the parser. + Flag_Scan_In_Comment : Boolean := False; + + -- If true scan for keywords in comments. Must be enabled if + -- Flag_Psl_Comment is true. + Flag_Comment_Keyword : Boolean := False; + + -- If the next character is '!', eat it and return True, otherwise return + -- False (used by PSL). + function Scan_Exclam_Mark return Boolean; + + -- If the next character is '_', eat it and return True, otherwise return + -- False (used by PSL). + function Scan_Underscore return Boolean; + + -- Get the current location, or the location of the current token. + -- Since a token cannot spread over lines, file and line of the current + -- token are the same as those of the current position. + -- The offset is the offset in the current line. + function Get_Current_Source_File return Source_File_Entry; + function Get_Current_Line return Natural; + function Get_Current_Offset return Natural; + function Get_Position return Source_Ptr; + function Get_Token_Location return Location_Type; + function Get_Token_Offset return Natural; + function Get_Token_Position return Source_Ptr; + + -- Return the initial location before the current token (ie before all + -- the blanks, comments and newlines have been skipped). Useful for the + -- location of a missing token. + function Get_Prev_Location return Location_Type; + + -- Convert (canonicalize) an identifier stored in name_buffer/name_length. + -- Upper case letters are converted into lower case. + -- Lexical checks are performed. + -- This procedure is not used by Scan, but should be used for identifiers + -- given in the command line. + -- Errors are directly reported through error_msg_option. + -- Also, Vhdl_Std should be set. + procedure Convert_Identifier (Str : in out String); + + -- Return TRUE iff C is a whitespace. + -- LRM93 13.2 Lexical elements, separators, and delimiters + -- A space character (SPACE or NBSP) ... + function Is_Whitespace (C : Character) return Boolean; +end Vhdl.Scanner; diff --git a/src/vhdl/vhdl.ads b/src/vhdl/vhdl.ads new file mode 100644 index 000000000..db536d277 --- /dev/null +++ b/src/vhdl/vhdl.ads @@ -0,0 +1,21 @@ +-- VHDL package hierarchy. +-- Copyright (C) 2019 Tristan Gingold +-- +-- GHDL is free software; you can redistribute it and/or modify it under +-- the terms of the GNU General Public License as published by the Free +-- Software Foundation; either version 2, or (at your option) any later +-- version. +-- +-- GHDL is distributed in the hope that it will be useful, but WITHOUT ANY +-- WARRANTY; without even the implied warranty of MERCHANTABILITY or +-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-- for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with GHDL; see the file COPYING. If not, write to the Free +-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA +-- 02111-1307, USA. + +package Vhdl is + pragma Pure (Vhdl); +end Vhdl; -- cgit v1.2.3