Commit 5bdd76e8 by Emmanuel Briot Committed by Arnaud Charlet

g-regpat.adb (Parse_Posix_Character_Class): Fix handling of [[:xdigit:]] character class.

2007-04-06  Emmanuel Briot  <briot@adacore.com>

	* g-regpat.adb (Parse_Posix_Character_Class): Fix handling of
	[[:xdigit:]] character class.
	Also raise an exception when an invalid character class is used.

From-SVN: r123577
parent 7616900d
...@@ -1828,15 +1828,13 @@ package body GNAT.Regpat is ...@@ -1828,15 +1828,13 @@ package body GNAT.Regpat is
-- Check for class names based on first letter -- Check for class names based on first letter
case Expression (Parse_Pos) is case Expression (Parse_Pos) is
when 'a' => when 'a' =>
-- All 'a' classes have the same length (Alnum'Length) -- All 'a' classes have the same length (Alnum'Length)
if Parse_Pos + Alnum'Length - 1 <= Parse_End then if Parse_Pos + Alnum'Length - 1 <= Parse_End then
if
if E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) = E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) = Alnum
Alnum
then then
if Invert then if Invert then
Class := ANYOF_NALNUMC; Class := ANYOF_NALNUMC;
...@@ -1846,8 +1844,8 @@ package body GNAT.Regpat is ...@@ -1846,8 +1844,8 @@ package body GNAT.Regpat is
Parse_Pos := Parse_Pos + Alnum'Length; Parse_Pos := Parse_Pos + Alnum'Length;
elsif E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) = elsif
Alpha E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) = Alpha
then then
if Invert then if Invert then
Class := ANYOF_NALPHA; Class := ANYOF_NALPHA;
...@@ -1867,13 +1865,19 @@ package body GNAT.Regpat is ...@@ -1867,13 +1865,19 @@ package body GNAT.Regpat is
end if; end if;
Parse_Pos := Parse_Pos + Ascii_C'Length; Parse_Pos := Parse_Pos + Ascii_C'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'c' => when 'c' =>
if Parse_Pos + Cntrl'Length - 1 <= Parse_End if Parse_Pos + Cntrl'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) = and then
Cntrl E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) = Cntrl
then then
if Invert then if Invert then
Class := ANYOF_NCNTRL; Class := ANYOF_NCNTRL;
...@@ -1882,12 +1886,15 @@ package body GNAT.Regpat is ...@@ -1882,12 +1886,15 @@ package body GNAT.Regpat is
end if; end if;
Parse_Pos := Parse_Pos + Cntrl'Length; Parse_Pos := Parse_Pos + Cntrl'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'd' => when 'd' =>
if Parse_Pos + Digit'Length - 1 <= Parse_End if Parse_Pos + Digit'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Digit'Length - 1) = and then
Digit E (Parse_Pos .. Parse_Pos + Digit'Length - 1) = Digit
then then
if Invert then if Invert then
Class := ANYOF_NDIGIT; Class := ANYOF_NDIGIT;
...@@ -1900,28 +1907,36 @@ package body GNAT.Regpat is ...@@ -1900,28 +1907,36 @@ package body GNAT.Regpat is
when 'g' => when 'g' =>
if Parse_Pos + Graph'Length - 1 <= Parse_End if Parse_Pos + Graph'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Graph'Length - 1) = and then
Graph E (Parse_Pos .. Parse_Pos + Graph'Length - 1) = Graph
then then
if Invert then if Invert then
Class := ANYOF_NGRAPH; Class := ANYOF_NGRAPH;
else else
Class := ANYOF_GRAPH; Class := ANYOF_GRAPH;
end if; end if;
Parse_Pos := Parse_Pos + Graph'Length; Parse_Pos := Parse_Pos + Graph'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'l' => when 'l' =>
if Parse_Pos + Lower'Length - 1 <= Parse_End if Parse_Pos + Lower'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Lower'Length - 1) = and then
Lower E (Parse_Pos .. Parse_Pos + Lower'Length - 1) = Lower
then then
if Invert then if Invert then
Class := ANYOF_NLOWER; Class := ANYOF_NLOWER;
else else
Class := ANYOF_LOWER; Class := ANYOF_LOWER;
end if; end if;
Parse_Pos := Parse_Pos + Lower'Length; Parse_Pos := Parse_Pos + Lower'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'p' => when 'p' =>
...@@ -1929,8 +1944,8 @@ package body GNAT.Regpat is ...@@ -1929,8 +1944,8 @@ package body GNAT.Regpat is
-- All 'p' classes have the same length -- All 'p' classes have the same length
if Parse_Pos + Print'Length - 1 <= Parse_End then if Parse_Pos + Print'Length - 1 <= Parse_End then
if E (Parse_Pos .. Parse_Pos + Print'Length - 1) = if
Print E (Parse_Pos .. Parse_Pos + Print'Length - 1) = Print
then then
if Invert then if Invert then
Class := ANYOF_NPRINT; Class := ANYOF_NPRINT;
...@@ -1940,8 +1955,8 @@ package body GNAT.Regpat is ...@@ -1940,8 +1955,8 @@ package body GNAT.Regpat is
Parse_Pos := Parse_Pos + Print'Length; Parse_Pos := Parse_Pos + Print'Length;
elsif E (Parse_Pos .. Parse_Pos + Punct'Length - 1) = elsif
Punct E (Parse_Pos .. Parse_Pos + Punct'Length - 1) = Punct
then then
if Invert then if Invert then
Class := ANYOF_NPUNCT; Class := ANYOF_NPUNCT;
...@@ -1950,13 +1965,19 @@ package body GNAT.Regpat is ...@@ -1950,13 +1965,19 @@ package body GNAT.Regpat is
end if; end if;
Parse_Pos := Parse_Pos + Punct'Length; Parse_Pos := Parse_Pos + Punct'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 's' => when 's' =>
if Parse_Pos + Space'Length - 1 <= Parse_End if Parse_Pos + Space'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Space'Length - 1) = and then
Space E (Parse_Pos .. Parse_Pos + Space'Length - 1) = Space
then then
if Invert then if Invert then
Class := ANYOF_NSPACE; Class := ANYOF_NSPACE;
...@@ -1965,41 +1986,49 @@ package body GNAT.Regpat is ...@@ -1965,41 +1986,49 @@ package body GNAT.Regpat is
end if; end if;
Parse_Pos := Parse_Pos + Space'Length; Parse_Pos := Parse_Pos + Space'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'u' => when 'u' =>
if Parse_Pos + Upper'Length - 1 <= Parse_End if Parse_Pos + Upper'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Upper'Length - 1) = and then
Upper E (Parse_Pos .. Parse_Pos + Upper'Length - 1) = Upper
then then
if Invert then if Invert then
Class := ANYOF_NUPPER; Class := ANYOF_NUPPER;
else else
Class := ANYOF_UPPER; Class := ANYOF_UPPER;
end if; end if;
Parse_Pos := Parse_Pos + Upper'Length; Parse_Pos := Parse_Pos + Upper'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'w' => when 'w' =>
if Parse_Pos + Word'Length - 1 <= Parse_End if Parse_Pos + Word'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Word'Length - 1) = and then
Word E (Parse_Pos .. Parse_Pos + Word'Length - 1) = Word
then then
if Invert then if Invert then
Class := ANYOF_NALNUM; Class := ANYOF_NALNUM;
else else
Class := ANYOF_ALNUM; Class := ANYOF_ALNUM;
end if; end if;
Parse_Pos := Parse_Pos + Word'Length; Parse_Pos := Parse_Pos + Word'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when 'x' => when 'x' =>
if Parse_Pos + Xdigit'Length - 1 <= Parse_End if Parse_Pos + Xdigit'Length - 1 <= Parse_End
and then E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1) and then
= Digit E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1) = Xdigit
then then
if Invert then if Invert then
Class := ANYOF_NXDIGIT; Class := ANYOF_NXDIGIT;
...@@ -2008,10 +2037,13 @@ package body GNAT.Regpat is ...@@ -2008,10 +2037,13 @@ package body GNAT.Regpat is
end if; end if;
Parse_Pos := Parse_Pos + Xdigit'Length; Parse_Pos := Parse_Pos + Xdigit'Length;
else
Fail ("Invalid character class: " & E);
end if; end if;
when others => when others =>
Fail ("Invalid character class"); Fail ("Invalid character class: " & E);
end case; end case;
-- Character class not specified -- Character class not specified
...@@ -2072,7 +2104,6 @@ package body GNAT.Regpat is ...@@ -2072,7 +2104,6 @@ package body GNAT.Regpat is
Flags : Regexp_Flags := No_Flags) Flags : Regexp_Flags := No_Flags)
is is
Size : Program_Size; Size : Program_Size;
begin begin
Compile (Matcher, Expression, Size, Flags); Compile (Matcher, Expression, Size, Flags);
end Compile; end Compile;
...@@ -2082,10 +2113,6 @@ package body GNAT.Regpat is ...@@ -2082,10 +2113,6 @@ package body GNAT.Regpat is
---------- ----------
procedure Dump (Self : Pattern_Matcher) is procedure Dump (Self : Pattern_Matcher) is
-- Index : Pointer := Program_First + 1;
-- What is the above line for ???
Op : Opcode; Op : Opcode;
Program : Program_Data renames Self.Program; Program : Program_Data renames Self.Program;
...@@ -2106,14 +2133,14 @@ package body GNAT.Regpat is ...@@ -2106,14 +2133,14 @@ package body GNAT.Regpat is
Till : Pointer; Till : Pointer;
Indent : Natural := 0) Indent : Natural := 0)
is is
Next : Pointer; Next : Pointer;
Index : Pointer := Start; Index : Pointer;
Local_Indent : Natural := Indent; Local_Indent : Natural := Indent;
Length : Pointer; Length : Pointer;
begin begin
Index := Start;
while Index < Till loop while Index < Till loop
Op := Opcode'Val (Character'Pos ((Self.Program (Index)))); Op := Opcode'Val (Character'Pos ((Self.Program (Index))));
if Op = CLOSE then if Op = CLOSE then
...@@ -2296,7 +2323,6 @@ package body GNAT.Regpat is ...@@ -2296,7 +2323,6 @@ package body GNAT.Regpat is
C : Character) return Boolean C : Character) return Boolean
is is
Value : constant Class_Byte := Character'Pos (C); Value : constant Class_Byte := Character'Pos (C);
begin begin
return return
(Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0; (Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0;
...@@ -2308,7 +2334,6 @@ package body GNAT.Regpat is ...@@ -2308,7 +2334,6 @@ package body GNAT.Regpat is
function Get_Next (Program : Program_Data; IP : Pointer) return Pointer is function Get_Next (Program : Program_Data; IP : Pointer) return Pointer is
Offset : constant Pointer := Get_Next_Offset (Program, IP); Offset : constant Pointer := Get_Next_Offset (Program, IP);
begin begin
if Offset = 0 then if Offset = 0 then
return 0; return 0;
...@@ -2680,8 +2705,8 @@ package body GNAT.Regpat is ...@@ -2680,8 +2705,8 @@ package body GNAT.Regpat is
Opnd : Pointer := String_Operand (Scan); Opnd : Pointer := String_Operand (Scan);
Current : Positive := Input_Pos; Current : Positive := Input_Pos;
Last : constant Pointer := Last : constant Pointer :=
Opnd + String_Length (Program, Scan); Opnd + String_Length (Program, Scan);
begin begin
while Opnd <= Last loop while Opnd <= Last loop
...@@ -2697,7 +2722,6 @@ package body GNAT.Regpat is ...@@ -2697,7 +2722,6 @@ package body GNAT.Regpat is
when ANYOF => when ANYOF =>
declare declare
Bitmap : Character_Class; Bitmap : Character_Class;
begin begin
Bitmap_Operand (Program, Scan, Bitmap); Bitmap_Operand (Program, Scan, Bitmap);
exit State_Machine when Input_Pos > Last_In_Data exit State_Machine when Input_Pos > Last_In_Data
...@@ -2709,7 +2733,6 @@ package body GNAT.Regpat is ...@@ -2709,7 +2733,6 @@ package body GNAT.Regpat is
declare declare
No : constant Natural := No : constant Natural :=
Character'Pos (Program (Operand (Scan))); Character'Pos (Program (Operand (Scan)));
begin begin
Matches_Tmp (No) := Input_Pos; Matches_Tmp (No) := Input_Pos;
end; end;
...@@ -2761,7 +2784,6 @@ package body GNAT.Regpat is ...@@ -2761,7 +2784,6 @@ package body GNAT.Regpat is
when STAR | PLUS | CURLY => when STAR | PLUS | CURLY =>
declare declare
Greed : constant Boolean := Greedy; Greed : constant Boolean := Greedy;
begin begin
Greedy := True; Greedy := True;
return Match_Simple_Operator (Op, Scan, Next, Greed); return Match_Simple_Operator (Op, Scan, Next, Greed);
...@@ -3492,11 +3514,8 @@ package body GNAT.Regpat is ...@@ -3492,11 +3514,8 @@ package body GNAT.Regpat is
-------------- --------------
procedure Optimize (Self : in out Pattern_Matcher) is procedure Optimize (Self : in out Pattern_Matcher) is
Max_Length : Program_Size; Scan : Pointer;
This_Length : Program_Size; Program : Program_Data renames Self.Program;
Longest : Pointer;
Scan : Pointer;
Program : Program_Data renames Self.Program;
begin begin
-- Start with safe defaults (no optimization): -- Start with safe defaults (no optimization):
...@@ -3520,33 +3539,6 @@ package body GNAT.Regpat is ...@@ -3520,33 +3539,6 @@ package body GNAT.Regpat is
then then
Self.Anchored := True; Self.Anchored := True;
end if; end if;
-- If there's something expensive in the regexp, find the
-- longest literal string that must appear and make it the
-- regmust. Resolve ties in favor of later strings, since
-- the regstart check works with the beginning of the regexp.
-- and avoiding duplication strengthens checking. Not a
-- strong reason, but sufficient in the absence of others.
if False then -- if Flags.SP_Start then ???
Longest := 0;
Max_Length := 0;
while Scan /= 0 loop
if Program (Scan) = EXACT or else Program (Scan) = EXACTF then
This_Length := String_Length (Program, Scan);
if This_Length >= Max_Length then
Longest := String_Operand (Scan);
Max_Length := This_Length;
end if;
end if;
Scan := Get_Next (Program, Scan);
end loop;
Self.Must_Have := Longest;
Self.Must_Have_Length := Natural (Max_Length) + 1;
end if;
end Optimize; end Optimize;
----------------- -----------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment