Commit 19f21e11 by Arnaud Charlet

[multiple changes]

2009-07-23  Yannick Moy  <moy@adacore.com>

	* s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the
	pattern.
	(Raise_Exception_If_No_More_Chars): Remove extra blank in exception
	string.
	(Raise_Exception): Ditto.

2009-07-23  Olivier Hainque  <hainque@adacore.com>

	* g-sse.ads: Simplify comment.

From-SVN: r150000
parent 84d60eea
2009-07-23 Yannick Moy <moy@adacore.com>
* s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the
pattern.
(Raise_Exception_If_No_More_Chars): Remove extra blank in exception
string.
(Raise_Exception): Ditto.
2009-07-23 Olivier Hainque <hainque@adacore.com>
* g-sse.ads: Simplify comment.
2009-07-23 Olivier Hainque <hainque@adacore.com> 2009-07-23 Olivier Hainque <hainque@adacore.com>
* g-ssinty.ads: New unit. GNAT.SSE.Internal_Types. Factorize * g-ssinty.ads: New unit. GNAT.SSE.Internal_Types. Factorize
......
...@@ -82,11 +82,7 @@ ...@@ -82,11 +82,7 @@
-- end if; -- end if;
-- end; -- end;
-- Use of Unchecked_Union is very tempting, however hits difficulties with -- Use of Unchecked_Union to perform the overlays is not supported.
-- e.g. implicit front-end expanded equality operators, which typically
-- feature a subcase comparing the m128 components, not supported by the
-- middle-end. This needs more explanation, should it be fixed ??? It
-- reads like a bug in this paragraph.
package GNAT.SSE is package GNAT.SSE is
type Float32 is new Float; type Float32 is new Float;
......
...@@ -129,6 +129,14 @@ package body System.Regexp is ...@@ -129,6 +129,14 @@ package body System.Regexp is
-- Number of significant characters in the regular expression. -- Number of significant characters in the regular expression.
-- This total does not include special operators, such as *, (, ... -- This total does not include special operators, such as *, (, ...
procedure Check_Well_Formed_Pattern;
-- Check that the pattern to compile is well-formed, so that
-- subsequent code can rely on this without performing each time
-- the checks to avoid accessing the pattern outside its bounds.
-- Except that, not all well-formedness rules are checked.
-- In particular, the rules about special characters not being
-- treated as regular characters are not checked.
procedure Create_Mapping; procedure Create_Mapping;
-- Creates a mapping between characters in the regexp and columns -- Creates a mapping between characters in the regexp and columns
-- in the tables representing the regexp. Test that the regexp is -- in the tables representing the regexp. Test that the regexp is
...@@ -180,6 +188,270 @@ package body System.Regexp is ...@@ -180,6 +188,270 @@ package body System.Regexp is
pragma No_Return (Raise_Exception); pragma No_Return (Raise_Exception);
-- Raise an exception, indicating an error at character Index in S -- Raise an exception, indicating an error at character Index in S
-------------------------------
-- Check_Well_Formed_Pattern --
-------------------------------
procedure Check_Well_Formed_Pattern is
J : Integer := S'First;
Past_Elmt : Boolean := False;
-- Set to True everywhere an elmt has been parsed, if Glob=False,
-- meaning there can be now an occurence of '*', '+' and '?'.
Past_Term : Boolean := False;
-- Set to True everywhere a term has been parsed, if Glob=False,
-- meaning there can be now an occurence of '|'.
Parenthesis_Level : Integer := 0;
Curly_Level : Integer := 0;
Last_Open : Integer := S'First - 1;
-- The last occurence of an opening parenthesis, if Glob=False,
-- or the last occurence of an opening curly brace, if Glob=True.
procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
--------------------------------------
-- Raise_Exception_If_No_More_Chars --
--------------------------------------
procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
begin
if J + K > S'Last then
Raise_Exception
("Ill-formed pattern while parsing", J);
end if;
end Raise_Exception_If_No_More_Chars;
-- Start of processing for Check_Well_Formed_Pattern
begin
while J <= S'Last loop
case S (J) is
when Open_Bracket =>
J := J + 1;
Raise_Exception_If_No_More_Chars;
if not Glob then
if S (J) = '^' then
J := J + 1;
Raise_Exception_If_No_More_Chars;
end if;
end if;
-- The first character never has a special meaning
if S (J) = ']' or else S (J) = '-' then
J := J + 1;
Raise_Exception_If_No_More_Chars;
end if;
-- The set of characters cannot be empty
if S (J) = ']' then
Raise_Exception
("Set of characters cannot be empty in regular "
& "expression", J);
end if;
declare
Possible_Range_Start : Boolean := True;
-- Set to True everywhere a range character '-'
-- can occur.
begin
loop
exit when S (J) = Close_Bracket;
-- The current character should be followed by
-- a closing bracket.
Raise_Exception_If_No_More_Chars (1);
if S (J) = '-'
and then S (J + 1) /= Close_Bracket
then
if not Possible_Range_Start then
Raise_Exception
("No mix of ranges is allowed in "
& "regular expression", J);
end if;
J := J + 1;
Raise_Exception_If_No_More_Chars;
-- Range cannot be followed by '-' character,
-- except as last character in the set.
Possible_Range_Start := False;
else
Possible_Range_Start := True;
end if;
if S (J) = '\' then
J := J + 1;
Raise_Exception_If_No_More_Chars;
end if;
J := J + 1;
end loop;
end;
-- A closing bracket can end an elmt or term
Past_Elmt := True;
Past_Term := True;
when Close_Bracket =>
-- A close bracket must follow a open_bracket,
-- and cannot be found alone on the line.
Raise_Exception
("Incorrect character ']' in regular expression", J);
when '\' =>
if J < S'Last then
J := J + 1;
-- Any character can be an elmt or a term
Past_Elmt := True;
Past_Term := True;
else
-- \ not allowed at the end of the regexp
Raise_Exception
("Incorrect character '\' in regular expression", J);
end if;
when Open_Paren =>
if not Glob then
Parenthesis_Level := Parenthesis_Level + 1;
Last_Open := J;
-- An open parenthesis does not end an elmt or term
Past_Elmt := False;
Past_Term := False;
end if;
when Close_Paren =>
if not Glob then
Parenthesis_Level := Parenthesis_Level - 1;
if Parenthesis_Level < 0 then
Raise_Exception
("')' is not associated with '(' in regular "
& "expression", J);
end if;
if J = Last_Open + 1 then
Raise_Exception
("Empty parentheses not allowed in regular "
& "expression", J);
end if;
if not Past_Term then
Raise_Exception
("Closing parenthesis not allowed here in regular "
& "expression", J);
end if;
-- A closing parenthesis can end an elmt or term
Past_Elmt := True;
Past_Term := True;
end if;
when '{' =>
if Glob then
Curly_Level := Curly_Level + 1;
Last_Open := J;
else
-- Any character can be an elmt or a term
Past_Elmt := True;
Past_Term := True;
end if;
-- No need to check for ',' as the code always accepts them
when '}' =>
if Glob then
Curly_Level := Curly_Level - 1;
if Curly_Level < 0 then
Raise_Exception
("'}' is not associated with '{' in regular "
& "expression", J);
end if;
if J = Last_Open + 1 then
Raise_Exception
("Empty curly braces not allowed in regular "
& "expression", J);
end if;
else
-- Any character can be an elmt or a term
Past_Elmt := True;
Past_Term := True;
end if;
when '*' | '?' | '+' =>
if not Glob then
-- These operators must apply to an elmt sub-expression,
-- and cannot be found if one has not just been parsed.
if not Past_Elmt then
Raise_Exception
("'*', '+' and '?' operators must be "
& "applied to an element in regular expression", J);
end if;
Past_Elmt := False;
Past_Term := True;
end if;
when '|' =>
if not Glob then
-- This operator must apply to a term sub-expression,
-- and cannot be found if one has not just been parsed.
if not Past_Term then
Raise_Exception
("'|' operator must be "
& "applied to a term in regular expression", J);
end if;
Past_Elmt := False;
Past_Term := False;
end if;
when others =>
if not Glob then
-- Any character can be an elmt or a term
Past_Elmt := True;
Past_Term := True;
end if;
end case;
J := J + 1;
end loop;
-- A closing parenthesis must follow an open parenthesis
if Parenthesis_Level /= 0 then
Raise_Exception
("'(' must always be associated with a ')'", J);
end if;
-- A closing curly brace must follow an open curly brace
if Curly_Level /= 0 then
Raise_Exception
("'{' must always be associated with a '}'", J);
end if;
end Check_Well_Formed_Pattern;
-------------------- --------------------
-- Create_Mapping -- -- Create_Mapping --
-------------------- --------------------
...@@ -1224,7 +1496,7 @@ package body System.Regexp is ...@@ -1224,7 +1496,7 @@ package body System.Regexp is
procedure Raise_Exception (M : String; Index : Integer) is procedure Raise_Exception (M : String; Index : Integer) is
begin begin
raise Error_In_Regexp with M & " at offset " & Index'Img; raise Error_In_Regexp with M & " at offset" & Index'Img;
end Raise_Exception; end Raise_Exception;
-- Start of processing for Compile -- Start of processing for Compile
...@@ -1247,12 +1519,16 @@ package body System.Regexp is ...@@ -1247,12 +1519,16 @@ package body System.Regexp is
System.Case_Util.To_Lower (S); System.Case_Util.To_Lower (S);
end if; end if;
-- Check the pattern is well-formed before any treatment
Check_Well_Formed_Pattern;
Create_Mapping; Create_Mapping;
-- Creates the primary table -- Creates the primary table
declare declare
Table : Regexp_Array_Access; Table : Regexp_Array_Access;
Num_States : State_Index; Num_States : State_Index;
Start_State : State_Index; Start_State : State_Index;
End_State : State_Index; End_State : State_Index;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment