Commit 7e1603c0 by Ziga Mahkovec Committed by Ziga Mahkovec

re PR libgcj/20435 (regex pattern compiling bug)

2005-06-01  Ziga Mahkovec  <ziga.mahkovec@klika.si>

	PR libgcj/20435:
	* gnu/regexp/RESyntax.java (RE_POSSESSIVE_OPS): New field.
	(static): Add possessive matching to JAVA_1_4 syntax.
	* gnu/regexp/RETokenRepeated.java (possessive): New field.
	(makePossessive, isPossessive): New methods.
	(match): Don't back off during possessive matching.
	* gnu/regexp/RE.java (initalize): Accept possessive quantifier.
	* java/util/regex/Pattern.java (constructor): Switch syntax from PERL5
	to JAVA_1_4.

From-SVN: r100466
parent cce70747
2005-06-01 Ziga Mahkovec <ziga.mahkovec@klika.si>
PR libgcj/20435:
* gnu/regexp/RESyntax.java (RE_POSSESSIVE_OPS): New field.
(static): Add possessive matching to JAVA_1_4 syntax.
* gnu/regexp/RETokenRepeated.java (possessive): New field.
(makePossessive, isPossessive): New methods.
(match): Don't back off during possessive matching.
* gnu/regexp/RE.java (initalize): Accept possessive quantifier.
* java/util/regex/Pattern.java (constructor): Switch syntax from PERL5
to JAVA_1_4.
2005-06-01 Keith Seitz <keiths@redhat.com> 2005-06-01 Keith Seitz <keiths@redhat.com>
* gnu/classpath/jdwp/transport/JdwpPacket.java: New file. * gnu/classpath/jdwp/transport/JdwpPacket.java: New file.
......
...@@ -629,19 +629,28 @@ public class RE extends REToken { ...@@ -629,19 +629,28 @@ public class RE extends REToken {
currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index); currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
} }
// ONE-OR-MORE REPEAT OPERATOR // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR
// + | \+ depending on RE_BK_PLUS_QM // + | \+ depending on RE_BK_PLUS_QM
// not available if RE_LIMITED_OPS is set // not available if RE_LIMITED_OPS is set
else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null) if (currentToken == null)
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
// Check for possessive matching on RETokenRepeated
if (currentToken instanceof RETokenRepeated) {
RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
tokenRep.makePossessive();
else
throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
}
else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
if (currentToken.getMinimumLength() == 0) else if (currentToken.getMinimumLength() == 0)
throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index); throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
else
currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index); currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
} }
...@@ -655,8 +664,9 @@ public class RE extends REToken { ...@@ -655,8 +664,9 @@ public class RE extends REToken {
// Check for stingy matching on RETokenRepeated // Check for stingy matching on RETokenRepeated
if (currentToken instanceof RETokenRepeated) { if (currentToken instanceof RETokenRepeated) {
if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy()) RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
((RETokenRepeated)currentToken).makeStingy(); if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
tokenRep.makeStingy();
else else
throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
} }
......
...@@ -197,7 +197,12 @@ public final class RESyntax implements Serializable { ...@@ -197,7 +197,12 @@ public final class RESyntax implements Serializable {
*/ */
public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24; public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
private static final int BIT_TOTAL = 25; /**
* Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+).
*/
public static final int RE_POSSESSIVE_OPS = 25;
private static final int BIT_TOTAL = 26;
/** /**
* Predefined syntax. * Predefined syntax.
...@@ -425,6 +430,7 @@ public final class RESyntax implements Serializable { ...@@ -425,6 +430,7 @@ public final class RESyntax implements Serializable {
RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5) RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
// XXX // XXX
.set(RE_POSSESSIVE_OPS) // *+,?+,++,{}+
.makeFinal(); .makeFinal();
} }
......
...@@ -44,6 +44,7 @@ final class RETokenRepeated extends REToken { ...@@ -44,6 +44,7 @@ final class RETokenRepeated extends REToken {
private REToken token; private REToken token;
private int min,max; private int min,max;
private boolean stingy; private boolean stingy;
private boolean possessive;
RETokenRepeated(int subIndex, REToken token, int min, int max) { RETokenRepeated(int subIndex, REToken token, int min, int max) {
super(subIndex); super(subIndex);
...@@ -62,6 +63,16 @@ final class RETokenRepeated extends REToken { ...@@ -62,6 +63,16 @@ final class RETokenRepeated extends REToken {
return stingy; return stingy;
} }
/** Sets possessive matching mode to true. */
void makePossessive() {
possessive = true;
}
/** Queries if this token has possessive matching enabled. */
boolean isPossessive() {
return possessive;
}
/** /**
* The minimum length of a repeated token is the minimum length * The minimum length of a repeated token is the minimum length
* of the token multiplied by the minimum number of times it must * of the token multiplied by the minimum number of times it must
...@@ -172,6 +183,8 @@ final class RETokenRepeated extends REToken { ...@@ -172,6 +183,8 @@ final class RETokenRepeated extends REToken {
} }
} }
// else did not match rest of the tokens, try again on smaller sample // else did not match rest of the tokens, try again on smaller sample
// or break out when performing possessive matching
if (possessive) break;
} }
if (allResults != null) { if (allResults != null) {
mymatch.assignFrom(allResults); // does this get all? mymatch.assignFrom(allResults); // does this get all?
......
...@@ -84,8 +84,7 @@ public final class Pattern implements Serializable ...@@ -84,8 +84,7 @@ public final class Pattern implements Serializable
// if ((flags & UNICODE_CASE) != 0) gnuFlags = // if ((flags & UNICODE_CASE) != 0) gnuFlags =
// if ((flags & CANON_EQ) != 0) gnuFlags = // if ((flags & CANON_EQ) != 0) gnuFlags =
// Eventually there will be such a thing as JDK 1_4 syntax RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
RESyntax syntax = RESyntax.RE_SYNTAX_PERL5;
if ((flags & UNIX_LINES) != 0) if ((flags & UNIX_LINES) != 0)
{ {
// Use a syntax set with \n for linefeeds? // Use a syntax set with \n for linefeeds?
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment