Commit 57807c31 by Michael Koch Committed by Michael Koch

CollationElementIterator.java, [...]: New versions from GNU classpath.

2004-06-01  Michael Koch  <konqueror@gmx.de>

	* java/text/CollationElementIterator.java,
	java/text/CollationKey.java,
	java/text/RuleBasedCollator.java: New versions from GNU classpath.
	* testsuite/libjava.mauve/xfails: Removed all
	java.text.CollationElementIterator tests.

From-SVN: r82510
parent f7dbd56c
2004-06-01 Michael Koch <konqueror@gmx.de>
* java/text/CollationElementIterator.java,
java/text/CollationKey.java,
java/text/RuleBasedCollator.java: New versions from GNU classpath.
* testsuite/libjava.mauve/xfails: Removed all
java.text.CollationElementIterator tests.
2004-06-01 Michael Koch <konqueror@gmx.de>
* java/util/zip/InflaterInputStream.java: Merged more with Classpath
version.
* java/util/zip/ZipOutputStream.java (): Renamed enum to e to removed
......
......@@ -38,6 +38,8 @@ exception statement from your version. */
package java.text;
import java.util.Vector;
/* Written using "Java Class Libraries", 2nd edition, plus online
* API docs for JDK 1.2 from http://www.javasoft.com.
* Status: Believed complete and correct to JDK 1.1.
......@@ -74,13 +76,25 @@ public final class CollationElementIterator
String text;
/**
* This is the index into the collation decomposition where we are currently scanning.
*/
int index;
/**
* This is the index into the String where we are currently scanning.
*/
int textIndex;
// A piece of lookahead.
boolean lookahead_set;
int lookahead;
/**
* Array containing the collation decomposition of the
* text given to the constructor.
*/
private Object[] text_decomposition;
/**
* Array containing the index of the specified block.
*/
private int[] text_indexes;
/**
* This method initializes a new instance of <code>CollationElementIterator</code>
......@@ -97,6 +111,35 @@ public final class CollationElementIterator
setText (text);
}
RuleBasedCollator.CollationElement nextBlock()
{
if (index >= text_decomposition.length)
return null;
RuleBasedCollator.CollationElement e =
(RuleBasedCollator.CollationElement) text_decomposition[index];
textIndex = text_indexes[index+1];
index++;
return e;
}
RuleBasedCollator.CollationElement previousBlock()
{
if (index == 0)
return null;
index--;
RuleBasedCollator.CollationElement e =
(RuleBasedCollator.CollationElement) text_decomposition[index];
textIndex = text_indexes[index+1];
return e;
}
/**
* This method returns the collation ordering value of the next character sequence
* in the string (it may be an extended character following collation rules).
......@@ -107,10 +150,29 @@ public final class CollationElementIterator
*/
public int next()
{
if (textIndex == text.length())
RuleBasedCollator.CollationElement e = nextBlock();
if (e == null)
return NULLORDER;
return e.getValue();
}
/**
* This method returns the collation ordering value of the previous character
* in the string. This method will return <code>NULLORDER</code> if the
* beginning of the string was reached.
*
* @return The collation ordering value.
*/
public int previous()
{
RuleBasedCollator.CollationElement e = previousBlock();
return collator.ceiNext (this);
if (e == null)
return NULLORDER;
return e.getValue();
}
/**
......@@ -133,9 +195,8 @@ public final class CollationElementIterator
*/
public void reset()
{
index = 0;
textIndex = 0;
lookahead_set = false;
lookahead = 0;
}
/**
......@@ -176,10 +237,152 @@ public final class CollationElementIterator
*/
public void setText(String text)
{
int idx = 0;
int idx_idx = 0;
int alreadyExpanded = 0;
int idxToMove = 0;
this.text = text;
this.textIndex = 0;
this.lookahead_set = false;
this.lookahead = 0;
this.index = 0;
String work_text = text.intern();
Vector v = new Vector();
Vector vi = new Vector();
// Build element collection ordered as they come in "text".
while (idx < work_text.length())
{
String key, key_old;
Object object = null;
int p = 1;
// IMPROVE: use a TreeMap with a prefix-ordering rule.
key_old = key = null;
do
{
if (object != null)
key_old = key;
key = work_text.substring (idx, idx+p);
object = collator.prefix_tree.get (key);
if (object != null && idx < alreadyExpanded)
{
RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
if (prefix.expansion != null &&
prefix.expansion.startsWith(work_text.substring(0, idx)))
{
object = null;
key = key_old;
}
}
p++;
}
while (idx+p <= work_text.length());
if (object == null)
key = key_old;
RuleBasedCollator.CollationElement prefix =
(RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
/*
* First case: There is no such sequence in the database.
* We will have to build one from the context.
*/
if (prefix == null)
{
/*
* We are dealing with sequences in an expansion. They
* are treated as accented characters (tertiary order).
*/
if (alreadyExpanded > 0)
{
RuleBasedCollator.CollationElement e =
collator.getDefaultAccentedElement (work_text.charAt (idx));
v.add (e);
vi.add (new Integer(idx_idx));
idx++;
alreadyExpanded--;
if (alreadyExpanded == 0)
{
/* There is not any characters left in the expansion set.
* We can increase the pointer in the source string.
*/
idx_idx += idxToMove;
idxToMove = 0;
}
else
idx_idx++;
}
else
{
/* This is a normal character. */
RuleBasedCollator.CollationElement e =
collator.getDefaultElement (work_text.charAt (idx));
Integer i_ref = new Integer(idx_idx);
/* Don't forget to mark it as a special sequence so the
* string can be ordered.
*/
v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
vi.add (i_ref);
v.add (e);
vi.add (i_ref);
idx_idx++;
idx++;
}
continue;
}
/*
* Second case: Here we have found a matching sequence.
* Here we have an expansion string prepend it to the "work text" and
* add the corresponding sorting element. We must also mark
*/
if (prefix.expansion != null)
{
work_text = prefix.expansion
+ work_text.substring (idx+prefix.key.length());
idx = 0;
v.add (prefix);
vi.add (new Integer(idx_idx));
if (alreadyExpanded == 0)
idxToMove = prefix.key.length();
alreadyExpanded += prefix.expansion.length()-prefix.key.length();
}
else
{
/* Third case: the simplest. We have got the prefix and it
* has not to be expanded.
*/
v.add (prefix);
vi.add (new Integer(idx_idx));
idx += prefix.key.length();
/* If the sequence is in an expansion, we must decrease the
* counter.
*/
if (alreadyExpanded > 0)
{
alreadyExpanded -= prefix.key.length();
if (alreadyExpanded == 0)
{
idx_idx += idxToMove;
idxToMove = 0;
}
} else
idx_idx += prefix.key.length();
}
}
text_decomposition = v.toArray();
text_indexes = new int[vi.size()+1];
for (int i = 0; i < vi.size(); i++)
{
text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
}
text_indexes[vi.size()] = text.length();
}
/**
......@@ -215,4 +418,50 @@ public final class CollationElementIterator
{
return textIndex;
}
/**
* This method sets the iteration index position into the current
* <code>String</code> to the specified value. This value must not
* be negative and must not be greater than the last index position
* in the <code>String</code>.
*
* @param offset The new iteration index position.
*
* @exception IllegalArgumentException If the new offset is not valid.
*/
public void setOffset(int offset)
{
if (offset < 0)
throw new IllegalArgumentException("Negative offset: " + offset);
if (offset > (text.length() - 1))
throw new IllegalArgumentException("Offset too large: " + offset);
for (index = 0; index < text_decomposition.length; index++)
{
if (offset <= text_indexes[index])
break;
}
/*
* As text_indexes[0] == 0, we should not have to take care whether index is
* greater than 0. It is always.
*/
if (text_indexes[index] == offset)
textIndex = offset;
else
textIndex = text_indexes[index-1];
}
/**
* This method returns the maximum length of any expansion sequence that
* ends with the specified collation order value. (Whatever that means).
*
* @param value The collation order value
*
* @param The maximum length of an expansion sequence.
*/
public int getMaxExpansion(int value)
{
return 1;
}
}
......@@ -78,24 +78,13 @@ public final class CollationKey implements Comparable
/**
* This is the bit value for this key.
*/
private int[] key;
private byte[] key;
CollationKey(Collator collator, CollationElementIterator iter,
String originalText, int strength)
CollationKey (Collator collator, String originalText, byte[] key)
{
this.collator = collator;
this.originalText = originalText;
// Compute size of required array.
int size = 0;
while (RuleBasedCollator.next(iter, strength)
!= CollationElementIterator.NULLORDER)
++size;
iter.reset();
key = new int[size];
for (int i = 0; i < size; i++)
key[i] = RuleBasedCollator.next(iter, strength);
this.key = key;
}
/**
......@@ -205,15 +194,6 @@ public final class CollationKey implements Comparable
*/
public byte[] toByteArray()
{
byte[] r = new byte[4 * key.length];
int off = 0;
for (int i = 0; i < key.length; ++i)
{
r[off++] = (byte) ((key[i] >>> 24) & 255);
r[off++] = (byte) ((key[i] >>> 16) & 255);
r[off++] = (byte) ((key[i] >>> 8) & 255);
r[off++] = (byte) ((key[i] ) & 255);
}
return r;
return key;
}
}
......@@ -38,7 +38,7 @@ exception statement from your version. */
package java.text;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.HashMap;
import java.util.Vector;
/* Written using "Java Class Libraries", 2nd edition, plus online
......@@ -147,32 +147,104 @@ public class RuleBasedCollator extends Collator
* This class describes what rank has a character (or a sequence of characters)
* in the lexicographic order. Each element in a rule has a collation element.
*/
final class CollationElement
final static class CollationElement
{
String key;
char relation;
CollationElement(String key, char relation)
int primary;
short secondary;
short tertiary;
short equality;
boolean ignore;
String expansion;
CollationElement(String key, int primary, short secondary, short tertiary,
short equality, String expansion, boolean ignore)
{
this.key = key;
this.relation = relation;
this.primary = primary;
this.secondary = secondary;
this.tertiary = tertiary;
this.equality = equality;
this.ignore = ignore;
this.expansion = expansion;
}
final int getValue()
{
return (primary << 16) + (secondary << 8) + tertiary;
}
}
// True if we are using French-style accent ordering.
private boolean frenchAccents;
/**
* Basic collation instruction (internal format) to build the series of
* collation elements. It contains an instruction which specifies the new
* state of the generator. The sequence of instruction should not contain
* RESET (it is used by
* {@link #mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
* as a temporary state while merging two sets of instructions.
*/
final static class CollationSorter
{
static final int GREATERP = 0;
static final int GREATERS = 1;
static final int GREATERT = 2;
static final int EQUAL = 3;
static final int RESET = 4;
static final int INVERSE_SECONDARY = 5;
int comparisonType;
String textElement;
int hashText;
int offset;
boolean ignore;
String expansionOrdering;
}
/**
* This the the original rule string.
*/
private String rules;
// This maps strings onto collation values.
private Hashtable map;
// An entry in this hash means that more lookahead is required for
// the prefix string.
private Hashtable prefixes;
/**
* This is the table of collation element values
*/
private Object[] ce_table;
/**
* Quick-prefix finder.
*/
HashMap prefix_tree;
/**
* This is the value of the last sequence entered into
* <code>ce_table</code>. It is used to compute the
* ordering value of unspecified character.
*/
private int last_primary_value;
/**
* This is the value of the last secondary sequence of the
* primary 0, entered into
* <code>ce_table</code>. It is used to compute the
* ordering value of an unspecified accented character.
*/
private int last_tertiary_value;
/**
* This variable is true if accents need to be sorted
* in the other direction.
*/
private boolean inverseAccentComparison;
/**
* This collation element is special to unknown sequence.
* The JDK uses it to mark and sort the characters which has
* no collation rules.
*/
static final CollationElement SPECIAL_UNKNOWN_SEQ =
new CollationElement("", (short) 32767, (short) 0, (short) 0,
(short) 0, null, false);
/**
* This method initializes a new instance of <code>RuleBasedCollator</code>
......@@ -192,128 +264,313 @@ public class RuleBasedCollator extends Collator
this.rules = rules;
// We keep each rule in order in a vector. At the end we traverse
// the vector and compute collation values from it.
int insertion_index = 0;
Vector vec = new Vector ();
buildCollationVector(parseString(rules));
buildPrefixAccess();
}
/**
* This method returns the number of common characters at the beginning
* of the string of the two parameters.
*
* @param prefix A string considered as a prefix to test against
* the other string.
* @param s A string to test the prefix against.
* @return The number of common characters.
*/
static int findPrefixLength(String prefix, String s)
{
int index;
StringBuffer argument = new StringBuffer();
int len = rules.length();
int len = prefix.length();
for (index = 0; index < len; ++index)
for (index = 0; index < len && index < s.length(); ++index)
{
char c = rules.charAt(index);
if (prefix.charAt(index) != s.charAt(index))
return index;
}
// Just skip whitespace.
if (Character.isWhitespace(c))
continue;
// Modifier.
if (c == '@')
return index;
}
/**
* Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
* checks whether it is possible to find an anchor point for the rules to be merged and
* then insert them at that precise point.
*
* @param offset Offset in the string containing rules of the beginning of the rules
* being merged in.
* @param starter Text of the rules being merged.
* @param main Repository of all already parsed rules.
* @param patch Rules to be merged into the repository.
* @throws ParseException if it is impossible to find an anchor point for the new rules.
*/
private void mergeRules(int offset, String starter, Vector main, Vector patch)
throws ParseException
{
Enumeration elements = main.elements();
int insertion_point = -1;
int max_length = 0;
/* We must check that no rules conflict with another already present. If it
* is the case delete the old rule.
*/
/* For the moment good old O(N^2) algorithm.
*/
for (int i = 0; i < patch.size(); i++)
{
int j = 0;
while (j < main.size())
{
frenchAccents = true;
continue;
CollationSorter rule1 = (CollationSorter) patch.elementAt(i);
CollationSorter rule2 = (CollationSorter) main.elementAt(j);
if (rule1.textElement.equals(rule2.textElement))
main.removeElementAt(j);
else
j++;
}
}
// Check for relation or reset operator.
if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&'))
throw new ParseException ("invalid character", index);
++index;
while (index < len)
// Find the insertion point... O(N)
for (int i = 0; i < main.size(); i++)
{
CollationSorter sorter = (CollationSorter) main.elementAt(i);
int length = findPrefixLength(starter, sorter.textElement);
if (length > max_length)
{
if (! Character.isWhitespace(rules.charAt(index)))
break;
++index;
max_length = length;
insertion_point = i+1;
}
if (index == len)
throw new ParseException ("missing argument", index);
int save = index;
index = text_argument (rules, index, argument);
if (argument.length() == 0)
throw new ParseException ("invalid character", save);
String arg = argument.toString();
int item_index = -1;
for (int j = 0; j < vec.size(); ++j)
{
CollationElement e = (CollationElement) vec.elementAt (j);
}
if (arg.equals (e.key))
{
item_index = j;
break;
}
}
if (insertion_point < 0)
throw new ParseException("no insertion point found for " + starter, offset);
if (max_length < starter.length())
{
/*
* We need to expand the first entry. It must be sorted
* like if it was the reference key itself (like the spec
* said. So the first entry is special: the element is
* replaced by the specified text element for the sorting.
* This text replace the old one for comparisons. However
* to preserve the behaviour we replace the first key (corresponding
* to the found prefix) by a new code rightly ordered in the
* sequence. The rest of the subsequence must be appended
* to the end of the sequence.
*/
CollationSorter sorter = (CollationSorter) patch.elementAt(0);
CollationSorter expansionPrefix =
(CollationSorter) main.elementAt(insertion_point-1);
if (c != '&')
sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
main.insertElementAt(sorter, insertion_point);
/*
* This is a new set of rules. Append to the list.
*/
patch.removeElementAt(0);
insertion_point++;
}
// Now insert all elements of patch at the insertion point.
for (int i = 0; i < patch.size(); i++)
main.insertElementAt(patch.elementAt(i), i+insertion_point);
}
/**
* This method parses a string and build a set of sorting instructions. The parsing
* may only be partial on the case the rules are to be merged sometime later.
*
* @param stop_on_reset If this parameter is true then the parser stops when it
* encounters a reset instruction. In the other case, it tries to parse the subrules
* and merged it in the same repository.
* @param v Output vector for the set of instructions.
* @param base_offset Offset in the string to begin parsing.
* @param rules Rules to be parsed.
* @return -1 if the parser reached the end of the string, an integer representing the
* offset in the string at which it stopped parsing.
* @throws ParseException if something turned wrong during the parsing. To get details
* decode the message.
*/
private int subParseString(boolean stop_on_reset, Vector v,
int base_offset, String rules)
throws ParseException
{
boolean ignoreChars = (base_offset == 0);
int operator = -1;
StringBuffer sb = new StringBuffer();
boolean doubleQuote = false;
boolean eatingChars = false;
boolean nextIsModifier = false;
boolean isModifier = false;
int i;
main_parse_loop:
for (i = 0; i < rules.length(); i++)
{
char c = rules.charAt(i);
int type = -1;
if (!eatingChars &&
((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
continue;
isModifier = nextIsModifier;
nextIsModifier = false;
if (eatingChars && c != '\'')
{
// If the argument already appears in the vector, then we
// must remove it in order to re-order.
if (item_index != -1)
{
vec.removeElementAt(item_index);
if (insertion_index >= item_index)
--insertion_index;
}
CollationElement r = new CollationElement (arg, c);
vec.insertElementAt(r, insertion_index);
++insertion_index;
doubleQuote = false;
sb.append(c);
continue;
}
else
if (doubleQuote && eatingChars)
{
// Reset.
if (item_index == -1)
throw
new ParseException ("argument to reset not previously seen",
save);
insertion_index = item_index + 1;
sb.append(c);
doubleQuote = false;
continue;
}
// Ugly: in this case the resulting INDEX comes from
// text_argument, which returns the index of the next
// character we should examine.
--index;
}
switch (c) {
case '!':
throw new ParseException
("Modifier '!' is not yet supported by Classpath", i+base_offset);
case '<':
type = CollationSorter.GREATERP;
break;
case ';':
type = CollationSorter.GREATERS;
break;
case ',':
type = CollationSorter.GREATERT;
break;
case '=':
type = CollationSorter.EQUAL;
break;
case '\'':
eatingChars = !eatingChars;
doubleQuote = true;
break;
case '@':
if (ignoreChars)
throw new ParseException
("comparison list has not yet been started. You may only use"
+ "(<,;=&)", i+base_offset);
// Inverse the order of secondaries from now on.
nextIsModifier = true;
type = CollationSorter.INVERSE_SECONDARY;
break;
case '&':
type = CollationSorter.RESET;
if (stop_on_reset)
break main_parse_loop;
break;
default:
if (operator < 0)
throw new ParseException
("operator missing at " + (i+base_offset), i+base_offset);
if (!eatingChars &&
((c >= 0x21 && c <= 0x2F)
|| (c >= 0x3A && c <= 0x40)
|| (c >= 0x5B && c <= 0x60)
|| (c >= 0x7B && c <= 0x7E)))
throw new ParseException
("unquoted punctuation character '"+c+"'", i+base_offset);
//type = ignoreChars ? CollationSorter.IGNORE : -1;
sb.append(c);
break;
}
// Now construct a hash table that maps strings onto their
// collation values.
int primary = 0;
int secondary = 0;
int tertiary = 0;
this.map = new Hashtable ();
this.prefixes = new Hashtable ();
Enumeration e = vec.elements();
while (e.hasMoreElements())
{
CollationElement r = (CollationElement) e.nextElement();
switch (r.relation)
if (type < 0)
continue;
if (operator < 0)
{
case '<':
++primary;
secondary = 0;
tertiary = 0;
break;
case ';':
++secondary;
tertiary = 0;
break;
case ',':
++tertiary;
break;
case '=':
break;
operator = type;
continue;
}
// This must match CollationElementIterator.
map.put(r.key, new Integer (primary << 16
| secondary << 8 | tertiary));
// Make a map of all lookaheads we might need.
for (int i = r.key.length() - 1; i >= 1; --i)
prefixes.put(r.key.substring(0, i), Boolean.TRUE);
if (sb.length() == 0 && !isModifier)
throw new ParseException
("text element empty at " + (i+base_offset), i+base_offset);
if (operator == CollationSorter.RESET)
{
/* Reposition in the sorting list at the position
* indicated by the text element.
*/
String subrules = rules.substring(i);
Vector sorted_rules = new Vector();
int idx;
// Parse the subrules but do not iterate through all
// sublist. This is the priviledge of the first call.
idx = subParseString(true, sorted_rules, base_offset+i, subrules);
// Merge new parsed rules into the list.
mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
sb.setLength(0);
// Reset state to none.
operator = -1;
type = -1;
// We have found a new subrule at 'idx' but it has not been parsed.
if (idx >= 0)
{
i += idx-1;
continue main_parse_loop;
}
else
// No more rules.
break main_parse_loop;
}
CollationSorter sorter = new CollationSorter();
if (operator == CollationSorter.GREATERP)
ignoreChars = false;
sorter.comparisonType = operator;
sorter.textElement = sb.toString();
sorter.hashText = sorter.textElement.hashCode();
sorter.offset = base_offset+rules.length();
sorter.ignore = ignoreChars;
sb.setLength(0);
v.add(sorter);
operator = type;
}
if (operator >= 0)
{
CollationSorter sorter = new CollationSorter();
int pos = rules.length() + base_offset;
if ((sb.length() != 0 && nextIsModifier)
|| (sb.length() == 0 && !nextIsModifier && !eatingChars))
throw new ParseException("text element empty at " + pos, pos);
if (operator == CollationSorter.GREATERP)
ignoreChars = false;
sorter.comparisonType = operator;
sorter.textElement = sb.toString();
sorter.hashText = sorter.textElement.hashCode();
sorter.offset = base_offset+pos;
sorter.ignore = ignoreChars;
v.add(sorter);
}
if (i == rules.length())
return -1;
else
return i;
}
/**
......@@ -323,90 +580,130 @@ public class RuleBasedCollator extends Collator
*/
public Object clone()
{
RuleBasedCollator c = (RuleBasedCollator) super.clone ();
c.map = (Hashtable) map.clone ();
c.prefixes = (Hashtable) map.clone ();
return c;
return super.clone();
}
// A helper for CollationElementIterator.next().
int ceiNext (CollationElementIterator cei)
/**
* This method completely parses a string 'rules' containing sorting rules.
*
* @param rules String containing the rules to be parsed.
* @return A set of sorting instructions stored in a Vector.
* @throws ParseException if something turned wrong during the parsing. To get details
* decode the message.
*/
private Vector parseString(String rules)
throws ParseException
{
if (cei.lookahead_set)
{
cei.lookahead_set = false;
return cei.lookahead;
}
Vector v = new Vector();
int save = cei.textIndex;
int max = cei.text.length();
String s = null;
// It is possible to have a case where `abc' has a mapping, but
// neither `ab' nor `abd' do. In this case we must treat `abd' as
// nothing special.
boolean found = false;
// result of the first subParseString is not absolute (may be -1 or a
// positive integer). But we do not care.
subParseString(false, v, 0, rules);
return v;
}
int i;
for (i = save + 1; i <= max; ++i)
/**
* This method uses the sorting instructions built by {@link #parseString}
* to build collation elements which can be directly used to sort strings.
*
* @param parsedElements Parsed instructions stored in a Vector.
* @throws ParseException if the order of the instructions are not valid.
*/
private void buildCollationVector(Vector parsedElements)
throws ParseException
{
int primary_seq = 0;
int last_tertiary_seq = 0;
short secondary_seq = 0;
short tertiary_seq = 0;
short equality_seq = 0;
boolean inverseComparisons = false;
final boolean DECREASING = false;
final boolean INCREASING = true;
boolean secondaryType = INCREASING;
Vector v = new Vector();
// elts is completely sorted.
element_loop:
for (int i = 0; i < parsedElements.size(); i++)
{
s = cei.text.substring(save, i);
if (prefixes.get(s) == null)
break;
found = true;
}
// Assume s != null.
CollationSorter elt = (CollationSorter) parsedElements.elementAt(i);
boolean ignoreChar = false;
Object obj = map.get(s);
// The special case.
while (found && obj == null && s.length() > 1)
{
--i;
s = cei.text.substring(save, i);
obj = map.get(s);
switch (elt.comparisonType)
{
case CollationSorter.GREATERP:
primary_seq++;
if (inverseComparisons)
{
secondary_seq = Short.MAX_VALUE;
secondaryType = DECREASING;
}
else
{
secondary_seq = 0;
secondaryType = INCREASING;
}
tertiary_seq = 0;
equality_seq = 0;
inverseComparisons = false;
break;
case CollationSorter.GREATERS:
if (secondaryType == DECREASING)
secondary_seq--;
else
secondary_seq++;
tertiary_seq = 0;
equality_seq = 0;
break;
case CollationSorter.INVERSE_SECONDARY:
inverseComparisons = true;
continue element_loop;
case CollationSorter.GREATERT:
tertiary_seq++;
if (primary_seq == 0)
last_tertiary_seq = tertiary_seq;
equality_seq = 0;
break;
case CollationSorter.EQUAL:
equality_seq++;
break;
case CollationSorter.RESET:
throw new ParseException
("Invalid reached state 'RESET'. Internal error", elt.offset);
default:
throw new ParseException
("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
}
v.add(new CollationElement(elt.textElement, primary_seq,
secondary_seq, tertiary_seq,
equality_seq, elt.expansionOrdering, elt.ignore));
}
// Update state.
cei.textIndex = i;
this.inverseAccentComparison = inverseComparisons;
if (obj == null)
{
// This idea, and the values, come from JDK.
// assert (s.length() == 1)
cei.lookahead_set = true;
cei.lookahead = s.charAt(0) << 8;
return 0x7fff << 16;
}
ce_table = v.toArray();
return ((Integer) obj).intValue();
last_primary_value = primary_seq+1;
last_tertiary_value = last_tertiary_seq+1;
}
// A helper for compareTo() that returns the next character that has
// a nonzero ordering at the indicated strength. This is also used
// in CollationKey.
static final int next (CollationElementIterator iter, int strength)
/**
* Build a tree where all keys are the texts of collation elements and data is
* the collation element itself. The tree is used when extracting all prefix
* for a given text.
*/
private void buildPrefixAccess()
{
while (true)
prefix_tree = new HashMap();
for (int i = 0; i < ce_table.length; i++)
{
int os = iter.next();
if (os == CollationElementIterator.NULLORDER)
return os;
int c = 0;
switch (strength)
{
case PRIMARY:
c = os & ~0xffff;
break;
case SECONDARY:
c = os & ~0x00ff;
break;
case TERTIARY:
case IDENTICAL:
c = os;
break;
}
if (c != 0)
return c;
CollationElement e = (CollationElement) ce_table[i];
prefix_tree.put(e.key, e);
}
}
......@@ -425,34 +722,115 @@ public class RuleBasedCollator extends Collator
public int compare(String source, String target)
{
CollationElementIterator cs, ct;
CollationElement ord1block = null;
CollationElement ord2block = null;
boolean advance_block_1 = true;
boolean advance_block_2 = true;
cs = new CollationElementIterator(this, source);
ct = new CollationElementIterator(this, target);
cs = getCollationElementIterator(source);
ct = getCollationElementIterator(target);
for(;;)
{
int os = next (cs, strength);
int ot = next (ct, strength);
int ord1;
int ord2;
/*
* We have to check whether the characters are ignorable.
* If it is the case then forget them.
*/
if (advance_block_1)
{
ord1block = cs.nextBlock();
if (ord1block != null && ord1block.ignore)
continue;
}
if (advance_block_2)
{
ord2block = ct.nextBlock();
if (ord2block != null && ord2block.ignore)
{
advance_block_1 = false;
continue;
}
}
else
advance_block_2 = true;
if (os == CollationElementIterator.NULLORDER
&& ot == CollationElementIterator.NULLORDER)
break;
else if (os == CollationElementIterator.NULLORDER)
if (!advance_block_1)
advance_block_1 = true;
if (ord1block != null)
ord1 = ord1block.getValue();
else
{
// Source string is shorter, so return "less than".
if (ord2block == null)
return 0;
return -1;
}
else if (ot == CollationElementIterator.NULLORDER)
if (ord2block == null)
return 1;
ord2 = ord2block.getValue();
// We know chars are totally equal, so skip
if (ord1 == ord2)
{
// Target string is shorter, so return "greater than".
return 1;
if (getStrength() == IDENTICAL)
if (!ord1block.key.equals(ord2block.key))
return ord1block.key.compareTo(ord2block.key);
continue;
}
if (os != ot)
return os - ot;
}
// Check for primary strength differences
int prim1 = CollationElementIterator.primaryOrder(ord1);
int prim2 = CollationElementIterator.primaryOrder(ord2);
if (prim1 == 0 && getStrength() < TERTIARY)
{
advance_block_2 = false;
continue;
}
else if (prim2 == 0 && getStrength() < TERTIARY)
{
advance_block_1 = false;
continue;
}
if (prim1 < prim2)
return -1;
else if (prim1 > prim2)
return 1;
else if (getStrength() == PRIMARY)
continue;
// Check for secondary strength differences
int sec1 = CollationElementIterator.secondaryOrder(ord1);
int sec2 = CollationElementIterator.secondaryOrder(ord2);
if (sec1 < sec2)
return -1;
else if (sec1 > sec2)
return 1;
else if (getStrength() == SECONDARY)
continue;
// Check for tertiary differences
int tert1 = CollationElementIterator.tertiaryOrder(ord1);
int tert2 = CollationElementIterator.tertiaryOrder(ord2);
if (tert1 < tert2)
return -1;
else if (tert1 > tert2)
return 1;
else if (getStrength() == TERTIARY)
continue;
return 0;
// Apparently JDK does this (at least for my test case).
return ord1block.key.compareTo(ord2block.key);
}
}
/**
......@@ -467,13 +845,54 @@ public class RuleBasedCollator extends Collator
*/
public boolean equals(Object obj)
{
if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj))
if (obj == this)
return true;
else
return false;
RuleBasedCollator rbc = (RuleBasedCollator) obj;
// FIXME: this is probably wrong. Instead we should compare maps
// directly.
return (frenchAccents == rbc.frenchAccents
&& rules.equals(rbc.rules));
}
/**
* This method builds a default collation element without invoking
* the database created from the rules passed to the constructor.
*
* @param c Character which needs a collation element.
* @return A valid brand new CollationElement instance.
*/
CollationElement getDefaultElement(char c)
{
int v;
// Preliminary support for generic accent sorting inversion (I don't know if all
// characters in the range should be sorted backward). This is the place
// to fix this if needed.
if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
v = 0x0361 - ((int) c - 0x02B9);
else
v = (short) c;
return new CollationElement("" + c, last_primary_value + v,
(short) 0, (short) 0, (short) 0, null, false);
}
/**
* This method builds a default collation element for an accented character
* without invoking the database created from the rules passed to the constructor.
*
* @param c Character which needs a collation element.
* @return A valid brand new CollationElement instance.
*/
CollationElement getDefaultAccentedElement(char c)
{
int v;
// Preliminary support for generic accent sorting inversion (I don't know if all
// characters in the range should be sorted backward). This is the place
// to fix this if needed.
if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
v = 0x0361 - ((int) c - 0x02B9);
else
v = (short) c;
return new CollationElement("" + c, (short) 0,
(short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
}
/**
......@@ -489,13 +908,7 @@ public class RuleBasedCollator extends Collator
*/
public CollationElementIterator getCollationElementIterator(String source)
{
int len = source.length();
StringBuffer expand = new StringBuffer(len);
for (int index = 0; index < len; ++index)
decomposeCharacter(source.charAt(index), expand);
return new CollationElementIterator(this, expand.toString());
return new CollationElementIterator(this, source);
}
/**
......@@ -517,7 +930,7 @@ public class RuleBasedCollator extends Collator
c = source.next())
decomposeCharacter(c, expand);
return new CollationElementIterator(this, expand.toString());
return getCollationElementIterator(expand.toString());
}
/**
......@@ -533,8 +946,52 @@ public class RuleBasedCollator extends Collator
*/
public CollationKey getCollationKey(String source)
{
return new CollationKey(this, getCollationElementIterator(source), source,
strength);
CollationElementIterator cei = getCollationElementIterator(source);
Vector vect = new Vector(25);
int ord = cei.next();
cei.reset(); //set to start of string
while (ord != CollationElementIterator.NULLORDER)
{
// If the primary order is null, it means this is an ignorable
// character.
if (CollationElementIterator.primaryOrder(ord) == 0)
{
ord = cei.next();
continue;
}
switch (getStrength())
{
case PRIMARY:
ord = CollationElementIterator.primaryOrder(ord);
break;
case SECONDARY:
ord = CollationElementIterator.primaryOrder(ord) << 8;
ord |= CollationElementIterator.secondaryOrder(ord);
default:
break;
}
vect.add(new Integer(ord));
ord = cei.next(); //increment to next key
}
Object[] objarr = vect.toArray();
byte[] key = new byte[objarr.length * 4];
for (int i = 0; i < objarr.length; i++)
{
int j = ((Integer) objarr[i]).intValue();
key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
key [i * 4 + 3] = (byte) (j & 0x000000FF);
}
return new CollationKey(this, source, key);
}
/**
......@@ -555,44 +1012,6 @@ public class RuleBasedCollator extends Collator
*/
public int hashCode()
{
return (frenchAccents ? 1231 : 1237
^ rules.hashCode()
^ map.hashCode()
^ prefixes.hashCode());
return System.identityHashCode(this);
}
private final boolean is_special (char c)
{
// Rules from JCL book.
return ((c >= 0x0021 && c <= 0x002f)
|| (c >= 0x003a && c <= 0x0040)
|| (c >= 0x005b && c <= 0x0060)
|| (c >= 0x007b && c <= 0x007e));
}
private final int text_argument (String rules, int index,
StringBuffer result)
{
result.setLength(0);
int len = rules.length();
while (index < len)
{
char c = rules.charAt (index);
if (c == '\''
&& index + 2 < len
&& rules.charAt (index + 2) == '\'')
{
result.append (rules.charAt (index + 1));
index += 2;
}
else if (is_special (c))
return index;
else if (!Character.isWhitespace (c))
result.append (c);
++index;
}
return index;
}
}
......@@ -35,106 +35,6 @@ FAIL: gnu.testlet.java.lang.String.getBytes14: String.getBytes("UTF-16LE") (numb
FAIL: gnu.testlet.java.text.DateFormatSymbols.Test: patterns (number 2)
FAIL: gnu.testlet.java.text.SimpleDateFormat.getAndSet2DigitYearStart: get2DigitYearStart() initial (number 1)
FAIL: gnu.testlet.java.text.DateFormatSymbols.Test: invalid locale (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: CollationElementIterator (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 0 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 1 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 10 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 11 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 12 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 13 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 2 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 3 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 5 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 6 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 7 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 8 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 9 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 0 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 1 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 10 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 11 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 12 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 13 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 2 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 3 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 5 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 6 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 7 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: primaryOrder() 8 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: secondaryOrder() 12 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: secondaryOrder() 4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: secondaryOrder() 5 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: secondaryOrder() 7 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: secondaryOrder() 9 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: tertiaryOrder() 10 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 2 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 2 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 4 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 4 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 6 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 6 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 8 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 8 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 10 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 10 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 12 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 12 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 14 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 14 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 16 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 16 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 18 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 18 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 20 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 20 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 22 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 22 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 24 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 24 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 26 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 26 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 28 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 28 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 30 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 30 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 32 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 32 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 34 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 34 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 36 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 36 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 38 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 38 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 40 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 40 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 42 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 42 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 44 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 44 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 46 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 46 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 48 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 48 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 50 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 50 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 52 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 52 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 54 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 54 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 56 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 56 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 58 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 58 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 60 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 60 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 62 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no primary difference 62 test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: wrong number of keys test string #4 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: next() 2 test string #5 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: no tertiary difference2 test string #5 (number 1)
FAIL: gnu.testlet.java.text.CollationElementIterator.jdk11: not tertiary ordered0, 2 test set #0 (number 1)
FAIL: gnu.testlet.java.net.URLConnection.URLConnectionTest: Error in test_Basics - 2 should not have raised Throwable here (number 1)
FAIL: gnu.testlet.java.net.URLConnection.URLConnectionTest: Error in test_getHeaderField - 2 4 header field wrong (number 1)
FAIL: gnu.testlet.java.net.URL.URLTest: openStream (number 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment