unicode-decomp.pl: Move from chartables.pl...

2002-03-04 Eric Blake <ebb9@email.byu.edu> * scripts/unicode-decomp.pl: Move from chartables.pl, and remove the code for generating include/java-chartables.h. * scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and merge with Classpath. * scripts/unicode-muncher.pl: Copy from Classpath. * scritps/MakeCharTables.java: New file. * gnu/gcj/convert/Blocks-3.txt: New file. * gnu/gcj/convert/UnicodeData-3.0.0.txt: New file. * gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file. * gnu/java/lang/CharData.java: Copy from Classpath. * Makefile.am (ordinary_java_source_files): Add gnu/java/lang/CharData.java. * configure.in: Remove --enable-fast-character option. * java/lang/Character.java: Merge algorithms and Javadoc with Classpath. * java/lang/natCharacter.cc: Implement Unicode lookup table more efficiently. * include/java-chardecomp.h: Regenerate. * include/java-chartables.h: Regenerate. From-SVN: r50368

unicode-decomp.pl: Move from chartables.pl...
2002-03-04 Eric Blake <ebb9@email.byu.edu> * scripts/unicode-decomp.pl: Move from chartables.pl, and remove the code for generating include/java-chartables.h. * scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and merge with Classpath. * scripts/unicode-muncher.pl: Copy from Classpath. * scritps/MakeCharTables.java: New file. * gnu/gcj/convert/Blocks-3.txt: New file. * gnu/gcj/convert/UnicodeData-3.0.0.txt: New file. * gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file. * gnu/java/lang/CharData.java: Copy from Classpath. * Makefile.am (ordinary_java_source_files): Add gnu/java/lang/CharData.java. * configure.in: Remove --enable-fast-character option. * java/lang/Character.java: Merge algorithms and Javadoc with Classpath. * java/lang/natCharacter.cc: Implement Unicode lookup table more efficiently. * include/java-chardecomp.h: Regenerate. * include/java-chartables.h: Regenerate. From-SVN: r50368
1fa78272 · Eric Blake · Eric Blake · b87e4a4c · 1fa78272 · 1fa78272
Commit 1fa78272 authored Mar 06, 2002 by Eric Blake Committed by Eric Blake Mar 06, 2002
17 changed files
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
+2002-03-04  Eric Blake  <ebb9@email.byu.edu>
+	* scripts/unicode-decomp.pl: Move from chartables.pl, and remove
+	the code for generating include/java-chartables.h.
+	* scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and
+	merge with Classpath.
+	* scripts/unicode-muncher.pl: Copy from Classpath.
+	* scritps/MakeCharTables.java: New file.
+	* gnu/gcj/convert/Blocks-3.txt: New file.
+	* gnu/gcj/convert/UnicodeData-3.0.0.txt: New file.
+	* gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file.
+	* gnu/java/lang/CharData.java: Copy from Classpath.
+	* Makefile.am (ordinary_java_source_files): Add
+	gnu/java/lang/CharData.java.
+	* configure.in: Remove --enable-fast-character option.
+	* java/lang/Character.java: Merge algorithms and Javadoc with
+	Classpath.
+	* java/lang/natCharacter.cc: Implement Unicode lookup table more
+	efficiently.
+	* include/java-chardecomp.h: Regenerate.
+	* include/java-chartables.h: Regenerate.
 2002-03-06  Bryce McKinlay  <bryce@waitaki.otago.ac.nz>
 	* java/awt/MediaTracker.java: Implemented.

--- a/libjava/Makefile.am
+++ b/libjava/Makefile.am
@@ -1288,6 +1288,7 @@ gnu/java/io/NullOutputStream.java \
 gnu/java/io/ObjectIdentityWrapper.java \
 gnu/java/lang/ArrayHelper.java \
 gnu/java/lang/ClassHelper.java \
+gnu/java/lang/CharData.java \
 gnu/java/lang/reflect/TypeSignature.java \
 gnu/java/locale/Calendar.java \
 gnu/java/locale/Calendar_de.java \

--- a/libjava/chartables.pl
+++ b/libjava/chartables.pl
-# chartables.pl - A perl program to generate tables for use by the
-# Character class.
-# Copyright (C) 1998, 1999  Red Hat, Inc.
-#
-# This file is part of libjava.
-# 
-# This software is copyrighted work licensed under the terms of the
-# Libjava License.  Please consult the file "LIBJAVA_LICENSE" for
-# details.
-# This program requires a `unidata.txt' file of the form distributed
-# on the Unicode 2.0 CD ROM.  Or, get it more conveniently here:
-# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
-# Version `2.1.8' of this file was last used to update the Character class.
-# Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
-# "The Java Language Specification", ISBN 0-201-63451-1
-# plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
-# Usage: perl chartables.pl [-n] UnicodeData-VERSION.txt
-# If this exits with nonzero status, then you must investigate the
-# cause of the problem.
-# Diagnostics and other information to stderr.
-# This creates the new include/java-chartables.h and
-# include/java-chardecomp.h files directly.
-# With -n, the files are not created, but all processing
-# still occurs.
-# Fields in the table.
-$CODE = 0;
-$NAME = 1;
-$CATEGORY = 2;
-$DECOMPOSITION = 5;
-$DECIMAL = 6;
-$DIGIT = 7;
-$NUMERIC = 8;
-$UPPERCASE = 12;
-$LOWERCASE = 13;
-$TITLECASE = 14;
-# A special case.
-$TAMIL_DIGIT_ONE  = 0x0be7;
-$TAMIL_DIGIT_NINE = 0x0bef;
-# These are endpoints of legitimate gaps in the tables.
-$CJK_IDEOGRAPH_END = 0x9fa5;
-$HANGUL_END = 0xd7a3;
-$HIGH_SURROGATE_END = 0xdb7f;
-$PRIVATE_HIGH_SURROGATE_END = 0xdbff;
-$LOW_SURROGATE_END = 0xdfff;
-$PRIVATE_END = 0xf8ff;
-%title_to_upper = ();
-%title_to_lower = ();
-%numerics  = ();
-%name = ();
-@digit_start = ();
-@digit_end   = ();
-@space_start = ();
-@space_end   = ();
-# @letter_start = ();
-# @letter_end   = ();
-@all_start = ();
-@all_end   = ();
-@all_cats  = ();
-@upper_start = ();
-@upper_end   = ();
-@upper_map   = ();
-%upper_anom  = ();
-@lower_start = ();
-@lower_end   = ();
-@lower_map   = ();
-%lower_anom  = ();
-@attributes = ();
-# There are a few characters which actually need two attributes.
-# These are special-cased.
-$ROMAN_START = 0x2160;
-$ROMAN_END   = 0x217f;
-%second_attributes = ();
-$prevcode = -1;
-$status = 0;
-%category_map =
-(
- 'Mn' => 'NON_SPACING_MARK',
- 'Mc' => 'COMBINING_SPACING_MARK',
- 'Me' => 'ENCLOSING_MARK',
- 'Nd' => 'DECIMAL_DIGIT_NUMBER',
- 'Nl' => 'LETTER_NUMBER',
- 'No' => 'OTHER_NUMBER',
- 'Zs' => 'SPACE_SEPARATOR',
- 'Zl' => 'LINE_SEPARATOR',
- 'Zp' => 'PARAGRAPH_SEPARATOR',
- 'Cc' => 'CONTROL',
- 'Cf' => 'FORMAT',
- 'Cs' => 'SURROGATE',
- 'Co' => 'PRIVATE_USE',
- 'Cn' => 'UNASSIGNED',
- 'Lu' => 'UPPERCASE_LETTER',
- 'Ll' => 'LOWERCASE_LETTER',
- 'Lt' => 'TITLECASE_LETTER',
- 'Lm' => 'MODIFIER_LETTER',
- 'Lo' => 'OTHER_LETTER',
- 'Pc' => 'CONNECTOR_PUNCTUATION',
- 'Pd' => 'DASH_PUNCTUATION',
- 'Ps' => 'START_PUNCTUATION',
- 'Pe' => 'END_PUNCTUATION',
- 'Pi' => 'START_PUNCTUATION',
- 'Pf' => 'END_PUNCTUATION',
- 'Po' => 'OTHER_PUNCTUATION',
- 'Sm' => 'MATH_SYMBOL',
- 'Sc' => 'CURRENCY_SYMBOL',
- 'Sk' => 'MODIFIER_SYMBOL',
- 'So' => 'OTHER_SYMBOL'
- );
-# These maps characters to their decompositions.
-%canonical_decomposition = ();
-%full_decomposition = ();
-# Handle `-n' and open output files.
-local ($f1, $f2) = ('include/java-chartables.h',
-		    'include/java-chardecomp.h');
-if ($ARGV[0] eq '-n')
-{
-    shift @ARGV;
-    $f1 = '/dev/null';
-    $f2 = '/dev/null';
-}
-open (CHARTABLE, "> $f1");
-open (DECOMP, "> $f2");
-# Process the Unicode file.
-while (<>)
-{
-    chop;
-    # Specify a limit for split so that we pick up trailing fields.
-    # We make the limit larger than we need, to catch the case where
-    # there are extra fields.
-    @fields = split (';', $_, 30);
-    # Convert code to number.
-    $ncode = hex ($fields[$CODE]);
-    if ($#fields != 14)
-    {
-	print STDERR ("Entry for \\u", $fields[$CODE],
-		      " has wrong number of fields: ", $#fields, "\n");
-    }
-    $name{$fields[$CODE]} = $fields[$NAME];
-    # If we've found a gap in the table, fill it in.
-    if ($ncode != $prevcode + 1)
-    {
-	&process_gap (*fields, $prevcode, $ncode);
-    }
-    &process_char (*fields, $ncode);
-    $prevcode = $ncode;
-}
-if ($prevcode != 0xffff)
-{
-    # Setting of `fields' parameter doesn't matter here.
-    &process_gap (*fields, $prevcode, 0x10000);
-}
-print CHARTABLE "// java-chartables.h - Character tables for java.lang.Character -*- c++ -*-\n\n";
-print CHARTABLE "#ifndef __JAVA_CHARTABLES_H__\n";
-print CHARTABLE "#define __JAVA_CHARTABLES_H__\n\n";
-print CHARTABLE "// These tables are automatically generated by the chartables.pl\n";
-print CHARTABLE "// script.  DO NOT EDIT the tables.  Instead, fix the script\n";
-print CHARTABLE "// and run it again.\n\n";
-print CHARTABLE "// This file should only be included by natCharacter.cc\n\n";
-$bytes = 0;
-# Titlecase mapping tables.
-if ($#title_to_lower != $#title_to_upper)
-{
-    # If this fails we need to reimplement toTitleCase.
-    print STDERR "titlecase mappings have different sizes\n";
-    $status = 1;
-}
-# Also ensure that the tables are entirely parallel.
-foreach $key (sort keys %title_to_lower)
-{
-    if (! defined $title_to_upper{$key})
-    {
-	print STDERR "titlecase mappings have different entries\n";
-	$status = 1;
-    }
-}
-&print_single_map ("title_to_lower_table", %title_to_lower);
-&print_single_map ("title_to_upper_table", %title_to_upper);
-print CHARTABLE "#ifdef COMPACT_CHARACTER\n\n";
-printf CHARTABLE "#define TAMIL_DIGIT_ONE 0x%04x\n\n", $TAMIL_DIGIT_ONE;
-# All numeric values.
-&print_numerics;
-# Digits only.
-&print_block ("digit_table", *digit_start, *digit_end);
-# Space characters.
-&print_block ("space_table", *space_start, *space_end);
-# Letters.  We used to generate a separate letter table.  But this
-# doesn't really seem worthwhile.  Simply using `all_table' saves us
-# about 800 bytes, and only adds 3 table probes to isLetter.
-# &print_block ("letter_table", *letter_start, *letter_end);
-# Case tables.
-&print_case_table ("upper", *upper_start, *upper_end, *upper_map, *upper_anom);
-&print_case_table ("lower", *lower_start, *lower_end, *lower_map, *lower_anom);
-# Everything else.
-&print_all_block (*all_start, *all_end, *all_cats);
-print CHARTABLE "#else /* COMPACT_CHARACTER */\n\n";
-printf CHARTABLE "#define ROMAN_START 0x%04x\n", $ROMAN_START;
-printf CHARTABLE "#define ROMAN_END   0x%04x\n\n", $ROMAN_END;
-&print_fast_tables (*all_start, *all_end, *all_cats,
-		    *attributes, *second_attributes);
-print CHARTABLE "#endif /* COMPACT_CHARACTER */\n\n";
-print CHARTABLE "#endif /* __JAVA_CHARTABLES_H__ */\n";
-printf STDERR "Approximately %d bytes of data generated (compact case)\n",
-    $bytes;
-# Now generate decomposition tables.
-printf DECOMP "// java-chardecomp.h - Decomposition character tables -*- c++ -*-\n\n";
-printf DECOMP "#ifndef __JAVA_CHARDECOMP_H__\n";
-printf DECOMP "#define __JAVA_CHARDECOMP_H__\n\n";
-print DECOMP "// These tables are automatically generated by the chartables.pl\n";
-print DECOMP "// script.  DO NOT EDIT the tables.  Instead, fix the script\n";
-print DECOMP "// and run it again.\n\n";
-print DECOMP "// This file should only be included by natCollator.cc\n\n";
-print DECOMP "struct decomp_entry\n{\n";
-print DECOMP "  jchar key;\n";
-print DECOMP "  const char *value;\n";
-print DECOMP "};\n\n";
-&write_decompositions;
-printf DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
-close (CHARTABLE);
-close (DECOMP);
-exit $status;
-# Process a gap in the space.
-sub process_gap
-{
-    local (*fields, $prevcode, $ncode) = @_;
-    local (@gap_fields, $i);
-    if ($ncode == $CJK_IDEOGRAPH_END
-	|| $ncode == $HANGUL_END
-	|| $ncode == $HIGH_SURROGATE_END
-	|| $ncode == $PRIVATE_HIGH_SURROGATE_END
-	|| $ncode == $LOW_SURROGATE_END
-	|| $ncode == $PRIVATE_END)
-    {
-	# The characters in the gap we just found are known to
-	# have the same properties as the character at the end of
-	# the gap.
-	@gap_fields = @fields;
-    }
-    else
-    {
-	# This prints too much to be enabled.
-	# print STDERR "Gap found at \\u", $fields[$CODE], "\n";
-	@gap_fields = ('', '', 'Cn', '', '', '', '', '', '', '', '',
-		       '', '', '', '');
-    }
-    for ($i = $prevcode + 1; $i < $ncode; ++$i)
-    {
-	$gap_fields[$CODE] = sprintf ("%04x", $i);
-	$gap_fields[$NAME] = "CHARACTER " . $gap_fields[$CODE];
-	&process_char (*gap_fields, $i);
-    }
-}
-# Process a single character.
-sub process_char
-{
-    local (*fields, $ncode) = @_;
-    if ($fields[$DECOMPOSITION] ne '')
-    {
-	&add_decomposition ($ncode, $fields[$DECOMPOSITION]);
-    }
-    # If this is a titlecase character, mark it.
-    if ($fields[$CATEGORY] eq 'Lt')
-    {
-	$title_to_upper{$fields[$CODE]} = $fields[$UPPERCASE];
-	$title_to_lower{$fields[$CODE]} = $fields[$LOWERCASE];
-    }
-    else
-    {
-	# For upper and lower case mappings, we try to build compact
-	# tables that map range onto range.  We specifically want to
-	# avoid titlecase characters.  Java specifies a range check to
-	# make sure the character is not between 0x2000 and 0x2fff.
-	# We avoid that here because we need to generate table entries
-	# -- toLower and toUpper still work in that range.
-	if ($fields[$UPPERCASE] eq ''
-	    && ($fields[$LOWERCASE] ne ''
-		|| $fields[$NAME] =~ /CAPITAL (LETTER|LIGATURE)/))
-	{
-	    if ($fields[$LOWERCASE] ne '')
-	    {
-		&update_case_block (*upper_start, *upper_end, *upper_map,
-				    $fields[$CODE], $fields[$LOWERCASE]);
-		&set_attribute ($ncode, hex ($fields[$LOWERCASE]));
-	    }
-	    else
-	    {
-		$upper_anom{$fields[$CODE]} = 1;
-	    }
-	}
-	elsif ($fields[$LOWERCASE] ne '')
-	{
-	    print STDERR ("Java missed upper case char \\u",
-			  $fields[$CODE], "\n");
-	}
-	elsif ($fields[$CATEGORY] eq 'Lu')
-	{
-	    # This case is for letters which are marked as upper case
-	    # but for which there is no lower case equivalent.  For
-	    # instance, LATIN LETTER YR.
-	}
-	if ($fields[$LOWERCASE] eq ''
-	    && ($fields[$UPPERCASE] ne ''
-		|| $fields[$NAME] =~ /SMALL (LETTER|LIGATURE)/))
-	{
-	    if ($fields[$UPPERCASE] ne '')
-	    {
-		&update_case_block (*lower_start, *lower_end, *lower_map,
-				    $fields[$CODE], $fields[$UPPERCASE]);
-		&set_attribute ($ncode, hex ($fields[$UPPERCASE]));
-	    }
-	    else
-	    {
-		$lower_anom{$fields[$CODE]} = 1;
-	    }
-	}
-	elsif ($fields[$UPPERCASE] ne '')
-	{
-	    print STDERR ("Java missed lower case char \\u",
-			  $fields[$CODE], "\n");
-	}
-	elsif ($fields[$CATEGORY] eq 'Ll')
-	{
-	    # This case is for letters which are marked as lower case
-	    # but for which there is no upper case equivalent.  For
-	    # instance, FEMININE ORDINAL INDICATOR.
-	}
-    }
-    # If we have a non-decimal numeric value, add it to the list.
-    if ($fields[$CATEGORY] eq 'Nd'
-	&& ($ncode < 0x2000 || $ncode > 0x2fff)
-	&& $fields[$NAME] =~ /DIGIT/)
-    {
-	# This is a digit character that is handled elsewhere.
-    }
-    elsif ($fields[$DIGIT] ne '' || $fields[$NUMERIC] ne '')
-    {
-	# Do a simple check.
-	if ($fields[$DECIMAL] ne '')
-	{
-	    # This catches bugs in an earlier implementation of
-	    # chartables.pl.  Now it is here for historical interest
-	    # only.
-	    # print STDERR ("Character \u", $fields[$CODE],
-	    # " would have been missed as digit\n");
-	}
-	local ($val) = $fields[$DIGIT];
-	$val = $fields[$NUMERIC] if $val eq '';
-	local ($ok) = 1;
-	# If we have a value which is not a positive integer, then we
-	# set the value to -2 to make life easier for
-	# Character.getNumericValue.
-	if ($val !~ m/^[0-9]+$/)
-	{
-	    if ($fields[$CATEGORY] ne 'Nl'
-		&& $fields[$CATEGORY] ne 'No')
-	    {
-		# This shows a few errors in the Unicode table.  These
-		# characters have a missing Numeric field, and the `N'
-		# for the mirrored field shows up there instead.  I
-		# reported these characters to errata@unicode.org on
-		# Thu Sep 10 1998.  They said it will be fixed in the
-		# 2.1.6 release of the tables.
-		print STDERR ("Character \u", $fields[$CODE],
-			      " has value but is not numeric; val = '",
-			      $val, "'\n");
-		# We skip these.
-		$ok = 0;
-	    }
-	    $val = "-2";
-	}
-	if ($ok)
-	{
-	    $numerics{$fields[$CODE]} = $val;
-	    &set_attribute ($ncode, $val);
-	}
-    }
-    # We build a table that lists ranges of ordinary decimal values.
-    # At each step we make sure that the digits are in the correct
-    # order, with no holes, as this is assumed by Character.  If this
-    # fails, reimplementation is required.  This implementation
-    # dovetails nicely with the Java Spec, which has strange rules for
-    # what constitutes a decimal value.  In particular the Unicode
-    # name must contain the word `DIGIT'.  The spec doesn't directly
-    # say that digits must have type `Nd' (or that their value must an
-    # integer), but that can be inferred from the list of digits in
-    # the book(s).  Currently the only Unicode characters whose name
-    # includes `DIGIT' which would not fit are the Tibetan "half"
-    # digits.
-    if ($fields[$CATEGORY] eq 'Nd')
-    {
-	if (($ncode < 0x2000 || $ncode > 0x2fff)
-	    && $fields[$NAME] =~ /DIGIT/)
-	{
-	    &update_digit_block (*digit_start, *digit_end, $fields[$CODE],
-				 $fields[$DECIMAL]);
-	    &set_attribute ($ncode, $fields[$DECIMAL]);
-	}
-	else
-	{
-	    # If this fails then Character.getType will fail.  We
-	    # assume that things in `digit_table' are the only
-	    # category `Nd' characters.
-	    print STDERR ("Character \u", $fields[$CODE],
-			  " is class Nd but not in digit table\n");
-	    $status = 1;
-	}
-    }
-    # Keep track of space characters.
-    if ($fields[$CATEGORY] =~ /Z[slp]/)
-    {
-	&update_block (*space_start, *space_end, $fields[$CODE]);
-    }
-    # Keep track of letters.
-    # if ($fields[$CATEGORY] =~ /L[ultmo]/)
-    # {
-    # 	&update_letter_block (*letter_start, *letter_end, $fields[$CODE],
-    # 			      $fields[$CATEGORY]);
-    # }
-    # Keep track of all characters.  You might think we wouldn't have
-    # to do this for uppercase letters, or other characters we already
-    # "classify".  The problem is that this classification is
-    # different.  E.g., \u216f is uppercase by Java rules, but is a
-    # LETTER_NUMBER here.
-    &update_all_block (*all_start, *all_end, *all_cats,
-		       $fields[$CODE], $fields[$CATEGORY]);
-}
-# Called to add a new decomposition.
-sub add_decomposition
-{
-    local ($ncode, $value) = @_;
-    local ($is_full) = 0;
-    local ($first) = 1;
-    local (@decomp) = ();
-    foreach (split (' ', $value))
-    {
-	if ($first && /^\<.*\>$/)
-	{
-	    $is_full = 1;
-	}
-	else
-	{
-	    push (@decomp, hex ($_));
-	}
-	$first = 0;
-    }
-    # We pack the value into a string because this means we can stick
-    # with Perl 4 features.
-    local ($s) = pack "I*", @decomp;
-    if ($is_full)
-    {
-	$full_decomposition{$ncode} = $s;
-    }
-    else
-    {
-	$canonical_decomposition{$ncode} = $s;
-    }
-}
-# Write a single decomposition table.
-sub write_single_decomposition
-{
-    local ($name, $is_canon, %table) = @_;
-    printf DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
-    local ($key, @expansion, $char);
-    local ($first_line) = 1;
-    for ($key = 0; $key <= 65535; ++$key)
-    {
-	next if ! defined $table{$key};
-	printf DECOMP ",\n"
-	    unless $first_line;
-	$first_line = 0;
-	printf DECOMP "  { 0x%04x, \"", $key;
-	# We represent the expansion as a series of bytes, terminated
-	# with a double nul.  This is ugly, but relatively
-	# space-efficient.  Most expansions are short, but there are a
-	# few that are very long (e.g. \uFDFA).  This means that if we
-	# chose a fixed-space representation we would waste a lot of
-	# space.
-	@expansion = unpack "I*", $table{$key};
-	foreach $char (@expansion)
-	{
-	    printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
-	}
-	printf DECOMP "\" }";
-    }
-    printf DECOMP "\n};\n\n";
-}
-sub write_decompositions
-{
-    &write_single_decomposition ('canonical', 1, %canonical_decomposition);
-    &write_single_decomposition ('full', 0, %full_decomposition);
-}
-# We represent a block of characters with a pair of lists.  This
-# function updates the pair to account for the new character.  Returns
-# 1 if we added to the old block, 0 otherwise.
-sub update_block
-{
-    local (*start, *end, $char) = @_;
-    local ($nchar) = hex ($char);
-    local ($count) = $#end;
-    if ($count >= 0 && $end[$count] == $nchar - 1)
-    {
-	++$end[$count];
-	return 1;
-    }
-    else
-    {
-	++$count;
-	$start[$count] = $nchar;
-	$end[$count] = $nchar;
-    }
-    return 0;
-}
-# Return true if we will be appending this character to the end of the
-# existing block.
-sub block_append_p
-{
-    local (*end, $char) = @_;
-    return $#end >= 0 && $end[$#end] == $char - 1;
-}
-# This updates the digit block.  This table is much like an ordinary
-# block, but it has an extra constraint.
-sub update_digit_block
-{
-    local (*start, *end, $char, $value) = @_;
-    &update_block ($start, $end, $char);
-    local ($nchar) = hex ($char);
-    # We want to make sure that the new digit's value is correct for
-    # its place in the block.  However, we special-case Tamil digits,
-    # since Tamil does not have a digit `0'.
-    local ($count) = $#start;
-    if (($nchar < $TAMIL_DIGIT_ONE || $nchar > $TAMIL_DIGIT_NINE)
-	&& $nchar - $start[$count] != $value)
-    {
-	# If this fails then Character.digit_value will be wrong.
-	print STDERR "Character \\u", $char, " violates digit constraint\n";
-	$status = 1;
-    }
-}
-# Update letter table.  We could be smart about avoiding upper or
-# lower case letters, but it is much simpler to just track them all.
-sub update_letter_block
-{
-    local (*start, *end, $char, $category) = @_;
-    &update_block (*start, *end, $char);
-}
-# Update `all' table.  This table holds all the characters we don't
-# already categorize for other reasons.  FIXME: if a given type has
-# very few characters, we should just inline the code.  E.g., there is
-# only one paragraph separator.
-sub update_all_block
-{
-    local (*start, *end, *cats, $char, $category) = @_;
-    local ($nchar) = hex ($char);
-    local ($count) = $#end;
-    if ($count >= 0
-	&& $end[$count] == $nchar - 1
-	&& $cats[$count] eq $category)
-    {
-	++$end[$count];
-    }
-    else
-    {
-	++$count;
-	$start[$count] = $nchar;
-	$end[$count] = $nchar;
-	$cats[$count] = $category;
-    }
-}
-# Update a case table.  We handle case tables specially because we
-# want to map (e.g.) a block of uppercase characters directly onto the
-# corresponding block of lowercase characters.  Therefore we generate
-# a new entry when the block would no longer map directly.
-sub update_case_block
-{
-    local (*start, *end, *map, $char, $mapchar) = @_;
-    local ($nchar) = hex ($char);
-    local ($nmap) = hex ($mapchar);
-    local ($count) = $#end;
-    if ($count >= 0
-	&& $end[$count] == $nchar - 1
-	&& $nchar - $start[$count] == $nmap - $map[$count])
-    {
-	++$end[$count];
-    }
-    else
-    {
-	++$count;
-	$start[$count] = $nchar;
-	$end[$count] = $nchar;
-	$map[$count] = $nmap;
-    }
-}
-# Set the attribute value for the character.  Each character can have
-# only one attribute.
-sub set_attribute
-{
-    local ($ncode, $attr) = @_;
-    if ($attributes{$ncode} ne '' && $attributes{$ncode} ne $attr)
-    {
-	if ($ncode >= $ROMAN_START && $ncode <= $ROMAN_END)
-	{
-	    $second_attributes{$ncode} = $attr;
-	}
-	else
-	{
-	    printf STDERR "character \\u%04x already has attribute\n", $ncode;
-	}
-    }
-    # Attributes can be interpreted as unsigned in some situations,
-    # so we check against 65535.  This could cause errors -- we need
-    # to check the interpretation here.
-    elsif ($attr < -32768 || $attr > 65535)
-    {
-	printf STDERR "attribute out of range for character \\u%04x\n", $ncode;
-    }
-    else
-    {
-	$attributes{$ncode} = $attr;
-    }
-}
-# Print a block table.
-sub print_block
-{
-    local ($title, *start, *end) = @_;
-    print CHARTABLE "static const jchar ", $title, "[][2] =\n";
-    print CHARTABLE "  {\n";
-    local ($i) = 0;
-    while ($i <= $#start)
-    {
-	print CHARTABLE "    { ";
-	&print_char ($start[$i]);
-	print CHARTABLE ", ";
-	&print_char ($end[$i]);
-	print CHARTABLE " }";
-	print CHARTABLE "," if ($i != $#start);
-	print CHARTABLE "\n";
-	++$i;
-	$bytes += 4;		# Two bytes per char.
-    }
-    print CHARTABLE "  };\n\n";
-}
-# Print the numerics table.
-sub print_numerics
-{
-    local ($i, $key, $count, @keys);
-    $i = 0;
-    @keys = sort keys %numerics;
-    $count = @keys;
-    print CHARTABLE "static const jchar numeric_table[] =\n";
-    print CHARTABLE "  { ";
-    foreach $key (@keys)
-    {
-	&print_char (hex ($key));
-	++$i;
-	print CHARTABLE ", " if $i < $count;
-	# Print 5 per line.
-	print CHARTABLE "\n    " if ($i % 5 == 0);
-	$bytes += 2;		# One character.
-    }
-    print CHARTABLE " };\n\n";
-    print CHARTABLE "static const jshort numeric_value[] =\n";
-    print CHARTABLE "  { ";
-    $i = 0;
-    foreach $key (@keys)
-    {
-	print CHARTABLE $numerics{$key};
-	if ($numerics{$key} > 32767 || $numerics{$key} < -32768)
-	{
-	    # This means our generated type info is incorrect.  We
-	    # could just detect and work around this here, but I'm
-	    # lazy.
-	    print STDERR "numeric value won't fit in a short\n";
-	    $status = 1;
-	}
-	++$i;
-	print CHARTABLE ", " if $i < $count;
-	# Print 10 per line.
-	print CHARTABLE "\n    " if ($i % 10 == 0);
-	$bytes += 2;		# One short.
-    }
-    print CHARTABLE " };\n\n";
-}
-# Print a table that maps one single letter onto another.  It assumes
-# the map is index by char code.
-sub print_single_map
-{
-    local ($title, %map) = @_;
-    local (@keys) = sort keys %map;
-    $num = @keys;
-    print CHARTABLE "static const jchar ", $title, "[][2] =\n";
-    print CHARTABLE "  {\n";
-    $i = 0;
-    for $key (@keys)
-    {
-	print CHARTABLE "    { ";
-	&print_char (hex ($key));
-	print CHARTABLE ", ";
-	&print_char (hex ($map{$key}));
-	print CHARTABLE " }";
-	++$i;
-	if ($i < $num)
-	{
-	    print CHARTABLE ",";
-	}
-	else
-	{
-	    print CHARTABLE " ";
-	}
-	print CHARTABLE "   // ", $name{$key}, "\n";
-	$bytes += 4;		# Two bytes per char.
-    }
-    print CHARTABLE "  };\n\n";
-}
-# Print the `all' block.
-sub print_all_block
-{
-    local (*start, *end, *cats) = @_;
-    &print_block ("all_table", *start, *end);
-    local ($i) = 0;
-    local ($sum) = 0;
-    while ($i <= $#start)
-    {
-	$sum += $end[$i] - $start[$i] + 1;
-	++$i;
-    }
-    # We do this computation just to make sure it isn't cheaper to
-    # simply list all the characters individually.
-    printf STDERR ("all_table encodes %d characters in %d entries\n",
-		   $sum, $#start + 1);
-    print CHARTABLE "static const jbyte category_table[] =\n";
-    print CHARTABLE "  { ";
-    $i = 0;
-    while ($i <= $#cats)
-    {
-	if ($i > 0 && $cats[$i] eq $cats[$i - 1])
-	{
-	    # This isn't an error.  We can have a duplicate because
-	    # two ranges are not adjacent while the intervening
-	    # characters are left out of the table for other reasons.
-	    # We could exploit this to make the table a little smaller.
-	    # printf STDERR "Duplicate all entry at \\u%04x\n", $start[$i];
-	}
-	print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
-	print CHARTABLE ", " if ($i < $#cats);
-	++$i;
-	print CHARTABLE "\n    ";
-	++$bytes;
-    }
-    print CHARTABLE "  };\n\n";
-}
-# Print case table.
-sub print_case_table
-{
-    local ($title, *start, *end, *map, *anomalous) = @_;
-    &print_block ($title . '_case_table', *start, *end);
-    print CHARTABLE "static const jchar ", $title, "_case_map_table[] =\n";
-    print CHARTABLE "  { ";
-    local ($i) = 0;
-    while ($i <= $#map)
-    {
-	&print_char ($map[$i]);
-	print CHARTABLE ", " if $i < $#map;
-	++$i;
-	print CHARTABLE "\n    " if $i % 5 == 0;
-	$bytes += 2;
-    }
-    print CHARTABLE "  };\n";
-    local ($key, @keys);
-    @keys = sort keys %anomalous;
-    if ($title eq 'upper')
-    {
-	if ($#keys >= 0)
-	{
-	    # If these are found we need to change Character.isUpperCase.
-	    print STDERR "Found anomalous upper case characters\n";
-	    $status = 1;
-	}
-    }
-    else
-    {
-	print CHARTABLE "\n";
-	print CHARTABLE "static const jchar ", $title, "_anomalous_table[] =\n";
-	print CHARTABLE "  { ";
-	$i = 0;
-	foreach $key (@keys)
-	{
-	    &print_char (hex ($key));
-	    print CHARTABLE ", " if $i < $#keys;
-	    ++$i;
-	    print CHARTABLE "\n    " if $i % 5 == 0;
-	    $bytes += 2;
-	}
-	print CHARTABLE "  };\n";
-    }
-    print CHARTABLE "\n";
-}
-# Print the type table and attributes table for the fast version.
-sub print_fast_tables
-{
-    local (*start, *end, *cats, *atts, *second_atts) = @_;
-    print CHARTABLE "static const jbyte type_table[] =\n{ ";
-    local ($i, $j);
-    for ($i = 0; $i <= $#cats; ++$i)
-    {
-	for ($j = $start[$i]; $j <= $end[$i]; ++$j)
-	{
-	    print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
-	    print CHARTABLE "," if ($i < $#cats || $j < $end[$i]);
-	    print CHARTABLE "\n    ";
-	}
-    }
-    print CHARTABLE "\n };\n\n";
-    print CHARTABLE "static const jshort attribute_table[] =\n{ ";
-    for ($i = 0; $i <= 0xffff; ++$i)
-    {
-	$atts{$i} = 0 if ! defined $atts{$i};
-	print CHARTABLE $atts{$i};
-	print CHARTABLE ", " if $i < 0xffff;
-	print CHARTABLE "\n    " if $i % 5 == 1;
-    }
-    print CHARTABLE "\n };\n\n";
-    print CHARTABLE "static const jshort secondary_attribute_table[] =\n{ ";
-    for ($i = $ROMAN_START; $i <= $ROMAN_END; ++$i)
-    {
-	print CHARTABLE $second_atts{$i};
-	print CHARTABLE ", " if $i < $ROMAN_END;
-	print CHARTABLE "\n    " if $i % 5 == 1;
-    }
-    print CHARTABLE "\n };\n\n";
-}
-# Print a character constant.
-sub print_char
-{
-    local ($ncode) = @_;
-    printf CHARTABLE "0x%04x", $ncode;
-}
--- a/libjava/configure.in
+++ b/libjava/configure.in
@@ -42,13 +42,6 @@ AC_SUBST(COMPPATH)
 dnl The -no-testsuite modules omit the test subdir.
 AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
-dnl See whether the user prefers size or speed for Character.
-dnl The default is size.
-AC_ARG_ENABLE(fast-character,
-[  --enable-fast-character prefer speed over size for Character],
-# Nothing
-, AC_DEFINE(COMPACT_CHARACTER))
 dnl Should the runtime set system properties by examining the 
 dnl environment variable GCJ_PROPERTIES?
 AC_ARG_ENABLE(getenv-properties,

--- a/libjava/gnu/gcj/convert/Blocks-3.txt
+++ b/libjava/gnu/gcj/convert/Blocks-3.txt
+# Start Code; End Code; Block Name
+0000; 007F; Basic Latin
+0080; 00FF; Latin-1 Supplement
+0100; 017F; Latin Extended-A
+0180; 024F; Latin Extended-B
+0250; 02AF; IPA Extensions
+02B0; 02FF; Spacing Modifier Letters
+0300; 036F; Combining Diacritical Marks
+0370; 03FF; Greek
+0400; 04FF; Cyrillic
+0530; 058F; Armenian
+0590; 05FF; Hebrew
+0600; 06FF; Arabic
+0700; 074F; Syriac  
+0780; 07BF; Thaana
+0900; 097F; Devanagari
+0980; 09FF; Bengali
+0A00; 0A7F; Gurmukhi
+0A80; 0AFF; Gujarati
+0B00; 0B7F; Oriya
+0B80; 0BFF; Tamil
+0C00; 0C7F; Telugu
+0C80; 0CFF; Kannada
+0D00; 0D7F; Malayalam
+0D80; 0DFF; Sinhala
+0E00; 0E7F; Thai
+0E80; 0EFF; Lao
+0F00; 0FFF; Tibetan
+1000; 109F; Myanmar 
+10A0; 10FF; Georgian
+1100; 11FF; Hangul Jamo
+1200; 137F; Ethiopic
+13A0; 13FF; Cherokee
+1400; 167F; Unified Canadian Aboriginal Syllabics
+1680; 169F; Ogham
+16A0; 16FF; Runic
+1780; 17FF; Khmer
+1800; 18AF; Mongolian
+1E00; 1EFF; Latin Extended Additional
+1F00; 1FFF; Greek Extended
+2000; 206F; General Punctuation
+2070; 209F; Superscripts and Subscripts
+20A0; 20CF; Currency Symbols
+20D0; 20FF; Combining Marks for Symbols
+2100; 214F; Letterlike Symbols
+2150; 218F; Number Forms
+2190; 21FF; Arrows
+2200; 22FF; Mathematical Operators
+2300; 23FF; Miscellaneous Technical
+2400; 243F; Control Pictures
+2440; 245F; Optical Character Recognition
+2460; 24FF; Enclosed Alphanumerics
+2500; 257F; Box Drawing
+2580; 259F; Block Elements
+25A0; 25FF; Geometric Shapes
+2600; 26FF; Miscellaneous Symbols
+2700; 27BF; Dingbats
+2800; 28FF; Braille Patterns
+2E80; 2EFF; CJK Radicals Supplement
+2F00; 2FDF; Kangxi Radicals
+2FF0; 2FFF; Ideographic Description Characters
+3000; 303F; CJK Symbols and Punctuation
+3040; 309F; Hiragana
+30A0; 30FF; Katakana
+3100; 312F; Bopomofo
+3130; 318F; Hangul Compatibility Jamo
+3190; 319F; Kanbun
+31A0; 31BF; Bopomofo Extended
+3200; 32FF; Enclosed CJK Letters and Months
+3300; 33FF; CJK Compatibility
+3400; 4DB5; CJK Unified Ideographs Extension A
+4E00; 9FFF; CJK Unified Ideographs
+A000; A48F; Yi Syllables
+A490; A4CF; Yi Radicals
+AC00; D7A3; Hangul Syllables
+D800; DB7F; High Surrogates
+DB80; DBFF; High Private Use Surrogates
+DC00; DFFF; Low Surrogates
+E000; F8FF; Private Use
+F900; FAFF; CJK Compatibility Ideographs
+FB00; FB4F; Alphabetic Presentation Forms
+FB50; FDFF; Arabic Presentation Forms-A
+FE20; FE2F; Combining Half Marks
+FE30; FE4F; CJK Compatibility Forms
+FE50; FE6F; Small Form Variants
+FE70; FEFE; Arabic Presentation Forms-B
+FEFF; FEFF; Specials
+FF00; FFEF; Halfwidth and Fullwidth Forms
+FFF0; FFFD; Specials
--- a/libjava/gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html
+++ b/libjava/gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+       "http://www.w3.org/TR/REC-html40/loose.dtd"> 
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<meta http-equiv="Content-Language" content="en-us">
+<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
+<meta name="ProgId" content="FrontPage.Editor.Document">
+<link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css">
+<title>Unicode Character Database</title>
+</head>
+<body>
+<h1>UNICODE CHARACTER DATABASE<br>  
+Version 3.0.0</h1>
+<table border="1" cellspacing="2" cellpadding="0" height="87" width="100%">
+  <tr>
+    <td valign="TOP" width="144">Revision</td>
+    <td valign="TOP">3.0.0</td>
+  </tr>
+  <tr>
+    <td valign="TOP" width="144">Authors</td>
+    <td valign="TOP">Mark Davis and Ken Whistler</td>
+  </tr>
+  <tr>
+    <td valign="TOP" width="144">Date</td>
+    <td valign="TOP">1999-09-11</td>
+  </tr>
+  <tr>
+    <td valign="TOP" width="144">This Version</td>
+    <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
+  </tr>
+  <tr>
+    <td valign="TOP" width="144">Previous Version</td>
+    <td valign="TOP">n/a</td>
+  </tr>
+  <tr>
+    <td valign="TOP" width="144">Latest Version</td>
+    <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
+  </tr>
+</table>
+<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p>  
+<h2>Disclaimer</h2>  
+<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims   
+are made as to fitness for any particular purpose. No warranties of any kind are   
+expressed or implied. The recipient agrees to determine applicability of   
+information provided. If this file has been purchased on magnetic or optical   
+media from Unicode, Inc., the sole remedy for any claim will be exchange of   
+defective media within 90 days of receipt.</p>  
+<p>This disclaimer is applicable for all other data files accompanying the   
+Unicode Character Database, some of which have been compiled by the Unicode   
+Consortium, and some of which have been supplied by other sources.</p>  
+<h2>Limitations on Rights to Redistribute This Data</h2>  
+<p>Recipient is granted the right to make copies in any form for internal   
+distribution and to freely use the information supplied in the creation of   
+products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode   
+Character Database can be redistributed to third parties or other organizations   
+(whether for profit or not) as long as this notice and the disclaimer notice are   
+retained. Information can be extracted from these files and used in   
+documentation or programs, as long as there is an accompanying notice indicating   
+the source.</p>  
+<h2>Introduction</h2>  
+<p>The Unicode Character Database is a set of files that define the Unicode   
+character properties and internal mappings. For more information about character   
+properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The   
+Unicode Standard</a></i>.</p>  
+<p>The Unicode Character Database has been updated to reflect Version 3.0 of the   
+Unicode Standard, with many characters added to those published in Version 2.0.   
+A number of corrections have also been made to case mappings or other errors in   
+the database noted since the publication of Version 2.0. Normative bidirectional   
+properties have also been modified to reflect decisions of the Unicode Technical   
+Committee.</p>  
+<p>For more information on versions of the Unicode Standard and how to reference   
+them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p>  
+<h2>Conformance</h2>  
+<p>Character properties may be either normative or informative. <i>Normative</i>   
+means that implementations that claim conformance to the Unicode Standard (at a   
+particular version) and which make use of a particular property or field must   
+follow the specifications of the standard for that property or field in order to   
+be conformant. The term <i>normative</i> when applied to a property or field of   
+the Unicode Character Database, does <i>not</i> mean that the value of that   
+field will never change. Corrections and extensions to the standard in the   
+future may require minor changes to normative values, even though the Unicode   
+Technical Committee strives to minimize such changes. An<i> informative </i>property   
+or field is strongly recommended, but a conformant implementation is free to use   
+or change such values as it may require while still being conformant to the   
+standard. Particular implementations may choose to override the properties and   
+mappings that are not normative. In that case, it is up to the implementer to   
+establish a protocol to convey that information.</p>  
+<h2>Files</h2>  
+<p>The following summarizes the files in the Unicode Character Database. &nbsp;For   
+more information about these files, see the referenced technical report or   
+section of Unicode Standard, Version 3.0.</p>  
+<p><b>UnicodeData.txt (Chapter 4)</b>  
+<ul>  
+  <li>The main file in the Unicode Character Database.</li>  
+  <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>.   
+    This file also characterizes which properties are normative and which are   
+    informative.</li>  
+</ul>  
+<p><b>PropList.txt (Chapter 4)</b>  
+<ul>  
+  <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i>   
+    and <i>Mathematical</i>, among others.</li>  
+</ul>  
+<p><b>SpecialCasing.txt (Chapter 4)</b>  
+<ul>  
+  <li>List of informative special casing properties, including one-to-many   
+    mappings such as SHARP S =&gt; &quot;SS&quot;, and locale-specific mappings,   
+    such as for Turkish <i>dotless i</i>.</li>  
+</ul>  
+<p><b>Blocks.txt (Chapter 14)</b>  
+<ul>  
+  <li>List of normative block names.</li>  
+</ul>  
+<p><b>Jamo.txt (Chapter 4)</b>  
+<ul>  
+  <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names   
+    algorithmically.</li>  
+</ul>  
+<p><b>ArabicShaping.txt (Section 8.2)</b>  
+<ul>  
+  <li>Basic Arabic and Syriac character shaping properties, such as initial,   
+    medial and final shapes. These properties are normative for minimal shaping   
+    of Arabic and Syriac. </li>  
+</ul>  
+<p><b>NamesList.txt (Chapter 14)</b>  
+<ul>  
+  <li>This file duplicates some of the material in the UnicodeData file, and   
+    adds informative annotations uses in the character charts, as printed in the   
+    Unicode Standard. </li>  
+  <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches   
+    the appropriate version of the book. Changes in the Unicode Character   
+    Database since then may not be reflected in these files, since they are   
+    primarily of archival interest.</li>  
+</ul>  
+<p><b>Index.txt (Chapter 14)</b>  
+<ul>  
+  <li>Informative index to Unicode characters, as printed in the Unicode   
+    Standard</li>  
+  <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches   
+    the appropriate version of the book. Changes in the Unicode Character   
+    Database since then may not be reflected in these files, since they are   
+    primarily of archival interest.</li>  
+</ul>  
+<p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15   
+Unicode Normalization Forms</a>)</b>  
+<ul>  
+  <li>Normative properties for normalization.</li>  
+</ul>  
+<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR   
+#14: Line Breaking Properties</a>)</b>  
+<ul>  
+  <li>Normative and informative properties for line breaking. To see which   
+    properties are informative and which are normative, consult UTR#14.</li>  
+</ul>  
+<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR   
+#11: East Asian Character Width</a>)</b>  
+<ul>  
+  <li>Informative properties for determining the choice of wide vs. narrow   
+    glyphs in East Asian contexts.</li>  
+</ul>  
+<p><b>diffXvY.txt</b>  
+<ul>  
+  <li>Mechanically-generated informative files containing accumulated   
+    differences between successive versions of UnicodeData.txt</li>  
+</ul>  
+</body>  
+</html>  
--- a/libjava/gnu/gcj/convert/UnicodeData-3.0.0.txt
+++ b/libjava/gnu/gcj/convert/UnicodeData-3.0.0.txt
--- a/libjava/gnu/java/lang/CharData.java
+++ b/libjava/gnu/java/lang/CharData.java
+/* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   *** This file is generated by scripts/unicode-muncher.pl ***
+This file is part of GNU Classpath.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+package gnu.java.lang;
+/**
+ * This contains the info about the unicode characters, that
+ * java.lang.Character needs.  It is generated automatically from
+ * <code>gnu/gcj/convert/UnicodeData-3.0.0.txt</code>, by some
+ * perl scripts. This Unicode definition file can be found on the
+ * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
+ * JDK 1.4 uses Unicode version 3.0.0.
+ *
+ * The data is stored as string constants, but Character will convert these
+ * Strings to their respective <code>char[]</code> components.  The field
+ * <code>BLOCKS</code> stores the offset of a block of 2<sup>SHIFT</sup>
+ * characters within <code>DATA</code>.  The DATA field, in turn, stores
+ * information about each character in the low order bits, and an offset
+ * into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
+ * <code>NUM_VALUE</code>, and <code>DIRECTION</code>.  Notice that the
+ * attribute tables are much smaller than 0xffff entries; as many characters
+ * in Unicode share common attributes.  Finally, there is a listing for
+ * <code>TITLE</code> exceptions (most characters just have the same
+ * title case as upper case).
+ *
+ * @author scripts/unicode-muncher.pl (written by Jochen Hoenicke,
+ *         Eric Blake)
+ * @see Character
+ */
+public interface CharData
+{
+  /**
+   * The Unicode definition file that was parsed to build this database.
+   */
+  String SOURCE = "gnu/gcj/convert/UnicodeData-3.0.0.txt";
+  /**
+   * The character shift amount to look up the block offset. In other words,
+   * <code>(char) (BLOCKS.value[ch >> SHIFT] + ch)</code> is the index where
+   * <code>ch</code> is described in <code>DATA</code>.
+   */
+  int SHIFT = 5;
+  /**
+   * The mapping of character blocks to their location in <code>DATA</code>.
+   * Each entry has been adjusted so that the 16-bit sum with the desired
+   * character gives the actual index into <code>DATA</code>.
+   */
+  String BLOCKS
+    = "\u01c2\u01c2\u01c1\u012c\u012b\u01a0\u01f8\u027e\u0201\u0290\u01b7"
+    + "\u02c9\u0157\u02a9\u023f\u01b8\u0101\u0257\u02f9\u02f9\u0370\u0363"
+    + "\ufe8f\u033e\uff85\uff65\ufdb5\ufda1\uffbd\u02be\uffbe\uffe9\ufea8"
+    + "\uff2e\u022f\ufea1\uff9d\ufe61\uffa9\ufb20\u00c3\u010d\u00fe\ufb61"
+    + "\ufb5a\u0105\u0103\u00f8\ufe8b\u0223\u0258\u01c1\u01f6\u01d6\ufee1"
+    + "\u0223\ufdaa\ufb55\u0163\ufe63\u00f6\u00fd\ufe03\ufde3\ufdc3\ufda3"
+    + "\ufd83\ufd63\ufd43\ufd23\ufd03\ufce3_J\u00a5\u0133\ufe08"
+    + "]\u014f\u01a2\uf669\u0123\u0122\uf8c1\ufe50\u0102\u0101\ufa43"
+    + "\ufc88\ufea7\u00c0\ufca1\ufb8f\uf47d\u0099\ufb11\ufe1a\ufd22\ufb29"
+    + "5\uf3b9\ufe51\ufcc8\uffb5\uf339\ufa92\uff85\uff35\ufa4e\uf766"
+    + "\uff25\ufad1\ufb63\ufc34\ufedf\uf763\ufcaa\ufa82\ufdb9\uf6e3\ufe3f"
+    + "\ufcc1\ufe49\uf7eb\uf70f\uf2e8\ufd99\uf5e3\uf964\ufbfc\ufd91\uf563"
+    + "\uf543\uf43c\uf436\uef6b\uf864\uf844\ufc52\uf804\uf7e4\ufc09\uf7a4"
+    + "\uee9c\ufb8a\uf744\uf7f4\uf704\uf7c4\uf78c\uf46b\uf4cc\uf72c\uf644"
+    + "\uf3fb\uf647\uf243\uf5c4\uf5a4\ueca1\uf563\uf544\uf524\uf504\uf4e4"
+    + "\uf4c4\uf4a4\uf484\uf464\uf444\uf424\uf404\uf3e4\uf3c4\uf3a4\uf384"
+    + "\uf364\uf344\uf324\uf283\uf771\uf2c4\uf2a4\uf731\ueec3\ueea3\uee83"
+    + "\uee63\uf1e4\uf49c\uf53f\ued49\uf2d1\uf144\ue8ed\ue81e\uf0e4\uf591"
+    + "\ued03\uece3\uecc3\ueca3\uec83\uec63\uec43\uec23\uec03\uebe3\uebc3"
+    + "\ueba3\ueb83\ueb63\ueb43\ueb23\ueb03\ueae3\ueac3\ueaa3\uea83\uea63"
+    + "\uea43\uea23\uea03\ue9e3\ue9c3\ue9a3\ue983\ue963\ue943\ue923\ue903"
+    + "\ue8e3\ue8c3\ue8a3\ue883\ue863\ue843\ue823\ue803\ue7e3\ue501\ue4e1"
+    + "\ue4c1\ue4a1\uefd1\ue461\ue441\ue427\uef81\uef51\uef51\uef51\uef51"
+    + "\uef41\ue1ad\uef40\uef40\uef40\ue1e0\ue37f\uef00\ue9d3\uebe5\ue4df"
+    + "\ueece\uef5f\ue490\uef3f\ueee3\uef76\uee6e\uef6a\uef77\uef72\uee8b"
+    + "\uefcd\uee77\uee1f\uee21\uef5f\uefd7\uee90\uefcd\uefdb\uef63\ue223"
+    + "\ue203\ue1e3\ued25\ued37\ued13\uef16\uef02\ued41\ued37\ued2d\uec25"
+    + "\uec05\uebe5\uebc5\uebc8\uea76\ueb63\ueb66\ue9b0\ueb05\ueae5\ue9e3"
+    + "\udf43\udf23\udf03\udee3\ue90a\uea1d\ueb52\ueb49\ueb3f\ueadf\ude03"
+    + "\udde3\ue925\ue905\ue8e5\ue8c5\ue8a5\ue885\ue865\ue845\udcc3\udca3"
+    + "\udc83\udc63\udc43\udc23\udc03\udbe3\udbc3\udba3\udb83\udb63\udb43"
+    + "\udb23\udb03\udae3\udac3\udaa3\uda83\uda63\uda43\uda23\uda03\ud9e3"
+    + "\ud9c3\ud9a3\ud983\ud963\ud943\ud923\ud903\ud8e3\ud8c3\ud8a3\ud883"
+    + "\ud863\ud843\ud823\ud803\ud7e3\ud7c3\ud7a3\ud783\ud763\ue2ab\ue285"
+    + "\ue265\ue26a\ue225\ue205\ue1e5\ue1c5\ue1a5\ue185\ue188\udd89\ue3a2"
+    + "\udfcd\ud923\ud904\ue342\ud8c3\ud8a4\udcd1\udcac\ud67b\ud824\ud804"
+    + "\ue262\ucede\ud403\ud3e3\ue110\ue0e3\ue1c5\ue0b1\ue0b1\ue158\udf11"
+    + "\ue02e\ue00d\udfed\udfcd\ude66\udf8d\udf6d\udf4f\udf2e\ud564\ud544"
+    + "\ud524\ud504\ud4e4\ud4c4\ud4a4\ud484\ud464\ud444\ud424\ud404\ud3e4"
+    + "\ud3c4\ud3a4\ud384\ud364\ud344\ud324\ud304\ud2e4\ud2c4\ud2a4\ud284"
+    + "\ud264\ud244\ud224\ud204\ud1e4\ud1c4\ud1a4\ud184\ud164\ud144\ud124"
+    + "\ud104\ud0e4\ud0c4\ud0a4\ud084\ud064\ud044\ud024\ud004\ucfe4\ucfc4"
+    + "\ucfa4\ucf84\ucf64\ucf44\ucf24\ucf04\ucee4\ucec4\ucea4\uce84\uce64"
+    + "\uce44\uce24\uce04\ucde4\ucdc4\ucda4\ucd84\ucd64\ucd44\ucd24\ucd04"
+    + "\ucce4\uccc4\ucca4\ucc84\ucc64\ucc44\ucc24\ucc04\ucbe4\ucbc4\ucba4"
+    + "\ucb84\ucb64\ucb44\ucb24\ucb04\ucae4\ucac4\ucaa4\uca84\uca64\uca44"
+    + "\uca24\uca04\uc9e4\uc9c4\uc9a4\uc984\uc964\uc944\uc924\uc904\uc8e4"
+    + "\uc8c4\uc8a4\uc884\uc864\uc844\uc824\uc804\uc7e4\uc7c4\uc7a4\uc784"
+    + "\uc764\uc744\uc724\uc704\uc6e4\uc6c4\uc6a4\uc684\uc664\uc644\uc624"
+    + "\uc604\uc5e4\uc5c4\uc5a4\uc584\uc564\uc544\uc524\uc504\uc4e4\uc4c4"
+    + "\uc4a4\uc484\uc464\uc444\uc424\uc404\uc3e4\uc3c4\uc3a4\uc384\uc364"
+    + "\uc344\uc324\uc304\uc2e4\uc2c4\uc2a4\uc284\uc264\uc244\uc224\uc204"
+    + "\uc1e4\uc1c4\uc1a4\uc184\uc164\uc144\uc124\uc104\uc0e4\uc0c4\uc0a4"
+    + "\uc084\uc064\uc044\uc024\uc004\ubfe4\ubfc4\ubfa4\ubf84\ubf64\ubf44"
+    + "\ubf24\ubf04\ubee4\ubec4\ubea4\ube84\ube64\ube44\ube24\ube04\ubde4"
+    + "\ubdc4\ubda4\ubd84\ubd64\ubd44\ubd24\ubd04\ubce4\ubcc4\ubca4\ubc84"
+    + "\ubc64\ubc44\ubc24\ubc04\ubbe4\ub2e0\ub803\ub7e3\ubb64\ubb44\ubb24"
+    + "\ubb04\ubae4\ubac4\ubaa4\uba84\uba64\uba44\uba24\uba04\ub9e4\ub9c4"
+    + "\ub9a4\ub984\ub964\ub944\ub924\ub904\ub8e4\ub8c4\ub8a4\ub884\ub864"
+    + "\ub844\ub824\ub804\ub7e4\ub7c4\ub7a4\ub784\ub764\ub744\ub724\ub704"
+    + "\ub6e4\ub6c4\ub6a4\ub684\ub664\ub644\ub624\ub604\ub5e4\ub5c4\ub5a4"
+    + "\ub584\ub564\ub544\ub524\ub504\ub4e4\ub4c4\ub4a4\ub484\ub464\ub444"
+    + "\ub424\ub404\ub3e4\ub3c4\ub3a4\ub384\ub364\ub344\ub324\ub304\ub2e4"
+    + "\ub2c4\ub2a4\ub284\ub264\ub244\ub224\ub204\ub1e4\ub1c4\ub1a4\ub184"
+    + "\ub164\ub144\ub124\ub104\ub0e4\ub0c4\ub0a4\ub084\ub064\ub044\ub024"
+    + "\ub004\uafe4\uafc4\uafa4\uaf84\uaf64\uaf44\uaf24\uaf04\uaee4\uaec4"
+    + "\uaea4\uae84\uae64\uae44\uae24\uae04\uade4\uadc4\uada4\uad84\uad64"
+    + "\uad44\uad24\uad04\uace4\uacc4\uaca4\uac84\uac64\uac44\uac24\uac04"
+    + "\uabe4\uabc4\uaba4\uab84\uab64\uab44\uab24\uab04\uaae4\uaac4\uaaa4"
+    + "\uaa84\uaa64\uaa44\uaa24\uaa04\ua9e4\ua9c4\ua9a4\ua984\ua964\ua944"
+    + "\ua924\ua904\ua8e4\ua8c4\ua8a4\ua884\ua864\ua844\ua824\ua804\ua7e4"
+    + "\ua7c4\ua7a4\ua784\ua764\ua744\ua724\ua704\ua6e4\ua6c4\ua6a4\ua684"
+    + "\ua664\ua644\ua624\ua604\ua5e4\ua5c4\ua5a4\ua584\ua564\ua544\ua524"
+    + "\ua504\ua4e4\ua4c4\ua4a4\ua484\ua464\ua444\ua424\ua404\ua3e4\ua3c4"
+    + "\ua3a4\ua384\ua364\ua344\ua324\ua304\ua2e4\ua2c4\ua2a4\ua284\ua264"
+    + "\ua244\ua224\ua204\ua1e4\ua1c4\ua1a4\ua184\ua164\ua144\ua124\ua104"
+    + "\ua0e4\ua0c4\ua0a4\ua084\ua064\ua044\ua024\ua004\u9fe4\u9fc4\u9fa4"
+    + "\u9f84\u9f64\u9f44\u9f24\u9f04\u9ee4\u9ec4\u9ea4\u9e84\u9e64\u9e44"
+    + "\u9e24\u9e04\u9de4\u9dc4\u9da4\u9d84\u9d64\u9d44\u9d24\u9d04\u9ce4"
+    + "\u9cc4\u9ca4\u9c84\u9c64\u9c44\u9c24\u9c04\u9be4\u9bc4\u9ba4\u9b84"
+    + "\u9b64\u9b44\u9b24\u9b04\u9ae4\u9ac4\u9aa4\u9a84\u9a64\u9a44\u9a24"
+    + "\u9a04\u99e4\u99c4\u99a4\u9984\u9964\u9944\u9924\u9904\u98e4\u98c4"
+    + "\u98a4\u9884\u9864\u9844\u9824\u9804\u97e4\u97c4\u97a4\u9784\u9764"
+    + "\u9744\u9724\u9704\u96e4\u96c4\u96a4\u9684\u9664\u9644\u9624\u9604"
+    + "\u95e4\u95c4\u95a4\u9584\u9564\u9544\u9524\u9504\u94e4\u94c4\u94a4"
+    + "\u9484\u9464\u9444\u9424\u9404\u93e4\u93c4\u93a4\u9384\u9364\u9344"
+    + "\u9324\u9304\u92e4\u92c4\u92a4\u9284\u9264\u9244\u9224\u9204\u91e4"
+    + "\u91c4\u91a4\u9184\u9164\u9144\u9124\u9104\u90e4\u90c4\u90a4\u9084"
+    + "\u9064\u9044\u9024\u9004\u8fe4\u8fc4\u8fa4\u8f84\u8f64\u8f44\u8f24"
+    + "\u8f04\u8ee4\u8ec4\u8ea4\u8e84\u8e64\u8e44\u8e24\u8e04\u8de4\u8dc4"
+    + "\u8da4\u8d84\u8d64\u8d44\u8d24\u8d04\u8ce4\u8cc4\u8ca4\u8c84\u8c64"
+    + "\u8c44\u8c24\u8c04\u8be4\u8bc4\u8ba4\u8b84\u8b64\u8b44\u8b24\u8b04"
+    + "\u8ae4\u8ac4\u8aa4\u8a84\u8a64\u8a44\u8a24\u8a04\u89e4\u89c4\u89a4"
+    + "\u8984\u8964\u8944\u8924\u8904\u88e4\u88c4\u88a4\u8884\u8864\u8844"
+    + "\u8824\u8804\u87e4\u87c4\u87a4\u8784\u8764\u8744\u8724\u8704\u86e4"
+    + "\u86c4\u86a4\u8684\u8664\u8644\u8624\u8604\u85e4\u85c4\u85a4\u8584"
+    + "\u8564\u8544\u8524\u8504\u84e4\u84c4\u84a4\u8484\u8464\u8444\u8424"
+    + "\u8404\u83e4\u83c4\u83a4\u8384\u8364\u8344\u8324\u8304\u82e4\u82c4"
+    + "\u82a4\u8284\u8264\u8244\u8224\u8204\u81e4\u81c4\u81a4\u8184\u8164"
+    + "\u8144\u8124\u8104\u80e4\u80c4\u80a4\u8084\u8064\u8044\u8024\u8004"
+    + "\u7fe4\u7fc4\u7fa4\u7f84\u7f64\u7f44\u7f24\u7f04\u7ee4\u7ec4\u7ea4"
+    + "\u7e84\u7e64\u7e44\u7e24\u7e04\u7de4\u7dc4\u7da4\u7d84\u7d64\u7d44"
+    + "\u7d24\u7d04\u7ce4\u7cc4\u7ca4\u7c84\u7c64\u7c44\u7c24\u7c04\u7be4"
+    + "\u7bc4\u7ba4\u7b84\u7b64\u7b44\u7b24\u7b04\u7ae4\u7ac4\u7aa4\u7a84"
+    + "\u7a64\u7a44\u7a24\u7a04\u79e4\u79c4\u79a4\u7984\u7964\u7944\u7924"
+    + "\u7904\u78e4\u78c4\u78a4\u7884\u7864\u7844\u7824\u7804\u77e4\u77c4"
+    + "\u77a4\u7784\u7764\u7744\u7724\u7704\u76e4\u76c4\u76a4\u7684\u7664"
+    + "\u7644\u7624\u7604\u75e4\u75c4\u75a4\u7584\u7564\u7544\u7524\u7504"
+    + "\u74e4\u74c4\u74a4\u7484\u7464\u7444\u7424\u7404\u73e4\u73c4\u73a4"
+    + "\u7384\u7364\u7344\u7324\u7304\u72e4\u72c4\u72a4\u7284\u7264\u7244"
+    + "\u7224\u7204\u71e4\u71c4\u71a4\u7184\u7164\u7144\u7124\u7104\u70e4"
+    + "\u70c4\u70a4\u7084\u7064\u7044\u7024\u7004\u6fe4\u6fc4\u6fa4\u6f84"
+    + "\u6f64\u6f44\u6f24\u6f04\u6ee4\u6ec4\u6ea4\u6e84\u6e64\u6e44\u6e24"
+    + "\u6e04\u6de4\u6dc4\u6da4\u6d84\u6d64\u6d44\u6d24\u6d04\u6ce4\u6cc4"
+    + "\u6ca4\u6c84\u6c64\u6c44\u6c24\u6c04\u6be4\u6bc4\u6ba4\u6b84\u6b64"
+    + "\u6b44\u6b24\u6b04\u6ae4\u6ac4\u6aa4\u6a84\u6a64\u6a44\u6a24\u6a04"
+    + "\u69e4\u60f0\u6603\u65e3\u6964\u6944\u6924\u6904\u68e4\u68c4\u68a4"
+    + "\u6884\u6864\u6844\u6824\u6804\u67e4\u67c4\u67a4\u6784\u6764\u6744"
+    + "\u6724\u6704\u66e4\u66c4\u66a4\u6684\u6664\u6644\u6624\u6604\u65e4"
+    + "\u65c4\u65a4\u6584\u6564\u6544\u6524\u6504\u6b20\u6ddb\u6e96\u60e3"
+    + "\u60c3\u60a3\u6083\u6063\u6043\u6023\u6003\u5fe3\u5fc3\u5fa3\u5f83"
+    + "\u5f63\u5f43\u5f23\u5f03\u5ee3\u5ec3\u5ea3\u5e83\u5e63\u5e43\u5e23"
+    + "\u5e03\u5de3\u5dc3\u5da3\u5d83\u5d63\u5d43\u5d23\u5d03\u5ce3\u5cc3"
+    + "\u5ca3\u5c83\u5c63\u5c43\u5c23\u5c03\u5be3\u5bc3\u5ba3\u5b83\u5b63"
+    + "\u5b43\u5b23\u5b03\u5ae3\u5ac3\u5aa3\u5a83\u5a63\u5a43\u5a23\u5a03"
+    + "\u59e3\u5d64\u5d44\u5d24\u5d04\u5ce4\u5cc4\u5ca4\u5c84\u5c64\u5c44"
+    + "\u5c24\u5c04\u5be4\u5bc4\u5ba4\u5b84\u5b64\u5b44\u5b24\u5b04\u5ae4"
+    + "\u5ac4\u5aa4\u5a84\u5a64\u5a44\u5a24\u5a04\u59e4\u59c4\u59a4\u5984"
+    + "\u5964\u5944\u5924\u5904\u58e4\u58c4\u58a4\u5884\u5864\u5844\u5824"
+    + "\u5804\u57e4\u57c4\u57a4\u5784\u5764\u5744\u5724\u5704\u56e4\u56c4"
+    + "\u56a4\u5684\u5664\u5644\u5624\u5604\u55e4\u55c4\u55a4\u5584\u5564"
+    + "\u5544\u5524\u5504\u54e4\u54c4\u54a4\u5484\u5464\u5444\u5424\u5404"
+    + "\u53e4\u53c4\u53a4\u5384\u5364\u5344\u5324\u5304\u52e4\u52c4\u52a4"
+    + "\u5284\u5264\u5244\u5224\u5204\u51e4\u51c4\u51a4\u5184\u5164\u5144"
+    + "\u5124\u5104\u50e4\u50c4\u50a4\u5084\u5064\u5044\u5024\u5004\u4fe4"
+    + "\u4fc4\u4fa4\u4f84\u4f64\u4f44\u4f24\u4f04\u4ee4\u4ec4\u4ea4\u4e84"
+    + "\u4e64\u4e44\u4e24\u4e04\u4de4\u4dc4\u4da4\u4d84\u4d64\u4d44\u4d24"
+    + "\u4d04\u4ce4\u4cc4\u4ca4\u4c84\u4c64\u4c44\u4c24\u4c04\u4be4\u4bc4"
+    + "\u4ba4\u4b84\u4b64\u4b44\u4b24\u4b04\u4ae4\u4ac4\u4aa4\u4a84\u4a64"
+    + "\u4a44\u4a24\u4a04\u49e4\u49c4\u49a4\u4984\u4964\u4944\u4924\u4904"
+    + "\u48e4\u48c4\u48a4\u4884\u4864\u4844\u4824\u4804\u47e4\u47c4\u47a4"
+    + "\u4784\u4764\u4744\u4724\u4704\u46e4\u46c4\u46a4\u4684\u4664\u4644"
+    + "\u4624\u4604\u45e4\u45c4\u45a4\u4584\u4564\u4544\u4524\u4504\u44e4"
+    + "\u44c4\u44a4\u4484\u4464\u4444\u4424\u4404\u43e4\u43c4\u43a4\u4384"
+    + "\u4364\u4344\u4324\u4304\u42e4\u42c4\u42a4\u4284\u4264\u4244\u4224"
+    + "\u4204\u41e4\u41c4\u41a4\u4184\u4164\u4144\u4124\u4104\u40e4\u40c4"
+    + "\u40a4\u4084\u4064\u4044\u4024\u4004\u3fe4\u3fc4\u3fa4\u3f84\u3f64"
+    + "\u3f44\u3f24\u3f04\u3ee4\u3ec4\u3ea4\u3e84\u3e64\u3e44\u3e24\u3e04"
+    + "\u3de4\u3dc4\u3da4\u3d84\u3d64\u3d44\u3d24\u3d04\u3ce4\u3cc4\u3ca4"
+    + "\u3c84\u3c64\u3c44\u3c24\u3c04\u3be4\u3bc4\u3ba4\u3b84\u3b64\u3b44"
+    + "\u3b24\u3b04\u3ae4\u3ac4\u3aa4\u3a84\u3a64\u3a44\u3a24\u3a04\u39e4"
+    + "\u39c4\u39a4\u3984\u3964\u3944\u3924\u3904\u38e4\u38c4\u38a4\u3884"
+    + "\u3864\u3844\u3824\u3804\u37e4\u37c4\u37a4\u3784\u3764\u3744\u3724"
+    + "\u3704\u36e4\u36c4\u36a4\u3684\u3664\u3644\u3624\u3604\u35e4\u35c4"
+    + "\u35a4\u3584\u3564\u3544\u3524\u3504\u34e4\u34c4\u34a4\u3484\u3464"
+    + "\u3444\u3424\u3404\u33e4\u33c4\u33a4\u3384\u3364\u3344\u3324\u3304"
+    + "\u32e4\u32c4\u32a4\u3284\u3264\u3244\u3224\u3204\u31e4\u28f2\u2e03"
+    + "\u2de3\u3c25\u3c05\u3be5\u3bc5\u3ba5\u3b85\u3b65\u3b45\u3b25\u3b05"
+    + "\u3ae5\u3ac5\u3aa5\u3a85\u3a65\u3a45\u3a25\u3a05\u39e5\u39c5\u39a5"
+    + "\u3985\u3965\u3945\u3925\u3905\u38e5\u38c5\u38a5\u3885\u3865\u3845"
+    + "\u3825\u3805\u37e5\u37c5\u37a5\u3785\u3765\u3745\u3725\u3705\u36e5"
+    + "\u36c5\u36a5\u3685\u3665\u3645\u3625\u3605\u35e5\u35c5\u35a5\u3585"
+    + "\u3565\u3545\u3525\u3505\u34e5\u34c5\u34a5\u3485\u3465\u3445\u3445"
+    + "\u3425\u3405\u33e5\u33c5\u33a5\u3385\u3365\u3345\u3325\u3305\u32e5"
+    + "\u32c5\u32a5\u3285\u3265\u3245\u3225\u3205\u31e5\u31c5\u31a5\u3185"
+    + "\u3165\u3145\u3125\u3105\u30e5\u30c5\u30a5\u3085\u3065\u3045\u3025"
+    + "\u3005\u2fe5\u2fc5\u2fa5\u2f85\u2f65\u2f45\u2f25\u2f05\u2ee5\u2ec5"
+    + "\u2ea5\u2e85\u2e65\u2e45\u2e25\u2e05\u2de5\u2dc5\u2da5\u2d85\u2d65"
+    + "\u2d45\u2d25\u2d05\u2ce5\u2cc5\u2ca5\u2c85\u2c65\u2c45\u2c25\u2c05"
+    + "\u2be5\u2bc5\u2ba5\u2b85\u2b65\u2b45\u2b25\u2b05\u2ae5\u2ac5\u2aa5"
+    + "\u2a85\u2a65\u2a45\u2a25\u2a05\u29e5\u29c5\u29a5\u2985\u2965\u2945"
+    + "\u2925\u2905\u28e5\u28c5\u28a5\u2885\u2865\u2845\u2825\u2805\u27e5"
+    + "\u27c5\u27a5\u2785\u2765\u2745\u2725\u2705\u26e5\u26c5\u26a5\u2685"
+    + "\u2665\u2645\u2625\u2605\u25e5\u25c5\u25a5\u2585\u2565\u2545\u2525"
+    + "\u2505\u24e5\u24c5\u24a5\u2485\u2465\u2445\u2425\u2405\u23e5\u23c5"
+    + "\u23a5\u2385\u2365\u2345\u2325\u2305\u22e5\u22c5\u22a5\u2285\u2265"
+    + "\u2245\u2225\u2205\u21e5\u21c5\u21a5\u2185\u2165\u2145\u2125\u2105"
+    + "\u20e5\u20c5\u20a5\u2085\u2065\u2045\u2025\u2005\u1fe5\u1fc5\u1fa5"
+    + "\u1f85\u1f65\u1f45\u1f25\u1f05\u1ee5\u1ec5\u1ea5\u1e85\u1e65\u1e45"
+    + "\u1e25\u1e05\u1de5\u1dc5\u1da5\u1d85\u1d65\u1d45\u1d25\u1d05\u1ce5"
+    + "\u1cc5\u1ca5\u1c85\u1c65\u1c45\u1c25\u1c05\u1be5\u1bc5\u1ba5\u1b85"
+    + "\u1b65\u1064\u1044\u1024\u1004\u0fe4\u0fc4\u0fa4\u0f84\u0f64\u0668"
+    + "\u0b83\u0b63\u0b43\u0b23\u0b03\u0ae3\u0a72\u1945\u0a51\u0d16\u0cf6"
+    + "\u0cac\u0ca3\u0c96\u0c76\u0c56\u0c36\u0c16\u0bf6\u0bd6\u0bb6\u0b96"
+    + "\u0b76\u0b58\u0b26\u0b16\u0ab2\u0ad6\u0a96\u02c2\u07c3\u1665\u1661"
+    + "\u03ff\u09f6\u09d6\u09b6\u05dc\u0bb2\u15a1\u0fc0\u01c0\u01b1\u09c5"
+    + "\u0826\u127f";
+  /**
+   * Information about each character.  The low order 5 bits form the
+   * character type, the next bit is a flag for non-breaking spaces, and the
+   * next bit is a flag for mirrored directionality.  The high order 9 bits
+   * form the offset into the attribute tables.  Note that this limits the
+   * number of unique character attributes to 512, which is not a problem
+   * as of Unicode version 3.2.0, but may soon become one.
+   */
+  String DATA
+    = "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u3e00\u3e00\u2f81\u3002\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05"
+    + "\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+    + "\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3e00\u3e00\u3e00\u5098\u3e00\u3e00\u3e00\u3e00\u4586\u3e00\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3a05"
+    + "\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u3e00\u3e00\u3e00\u3e00\u5102\u5102\u5102\u5102"
+    + "\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102"
+    + "\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102"
+    + "\u5102\u5102\u5102\u5102\u5102\u2902\u3e00\u5098\u2a14\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4606\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u1a1b\u1a1b\u3e00\u3e00\u3e00\u3e00\u4504\u3e00\u3e00"
+    + "\u3e00\u0298\u3e00\u0298\u6515\u6596\u0298\u1a97\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4504\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u4504\u4504\u1a1b\u1a1b\u1a1b\u1a1b"
+    + "\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u4504"
+    + "\u4504\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b"
+    + "\u1a1b\u1a1b\u1a1b\u1a1b\u2902\u7102\u2902\u3e00\u2902\u2902\u7301"
+    + "\u7301\u7301\u7301\u7203\u1a1b\u1a1b\u1a1b\u6c82\u6c82\u2902\u2902"
+    + "\u3e00\u3e00\u2902\u2902\u6d01\u6d01\u7381\u7381\u3e00\u1a1b\u1a1b"
+    + "\u1a1b\u1b02\u1b82\u1c02\u1c82\u1d02\u1d82\u1e02\u1e82\u1f02\u1f82"
+    + "\u2002\u2082\u2102\u2182\u2202\u2282\u2302\u2382\u2402\u2482\u2502"
+    + "\u2582\u2602\u2682\u2702\u2782\u0455\u0c99\u04d6\u0c99\017\017"
+    + "\017\017\017\u010f\017\017\017\017\017\017\017"
+    + "\017\017\017\017\017\017\017\017\017\017\017"
+    + "\017\017\017\017\017\017\017\017\u008f\u010f\u008f"
+    + "\u018f\u010f\017\017\017\017\017\017\017\017\017"
+    + "\017\017\017\017\017\u010f\u010f\u010f\u008f\u020c\u0298"
+    + "\u0298\u0318\u039a\u0318\u0298\u0298\u0455\u04d6\u0298\u0519\u0598"
+    + "\u0614\u0598\u0698\u0709\u0789\u0809\u0889\u0909\u0989\u0a09\u0a89"
+    + "\u0b09\u0b89\u0598\u0298\u0c59\u0c99\u0c59\u0298\u0d01\u0d81\u0e01"
+    + "\u0e81\u0f01\u0f81\u1001\u1081\u1101\u1181\u1201\u1281\u1301\u1381"
+    + "\u1401\u1481\u1501\u1581\u1601\u1681\u1701\u1781\u1801\u1881\u1901"
+    + "\u1981\u0455\u0298\u04d6\u1a1b\u1a97\u0298\u0298\u0298\u0c99\u0455"
+    + "\u04d6\u3e00\u0298\u0298\u0298\u0298\u0298\u0298\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u282c\u0298\u039a\u039a\u039a\u039a\u289c"
+    + "\u289c\u1a1b\u289c\u2902\u29dd\u0c99\u2a14\u289c\u1a1b\u2a9c\u0519"
+    + "\u2b0b\u2b8b\u1a1b\u2c02\u289c\u0298\u1a1b\u2c8b\u2902\u2d5e\u2d8b"
+    + "\u2d8b\u2d8b\u0298\u0298\u0519\u0614\u0c99\u0c99\u0c99\u3e00\u0298"
+    + "\u039a\u0318\u0298\u3e00\u3e00\u3e00\u3e00\u5305\u5305\u5305\u3e00"
+    + "\u5305\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u4f1c\u4f1c\u4e81\u4e81"
+    + "\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81"
+    + "\u4e81\u4e81\u4e81\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+    + "\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+    + "\u2e01\u2e01\u2e01\u2e01\u0c99\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+    + "\u2e01\u2902\u3281\u2f81\u3002\u2f81\u3002\u3301\u2f81\u3002\u3381"
+    + "\u3381\u2f81\u3002\u2902\u3401\u3481\u3501\u2f81\u3002\u3381\u3581"
+    + "\u3602\u3681\u3701\u2f81\u3002\u2902\u2902\u3681\u3781\u2902\u3801"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2902\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+    + "\u5e89\u5f09\u5f89\u4586\u4586\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u1a1b\u1a1b\u4681"
+    + "\u0298\u4701\u4701\u4701\u3e00\u4781\u3e00\u4801\u4801\u2902\u2e01"
+    + "\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+    + "\u2e01\u2e01\u2e01\u2e01\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u2e82\u2e82\u2e82\u0c99\u2e82\u2e82\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u2f02\u2e82\u2e82\u4982\u2e82\u2e82\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u2e82\u2e82\u4a02\u4a82\u4a82\u3e00\u4b02\u4b82\u4c01"
+    + "\u4c01\u4c01\u4c82\u4d02\u2902\u3e00\u3e00\u2f81\u3002\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2902\u3b01\u3b83\u3c02\u2f81\u3002\u3d01\u3d81\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u3081\u3102\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u2902\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u4d82\u4e02\u3c82\u2902\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u5a10\u5a10\u5a10\u5a10\u5a10\u5a10\u7d8b\u3e00\u3e00\u3e00\u7e0b"
+    + "\u7e8b\u7f0b\u7f8b\u800b\u808b\u0519\u0519\u0c99\u0455\u04d6\u2902"
+    + "\u3a05\u3a05\u3a05\u3a05\u3b01\u3b83\u3c02\u3b01\u3b83\u3c02\u3b01"
+    + "\u3b83\u3c02\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3c82\u2f81\u3002\u4f1c"
+    + "\u4586\u4586\u4586\u4586\u3e00\u4f87\u4f87\u3e00\u3e00\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u3181\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3202\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u3881\u2f81\u3002\u3881\u2902\u2902\u2f81\u3002"
+    + "\u3881\u2f81\u3002\u3901\u3901\u2f81\u3002\u2f81\u3002\u3981\u2f81"
+    + "\u3002\u2902\u3a05\u2f81\u3002\u2902\u3a82\u4c01\u2f81\u3002\u2f81"
+    + "\u3002\u3e00\u3e00\u2f81\u3002\u3e00\u3e00\u2f81\u3002\u3e00\u3e00"
+    + "\u3e00\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u0598\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u5298\u3e00\u3e00\u3e00\u5298\u5298\u5298\u5298\u5298"
+    + "\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u3e00\u5a10"
+    + "\u5305\u4586\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u3e00\u3e00"
+    + "\u5a10\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+    + "\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+    + "\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u2902\u2902\u2902\u3e82\u3f02\u2902\u3f82\u3f82\u2902\u4002\u2902"
+    + "\u4082\u2902\u2902\u2902\u2902\u3f82\u2902\u2902\u4102\u2902\u2902"
+    + "\u2902\u2902\u4182\u4202\u2902\u2902\u2902\u2902\u2902\u4202\u2902"
+    + "\u2902\u4282\u2902\u2902\u4302\u2902\u2902\u2902\u2902\u2902\u2902"
+    + "\u2902\u2902\u2902\u2902\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u2902\u2902\u2902\u2902\u2902\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u5205\u4586\u5205\u5205\u3e00\u5205\u5205"
+    + "\u3e00\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5298\u5305"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4f87\u4f87\u4586\u4f87"
+    + "\u4f87\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b"
+    + "\u2d8b\u828b\u4382\u2902\u2902\u4382\u2902\u2902\u2902\u2902\u4382"
+    + "\u2902\u4402\u4402\u2902\u2902\u2902\u2902\u2902\u2902\u4482\u2902"
+    + "\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902"
+    + "\u2902\u2902\u3e00\u3e00\u4504\u4504\u4504\u4504\u4504\u4504\u4504"
+    + "\u4504\u4504\u1a1b\u1a1b\u4504\u4504\u4504\u4504\u4504\u1a1b\u1a1b"
+    + "\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u4504\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001"
+    + "\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001"
+    + "\u5001\u5001\u5001\u5001\u3e00\u3e00\u4504\u5098\u5098\u5098\u5098"
+    + "\u5098\u5098\u2e01\u2e01\u3e00\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+    + "\u2e01\u2e01\u2e01\u4882\u4902\u4902\u4902\u2902\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+    + "\u2e82\u2e82\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02"
+    + "\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4586\u4586\u4586\u4586"
+    + "\u4586\u5098\u4586\u4586\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u3e00\u4586\u4586\u4586\u5198\u4586\u4586\u5198\u4586\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5205\u5205"
+    + "\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205"
+    + "\u5205\u5205\u5205\u3e00\u3e00\u3e00\u3e00\u3e00\u5205\u5205\u5205"
+    + "\u5198\u5198\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89"
+    + "\u630b\u638b\u640b\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u5a88\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u5a88\u5a88\u5a88\u5a88\u3e00"
+    + "\u4586\u4586\u4586\u3e00\u4586\u4586\u4586\u4586\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u4586\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88"
+    + "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89"
+    + "\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u4f1c\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u5409\u5489\u5509\u5589\u5609\u5689\u5709\u5789\u5809"
+    + "\u5889\u0318\u5918\u5918\u5298\u3e00\u3e00\u4586\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u3e00\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u6515\u6596"
+    + "\u5384\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88"
+    + "\u5a88\u5098\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u4586\u4586\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u5098\u5098\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586"
+    + "\u4586\u4586\u5384\u5384\u4586\u4586\u289c\u4586\u4586\u4586\u4586"
+    + "\u3e00\u3e00\u0709\u0789\u0809\u0889\u0909\u0989\u0a09\u0a89\u0b09"
+    + "\u0b89\u5305\u5305\u5305\u599c\u599c\u3e00\u3a05\u3a05\u3a05\u3e00"
+    + "\u3a05\u3e00\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u4586\u3a05\u3a05\u4586\u4586\u4586\u4586\u4586\u4586\u3e00"
+    + "\u4586\u4586\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+    + "\u3e00\u3e00\u4586\u4586\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u4586\u3a05\u5a88\u5a88"
+    + "\u5a88\u5a88\u5a88\u3e00\u4586\u5a88\u5a88\u3e00\u5a88\u5a88\u4586"
+    + "\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3e00\u5098\u5098\u5098"
+    + "\u5098\u5098\u5098\u5098\u5098\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+    + "\u5e89\u5f09\u5f89\u630b\u660b\u668b\u670b\u678b\u680b\u688b\u690b"
+    + "\u698b\u638b\u6a0b\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3e00\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3e00\u4586\u3a05\u5a88\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u5a88\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3a05\u4586\u4586"
+    + "\u4586\u4586\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3e00\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+    + "\u4586\u3e00\u5a88\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05"
+    + "\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4586"
+    + "\u3a05\u3a05\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00"
+    + "\u3e00\u3e00\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u039a"
+    + "\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3a05\u3a05\u4586\u4586\u5098\u5098\u5b09\u5b89\u5c09\u5c89"
+    + "\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u0298\u0298\u0318\u039a\u0318\u0298\u0298\u6515\u6596\u0298\u0519"
+    + "\u0598\u0614\u0598\u0698\u0709\u0789\u0809\u0889\u0909\u0989\u0a09"
+    + "\u0a89\u0b09\u0b89\u0598\u0298\u0c99\u0c99\u0c99\u0298\u0298\u0298"
+    + "\u0298\u0298\u0298\u2a14\u0298\u0298\u0298\u0298\u5a10\u5a10\u5a10"
+    + "\u5a10\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09"
+    + "\u5f89\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3e00\u3e00\u3e00\u3e00\u5a88\u4586\u4586\u4586\u4586\u3e00\u3e00"
+    + "\u5a88\u5a88\u3e00\u3e00\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3a05"
+    + "\u3a05\u3e00\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3e00\u3e00"
+    + "\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00"
+    + "\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u3e00\u4586\u3e00\u5a88\u5a88"
+    + "\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u3e00\u3e00\u4586"
+    + "\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05"
+    + "\u4586\u4586\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+    + "\u5e89\u5f09\u5f89\u3a05\u3a05\u039a\u039a\u600b\u608b\u610b\u618b"
+    + "\u620b\u628b\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3e00\u3e00\u4586\u3a05\u5a88\u5a88\u4586\u4586\u4586\u4586"
+    + "\u4586\u3e00\u4586\u4586\u5a88\u3e00\u5a88\u5a88\u4586\u3e00\u3e00"
+    + "\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89"
+    + "\u5e09\u5e89\u5f09\u5f89\u620b\u620b\u620b\u620b\u620b\u620b\u620b"
+    + "\u620b\u620b\u620b\u4f1c\u4586\u4f1c\u4586\u4f1c\u4586\u6515\u6596"
+    + "\u6515\u6596\u5a88\u5a88\u4586\u4586\u4586\u3e00\u3e00\u3e00\u5a88"
+    + "\u5a88\u3e00\u3e00\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u4586\u5a88\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05"
+    + "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u5a88"
+    + "\u4586\u4586\u4586\u4586\u5a88\u4586\u3e00\u3e00\u3e00\u4586\u4586"
+    + "\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u5a88\u5a88\u5a88\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u5a88\u5a88\u3e00\u3e00\u3e00\u5a88"
+    + "\u5a88\u5a88\u3e00\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u4504\u3e00"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00\u5b09\u5b89\u5c09"
+    + "\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u3e00\u3e00\u3a05\u3a05"
+    + "\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+    + "\u5e89\u5f09\u5f89\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4f87"
+    + "\u4f87\u4f87\u5a88\u4586\u4586\u4586\u3e00\u3e00\u5a88\u5a88\u5a88"
+    + "\u3e00\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u4586\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u5a88\u4586\u4586\u4586"
+    + "\u3e00\u4586\u3e00\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88"
+    + "\u4586\u5a88\u5a88\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u4586\u4586\u5098\u5098\u5098\u5098\u5098\u5098\u5098\u039a"
+    + "\u5098\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4504"
+    + "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u5098\u5b09\u5b89"
+    + "\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u5098\u3e00"
+    + "\u3e00\u3e00\u3e00\u3a05\u4f1c\u4f1c\u4f1c\u5098\u5098\u5098\u5098"
+    + "\u5098\u5098\u5098\u5098\u64b8\u5098\u5098\u5098\u5098\u5098\u5098"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4586\u4586\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4586\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u3e00\u3e00"
+    + "\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00\u3e00\u3e00\u3e00"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u1a97\u4504\u4504\u4504\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09"
+    + "\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u5098\u5098\u5098\u5098\u5098"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u5a88\u5a88\u4586\u4586\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u020c\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u6515"
+    + "\u6596\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u5098\u5098\u5098\u6a8b\u6b0b\u6b8b\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+    + "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+    + "\u2f81\u3002\u2902\u2902\u2902\u2902\u2902\u6c02\u3e00\u3e00\u3e00"
+    + "\u3e00\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6d01\u6d01"
+    + "\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6c82\u6c82\u6c82\u6c82\u6c82"
+    + "\u6c82\u6c82\u6c82\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01"
+    + "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u3e00\u3e00\u6d01\u6d01\u6d01"
+    + "\u6d01\u6d01\u6d01\u3e00\u3e00\u2902\u6c82\u2902\u6c82\u2902\u6c82"
+    + "\u2902\u6c82\u3e00\u6d01\u3e00\u6d01\u3e00\u6d01\u3e00\u6d01\u6c82"
+    + "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6d01\u6d01\u6d01\u6d01"
+    + "\u6d01\u6d01\u6d01\u6d01\u6d82\u6d82\u6e02\u6e02\u6e02\u6e02\u6e82"
+    + "\u6e82\u6f02\u6f02\u6f82\u6f82\u7002\u7002\u3e00\u3e00\u6c82\u6c82"
+    + "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u7083\u7083\u7083\u7083\u7083"
+    + "\u7083\u7083\u7083\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82"
+    + "\u7083\u7083\u7083\u7083\u7083\u7083\u7083\u7083\u6c82\u6c82\u2902"
+    + "\u7102\u2902\u3e00\u2902\u2902\u6d01\u6d01\u7181\u7181\u7203\u1a1b"
+    + "\u7282\u1a1b\u1b02\u1b82\u1c02\u1c82\u1d02\u1d82\u1e02\u1e82\u1f02"
+    + "\u1f82\u2002\u2082\u2102\u2182\u2202\u2282\u2302\u2382\u2402\u2482"
+    + "\u2502\u2582\u2602\u2682\u2702\u2782\u6515\u0c99\u6596\u0c99\u3e00"
+    + "\u6c82\u6c82\u2902\u2902\u2902\u7402\u2902\u2902\u6d01\u6d01\u7481"
+    + "\u7481\u7501\u1a1b\u1a1b\u1a1b\u3e00\u3e00\u2902\u7102\u2902\u3e00"
+    + "\u2902\u2902\u7581\u7581\u7601\u7601\u7203\u1a1b\u1a1b\u3e00\u020c"
+    + "\u020c\u020c\u020c\u020c\u020c\u020c\u76ac\u020c\u020c\u020c\u770c"
+    + "\u5a10\u5a10\u7790\u7810\u2a14\u78b4\u2a14\u2a14\u2a14\u2a14\u0298"
+    + "\u0298\u791d\u799e\u6515\u791d\u791d\u799e\u6515\u791d\u0298\u0298"
+    + "\u0298\u0298\u0298\u0298\u0298\u0298\u7a0d\u7a8e\u7b10\u7b90\u7c10"
+    + "\u7c90\u7d10\u76ac\u0318\u0318\u0318\u0318\u0318\u0298\u0298\u0298"
+    + "\u0298\u29dd\u2d5e\u0298\u0298\u0298\u0298\u1a97\u7d8b\u2c8b\u2b0b"
+    + "\u2b8b\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u0519\u0519\u0c99\u0455"
+    + "\u04d6\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+    + "\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u4c01\u289c\u289c\u289c\u289c\u4c01\u289c\u289c\u2902\u4c01\u4c01"
+    + "\u4c01\u2902\u2902\u4c01\u4c01\u4c01\u2902\u289c\u4c01\u289c\u289c"
+    + "\u289c\u4c01\u4c01\u4c01\u4c01\u4c01\u289c\u289c\ua08a\ua10a\ua18a"
+    + "\ua20a\ua28a\ua30a\ua38a\ua40a\ua48a\u4586\u4586\u4586\u4586\u4586"
+    + "\u4586\u2a14\u4504\u4504\u4504\u4504\u4504\u289c\u289c\ua50a\ua58a"
+    + "\ua60a\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u3e00\u289c\u289c"
+    + "\u289c\u289c\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u0c99\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99\u0c99"
+    + "\u289c\u289c\u0c99\u289c\u0c99\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99\u289c"
+    + "\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u930a\u938a\u940a\u948a\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u0c99\u0c99"
+    + "\u0c99\u0c99\u0c99\u289c\u289c\u289c\u289c\u289c\u0c99\u0c99\u289c"
+    + "\u289c\u289c\u289c\u4c01\u289c\u8101\u289c\u4c01\u289c\u8181\u8201"
+    + "\u4c01\u4c01\u2a9c\u2902\u4c01\u4c01\u289c\u4c01\u2902\u3a05\u3a05"
+    + "\u3a05\u3a05\u2902\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u830a\u838a"
+    + "\u840a\u848a\u850a\u858a\u860a\u868a\u870a\u878a\u880a\u888a\u890a"
+    + "\u898a\u8a0a\u8a8a\u8b0a\u8b8a\u8c0a\u8c8a\u8d0a\u8d8a\u8e0a\u8e8a"
+    + "\u8f0a\u8f8a\u900a\u908a\u910a\u918a\u920a\u928a\u0c99\u0c99\u0c59"
+    + "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c99\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99"
+    + "\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59"
+    + "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99\u0c99\u0c99"
+    + "\u0c59\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u289c\u289c\u0c99"
+    + "\u289c\u289c\u0c99\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99"
+    + "\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99"
+    + "\u0c59\u0519\u0519\u0c99\u0c59\u0c59\u0c99\u0c99\u0c99\u0c59\u0c59"
+    + "\u0c59\u0c59\u0c99\u0c59\u0c99\u0c59\u0c99\u0c99\u0c99\u0c99\u0c59"
+    + "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99"
+    + "\u0c99\u0c99\u0c59\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u0455\u04d6\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00"
+    + "\u3e00\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c"
+    + "\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c"
+    + "\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c"
+    + "\u7d8b\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u0c59\u0c99\u0c59\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+    + "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99"
+    + "\u0c99\u0c59\u0c59\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u039a\u039a\u0c99\u1a1b\u289c"
+    + "\u039a\u039a\u3e00\u289c\u0c99\u0c99\u0c99\u0c99\u289c\u289c\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a10\u5a10"
+    + "\u5a10\u289c\u289c\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u3e00\u289c\u3e00\u289c\u289c\u289c\u289c\u3e00"
+    + "\u3e00\u3e00\u289c\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u828b\u9b8b\u9c0b\u9c8b\u9d0b\u9d8b\u9e0b\u9e8b"
+    + "\u9f0b\u9f8b\u828b\u9b8b\u9c0b\u9c8b\u9d0b\u9d8b\u9e0b\u9e8b\u9f0b"
+    + "\u9f8b\u289c\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u0c59\u0c59\u0c59\u0c59\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+    + "\u289c\u289c\u289c\u289c\u289c\u4f1c\u289c\u289c\u289c\u289c\u289c"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u620b\u620b\u620b\u620b\u620b\u620b"
+    + "\u620b\u620b\u620b\u620b\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00\u4f1c\u600b\u608b\u610b\u618b"
+    + "\ua68b\ua70b\ua78b\ua80b\ua88b\u630b\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u289c\u3e00\u289c\u289c\u289c\u3e00\u289c\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u2c8b"
+    + "\u2b0b\u2b8b\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u950b\u958b\u960b"
+    + "\u968b\u970b\u978b\u980b\u988b\u990b\u998b\u9a0b\u2c8b\u2b0b\u2b8b"
+    + "\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u950b\u958b\u960b\u968b\u970b"
+    + "\u978b\u980b\u988b\u990b\u998b\u9a0b\u4f1c\u4f1c\u4f1c\u4f1c\u020c"
+    + "\u0298\u0298\u0298\u289c\u4504\u3a05\ua00a\u0455\u04d6\u0455\u04d6"
+    + "\u0455\u04d6\u0455\u04d6\u0455\u04d6\u289c\u289c\u0455\u04d6\u0455"
+    + "\u04d6\u0455\u04d6\u0455\u04d6\u2a14\u6515\u6596\u6596\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+    + "\u3e00\u4586\u4586\u1a1b\u1a1b\u4504\u4504\u3e00\u3a05\u3a05\u3a05"
+    + "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+    + "\u3a05\u3e00\u4f1c\u4f1c\u620b\u620b\u620b\u620b\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+    + "\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\ua913\ua913"
+    + "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913"
+    + "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913"
+    + "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua992\ua992\ua992"
+    + "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992"
+    + "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992"
+    + "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\u5205\u5205\u5205\u5205"
+    + "\u5205\u5205\u5205\u5205\u5205\u0519\u5205\u5205\u5205\u5205\u5205"
+    + "\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u3e00\u5205\u5205"
+    + "\u5205\u5205\u5205\u3e00\u5205\u3e00\u4586\u4586\u4586\u4586\u3e00"
+    + "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+    + "\u0298\u2a14\u2a14\u1a97\u1a97\u6515\u6596\u6515\u6596\u6515\u6596"
+    + "\u6515\u6596\u6515\u6596\u6515\u6596\u3e00\u3e00\u3e00\u3e00\u0298"
+    + "\u0298\u0298\u0298\u1a97\u1a97\u1a97\u0598\u0298\u0598\u3e00\u0298"
+    + "\u0598\u0298\u0298\u2a14\u6515\u6596\u6515\u6596\u6515\u6596\u0318"
+    + "\u0298\u0d01\u0d81\u0e01\u0e81\u0f01\u0f81\u1001\u1081\u1101\u1181"
+    + "\u1201\u1281\u1301\u1381\u1401\u1481\u1501\u1581\u1601\u1681\u1701"
+    + "\u1781\u1801\u1881\u1901\u1981\u6515\u0298\u6596\u1a1b\u1a97";
+  /**
+   * This is the attribute table for computing the numeric value of a
+   * character.  The value is -1 if Unicode does not define a value, -2
+   * if the value is not a positive integer, otherwise it is the value.
+   * Note that this is a signed value, but stored as an unsigned char
+   * since this is a String literal.
+   */
+  String NUM_VALUE
+    = "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\000\001\002\003\004\005\006\007"
+    + "\010\011\uffff\uffff\012\013\014\015\016\017\020"
+    + "\021\022\023\024\025\026\027\030\031\032\033"
+    + "\034\035\036\037 !\"#\uffff\uffff\012"
+    + "\013\014\015\016\017\020\021\022\023\024\025"
+    + "\026\027\030\031\032\033\034\035\036\037 "
+    + "!\"#\uffff\uffff\uffff\uffff\uffff\uffff\002\003"
+    + "\uffff\001\uffff\ufffe\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\000\001\002\003\004\005\006\007"
+    + "\010\011\uffff\uffff\uffff\uffff\000\001\002\003\004"
+    + "\005\006\007\010\011\001\002\003\004\uffff\020"
+    + "\012d\u03e8\uffff\uffff\uffff\024\036(2<"
+    + "FPZ\u2710\021\022\023\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+    + "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\000\004"
+    + "\005\006\007\010\011\uffff\uffff\uffff\001\001\002"
+    + "\003\004\005\006\007\010\011\012\013\0142"
+    + "d\u01f4\u03e8\001\002\003\004\005\006\007\010"
+    + "\011\012\013\0142d\u01f4\u03e8\u03e8\u1388\u2710"
+    + "\uffff\012\013\014\015\016\017\020\021\022\023"
+    + "\024\uffff\uffff\002\003\004\005\006\007\010\011"
+    + "\012\000\001\002\003\004\005\006\007\010\011"
+    + "\012\024\036\005\006\007\010\011\uffff\uffff";
+  /**
+   * This is the attribute table for computing the uppercase representation
+   * of a character.  The value is the signed difference between the
+   * character and its uppercase version.  Note that this is stored as an
+   * unsigned char since this is a String literal.
+   */
+  String UPPER
+    = "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\uffe0"
+    + "\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0"
+    + "\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0"
+    + "\uffe0\uffe0\uffe0\000\000\000\000\000\000\000\000"
+    + "\u02e7\000\000\000\000\uffe0y\000\uffff\000\uff18"
+    + "\000\ufed4\000\000\000\000\000\000\000a\000"
+    + "\000\000\000\000\000\000\0008\000\uffff\ufffe"
+    + "\uffb1\000\000\000\uff2e\uff32\uff33\uff36\uff35\uff31\uff2f"
+    + "\uff2d\uff2b\uff2a\uff26\uff27\uff25\000\000T\000\000"
+    + "\000\000\uffda\uffdb\uffe1\uffc0\uffc1\uffc2\uffc7\000\uffd1"
+    + "\uffca\uffaa\uffb0\000\000\000\000\000\uffd0\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\uffc5\010\000J"
+    + "Vd\u0080p~\000\011\000\000\ue3db\000"
+    + "\000\007\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0"
+    + "\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\uffe6\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000";
+  /**
+   * This is the attribute table for computing the lowercase representation
+   * of a character.  The value is the signed difference between the
+   * character and its lowercase version.  Note that this is stored as an
+   * unsigned char since this is a String literal.
+   */
+  String LOWER
+    = "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000       "
+    + "           "
+    + "        \000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000 \000\000\001\000\uff39\000"
+    + "\uff87\000\u00d2\u00ce\u00cdO\u00ca\u00cb\u00cf\000\u00d3"
+    + "\u00d1\u00d5\u00d6\u00da\u00d9\u00db\000\000\002\001\000"
+    + "\000\uff9f\uffc8\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000&%"
+    + "@?\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000P\000\0000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\ufff8\000"
+    + "\000\000\000\000\000\ufff8\000\uffb6\ufff7\000\uffaa"
+    + "\uff9c\000\uff90\ufff9\uff80\uff82\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\ue2a3\udf41\udfba\000\020\020"
+    + "\020\020\020\020\020\020\020\020\020\020\020"
+    + "\020\020\020\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\032\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000";
+  /**
+   * This is the attribute table for computing the directionality class
+   * of a character.  At present, the value is in the range 0 - 18 if the
+   * character has a direction, otherwise it is -1.  Note that this is
+   * stored as an unsigned char since this is a String literal.
+   */
+  String DIRECTION
+    = "\011\013\012\014\014\015\005\005\015\015\005"
+    + "\007\005\004\003\003\003\003\003\003\003\003"
+    + "\003\003\015\015\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\015\015\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\007\015\000\015\015\005\003\003"
+    + "\000\003\015\015\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\uffff\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\010\010\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\010\000\000\000\001\001"
+    + "\002\002\002\006\006\006\006\006\006\006\006"
+    + "\006\006\006\002\011\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\015\015\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\014\011\000\001\015"
+    + "\015\015\014\012\016\020\022\017\021\003\003"
+    + "\003\003\003\003\003\000\000\000\015\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000\000"
+    + "\000\003\003\003\003\003\003\003\003\003\003"
+    + "\003\000\000\015\015\015\015\015\015\015\015"
+    + "\015\000\000\000\000\000\000\000\000\000\000"
+    + "\000\000\000\000\000\000\000\000\000\000";
+  /**
+   * This is the listing of titlecase special cases (all other character
+   * can use <code>UPPER</code> to determine their titlecase).  The listing
+   * is a sequence of character pairs; converting the first character of the
+   * pair to titlecase produces the second character.
+   */
+  String TITLE
+    = "\u01c4\u01c5\u01c5\u01c5\u01c6\u01c5\u01c7\u01c8\u01c8\u01c8\u01c9"
+    + "\u01c8\u01ca\u01cb\u01cb\u01cb\u01cc\u01cb\u01f1\u01f2\u01f2\u01f2"
+    + "\u01f3\u01f2";
+}
--- a/libjava/include/java-chardecomp.h
+++ b/libjava/include/java-chardecomp.h
@@ -3,7 +3,8 @@
 #ifndef __JAVA_CHARDECOMP_H__
 #define __JAVA_CHARDECOMP_H__
-// These tables are automatically generated by the chartables.pl
+// These tables are automatically generated by the scripts/unicode-decomp.pl
 // script.  DO NOT EDIT the tables.  Instead, fix the script
 // and run it again.
@@ -200,8 +201,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x01dc, "\x00\xfc\x03\x00" },
  { 0x01de, "\x00\xc4\x03\x04" },
  { 0x01df, "\x00\xe4\x03\x04" },
-  { 0x01e0, "\x00\x41\x03\x07\x03\x04" },
+  { 0x01e0, "\x02\x26\x03\x04" },
-  { 0x01e1, "\x00\x61\x03\x07\x03\x04" },
+  { 0x01e1, "\x02\x27\x03\x04" },
  { 0x01e2, "\x00\xc6\x03\x04" },
  { 0x01e3, "\x00\xe6\x03\x04" },
  { 0x01e6, "\x00\x47\x03\x0c" },
@@ -217,6 +218,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x01f0, "\x00\x6a\x03\x0c" },
  { 0x01f4, "\x00\x47\x03\x01" },
  { 0x01f5, "\x00\x67\x03\x01" },
+  { 0x01f8, "\x00\x4e\x03\x00" },
+  { 0x01f9, "\x00\x6e\x03\x00" },
  { 0x01fa, "\x00\xc5\x03\x01" },
  { 0x01fb, "\x00\xe5\x03\x01" },
  { 0x01fc, "\x00\xc6\x03\x01" },
@@ -247,6 +250,26 @@ static const decomp_entry canonical_decomposition[] =
  { 0x0215, "\x00\x75\x03\x0f" },
  { 0x0216, "\x00\x55\x03\x11" },
  { 0x0217, "\x00\x75\x03\x11" },
+  { 0x0218, "\x00\x53\x03\x26" },
+  { 0x0219, "\x00\x73\x03\x26" },
+  { 0x021a, "\x00\x54\x03\x26" },
+  { 0x021b, "\x00\x74\x03\x26" },
+  { 0x021e, "\x00\x48\x03\x0c" },
+  { 0x021f, "\x00\x68\x03\x0c" },
+  { 0x0226, "\x00\x41\x03\x07" },
+  { 0x0227, "\x00\x61\x03\x07" },
+  { 0x0228, "\x00\x45\x03\x27" },
+  { 0x0229, "\x00\x65\x03\x27" },
+  { 0x022a, "\x00\xd6\x03\x04" },
+  { 0x022b, "\x00\xf6\x03\x04" },
+  { 0x022c, "\x00\xd5\x03\x04" },
+  { 0x022d, "\x00\xf5\x03\x04" },
+  { 0x022e, "\x00\x4f\x03\x07" },
+  { 0x022f, "\x00\x6f\x03\x07" },
+  { 0x0230, "\x02\x2e\x03\x04" },
+  { 0x0231, "\x02\x2f\x03\x04" },
+  { 0x0232, "\x00\x59\x03\x04" },
+  { 0x0233, "\x00\x79\x03\x04" },
  { 0x0340, "\x03\x00" },
  { 0x0341, "\x03\x01" },
  { 0x0343, "\x03\x13" },
@@ -277,17 +300,21 @@ static const decomp_entry canonical_decomposition[] =
  { 0x03ce, "\x03\xc9\x03\x01" },
  { 0x03d3, "\x03\xd2\x03\x01" },
  { 0x03d4, "\x03\xd2\x03\x08" },
+  { 0x0400, "\x04\x15\x03\x00" },
  { 0x0401, "\x04\x15\x03\x08" },
  { 0x0403, "\x04\x13\x03\x01" },
  { 0x0407, "\x04\x06\x03\x08" },
  { 0x040c, "\x04\x1a\x03\x01" },
+  { 0x040d, "\x04\x18\x03\x00" },
  { 0x040e, "\x04\x23\x03\x06" },
  { 0x0419, "\x04\x18\x03\x06" },
  { 0x0439, "\x04\x38\x03\x06" },
+  { 0x0450, "\x04\x35\x03\x00" },
  { 0x0451, "\x04\x35\x03\x08" },
  { 0x0453, "\x04\x33\x03\x01" },
  { 0x0457, "\x04\x56\x03\x08" },
  { 0x045c, "\x04\x3a\x03\x01" },
+  { 0x045d, "\x04\x38\x03\x00" },
  { 0x045e, "\x04\x43\x03\x06" },
  { 0x0476, "\x04\x74\x03\x0f" },
  { 0x0477, "\x04\x75\x03\x0f" },
@@ -313,6 +340,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x04e7, "\x04\x3e\x03\x08" },
  { 0x04ea, "\x04\xe8\x03\x08" },
  { 0x04eb, "\x04\xe9\x03\x08" },
+  { 0x04ec, "\x04\x2d\x03\x08" },
+  { 0x04ed, "\x04\x4d\x03\x08" },
  { 0x04ee, "\x04\x23\x03\x04" },
  { 0x04ef, "\x04\x43\x03\x04" },
  { 0x04f0, "\x04\x23\x03\x08" },
@@ -323,6 +352,14 @@ static const decomp_entry canonical_decomposition[] =
  { 0x04f5, "\x04\x47\x03\x08" },
  { 0x04f8, "\x04\x2b\x03\x08" },
  { 0x04f9, "\x04\x4b\x03\x08" },
+  { 0x0622, "\x06\x27\x06\x53" },
+  { 0x0623, "\x06\x27\x06\x54" },
+  { 0x0624, "\x06\x48\x06\x54" },
+  { 0x0625, "\x06\x27\x06\x55" },
+  { 0x0626, "\x06\x4a\x06\x54" },
+  { 0x06c0, "\x06\xd5\x06\x54" },
+  { 0x06c2, "\x06\xc1\x06\x54" },
+  { 0x06d3, "\x06\xd2\x06\x54" },
  { 0x0929, "\x09\x28\x09\x3c" },
  { 0x0931, "\x09\x30\x09\x3c" },
  { 0x0934, "\x09\x33\x09\x3c" },
@@ -334,23 +371,22 @@ static const decomp_entry canonical_decomposition[] =
  { 0x095d, "\x09\x22\x09\x3c" },
  { 0x095e, "\x09\x2b\x09\x3c" },
  { 0x095f, "\x09\x2f\x09\x3c" },
-  { 0x09b0, "\x09\xac\x09\xbc" },
  { 0x09cb, "\x09\xc7\x09\xbe" },
  { 0x09cc, "\x09\xc7\x09\xd7" },
  { 0x09dc, "\x09\xa1\x09\xbc" },
  { 0x09dd, "\x09\xa2\x09\xbc" },
  { 0x09df, "\x09\xaf\x09\xbc" },
+  { 0x0a33, "\x0a\x32\x0a\x3c" },
+  { 0x0a36, "\x0a\x38\x0a\x3c" },
  { 0x0a59, "\x0a\x16\x0a\x3c" },
  { 0x0a5a, "\x0a\x17\x0a\x3c" },
  { 0x0a5b, "\x0a\x1c\x0a\x3c" },
-  { 0x0a5c, "\x0a\x21\x0a\x3c" },
  { 0x0a5e, "\x0a\x2b\x0a\x3c" },
  { 0x0b48, "\x0b\x47\x0b\x56" },
  { 0x0b4b, "\x0b\x47\x0b\x3e" },
  { 0x0b4c, "\x0b\x47\x0b\x57" },
  { 0x0b5c, "\x0b\x21\x0b\x3c" },
  { 0x0b5d, "\x0b\x22\x0b\x3c" },
-  { 0x0b5f, "\x0b\x2f\x0b\x3c" },
  { 0x0b94, "\x0b\x92\x0b\xd7" },
  { 0x0bca, "\x0b\xc6\x0b\xbe" },
  { 0x0bcb, "\x0b\xc7\x0b\xbe" },
@@ -364,27 +400,28 @@ static const decomp_entry canonical_decomposition[] =
  { 0x0d4a, "\x0d\x46\x0d\x3e" },
  { 0x0d4b, "\x0d\x47\x0d\x3e" },
  { 0x0d4c, "\x0d\x46\x0d\x57" },
-  { 0x0e33, "\x0e\x4d\x0e\x32" },
+  { 0x0dda, "\x0d\xd9\x0d\xca" },
-  { 0x0eb3, "\x0e\xcd\x0e\xb2" },
+  { 0x0ddc, "\x0d\xd9\x0d\xcf" },
+  { 0x0ddd, "\x0d\xdc\x0d\xca" },
+  { 0x0dde, "\x0d\xd9\x0d\xdf" },
  { 0x0f43, "\x0f\x42\x0f\xb7" },
  { 0x0f4d, "\x0f\x4c\x0f\xb7" },
  { 0x0f52, "\x0f\x51\x0f\xb7" },
  { 0x0f57, "\x0f\x56\x0f\xb7" },
  { 0x0f5c, "\x0f\x5b\x0f\xb7" },
  { 0x0f69, "\x0f\x40\x0f\xb5" },
-  { 0x0f73, "\x0f\x72\x0f\x71" },
+  { 0x0f73, "\x0f\x71\x0f\x72" },
  { 0x0f75, "\x0f\x71\x0f\x74" },
  { 0x0f76, "\x0f\xb2\x0f\x80" },
-  { 0x0f77, "\x0f\x76\x0f\x71" },
  { 0x0f78, "\x0f\xb3\x0f\x80" },
-  { 0x0f79, "\x0f\x78\x0f\x71" },
+  { 0x0f81, "\x0f\x71\x0f\x80" },
-  { 0x0f81, "\x0f\x80\x0f\x71" },
  { 0x0f93, "\x0f\x92\x0f\xb7" },
  { 0x0f9d, "\x0f\x9c\x0f\xb7" },
  { 0x0fa2, "\x0f\xa1\x0f\xb7" },
  { 0x0fa7, "\x0f\xa6\x0f\xb7" },
  { 0x0fac, "\x0f\xab\x0f\xb7" },
  { 0x0fb9, "\x0f\x90\x0f\xb5" },
+  { 0x1026, "\x10\x25\x10\x2e" },
  { 0x1e00, "\x00\x41\x03\x25" },
  { 0x1e01, "\x00\x61\x03\x25" },
  { 0x1e02, "\x00\x42\x03\x07" },
@@ -413,8 +450,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x1e19, "\x00\x65\x03\x2d" },
  { 0x1e1a, "\x00\x45\x03\x30" },
  { 0x1e1b, "\x00\x65\x03\x30" },
-  { 0x1e1c, "\x01\x14\x03\x27" },
+  { 0x1e1c, "\x02\x28\x03\x06" },
-  { 0x1e1d, "\x01\x15\x03\x27" },
+  { 0x1e1d, "\x02\x29\x03\x06" },
  { 0x1e1e, "\x00\x46\x03\x07" },
  { 0x1e1f, "\x00\x66\x03\x07" },
  { 0x1e20, "\x00\x47\x03\x04" },
@@ -552,8 +589,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x1ea9, "\x00\xe2\x03\x09" },
  { 0x1eaa, "\x00\xc2\x03\x03" },
  { 0x1eab, "\x00\xe2\x03\x03" },
-  { 0x1eac, "\x00\xc2\x03\x23" },
+  { 0x1eac, "\x1e\xa0\x03\x02" },
-  { 0x1ead, "\x00\xe2\x03\x23" },
+  { 0x1ead, "\x1e\xa1\x03\x02" },
  { 0x1eae, "\x01\x02\x03\x01" },
  { 0x1eaf, "\x01\x03\x03\x01" },
  { 0x1eb0, "\x01\x02\x03\x00" },
@@ -562,8 +599,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x1eb3, "\x01\x03\x03\x09" },
  { 0x1eb4, "\x01\x02\x03\x03" },
  { 0x1eb5, "\x01\x03\x03\x03" },
-  { 0x1eb6, "\x01\x02\x03\x23" },
+  { 0x1eb6, "\x1e\xa0\x03\x06" },
-  { 0x1eb7, "\x01\x03\x03\x23" },
+  { 0x1eb7, "\x1e\xa1\x03\x06" },
  { 0x1eb8, "\x00\x45\x03\x23" },
  { 0x1eb9, "\x00\x65\x03\x23" },
  { 0x1eba, "\x00\x45\x03\x09" },
@@ -578,8 +615,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x1ec3, "\x00\xea\x03\x09" },
  { 0x1ec4, "\x00\xca\x03\x03" },
  { 0x1ec5, "\x00\xea\x03\x03" },
-  { 0x1ec6, "\x00\xca\x03\x23" },
+  { 0x1ec6, "\x1e\xb8\x03\x02" },
-  { 0x1ec7, "\x00\xea\x03\x23" },
+  { 0x1ec7, "\x1e\xb9\x03\x02" },
  { 0x1ec8, "\x00\x49\x03\x09" },
  { 0x1ec9, "\x00\x69\x03\x09" },
  { 0x1eca, "\x00\x49\x03\x23" },
@@ -596,8 +633,8 @@ static const decomp_entry canonical_decomposition[] =
  { 0x1ed5, "\x00\xf4\x03\x09" },
  { 0x1ed6, "\x00\xd4\x03\x03" },
  { 0x1ed7, "\x00\xf4\x03\x03" },
-  { 0x1ed8, "\x00\xd4\x03\x23" },
+  { 0x1ed8, "\x1e\xcc\x03\x02" },
-  { 0x1ed9, "\x00\xf4\x03\x23" },
+  { 0x1ed9, "\x1e\xcd\x03\x02" },
  { 0x1eda, "\x01\xa0\x03\x01" },
  { 0x1edb, "\x01\xa1\x03\x01" },
  { 0x1edc, "\x01\xa0\x03\x00" },
@@ -864,12 +901,18 @@ static const decomp_entry canonical_decomposition[] =
  { 0x2126, "\x03\xa9" },
  { 0x212a, "\x00\x4b" },
  { 0x212b, "\x00\xc5" },
+  { 0x219a, "\x21\x90\x03\x38" },
+  { 0x219b, "\x21\x92\x03\x38" },
+  { 0x21ae, "\x21\x94\x03\x38" },
+  { 0x21cd, "\x21\xd0\x03\x38" },
+  { 0x21ce, "\x21\xd4\x03\x38" },
+  { 0x21cf, "\x21\xd2\x03\x38" },
  { 0x2204, "\x22\x03\x03\x38" },
  { 0x2209, "\x22\x08\x03\x38" },
  { 0x220c, "\x22\x0b\x03\x38" },
  { 0x2224, "\x22\x23\x03\x38" },
  { 0x2226, "\x22\x25\x03\x38" },
-  { 0x2241, "\x00\x7e\x03\x38" },
+  { 0x2241, "\x22\x3c\x03\x38" },
  { 0x2244, "\x22\x43\x03\x38" },
  { 0x2247, "\x22\x45\x03\x38" },
  { 0x2249, "\x22\x48\x03\x38" },
@@ -1252,6 +1295,7 @@ static const decomp_entry canonical_decomposition[] =
  { 0xfa2b, "\x98\xfc" },
  { 0xfa2c, "\x99\x28" },
  { 0xfa2d, "\x9d\xb4" },
+  { 0xfb1d, "\x05\xd9\x05\xb4" },
  { 0xfb1f, "\x05\xf2\x05\xb7" },
  { 0xfb2a, "\x05\xe9\x05\xc1" },
  { 0xfb2b, "\x05\xe9\x05\xc2" },
@@ -1289,9 +1333,6 @@ static const decomp_entry canonical_decomposition[] =
 static const decomp_entry full_decomposition[] =
 {
-  { 0x005e, "\x00\x20\x03\x02" },
-  { 0x005f, "\x00\x20\x03\x32" },
-  { 0x0060, "\x00\x20\x03\x00" },
  { 0x00a0, "\x00\x20" },
  { 0x00a8, "\x00\x20\x03\x08" },
  { 0x00aa, "\x00\x61" },
@@ -1346,196 +1387,26 @@ static const decomp_entry full_decomposition[] =
  { 0x02e4, "\x02\x95" },
  { 0x037a, "\x00\x20\x03\x45" },
  { 0x0384, "\x00\x20\x03\x01" },
+  { 0x03d0, "\x03\xb2" },
+  { 0x03d1, "\x03\xb8" },
+  { 0x03d2, "\x03\xa5" },
+  { 0x03d5, "\x03\xc6" },
+  { 0x03d6, "\x03\xc0" },
+  { 0x03f0, "\x03\xba" },
+  { 0x03f1, "\x03\xc1" },
+  { 0x03f2, "\x03\xc2" },
  { 0x0587, "\x05\x65\x05\x82" },
+  { 0x0675, "\x06\x27\x06\x74" },
+  { 0x0676, "\x06\x48\x06\x74" },
+  { 0x0677, "\x06\xc7\x06\x74" },
+  { 0x0678, "\x06\x4a\x06\x74" },
+  { 0x0e33, "\x0e\x4d\x0e\x32" },
+  { 0x0eb3, "\x0e\xcd\x0e\xb2" },
  { 0x0edc, "\x0e\xab\x0e\x99" },
  { 0x0edd, "\x0e\xab\x0e\xa1" },
-  { 0x1101, "\x11\x00\x11\x00" },
+  { 0x0f0c, "\x0f\x0b" },
-  { 0x1104, "\x11\x03\x11\x03" },
+  { 0x0f77, "\x0f\xb2\x0f\x81" },
-  { 0x1108, "\x11\x07\x11\x07" },
+  { 0x0f79, "\x0f\xb3\x0f\x81" },
-  { 0x110a, "\x11\x09\x11\x09" },
-  { 0x110d, "\x11\x0c\x11\x0c" },
-  { 0x1113, "\x11\x02\x11\x00" },
-  { 0x1114, "\x11\x02\x11\x02" },
-  { 0x1115, "\x11\x02\x11\x03" },
-  { 0x1116, "\x11\x02\x11\x07" },
-  { 0x1117, "\x11\x03\x11\x00" },
-  { 0x1118, "\x11\x05\x11\x02" },
-  { 0x1119, "\x11\x05\x11\x05" },
-  { 0x111a, "\x11\x05\x11\x12" },
-  { 0x111b, "\x11\x05\x11\x0b" },
-  { 0x111c, "\x11\x06\x11\x07" },
-  { 0x111d, "\x11\x06\x11\x0b" },
-  { 0x111e, "\x11\x07\x11\x00" },
-  { 0x111f, "\x11\x07\x11\x02" },
-  { 0x1120, "\x11\x07\x11\x03" },
-  { 0x1121, "\x11\x07\x11\x09" },
-  { 0x1122, "\x11\x07\x11\x09\x11\x00" },
-  { 0x1123, "\x11\x07\x11\x09\x11\x03" },
-  { 0x1124, "\x11\x07\x11\x09\x11\x07" },
-  { 0x1125, "\x11\x07\x11\x09\x11\x09" },
-  { 0x1126, "\x11\x07\x11\x09\x11\x0c" },
-  { 0x1127, "\x11\x07\x11\x0c" },
-  { 0x1128, "\x11\x07\x11\x0e" },
-  { 0x1129, "\x11\x07\x11\x10" },
-  { 0x112a, "\x11\x07\x11\x11" },
-  { 0x112b, "\x11\x07\x11\x0b" },
-  { 0x112c, "\x11\x07\x11\x07\x11\x0b" },
-  { 0x112d, "\x11\x09\x11\x00" },
-  { 0x112e, "\x11\x09\x11\x02" },
-  { 0x112f, "\x11\x09\x11\x03" },
-  { 0x1130, "\x11\x09\x11\x05" },
-  { 0x1131, "\x11\x09\x11\x06" },
-  { 0x1132, "\x11\x09\x11\x07" },
-  { 0x1133, "\x11\x09\x11\x07\x11\x00" },
-  { 0x1134, "\x11\x09\x11\x09\x11\x09" },
-  { 0x1135, "\x11\x09\x11\x0b" },
-  { 0x1136, "\x11\x09\x11\x0c" },
-  { 0x1137, "\x11\x09\x11\x0e" },
-  { 0x1138, "\x11\x09\x11\x0f" },
-  { 0x1139, "\x11\x09\x11\x10" },
-  { 0x113a, "\x11\x09\x11\x11" },
-  { 0x113b, "\x11\x09\x11\x12" },
-  { 0x113d, "\x11\x3c\x11\x3c" },
-  { 0x113f, "\x11\x3e\x11\x3e" },
-  { 0x1141, "\x11\x0b\x11\x00" },
-  { 0x1142, "\x11\x0b\x11\x03" },
-  { 0x1143, "\x11\x0b\x11\x06" },
-  { 0x1144, "\x11\x0b\x11\x07" },
-  { 0x1145, "\x11\x0b\x11\x09" },
-  { 0x1146, "\x11\x0b\x11\x40" },
-  { 0x1147, "\x11\x0b\x11\x0b" },
-  { 0x1148, "\x11\x0b\x11\x0c" },
-  { 0x1149, "\x11\x0b\x11\x0e" },
-  { 0x114a, "\x11\x0b\x11\x10" },
-  { 0x114b, "\x11\x0b\x11\x11" },
-  { 0x114d, "\x11\x0c\x11\x0b" },
-  { 0x114f, "\x11\x4e\x11\x4e" },
-  { 0x1151, "\x11\x50\x11\x50" },
-  { 0x1152, "\x11\x0e\x11\x0f" },
-  { 0x1153, "\x11\x0e\x11\x12" },
-  { 0x1156, "\x11\x11\x11\x07" },
-  { 0x1157, "\x11\x11\x11\x0b" },
-  { 0x1158, "\x11\x12\x11\x12" },
-  { 0x1162, "\x11\x61\x11\x75" },
-  { 0x1164, "\x11\x63\x11\x75" },
-  { 0x1166, "\x11\x65\x11\x75" },
-  { 0x1168, "\x11\x67\x11\x75" },
-  { 0x116a, "\x11\x69\x11\x61" },
-  { 0x116b, "\x11\x69\x11\x61\x11\x75" },
-  { 0x116c, "\x11\x69\x11\x75" },
-  { 0x116f, "\x11\x6e\x11\x65" },
-  { 0x1170, "\x11\x6e\x11\x65\x11\x75" },
-  { 0x1171, "\x11\x6e\x11\x75" },
-  { 0x1174, "\x11\x73\x11\x75" },
-  { 0x1176, "\x11\x61\x11\x69" },
-  { 0x1177, "\x11\x61\x11\x6e" },
-  { 0x1178, "\x11\x63\x11\x69" },
-  { 0x1179, "\x11\x63\x11\x6d" },
-  { 0x117a, "\x11\x65\x11\x69" },
-  { 0x117b, "\x11\x65\x11\x6e" },
-  { 0x117c, "\x11\x65\x11\x73" },
-  { 0x117d, "\x11\x67\x11\x69" },
-  { 0x117e, "\x11\x67\x11\x6e" },
-  { 0x117f, "\x11\x69\x11\x65" },
-  { 0x1180, "\x11\x69\x11\x66" },
-  { 0x1181, "\x11\x69\x11\x68" },
-  { 0x1182, "\x11\x69\x11\x69" },
-  { 0x1183, "\x11\x69\x11\x6e" },
-  { 0x1184, "\x11\x6d\x11\x63" },
-  { 0x1185, "\x11\x6d\x11\x64" },
-  { 0x1186, "\x11\x6d\x11\x67" },
-  { 0x1187, "\x11\x6d\x11\x69" },
-  { 0x1188, "\x11\x6d\x11\x75" },
-  { 0x1189, "\x11\x6e\x11\x61" },
-  { 0x118a, "\x11\x6e\x11\x62" },
-  { 0x118b, "\x11\x6e\x11\x65\x11\x73" },
-  { 0x118c, "\x11\x6e\x11\x68" },
-  { 0x118d, "\x11\x6e\x11\x6e" },
-  { 0x118e, "\x11\x72\x11\x61" },
-  { 0x118f, "\x11\x72\x11\x65" },
-  { 0x1190, "\x11\x72\x11\x66" },
-  { 0x1191, "\x11\x72\x11\x67" },
-  { 0x1192, "\x11\x72\x11\x68" },
-  { 0x1193, "\x11\x72\x11\x6e" },
-  { 0x1194, "\x11\x72\x11\x75" },
-  { 0x1195, "\x11\x73\x11\x6e" },
-  { 0x1196, "\x11\x73\x11\x73" },
-  { 0x1197, "\x11\x74\x11\x6e" },
-  { 0x1198, "\x11\x75\x11\x61" },
-  { 0x1199, "\x11\x75\x11\x63" },
-  { 0x119a, "\x11\x75\x11\x69" },
-  { 0x119b, "\x11\x75\x11\x6e" },
-  { 0x119c, "\x11\x75\x11\x73" },
-  { 0x119d, "\x11\x75\x11\x9e" },
-  { 0x119f, "\x11\x9e\x11\x65" },
-  { 0x11a0, "\x11\x9e\x11\x6e" },
-  { 0x11a1, "\x11\x9e\x11\x75" },
-  { 0x11a2, "\x11\x9e\x11\x9e" },
-  { 0x11a9, "\x11\xa8\x11\xa8" },
-  { 0x11aa, "\x11\xa8\x11\xba" },
-  { 0x11ac, "\x11\xab\x11\xbd" },
-  { 0x11ad, "\x11\xab\x11\xc2" },
-  { 0x11b0, "\x11\xaf\x11\xa8" },
-  { 0x11b1, "\x11\xaf\x11\xb7" },
-  { 0x11b2, "\x11\xaf\x11\xb8" },
-  { 0x11b3, "\x11\xaf\x11\xba" },
-  { 0x11b4, "\x11\xaf\x11\xc0" },
-  { 0x11b5, "\x11\xaf\x11\xc1" },
-  { 0x11b6, "\x11\xaf\x11\xc2" },
-  { 0x11b9, "\x11\xb8\x11\xba" },
-  { 0x11bb, "\x11\xba\x11\xba" },
-  { 0x11c3, "\x11\xa8\x11\xaf" },
-  { 0x11c4, "\x11\xa8\x11\xba\x11\xa8" },
-  { 0x11c5, "\x11\xab\x11\xa8" },
-  { 0x11c6, "\x11\xab\x11\xae" },
-  { 0x11c7, "\x11\xab\x11\xba" },
-  { 0x11c8, "\x11\xab\x11\xeb" },
-  { 0x11c9, "\x11\xab\x11\xc0" },
-  { 0x11ca, "\x11\xae\x11\xa8" },
-  { 0x11cb, "\x11\xae\x11\xaf" },
-  { 0x11cc, "\x11\xaf\x11\xa8\x11\xba" },
-  { 0x11cd, "\x11\xaf\x11\xab" },
-  { 0x11ce, "\x11\xaf\x11\xae" },
-  { 0x11cf, "\x11\xaf\x11\xae\x11\xc2" },
-  { 0x11d0, "\x11\xaf\x11\xaf" },
-  { 0x11d1, "\x11\xaf\x11\xb7\x11\xa8" },
-  { 0x11d2, "\x11\xaf\x11\xb7\x11\xba" },
-  { 0x11d3, "\x11\xaf\x11\xb8\x11\xba" },
-  { 0x11d4, "\x11\xaf\x11\xb8\x11\xc2" },
-  { 0x11d5, "\x11\xaf\x11\xb8\x11\xbc" },
-  { 0x11d6, "\x11\xaf\x11\xba\x11\xba" },
-  { 0x11d7, "\x11\xaf\x11\xeb" },
-  { 0x11d8, "\x11\xaf\x11\xbf" },
-  { 0x11d9, "\x11\xaf\x11\xf9" },
-  { 0x11da, "\x11\xb7\x11\xa8" },
-  { 0x11db, "\x11\xb7\x11\xaf" },
-  { 0x11dc, "\x11\xb7\x11\xb8" },
-  { 0x11dd, "\x11\xb7\x11\xba" },
-  { 0x11de, "\x11\xb7\x11\xba\x11\xba" },
-  { 0x11df, "\x11\xb7\x11\xeb" },
-  { 0x11e0, "\x11\xb7\x11\xbe" },
-  { 0x11e1, "\x11\xb7\x11\xc2" },
-  { 0x11e2, "\x11\xb7\x11\xbc" },
-  { 0x11e3, "\x11\xb8\x11\xaf" },
-  { 0x11e4, "\x11\xb8\x11\xc1" },
-  { 0x11e5, "\x11\xb8\x11\xc2" },
-  { 0x11e6, "\x11\xb8\x11\xbc" },
-  { 0x11e7, "\x11\xba\x11\xa8" },
-  { 0x11e8, "\x11\xba\x11\xae" },
-  { 0x11e9, "\x11\xba\x11\xaf" },
-  { 0x11ea, "\x11\xba\x11\xb8" },
-  { 0x11ec, "\x11\xbc\x11\xa8" },
-  { 0x11ed, "\x11\xbc\x11\xa8\x11\xa8" },
-  { 0x11ee, "\x11\xbc\x11\xbc" },
-  { 0x11ef, "\x11\xbc\x11\xbf" },
-  { 0x11f1, "\x11\xf0\x11\xba" },
-  { 0x11f2, "\x11\xf0\x11\xeb" },
-  { 0x11f3, "\x11\xc1\x11\xb8" },
-  { 0x11f4, "\x11\xc1\x11\xbc" },
-  { 0x11f5, "\x11\xc2\x11\xab" },
-  { 0x11f6, "\x11\xc2\x11\xaf" },
-  { 0x11f7, "\x11\xc2\x11\xb7" },
-  { 0x11f8, "\x11\xc2\x11\xb8" },
  { 0x1e9a, "\x00\x61\x02\xbe" },
  { 0x1fbd, "\x00\x20\x03\x13" },
  { 0x1fbf, "\x00\x20\x03\x13" },
@@ -1555,12 +1426,15 @@ static const decomp_entry full_decomposition[] =
  { 0x2024, "\x00\x2e" },
  { 0x2025, "\x00\x2e\x00\x2e" },
  { 0x2026, "\x00\x2e\x00\x2e\x00\x2e" },
+  { 0x202f, "\x00\x20" },
  { 0x2033, "\x20\x32\x20\x32" },
  { 0x2034, "\x20\x32\x20\x32\x20\x32" },
  { 0x2036, "\x20\x35\x20\x35" },
  { 0x2037, "\x20\x35\x20\x35\x20\x35" },
  { 0x203c, "\x00\x21\x00\x21" },
  { 0x203e, "\x00\x20\x03\x05" },
+  { 0x2048, "\x00\x3f\x00\x21" },
+  { 0x2049, "\x00\x21\x00\x3f" },
  { 0x2070, "\x00\x30" },
  { 0x2074, "\x00\x34" },
  { 0x2075, "\x00\x35" },
@@ -1631,6 +1505,7 @@ static const decomp_entry full_decomposition[] =
  { 0x2136, "\x05\xd1" },
  { 0x2137, "\x05\xd2" },
  { 0x2138, "\x05\xd3" },
+  { 0x2139, "\x00\x69" },
  { 0x2153, "\x00\x31\x20\x44\x00\x33" },
  { 0x2154, "\x00\x32\x20\x44\x00\x33" },
  { 0x2155, "\x00\x31\x20\x44\x00\x35" },
@@ -1819,8 +1694,227 @@ static const decomp_entry full_decomposition[] =
  { 0x24e8, "\x00\x79" },
  { 0x24e9, "\x00\x7a" },
  { 0x24ea, "\x00\x30" },
+  { 0x2e9f, "\x6b\xcd" },
+  { 0x2ef3, "\x9f\x9f" },
+  { 0x2f00, "\x4e\x00" },
+  { 0x2f01, "\x4e\x28" },
+  { 0x2f02, "\x4e\x36" },
+  { 0x2f03, "\x4e\x3f" },
+  { 0x2f04, "\x4e\x59" },
+  { 0x2f05, "\x4e\x85" },
+  { 0x2f06, "\x4e\x8c" },
+  { 0x2f07, "\x4e\xa0" },
+  { 0x2f08, "\x4e\xba" },
+  { 0x2f09, "\x51\x3f" },
+  { 0x2f0a, "\x51\x65" },
+  { 0x2f0b, "\x51\x6b" },
+  { 0x2f0c, "\x51\x82" },
+  { 0x2f0d, "\x51\x96" },
+  { 0x2f0e, "\x51\xab" },
+  { 0x2f0f, "\x51\xe0" },
+  { 0x2f10, "\x51\xf5" },
+  { 0x2f11, "\x52\x00" },
+  { 0x2f12, "\x52\x9b" },
+  { 0x2f13, "\x52\xf9" },
+  { 0x2f14, "\x53\x15" },
+  { 0x2f15, "\x53\x1a" },
+  { 0x2f16, "\x53\x38" },
+  { 0x2f17, "\x53\x41" },
+  { 0x2f18, "\x53\x5c" },
+  { 0x2f19, "\x53\x69" },
+  { 0x2f1a, "\x53\x82" },
+  { 0x2f1b, "\x53\xb6" },
+  { 0x2f1c, "\x53\xc8" },
+  { 0x2f1d, "\x53\xe3" },
+  { 0x2f1e, "\x56\xd7" },
+  { 0x2f1f, "\x57\x1f" },
+  { 0x2f20, "\x58\xeb" },
+  { 0x2f21, "\x59\x02" },
+  { 0x2f22, "\x59\x0a" },
+  { 0x2f23, "\x59\x15" },
+  { 0x2f24, "\x59\x27" },
+  { 0x2f25, "\x59\x73" },
+  { 0x2f26, "\x5b\x50" },
+  { 0x2f27, "\x5b\x80" },
+  { 0x2f28, "\x5b\xf8" },
+  { 0x2f29, "\x5c\x0f" },
+  { 0x2f2a, "\x5c\x22" },
+  { 0x2f2b, "\x5c\x38" },
+  { 0x2f2c, "\x5c\x6e" },
+  { 0x2f2d, "\x5c\x71" },
+  { 0x2f2e, "\x5d\xdb" },
+  { 0x2f2f, "\x5d\xe5" },
+  { 0x2f30, "\x5d\xf1" },
+  { 0x2f31, "\x5d\xfe" },
+  { 0x2f32, "\x5e\x72" },
+  { 0x2f33, "\x5e\x7a" },
+  { 0x2f34, "\x5e\x7f" },
+  { 0x2f35, "\x5e\xf4" },
+  { 0x2f36, "\x5e\xfe" },
+  { 0x2f37, "\x5f\x0b" },
+  { 0x2f38, "\x5f\x13" },
+  { 0x2f39, "\x5f\x50" },
+  { 0x2f3a, "\x5f\x61" },
+  { 0x2f3b, "\x5f\x73" },
+  { 0x2f3c, "\x5f\xc3" },
+  { 0x2f3d, "\x62\x08" },
+  { 0x2f3e, "\x62\x36" },
+  { 0x2f3f, "\x62\x4b" },
+  { 0x2f40, "\x65\x2f" },
+  { 0x2f41, "\x65\x34" },
+  { 0x2f42, "\x65\x87" },
+  { 0x2f43, "\x65\x97" },
+  { 0x2f44, "\x65\xa4" },
+  { 0x2f45, "\x65\xb9" },
+  { 0x2f46, "\x65\xe0" },
+  { 0x2f47, "\x65\xe5" },
+  { 0x2f48, "\x66\xf0" },
+  { 0x2f49, "\x67\x08" },
+  { 0x2f4a, "\x67\x28" },
+  { 0x2f4b, "\x6b\x20" },
+  { 0x2f4c, "\x6b\x62" },
+  { 0x2f4d, "\x6b\x79" },
+  { 0x2f4e, "\x6b\xb3" },
+  { 0x2f4f, "\x6b\xcb" },
+  { 0x2f50, "\x6b\xd4" },
+  { 0x2f51, "\x6b\xdb" },
+  { 0x2f52, "\x6c\x0f" },
+  { 0x2f53, "\x6c\x14" },
+  { 0x2f54, "\x6c\x34" },
+  { 0x2f55, "\x70\x6b" },
+  { 0x2f56, "\x72\x2a" },
+  { 0x2f57, "\x72\x36" },
+  { 0x2f58, "\x72\x3b" },
+  { 0x2f59, "\x72\x3f" },
+  { 0x2f5a, "\x72\x47" },
+  { 0x2f5b, "\x72\x59" },
+  { 0x2f5c, "\x72\x5b" },
+  { 0x2f5d, "\x72\xac" },
+  { 0x2f5e, "\x73\x84" },
+  { 0x2f5f, "\x73\x89" },
+  { 0x2f60, "\x74\xdc" },
+  { 0x2f61, "\x74\xe6" },
+  { 0x2f62, "\x75\x18" },
+  { 0x2f63, "\x75\x1f" },
+  { 0x2f64, "\x75\x28" },
+  { 0x2f65, "\x75\x30" },
+  { 0x2f66, "\x75\x8b" },
+  { 0x2f67, "\x75\x92" },
+  { 0x2f68, "\x76\x76" },
+  { 0x2f69, "\x76\x7d" },
+  { 0x2f6a, "\x76\xae" },
+  { 0x2f6b, "\x76\xbf" },
+  { 0x2f6c, "\x76\xee" },
+  { 0x2f6d, "\x77\xdb" },
+  { 0x2f6e, "\x77\xe2" },
+  { 0x2f6f, "\x77\xf3" },
+  { 0x2f70, "\x79\x3a" },
+  { 0x2f71, "\x79\xb8" },
+  { 0x2f72, "\x79\xbe" },
+  { 0x2f73, "\x7a\x74" },
+  { 0x2f74, "\x7a\xcb" },
+  { 0x2f75, "\x7a\xf9" },
+  { 0x2f76, "\x7c\x73" },
+  { 0x2f77, "\x7c\xf8" },
+  { 0x2f78, "\x7f\x36" },
+  { 0x2f79, "\x7f\x51" },
+  { 0x2f7a, "\x7f\x8a" },
+  { 0x2f7b, "\x7f\xbd" },
+  { 0x2f7c, "\x80\x01" },
+  { 0x2f7d, "\x80\x0c" },
+  { 0x2f7e, "\x80\x12" },
+  { 0x2f7f, "\x80\x33" },
+  { 0x2f80, "\x80\x7f" },
+  { 0x2f81, "\x80\x89" },
+  { 0x2f82, "\x81\xe3" },
+  { 0x2f83, "\x81\xea" },
+  { 0x2f84, "\x81\xf3" },
+  { 0x2f85, "\x81\xfc" },
+  { 0x2f86, "\x82\x0c" },
+  { 0x2f87, "\x82\x1b" },
+  { 0x2f88, "\x82\x1f" },
+  { 0x2f89, "\x82\x6e" },
+  { 0x2f8a, "\x82\x72" },
+  { 0x2f8b, "\x82\x78" },
+  { 0x2f8c, "\x86\x4d" },
+  { 0x2f8d, "\x86\x6b" },
+  { 0x2f8e, "\x88\x40" },
+  { 0x2f8f, "\x88\x4c" },
+  { 0x2f90, "\x88\x63" },
+  { 0x2f91, "\x89\x7e" },
+  { 0x2f92, "\x89\x8b" },
+  { 0x2f93, "\x89\xd2" },
+  { 0x2f94, "\x8a\x00" },
+  { 0x2f95, "\x8c\x37" },
+  { 0x2f96, "\x8c\x46" },
+  { 0x2f97, "\x8c\x55" },
+  { 0x2f98, "\x8c\x78" },
+  { 0x2f99, "\x8c\x9d" },
+  { 0x2f9a, "\x8d\x64" },
+  { 0x2f9b, "\x8d\x70" },
+  { 0x2f9c, "\x8d\xb3" },
+  { 0x2f9d, "\x8e\xab" },
+  { 0x2f9e, "\x8e\xca" },
+  { 0x2f9f, "\x8f\x9b" },
+  { 0x2fa0, "\x8f\xb0" },
+  { 0x2fa1, "\x8f\xb5" },
+  { 0x2fa2, "\x90\x91" },
+  { 0x2fa3, "\x91\x49" },
+  { 0x2fa4, "\x91\xc6" },
+  { 0x2fa5, "\x91\xcc" },
+  { 0x2fa6, "\x91\xd1" },
+  { 0x2fa7, "\x95\x77" },
+  { 0x2fa8, "\x95\x80" },
+  { 0x2fa9, "\x96\x1c" },
+  { 0x2faa, "\x96\xb6" },
+  { 0x2fab, "\x96\xb9" },
+  { 0x2fac, "\x96\xe8" },
+  { 0x2fad, "\x97\x51" },
+  { 0x2fae, "\x97\x5e" },
+  { 0x2faf, "\x97\x62" },
+  { 0x2fb0, "\x97\x69" },
+  { 0x2fb1, "\x97\xcb" },
+  { 0x2fb2, "\x97\xed" },
+  { 0x2fb3, "\x97\xf3" },
+  { 0x2fb4, "\x98\x01" },
+  { 0x2fb5, "\x98\xa8" },
+  { 0x2fb6, "\x98\xdb" },
+  { 0x2fb7, "\x98\xdf" },
+  { 0x2fb8, "\x99\x96" },
+  { 0x2fb9, "\x99\x99" },
+  { 0x2fba, "\x99\xac" },
+  { 0x2fbb, "\x9a\xa8" },
+  { 0x2fbc, "\x9a\xd8" },
+  { 0x2fbd, "\x9a\xdf" },
+  { 0x2fbe, "\x9b\x25" },
+  { 0x2fbf, "\x9b\x2f" },
+  { 0x2fc0, "\x9b\x32" },
+  { 0x2fc1, "\x9b\x3c" },
+  { 0x2fc2, "\x9b\x5a" },
+  { 0x2fc3, "\x9c\xe5" },
+  { 0x2fc4, "\x9e\x75" },
+  { 0x2fc5, "\x9e\x7f" },
+  { 0x2fc6, "\x9e\xa5" },
+  { 0x2fc7, "\x9e\xbb" },
+  { 0x2fc8, "\x9e\xc3" },
+  { 0x2fc9, "\x9e\xcd" },
+  { 0x2fca, "\x9e\xd1" },
+  { 0x2fcb, "\x9e\xf9" },
+  { 0x2fcc, "\x9e\xfd" },
+  { 0x2fcd, "\x9f\x0e" },
+  { 0x2fce, "\x9f\x13" },
+  { 0x2fcf, "\x9f\x20" },
+  { 0x2fd0, "\x9f\x3b" },
+  { 0x2fd1, "\x9f\x4a" },
+  { 0x2fd2, "\x9f\x52" },
+  { 0x2fd3, "\x9f\x8d" },
+  { 0x2fd4, "\x9f\x9c" },
+  { 0x2fd5, "\x9f\xa0" },
  { 0x3000, "\x00\x20" },
  { 0x3036, "\x30\x12" },
+  { 0x3038, "\x53\x41" },
+  { 0x3039, "\x53\x44" },
+  { 0x303a, "\x53\x45" },
  { 0x309b, "\x00\x20\x30\x99" },
  { 0x309c, "\x00\x20\x30\x9a" },
  { 0x3131, "\x11\x00" },
@@ -2641,11 +2735,11 @@ static const decomp_entry full_decomposition[] =
  { 0xfc5b, "\x06\x30\x06\x70" },
  { 0xfc5c, "\x06\x31\x06\x70" },
  { 0xfc5d, "\x06\x49\x06\x70" },
-  { 0xfc5e, "\x00\x20\x06\x51\x06\x4c" },
+  { 0xfc5e, "\x00\x20\x06\x4c\x06\x51" },
-  { 0xfc5f, "\x00\x20\x06\x51\x06\x4d" },
+  { 0xfc5f, "\x00\x20\x06\x4d\x06\x51" },
-  { 0xfc60, "\x00\x20\x06\x51\x06\x4e" },
+  { 0xfc60, "\x00\x20\x06\x4e\x06\x51" },
-  { 0xfc61, "\x00\x20\x06\x51\x06\x4f" },
+  { 0xfc61, "\x00\x20\x06\x4f\x06\x51" },
-  { 0xfc62, "\x00\x20\x06\x51\x06\x50" },
+  { 0xfc62, "\x00\x20\x06\x50\x06\x51" },
  { 0xfc63, "\x00\x20\x06\x51\x06\x70" },
  { 0xfc64, "\x06\x26\x06\x31" },
  { 0xfc65, "\x06\x26\x06\x32" },
@@ -2789,9 +2883,9 @@ static const decomp_entry full_decomposition[] =
  { 0xfcef, "\x06\x46\x06\x47" },
  { 0xfcf0, "\x06\x4a\x06\x45" },
  { 0xfcf1, "\x06\x4a\x06\x47" },
-  { 0xfcf2, "\x06\x40\x06\x51\x06\x4e" },
+  { 0xfcf2, "\x06\x40\x06\x4e\x06\x51" },
-  { 0xfcf3, "\x06\x40\x06\x51\x06\x4f" },
+  { 0xfcf3, "\x06\x40\x06\x4f\x06\x51" },
-  { 0xfcf4, "\x06\x40\x06\x51\x06\x50" },
+  { 0xfcf4, "\x06\x40\x06\x50\x06\x51" },
  { 0xfcf5, "\x06\x37\x06\x49" },
  { 0xfcf6, "\x06\x37\x06\x4a" },
  { 0xfcf7, "\x06\x39\x06\x49" },

--- a/libjava/include/java-chartables.h
+++ b/libjava/include/java-chartables.h
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
-// Character.java - Character class.
+/* java.lang.Character -- Wrapper class for char, and Unicode subsets
+   Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
-/* Copyright (C) 1998, 1999, 2000  Free Software Foundation
+This file is part of GNU Classpath.
-   This file is part of libgcj.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
-This software is copyrighted work licensed under the terms of the
+GNU Classpath is distributed in the hope that it will be useful, but
-Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+WITHOUT ANY WARRANTY; without even the implied warranty of
-details.  */
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+/*
+ * Note: This class must not be merged with Classpath.  Gcj uses C-style
+ * arrays (see include/java-chartables.h) to store the Unicode character
+ * database, whereas Classpath uses Java objects (char[] extracted from
+ * String constants) in gnu.java.lang.CharData.  Gcj's approach is more
+ * efficient, because there is no vtable or data relocation to worry about.
+ * However, despite the difference in the database interface, the two
+ * versions share identical algorithms.
+ */
 package java.lang;
 import java.io.Serializable;
 /**
+ * Wrapper class for the primitive char data type.  In addition, this class
+ * allows one to retrieve property information and perform transformations
+ * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
+ * java.lang.Character is designed to be very dynamic, and as such, it
+ * retrieves information on the Unicode character set from a separate
+ * database, gnu.java.lang.CharData, which can be easily upgraded.
+ *
+ * <p>For predicates, boundaries are used to describe
+ * the set of characters for which the method will return true.
+ * This syntax uses fairly normal regular expression notation.
+ * See 5.13 of the Unicode Standard, Version 3.0, for the
+ * boundary specification.
+ *
+ * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
+ * for more information on the Unicode Standard.
+ *
 * @author Tom Tromey <tromey@cygnus.com>
- * @date September 10, 1998 
+ * @author Paul N. Fisher
+ * @author Jochen Hoenicke
+ * @author Eric Blake <ebb9@email.byu.edu>
+ * @since 1.0
+ * @status updated to 1.4
 */
-/* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
- * "The Java Language Specification", ISBN 0-201-63451-1,
- * online API docs for JDK 1.2 beta from http://www.javasoft.com,
- * and The Unicode Standard Version 2.0.
- * Status: Believed complete and correct for JDK 1.1; 1.2 methods
- * unimplemented.
- */
 public final class Character implements Serializable, Comparable
 {
-  public static final char MIN_VALUE = '\u0000';
+  /**
-  public static final char MAX_VALUE = '\uffff';
+   * A subset of Unicode blocks.
+   *
+   * @author Paul N. Fisher
+   * @author Eric Blake <ebb9@email.byu.edu>
+   * @since 1.2
+   */
+  public static class Subset
+  {
+    /** The name of the subset. */
+    private final String name;
-  public static final int MIN_RADIX = 2;
+    /**
-  public static final int MAX_RADIX = 36;
+     * Construct a new subset of characters.
+     *
+     * @param name the name of the subset
+     * @throws NullPointerException if name is null
+     */
+    protected Subset(String name)
+    {
+      // Note that name.toString() is name, unless name was null.
+      this.name = name.toString();
+    }
-  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
+    /**
+     * Compares two Subsets for equality. This is <code>final</code>, and
+     * restricts the comparison on the <code>==</code> operator, so it returns
+     * true only for the same object.
+     *
+     * @param o the object to compare
+     * @return true if o is this
+     */
+    public final boolean equals(Object o)
+    {
+      return o == this;
+    }
-  // Space.
+    /**
-  public static final byte SPACE_SEPARATOR     = 12;
+     * Makes the original hashCode of Object final, to be consistent with
-  public static final byte LINE_SEPARATOR      = 13;
+     * equals.
-  public static final byte PARAGRAPH_SEPARATOR = 14;
+     *
+     * @return the hash code for this object
+     */
+    public final int hashCode()
+    {
+      return super.hashCode();
+    }
-  // Letters.
+    /**
-  public static final byte UPPERCASE_LETTER = 1;
+     * Returns the name of the subset.
-  public static final byte LOWERCASE_LETTER = 2;
+     *
-  public static final byte TITLECASE_LETTER = 3;
+     * @return the name
-  public static final byte MODIFIER_LETTER  = 4;
+     */
-  public static final byte OTHER_LETTER     = 5;
+    public final String toString()
+    {
+      return name;
+    }
+  } // class Subset
-  // Numbers.
+  /**
-  public static final byte DECIMAL_DIGIT_NUMBER =  9;
+   * A family of character subsets in the Unicode specification. A character
-  public static final byte LETTER_NUMBER        = 10;
+   * is in at most one of these blocks.
-  public static final byte OTHER_NUMBER         = 11;
+   *
+   * This inner class was generated automatically from
+   * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
+   * This Unicode definition file can be found on the
+   * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
+   * JDK 1.4 uses Unicode version 3.0.0.
+   *
+   * @author scripts/unicode-blocks.pl (written by Eric Blake)
+   * @since 1.2
+   */
+  public static final class UnicodeBlock extends Subset
+  {
+    /** The start of the subset. */
+    private final char start;
-  // Marks.
+    /** The end of the subset. */
-  public static final byte NON_SPACING_MARK     = 6;
+    private final char end;
-  public static final byte ENCLOSING_MARK       = 7;
-  public static final byte COMBINING_SPACING_MARK = 8;
-  // Punctuation.
+    /**
-  public static final byte DASH_PUNCTUATION      = 20;
+     * Constructor for strictly defined blocks.
-  public static final byte START_PUNCTUATION     = 21;
+     *
-  public static final byte END_PUNCTUATION       = 22;
+     * @param start the start character of the range
-  public static final byte CONNECTOR_PUNCTUATION = 23;
+     * @param end the end character of the range
-  public static final byte OTHER_PUNCTUATION     = 24;
+     * @param name the block name
+     */
+    private UnicodeBlock(char start, char end, String name)
+    {
+      super(name);
+      this.start = start;
+      this.end = end;
+    }
-  // Symbols.
+    /**
-  public static final byte MATH_SYMBOL     = 25;
+     * Returns the Unicode character block which a character belongs to.
-  public static final byte CURRENCY_SYMBOL = 26;
+     *
-  public static final byte MODIFIER_SYMBOL = 27;
+     * @param ch the character to look up
-  public static final byte OTHER_SYMBOL    = 28;
+     * @return the set it belongs to, or null if it is not in one
+     */
+    public static UnicodeBlock of(char ch)
+    {
+      // Special case, since SPECIALS contains two ranges.
+      if (ch == '\uFEFF')
+        return SPECIALS;
+      // Simple binary search for the correct block.
+      int low = 0;
+      int hi = sets.length - 1;
+      while (low <= hi)
+        {
+          int mid = (low + hi) >> 1;
+          UnicodeBlock b = sets[mid];
+          if (ch < b.start)
+            hi = mid - 1;
+          else if (ch > b.end)
+            low = mid + 1;
+          else
+            return b;
+        }
+      return null;
+    }
-  // Format controls.
+    /**
-  public static final byte CONTROL = 15;
+     * Basic Latin.
-  // Note: The JCL book says that both FORMAT and PRIVATE_USE are 18.
+     * '\u0000' - '\u007F'.
-  // However, FORMAT is actually 16.
+     */
-  public static final byte FORMAT  = 16;
+    public final static UnicodeBlock BASIC_LATIN
+      = new UnicodeBlock('\u0000', '\u007F',
+                         "BASIC_LATIN");
-  // Others.
+    /**
-  public static final byte UNASSIGNED  = 0;
+     * Latin-1 Supplement.
-  public static final byte PRIVATE_USE = 18;
+     * '\u0080' - '\u00FF'.
-  public static final byte SURROGATE   = 19;
+     */
+    public final static UnicodeBlock LATIN_1_SUPPLEMENT
+      = new UnicodeBlock('\u0080', '\u00FF',
+                         "LATIN_1_SUPPLEMENT");
-  private static final long serialVersionUID = 3786198910865385080L;
+    /**
+     * Latin Extended-A.
+     * '\u0100' - '\u017F'.
+     */
+    public final static UnicodeBlock LATIN_EXTENDED_A
+      = new UnicodeBlock('\u0100', '\u017F',
+                         "LATIN_EXTENDED_A");
-  public Character (char ch)
+    /**
-  {
+     * Latin Extended-B.
-    value = ch;
+     * '\u0180' - '\u024F'.
-  }
+     */
+    public final static UnicodeBlock LATIN_EXTENDED_B
+      = new UnicodeBlock('\u0180', '\u024F',
+                         "LATIN_EXTENDED_B");
-  public char charValue ()
+    /**
-  {
+     * IPA Extensions.
-    return value;
+     * '\u0250' - '\u02AF'.
-  }
+     */
+    public final static UnicodeBlock IPA_EXTENSIONS
+      = new UnicodeBlock('\u0250', '\u02AF',
+                         "IPA_EXTENSIONS");
-  // See if a character is a digit.  If so, return the corresponding
+    /**
-  // value.  Otherwise return -1.
+     * Spacing Modifier Letters.
-  private static native int digit_value (char ch);
+     * '\u02B0' - '\u02FF'.
+     */
+    public final static UnicodeBlock SPACING_MODIFIER_LETTERS
+      = new UnicodeBlock('\u02B0', '\u02FF',
+                         "SPACING_MODIFIER_LETTERS");
-  public static int digit (char ch, int radix)
+    /**
-  {
+     * Combining Diacritical Marks.
-    if (radix < MIN_RADIX || radix > MAX_RADIX)
+     * '\u0300' - '\u036F'.
-      return -1;
+     */
+    public final static UnicodeBlock COMBINING_DIACRITICAL_MARKS
-    int d = digit_value (ch);
+      = new UnicodeBlock('\u0300', '\u036F',
-    if (d == -1)
+                         "COMBINING_DIACRITICAL_MARKS");
-      {
-	if (ch >= 'A' && ch <= 'Z')
-	  d = ch - 'A' + 10;
-	else if (ch >= 'a' && ch <= 'z')
-	  d = ch - 'a' + 10;
-	else
-	  return -1;
-      }
-    return d >= radix ? -1 : d;
-  }
-  public boolean equals (Object obj)
+    /**
-  {
+     * Greek.
-    // Don't need to compare OBJ to null as instanceof will do this.
+     * '\u0370' - '\u03FF'.
-    if (obj instanceof Character)
+     */
-      return value == ((Character) obj).value;
+    public final static UnicodeBlock GREEK
-    return false;
+      = new UnicodeBlock('\u0370', '\u03FF',
-  }
+                         "GREEK");
-  public static char forDigit (int d, int rdx)
+    /**
-  {
+     * Cyrillic.
-    if (d < 0 || d >= rdx || rdx < MIN_RADIX || rdx > MAX_RADIX)
+     * '\u0400' - '\u04FF'.
-      return '\u0000';
+     */
-    if (d < 10)
+    public final static UnicodeBlock CYRILLIC
-      return (char) ('0' + d);
+      = new UnicodeBlock('\u0400', '\u04FF',
-    // The Java Language Spec says to use lowercase, while the JCL
+                         "CYRILLIC");
-    // says to use uppercase.  We go with the former.
-    return (char) ('a' + d - 10);
-  }
-  public static native int getNumericValue (char ch);
+    /**
-  public static native int getType (char ch);
+     * Armenian.
+     * '\u0530' - '\u058F'.
+     */
+    public final static UnicodeBlock ARMENIAN
+      = new UnicodeBlock('\u0530', '\u058F',
+                         "ARMENIAN");
-  public int hashCode ()
+    /**
-  {
+     * Hebrew.
-    return value;
+     * '\u0590' - '\u05FF'.
-  }
+     */
+    public final static UnicodeBlock HEBREW
+      = new UnicodeBlock('\u0590', '\u05FF',
+                         "HEBREW");
-  public static boolean isDefined (char ch)
+    /**
-  {
+     * Arabic.
-    return getType (ch) != UNASSIGNED;
+     * '\u0600' - '\u06FF'.
-  }
+     */
+    public final static UnicodeBlock ARABIC
+      = new UnicodeBlock('\u0600', '\u06FF',
+                         "ARABIC");
-  public static boolean isDigit (char ch)
+    /**
-  {
+     * Syriac.
-    return digit_value (ch) != -1;
+     * '\u0700' - '\u074F'.
-  }
+     * @since 1.4
+     */
+    public final static UnicodeBlock SYRIAC
+      = new UnicodeBlock('\u0700', '\u074F',
+                         "SYRIAC");
-  // The JCL book says that the argument here is a Character.  That is
+    /**
-  // wrong.
+     * Thaana.
-  public static boolean isIdentifierIgnorable (char ch)
+     * '\u0780' - '\u07BF'.
-  {
+     * @since 1.4
-    // This information comes from the Unicode Standard.  It isn't
+     */
-    // auto-generated as it doesn't appear in the unidata table.
+    public final static UnicodeBlock THAANA
-    return ((ch >= '\u0000' && ch <= '\u0008')
+      = new UnicodeBlock('\u0780', '\u07BF',
-	    || (ch >= '\u000e' && ch <= '\u001b')
+                         "THAANA");
-	    // JDK 1.2 docs say that these are ignorable.  The Unicode
-	    // Standard is somewhat ambiguous on this issue.
-	    || (ch >= '\u007f' && ch <= '\u009f')
-	    || (ch >= '\u200c' && ch <= '\u200f')
-	    // JCl says 200a through 200e, but that is a typo.  The
-	    // Unicode standard says the bidi controls are 202a
-	    // through 202e.
-	    || (ch >= '\u202a' && ch <= '\u202e')
-	    || (ch >= '\u206a' && ch <= '\u206f')
-	    || ch == '\ufeff');
-  }
-  public static boolean isISOControl (char c)
+    /**
-  {
+     * Devanagari.
-    return ((c >= '\u0000' && c <= '\u001f')
+     * '\u0900' - '\u097F'.
-	    || (c >= '\u007f' && c <= '\u009f'));
+     */
-  }
+    public final static UnicodeBlock DEVANAGARI
+      = new UnicodeBlock('\u0900', '\u097F',
+                         "DEVANAGARI");
-  public static boolean isJavaIdentifierPart (char ch)
+    /**
-  {
+     * Bengali.
-    if (isIdentifierIgnorable (ch) || isDigit (ch))
+     * '\u0980' - '\u09FF'.
-      return true;
+     */
-    int type = getType (ch);
+    public final static UnicodeBlock BENGALI
-    return (type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
+      = new UnicodeBlock('\u0980', '\u09FF',
-	    || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
+                         "BENGALI");
-	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
-	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
-	    || type == OTHER_LETTER || type == LETTER_NUMBER);
-  }
-  public static boolean isJavaIdentifierStart (char ch)
+    /**
-  {
+     * Gurmukhi.
-    int type = getType (ch);
+     * '\u0A00' - '\u0A7F'.
-    return (type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
+     */
-	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
+    public final static UnicodeBlock GURMUKHI
-	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
+      = new UnicodeBlock('\u0A00', '\u0A7F',
-	    || type == OTHER_LETTER);
+                         "GURMUKHI");
-  }
-  // Deprecated in 1.2.
+    /**
-  public static boolean isJavaLetter (char ch)
+     * Gujarati.
-  {
+     * '\u0A80' - '\u0AFF'.
-    return ch == '$' || ch == '_' || isLetter (ch);
+     */
-  }
+    public final static UnicodeBlock GUJARATI
+      = new UnicodeBlock('\u0A80', '\u0AFF',
+                         "GUJARATI");
-  // Deprecated in 1.2.
+    /**
-  public static boolean isJavaLetterOrDigit (char ch)
+     * Oriya.
-  {
+     * '\u0B00' - '\u0B7F'.
-    return ch == '$' || ch == '_' || isLetterOrDigit (ch);
+     */
-  }
+    public final static UnicodeBlock ORIYA
+      = new UnicodeBlock('\u0B00', '\u0B7F',
+                         "ORIYA");
-  public static boolean isLetter (char ch)
+    /**
-  {
+     * Tamil.
-    int type = getType (ch);
+     * '\u0B80' - '\u0BFF'.
-    return (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
+     */
-	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
+    public final static UnicodeBlock TAMIL
-	    || type == OTHER_LETTER);
+      = new UnicodeBlock('\u0B80', '\u0BFF',
-  }
+                         "TAMIL");
-  public static boolean isLetterOrDigit (char ch)
+    /**
-  {
+     * Telugu.
-    return isDigit (ch) || isLetter (ch);
+     * '\u0C00' - '\u0C7F'.
-  }
+     */
+    public final static UnicodeBlock TELUGU
+      = new UnicodeBlock('\u0C00', '\u0C7F',
+                         "TELUGU");
-  public static native boolean isLowerCase (char ch);
+    /**
+     * Kannada.
+     * '\u0C80' - '\u0CFF'.
+     */
+    public final static UnicodeBlock KANNADA
+      = new UnicodeBlock('\u0C80', '\u0CFF',
+                         "KANNADA");
-  // Deprecated in JCL.
+    /**
-  public static boolean isSpace (char ch)
+     * Malayalam.
-  {
+     * '\u0D00' - '\u0D7F'.
-    return ch == '\n' || ch == '\t' || ch == '\f' || ch == '\r' || ch == ' ';
+     */
-  }
+    public final static UnicodeBlock MALAYALAM
+      = new UnicodeBlock('\u0D00', '\u0D7F',
+                         "MALAYALAM");
-  public static native boolean isSpaceChar (char ch);
+    /**
-  public static native boolean isTitleCase (char ch);
+     * Sinhala.
+     * '\u0D80' - '\u0DFF'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock SINHALA
+      = new UnicodeBlock('\u0D80', '\u0DFF',
+                         "SINHALA");
-  public static boolean isUnicodeIdentifierPart (char ch)
+    /**
-  {
+     * Thai.
-    if (isIdentifierIgnorable (ch) || isDigit (ch))
+     * '\u0E00' - '\u0E7F'.
-      return true;
+     */
-    int type = getType (ch);
+    public final static UnicodeBlock THAI
-    return (type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER
+      = new UnicodeBlock('\u0E00', '\u0E7F',
-	    || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
+                         "THAI");
-	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
-	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
-	    || type == OTHER_LETTER);
-  }
-  public static boolean isUnicodeIdentifierStart (char ch)
+    /**
-  {
+     * Lao.
-    return isLetter (ch);
+     * '\u0E80' - '\u0EFF'.
-  }
+     */
+    public final static UnicodeBlock LAO
+      = new UnicodeBlock('\u0E80', '\u0EFF',
+                         "LAO");
-  public static native boolean isUpperCase (char ch);
+    /**
+     * Tibetan.
+     * '\u0F00' - '\u0FFF'.
+     */
+    public final static UnicodeBlock TIBETAN
+      = new UnicodeBlock('\u0F00', '\u0FFF',
+                         "TIBETAN");
-  public static boolean isWhitespace (char ch)
+    /**
-  {
+     * Myanmar.
-    return ((ch >= '\u0009' && ch <= '\r')
+     * '\u1000' - '\u109F'.
-	    || (ch >= '\u001c' && ch <= '\u001f')
+     * @since 1.4
-	    || (ch != '\u00a0' && ch != '\ufeff' && isSpaceChar (ch)));
+     */
-  }
+    public final static UnicodeBlock MYANMAR
+      = new UnicodeBlock('\u1000', '\u109F',
+                         "MYANMAR");
-  public static native char toLowerCase (char ch);
+    /**
-  public static native char toTitleCase (char ch);
+     * Georgian.
-  public static native char toUpperCase (char ch);
+     * '\u10A0' - '\u10FF'.
+     */
+    public final static UnicodeBlock GEORGIAN
+      = new UnicodeBlock('\u10A0', '\u10FF',
+                         "GEORGIAN");
-  public String toString ()
+    /**
-  {
+     * Hangul Jamo.
-    return String.valueOf(value);
+     * '\u1100' - '\u11FF'.
-  }
+     */
+    public final static UnicodeBlock HANGUL_JAMO
+      = new UnicodeBlock('\u1100', '\u11FF',
+                         "HANGUL_JAMO");
-  public int compareTo (Character anotherCharacter)
+    /**
-  {
+     * Ethiopic.
-    return value - anotherCharacter.value;
+     * '\u1200' - '\u137F'.
-  }
+     * @since 1.4
+     */
+    public final static UnicodeBlock ETHIOPIC
+      = new UnicodeBlock('\u1200', '\u137F',
+                         "ETHIOPIC");
-  public int compareTo (Object o)
+    /**
-  {
+     * Cherokee.
-    return compareTo ((Character) o);
+     * '\u13A0' - '\u13FF'.
-  }
+     * @since 1.4
+     */
+    public final static UnicodeBlock CHEROKEE
+      = new UnicodeBlock('\u13A0', '\u13FF',
+                         "CHEROKEE");
-  // Private data.
+    /**
-  private char value;
+     * Unified Canadian Aboriginal Syllabics.
+     * '\u1400' - '\u167F'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
+      = new UnicodeBlock('\u1400', '\u167F',
+                         "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
-  public static class Subset
+    /**
-  {
+     * Ogham.
-    protected Subset (String name)
+     * '\u1680' - '\u169F'.
-    {
+     * @since 1.4
-      this.name = name;
+     */
-    }
+    public final static UnicodeBlock OGHAM
+      = new UnicodeBlock('\u1680', '\u169F',
+                         "OGHAM");
-    public final boolean equals (Object obj)
+    /**
-    {
+     * Runic.
-      return obj == this;
+     * '\u16A0' - '\u16FF'.
-    }
+     * @since 1.4
+     */
+    public final static UnicodeBlock RUNIC
+      = new UnicodeBlock('\u16A0', '\u16FF',
+                         "RUNIC");
-    public final int hashCode ()
+    /**
-    {
+     * Khmer.
-      return super.hashCode ();
+     * '\u1780' - '\u17FF'.
-    }
+     * @since 1.4
+     */
+    public final static UnicodeBlock KHMER
+      = new UnicodeBlock('\u1780', '\u17FF',
+                         "KHMER");
-    public final String toString ()
+    /**
-    {
+     * Mongolian.
-      return name;
+     * '\u1800' - '\u18AF'.
-    }
+     * @since 1.4
+     */
+    public final static UnicodeBlock MONGOLIAN
+      = new UnicodeBlock('\u1800', '\u18AF',
+                         "MONGOLIAN");
-    // Name of this subset.
+    /**
-    private String name;
+     * Latin Extended Additional.
-  }
+     * '\u1E00' - '\u1EFF'.
+     */
+    public final static UnicodeBlock LATIN_EXTENDED_ADDITIONAL
+      = new UnicodeBlock('\u1E00', '\u1EFF',
+                         "LATIN_EXTENDED_ADDITIONAL");
-  public static final class UnicodeBlock extends Subset
+    /**
-  {
+     * Greek Extended.
-    private UnicodeBlock (String name, char start, char end)
+     * '\u1F00' - '\u1FFF'.
-    {
+     */
-      super (name);
+    public final static UnicodeBlock GREEK_EXTENDED
-      this.start = start;
+      = new UnicodeBlock('\u1F00', '\u1FFF',
-      this.end = end;
+                         "GREEK_EXTENDED");
-    }
-    public static UnicodeBlock of (char c)
+    /**
-    {
+     * General Punctuation.
-      // A special case we need.
+     * '\u2000' - '\u206F'.
-      if (c == '\uFEFF')
+     */
-	return SPECIALS;
+    public final static UnicodeBlock GENERAL_PUNCTUATION
+      = new UnicodeBlock('\u2000', '\u206F',
-      // Do a binary search to find the correct subset.
+                         "GENERAL_PUNCTUATION");
-      int hi = blocks.length;
-      int lo = 0;
-      while (hi > lo)
-	{
-	  int mid = (hi + lo) / 2;
-	  UnicodeBlock ub = blocks[mid];
-	  if (c < ub.start)
-	    hi = mid;
-	  else if (c > ub.end)
-	    lo = mid;
-	  else
-	    return ub;
-	}
-      return null;
+    /**
-    }
+     * Superscripts and Subscripts.
+     * '\u2070' - '\u209F'.
+     */
+    public final static UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
+      = new UnicodeBlock('\u2070', '\u209F',
+                         "SUPERSCRIPTS_AND_SUBSCRIPTS");
+    /**
+     * Currency Symbols.
+     * '\u20A0' - '\u20CF'.
+     */
+    public final static UnicodeBlock CURRENCY_SYMBOLS
+      = new UnicodeBlock('\u20A0', '\u20CF',
+                         "CURRENCY_SYMBOLS");
+    /**
+     * Combining Marks for Symbols.
+     * '\u20D0' - '\u20FF'.
+     */
+    public final static UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
+      = new UnicodeBlock('\u20D0', '\u20FF',
+                         "COMBINING_MARKS_FOR_SYMBOLS");
+    /**
+     * Letterlike Symbols.
+     * '\u2100' - '\u214F'.
+     */
+    public final static UnicodeBlock LETTERLIKE_SYMBOLS
+      = new UnicodeBlock('\u2100', '\u214F',
+                         "LETTERLIKE_SYMBOLS");
+    /**
+     * Number Forms.
+     * '\u2150' - '\u218F'.
+     */
+    public final static UnicodeBlock NUMBER_FORMS
+      = new UnicodeBlock('\u2150', '\u218F',
+                         "NUMBER_FORMS");
+    /**
+     * Arrows.
+     * '\u2190' - '\u21FF'.
+     */
+    public final static UnicodeBlock ARROWS
+      = new UnicodeBlock('\u2190', '\u21FF',
+                         "ARROWS");
+    /**
+     * Mathematical Operators.
+     * '\u2200' - '\u22FF'.
+     */
+    public final static UnicodeBlock MATHEMATICAL_OPERATORS
+      = new UnicodeBlock('\u2200', '\u22FF',
+                         "MATHEMATICAL_OPERATORS");
+    /**
+     * Miscellaneous Technical.
+     * '\u2300' - '\u23FF'.
+     */
+    public final static UnicodeBlock MISCELLANEOUS_TECHNICAL
+      = new UnicodeBlock('\u2300', '\u23FF',
+                         "MISCELLANEOUS_TECHNICAL");
+    /**
+     * Control Pictures.
+     * '\u2400' - '\u243F'.
+     */
+    public final static UnicodeBlock CONTROL_PICTURES
+      = new UnicodeBlock('\u2400', '\u243F',
+                         "CONTROL_PICTURES");
+    /**
+     * Optical Character Recognition.
+     * '\u2440' - '\u245F'.
+     */
+    public final static UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
+      = new UnicodeBlock('\u2440', '\u245F',
+                         "OPTICAL_CHARACTER_RECOGNITION");
-    // Start and end characters.
+    /**
-    private char start, end;
+     * Enclosed Alphanumerics.
+     * '\u2460' - '\u24FF'.
-    // Everything from here to the end of UnicodeBlock is
+     */
-    // automatically generated by the blocks.pl script.
+    public final static UnicodeBlock ENCLOSED_ALPHANUMERICS
-    public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock ("Basic Latin", '\u0000', '\u007F');
+      = new UnicodeBlock('\u2460', '\u24FF',
-    public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock ("Latin-1 Supplement", '\u0080', '\u00FF');
+                         "ENCLOSED_ALPHANUMERICS");
-    public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock ("Latin Extended-A", '\u0100', '\u017F');
-    public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock ("Latin Extended-B", '\u0180', '\u024F');
+    /**
-    public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock ("IPA Extensions", '\u0250', '\u02AF');
+     * Box Drawing.
-    public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock ("Spacing Modifier Letters", '\u02B0', '\u02FF');
+     * '\u2500' - '\u257F'.
-    public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock ("Combining Diacritical Marks", '\u0300', '\u036F');
+     */
-    public static final UnicodeBlock GREEK = new UnicodeBlock ("Greek", '\u0370', '\u03FF');
+    public final static UnicodeBlock BOX_DRAWING
-    public static final UnicodeBlock CYRILLIC = new UnicodeBlock ("Cyrillic", '\u0400', '\u04FF');
+      = new UnicodeBlock('\u2500', '\u257F',
-    public static final UnicodeBlock ARMENIAN = new UnicodeBlock ("Armenian", '\u0530', '\u058F');
+                         "BOX_DRAWING");
-    public static final UnicodeBlock HEBREW = new UnicodeBlock ("Hebrew", '\u0590', '\u05FF');
-    public static final UnicodeBlock ARABIC = new UnicodeBlock ("Arabic", '\u0600', '\u06FF');
+    /**
-    public static final UnicodeBlock SYRIAC__ = new UnicodeBlock ("Syriac  ", '\u0700', '\u074F');
+     * Block Elements.
-    public static final UnicodeBlock THAANA = new UnicodeBlock ("Thaana", '\u0780', '\u07BF');
+     * '\u2580' - '\u259F'.
-    public static final UnicodeBlock DEVANAGARI = new UnicodeBlock ("Devanagari", '\u0900', '\u097F');
+     */
-    public static final UnicodeBlock BENGALI = new UnicodeBlock ("Bengali", '\u0980', '\u09FF');
+    public final static UnicodeBlock BLOCK_ELEMENTS
-    public static final UnicodeBlock GURMUKHI = new UnicodeBlock ("Gurmukhi", '\u0A00', '\u0A7F');
+      = new UnicodeBlock('\u2580', '\u259F',
-    public static final UnicodeBlock GUJARATI = new UnicodeBlock ("Gujarati", '\u0A80', '\u0AFF');
+                         "BLOCK_ELEMENTS");
-    public static final UnicodeBlock ORIYA = new UnicodeBlock ("Oriya", '\u0B00', '\u0B7F');
-    public static final UnicodeBlock TAMIL = new UnicodeBlock ("Tamil", '\u0B80', '\u0BFF');
+    /**
-    public static final UnicodeBlock TELUGU = new UnicodeBlock ("Telugu", '\u0C00', '\u0C7F');
+     * Geometric Shapes.
-    public static final UnicodeBlock KANNADA = new UnicodeBlock ("Kannada", '\u0C80', '\u0CFF');
+     * '\u25A0' - '\u25FF'.
-    public static final UnicodeBlock MALAYALAM = new UnicodeBlock ("Malayalam", '\u0D00', '\u0D7F');
+     */
-    public static final UnicodeBlock SINHALA = new UnicodeBlock ("Sinhala", '\u0D80', '\u0DFF');
+    public final static UnicodeBlock GEOMETRIC_SHAPES
-    public static final UnicodeBlock THAI = new UnicodeBlock ("Thai", '\u0E00', '\u0E7F');
+      = new UnicodeBlock('\u25A0', '\u25FF',
-    public static final UnicodeBlock LAO = new UnicodeBlock ("Lao", '\u0E80', '\u0EFF');
+                         "GEOMETRIC_SHAPES");
-    public static final UnicodeBlock TIBETAN = new UnicodeBlock ("Tibetan", '\u0F00', '\u0FFF');
-    public static final UnicodeBlock MYANMAR_ = new UnicodeBlock ("Myanmar ", '\u1000', '\u109F');
+    /**
-    public static final UnicodeBlock GEORGIAN = new UnicodeBlock ("Georgian", '\u10A0', '\u10FF');
+     * Miscellaneous Symbols.
-    public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock ("Hangul Jamo", '\u1100', '\u11FF');
+     * '\u2600' - '\u26FF'.
-    public static final UnicodeBlock ETHIOPIC = new UnicodeBlock ("Ethiopic", '\u1200', '\u137F');
+     */
-    public static final UnicodeBlock CHEROKEE = new UnicodeBlock ("Cherokee", '\u13A0', '\u13FF');
+    public final static UnicodeBlock MISCELLANEOUS_SYMBOLS
-    public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock ("Unified Canadian Aboriginal Syllabics", '\u1400', '\u167F');
+      = new UnicodeBlock('\u2600', '\u26FF',
-    public static final UnicodeBlock OGHAM = new UnicodeBlock ("Ogham", '\u1680', '\u169F');
+                         "MISCELLANEOUS_SYMBOLS");
-    public static final UnicodeBlock RUNIC = new UnicodeBlock ("Runic", '\u16A0', '\u16FF');
-    public static final UnicodeBlock KHMER = new UnicodeBlock ("Khmer", '\u1780', '\u17FF');
+    /**
-    public static final UnicodeBlock MONGOLIAN = new UnicodeBlock ("Mongolian", '\u1800', '\u18AF');
+     * Dingbats.
-    public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock ("Latin Extended Additional", '\u1E00', '\u1EFF');
+     * '\u2700' - '\u27BF'.
-    public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock ("Greek Extended", '\u1F00', '\u1FFF');
+     */
-    public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock ("General Punctuation", '\u2000', '\u206F');
+    public final static UnicodeBlock DINGBATS
-    public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock ("Superscripts and Subscripts", '\u2070', '\u209F');
+      = new UnicodeBlock('\u2700', '\u27BF',
-    public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock ("Currency Symbols", '\u20A0', '\u20CF');
+                         "DINGBATS");
-    public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock ("Combining Marks for Symbols", '\u20D0', '\u20FF');
-    public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock ("Letterlike Symbols", '\u2100', '\u214F');
+    /**
-    public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock ("Number Forms", '\u2150', '\u218F');
+     * Braille Patterns.
-    public static final UnicodeBlock ARROWS = new UnicodeBlock ("Arrows", '\u2190', '\u21FF');
+     * '\u2800' - '\u28FF'.
-    public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock ("Mathematical Operators", '\u2200', '\u22FF');
+     * @since 1.4
-    public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock ("Miscellaneous Technical", '\u2300', '\u23FF');
+     */
-    public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock ("Control Pictures", '\u2400', '\u243F');
+    public final static UnicodeBlock BRAILLE_PATTERNS
-    public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock ("Optical Character Recognition", '\u2440', '\u245F');
+      = new UnicodeBlock('\u2800', '\u28FF',
-    public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock ("Enclosed Alphanumerics", '\u2460', '\u24FF');
+                         "BRAILLE_PATTERNS");
-    public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock ("Box Drawing", '\u2500', '\u257F');
-    public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock ("Block Elements", '\u2580', '\u259F');
+    /**
-    public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock ("Geometric Shapes", '\u25A0', '\u25FF');
+     * CJK Radicals Supplement.
-    public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock ("Miscellaneous Symbols", '\u2600', '\u26FF');
+     * '\u2E80' - '\u2EFF'.
-    public static final UnicodeBlock DINGBATS = new UnicodeBlock ("Dingbats", '\u2700', '\u27BF');
+     * @since 1.4
-    public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock ("Braille Patterns", '\u2800', '\u28FF');
+     */
-    public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock ("CJK Radicals Supplement", '\u2E80', '\u2EFF');
+    public final static UnicodeBlock CJK_RADICALS_SUPPLEMENT
-    public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock ("Kangxi Radicals", '\u2F00', '\u2FDF');
+      = new UnicodeBlock('\u2E80', '\u2EFF',
-    public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock ("Ideographic Description Characters", '\u2FF0', '\u2FFF');
+                         "CJK_RADICALS_SUPPLEMENT");
-    public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock ("CJK Symbols and Punctuation", '\u3000', '\u303F');
-    public static final UnicodeBlock HIRAGANA = new UnicodeBlock ("Hiragana", '\u3040', '\u309F');
+    /**
-    public static final UnicodeBlock KATAKANA = new UnicodeBlock ("Katakana", '\u30A0', '\u30FF');
+     * Kangxi Radicals.
-    public static final UnicodeBlock BOPOMOFO = new UnicodeBlock ("Bopomofo", '\u3100', '\u312F');
+     * '\u2F00' - '\u2FDF'.
-    public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock ("Hangul Compatibility Jamo", '\u3130', '\u318F');
+     * @since 1.4
-    public static final UnicodeBlock KANBUN = new UnicodeBlock ("Kanbun", '\u3190', '\u319F');
+     */
-    public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock ("Bopomofo Extended", '\u31A0', '\u31BF');
+    public final static UnicodeBlock KANGXI_RADICALS
-    public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock ("Enclosed CJK Letters and Months", '\u3200', '\u32FF');
+      = new UnicodeBlock('\u2F00', '\u2FDF',
-    public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock ("CJK Compatibility", '\u3300', '\u33FF');
+                         "KANGXI_RADICALS");
-    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock ("CJK Unified Ideographs Extension A", '\u3400', '\u4DB5');
-    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock ("CJK Unified Ideographs", '\u4E00', '\u9FFF');
+    /**
-    public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock ("Yi Syllables", '\uA000', '\uA48F');
+     * Ideographic Description Characters.
-    public static final UnicodeBlock YI_RADICALS = new UnicodeBlock ("Yi Radicals", '\uA490', '\uA4CF');
+     * '\u2FF0' - '\u2FFF'.
-    public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock ("Hangul Syllables", '\uAC00', '\uD7A3');
+     * @since 1.4
-    public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock ("Surrogates Area", '\uD800', '\uDFFF');
+     */
-    public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock ("Private Use Area", '\uE000', '\uF8FF');
+    public final static UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
-    public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock ("CJK Compatibility Ideographs", '\uF900', '\uFAFF');
+      = new UnicodeBlock('\u2FF0', '\u2FFF',
-    public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock ("Alphabetic Presentation Forms", '\uFB00', '\uFB4F');
+                         "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
-    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock ("Arabic Presentation Forms-A", '\uFB50', '\uFDFF');
-    public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock ("Combining Half Marks", '\uFE20', '\uFE2F');
+    /**
-    public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock ("CJK Compatibility Forms", '\uFE30', '\uFE4F');
+     * CJK Symbols and Punctuation.
-    public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock ("Small Form Variants", '\uFE50', '\uFE6F');
+     * '\u3000' - '\u303F'.
-    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock ("Arabic Presentation Forms-B", '\uFE70', '\uFEFE');
+     */
-    public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock ("Halfwidth and Fullwidth Forms", '\uFF00', '\uFFEF');
+    public final static UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
-    public static final UnicodeBlock SPECIALS = new UnicodeBlock ("Specials", '\uFFF0', '\uFFFD');
+      = new UnicodeBlock('\u3000', '\u303F',
-    private static final UnicodeBlock[] blocks = {
+                         "CJK_SYMBOLS_AND_PUNCTUATION");
+    /**
+     * Hiragana.
+     * '\u3040' - '\u309F'.
+     */
+    public final static UnicodeBlock HIRAGANA
+      = new UnicodeBlock('\u3040', '\u309F',
+                         "HIRAGANA");
+    /**
+     * Katakana.
+     * '\u30A0' - '\u30FF'.
+     */
+    public final static UnicodeBlock KATAKANA
+      = new UnicodeBlock('\u30A0', '\u30FF',
+                         "KATAKANA");
+    /**
+     * Bopomofo.
+     * '\u3100' - '\u312F'.
+     */
+    public final static UnicodeBlock BOPOMOFO
+      = new UnicodeBlock('\u3100', '\u312F',
+                         "BOPOMOFO");
+    /**
+     * Hangul Compatibility Jamo.
+     * '\u3130' - '\u318F'.
+     */
+    public final static UnicodeBlock HANGUL_COMPATIBILITY_JAMO
+      = new UnicodeBlock('\u3130', '\u318F',
+                         "HANGUL_COMPATIBILITY_JAMO");
+    /**
+     * Kanbun.
+     * '\u3190' - '\u319F'.
+     */
+    public final static UnicodeBlock KANBUN
+      = new UnicodeBlock('\u3190', '\u319F',
+                         "KANBUN");
+    /**
+     * Bopomofo Extended.
+     * '\u31A0' - '\u31BF'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock BOPOMOFO_EXTENDED
+      = new UnicodeBlock('\u31A0', '\u31BF',
+                         "BOPOMOFO_EXTENDED");
+    /**
+     * Enclosed CJK Letters and Months.
+     * '\u3200' - '\u32FF'.
+     */
+    public final static UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
+      = new UnicodeBlock('\u3200', '\u32FF',
+                         "ENCLOSED_CJK_LETTERS_AND_MONTHS");
+    /**
+     * CJK Compatibility.
+     * '\u3300' - '\u33FF'.
+     */
+    public final static UnicodeBlock CJK_COMPATIBILITY
+      = new UnicodeBlock('\u3300', '\u33FF',
+                         "CJK_COMPATIBILITY");
+    /**
+     * CJK Unified Ideographs Extension A.
+     * '\u3400' - '\u4DB5'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
+      = new UnicodeBlock('\u3400', '\u4DB5',
+                         "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
+    /**
+     * CJK Unified Ideographs.
+     * '\u4E00' - '\u9FFF'.
+     */
+    public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
+      = new UnicodeBlock('\u4E00', '\u9FFF',
+                         "CJK_UNIFIED_IDEOGRAPHS");
+    /**
+     * Yi Syllables.
+     * '\uA000' - '\uA48F'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock YI_SYLLABLES
+      = new UnicodeBlock('\uA000', '\uA48F',
+                         "YI_SYLLABLES");
+    /**
+     * Yi Radicals.
+     * '\uA490' - '\uA4CF'.
+     * @since 1.4
+     */
+    public final static UnicodeBlock YI_RADICALS
+      = new UnicodeBlock('\uA490', '\uA4CF',
+                         "YI_RADICALS");
+    /**
+     * Hangul Syllables.
+     * '\uAC00' - '\uD7A3'.
+     */
+    public final static UnicodeBlock HANGUL_SYLLABLES
+      = new UnicodeBlock('\uAC00', '\uD7A3',
+                         "HANGUL_SYLLABLES");
+    /**
+     * Surrogates Area.
+     * '\uD800' - '\uDFFF'.
+     */
+    public final static UnicodeBlock SURROGATES_AREA
+      = new UnicodeBlock('\uD800', '\uDFFF',
+                         "SURROGATES_AREA");
+    /**
+     * Private Use Area.
+     * '\uE000' - '\uF8FF'.
+     */
+    public final static UnicodeBlock PRIVATE_USE_AREA
+      = new UnicodeBlock('\uE000', '\uF8FF',
+                         "PRIVATE_USE_AREA");
+    /**
+     * CJK Compatibility Ideographs.
+     * '\uF900' - '\uFAFF'.
+     */
+    public final static UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
+      = new UnicodeBlock('\uF900', '\uFAFF',
+                         "CJK_COMPATIBILITY_IDEOGRAPHS");
+    /**
+     * Alphabetic Presentation Forms.
+     * '\uFB00' - '\uFB4F'.
+     */
+    public final static UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
+      = new UnicodeBlock('\uFB00', '\uFB4F',
+                         "ALPHABETIC_PRESENTATION_FORMS");
+    /**
+     * Arabic Presentation Forms-A.
+     * '\uFB50' - '\uFDFF'.
+     */
+    public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_A
+      = new UnicodeBlock('\uFB50', '\uFDFF',
+                         "ARABIC_PRESENTATION_FORMS_A");
+    /**
+     * Combining Half Marks.
+     * '\uFE20' - '\uFE2F'.
+     */
+    public final static UnicodeBlock COMBINING_HALF_MARKS
+      = new UnicodeBlock('\uFE20', '\uFE2F',
+                         "COMBINING_HALF_MARKS");
+    /**
+     * CJK Compatibility Forms.
+     * '\uFE30' - '\uFE4F'.
+     */
+    public final static UnicodeBlock CJK_COMPATIBILITY_FORMS
+      = new UnicodeBlock('\uFE30', '\uFE4F',
+                         "CJK_COMPATIBILITY_FORMS");
+    /**
+     * Small Form Variants.
+     * '\uFE50' - '\uFE6F'.
+     */
+    public final static UnicodeBlock SMALL_FORM_VARIANTS
+      = new UnicodeBlock('\uFE50', '\uFE6F',
+                         "SMALL_FORM_VARIANTS");
+    /**
+     * Arabic Presentation Forms-B.
+     * '\uFE70' - '\uFEFE'.
+     */
+    public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_B
+      = new UnicodeBlock('\uFE70', '\uFEFE',
+                         "ARABIC_PRESENTATION_FORMS_B");
+    /**
+     * Halfwidth and Fullwidth Forms.
+     * '\uFF00' - '\uFFEF'.
+     */
+    public final static UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
+      = new UnicodeBlock('\uFF00', '\uFFEF',
+                         "HALFWIDTH_AND_FULLWIDTH_FORMS");
+    /**
+     * Specials.
+     * '\uFEFF', '\uFFF0' - '\uFFFD'.
+     */
+    public final static UnicodeBlock SPECIALS
+      = new UnicodeBlock('\uFFF0', '\uFFFD',
+                         "SPECIALS");
+    /**
+     * The defined subsets.
+     */
+    private static final UnicodeBlock sets[] = {
      BASIC_LATIN,
      LATIN_1_SUPPLEMENT,
      LATIN_EXTENDED_A,
@@ -445,7 +912,7 @@ public final class Character implements Serializable, Comparable
      ARMENIAN,
      HEBREW,
      ARABIC,
-      SYRIAC__,
+      SYRIAC,
      THAANA,
      DEVANAGARI,
      BENGALI,
@@ -460,7 +927,7 @@ public final class Character implements Serializable, Comparable
      THAI,
      LAO,
      TIBETAN,
-      MYANMAR_,
+      MYANMAR,
      GEORGIAN,
      HANGUL_JAMO,
      ETHIOPIC,
@@ -517,7 +984,1180 @@ public final class Character implements Serializable, Comparable
      SMALL_FORM_VARIANTS,
      ARABIC_PRESENTATION_FORMS_B,
      HALFWIDTH_AND_FULLWIDTH_FORMS,
-      SPECIALS
+      SPECIALS,
    };
+  } // class UnicodeBlock
+  /**
+   * The immutable value of this Character.
+   *
+   * @serial the value of this Character
+   */
+  private final char value;
+  /**
+   * Compatible with JDK 1.0+.
+   */
+  private static final long serialVersionUID = 3786198910865385080L;
+  /**
+   * Smallest value allowed for radix arguments in Java. This value is 2.
+   *
+   * @see #digit(char, int)
+   * @see #forDigit(int, int)
+   * @see Integer#toString(int, int)
+   * @see Integer#valueOf(String)
+   */
+  public static final int MIN_RADIX = 2;
+  /**
+   * Largest value allowed for radix arguments in Java. This value is 36.
+   *
+   * @see #digit(char, int)
+   * @see #forDigit(int, int)
+   * @see Integer#toString(int, int)
+   * @see Integer#valueOf(String)
+   */
+  public static final int MAX_RADIX = 36;
+  /**
+   * The minimum value the char data type can hold.
+   * This value is <code>'\\u0000'</code>.
+   */
+  public static final char MIN_VALUE = '\u0000';
+  /**
+   * The maximum value the char data type can hold.
+   * This value is <code>'\\uFFFF'</code>.
+   */
+  public static final char MAX_VALUE = '\uFFFF';
+  /**
+   * Class object representing the primitive char data type.
+   *
+   * @since 1.1
+   */
+  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
+  /**
+   * Lu = Letter, Uppercase (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte UPPERCASE_LETTER = 1;
+  /**
+   * Ll = Letter, Lowercase (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte LOWERCASE_LETTER = 2;
+  /**
+   * Lt = Letter, Titlecase (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte TITLECASE_LETTER = 3;
+  /**
+   * Mn = Mark, Non-Spacing (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte NON_SPACING_MARK = 6;
+  /**
+   * Mc = Mark, Spacing Combining (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte COMBINING_SPACING_MARK = 8;
+  /**
+   * Me = Mark, Enclosing (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte ENCLOSING_MARK = 7;
+  /**
+   * Nd = Number, Decimal Digit (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte DECIMAL_DIGIT_NUMBER = 9;
+  /**
+   * Nl = Number, Letter (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte LETTER_NUMBER = 10;
+  /**
+   * No = Number, Other (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte OTHER_NUMBER = 11;
+  /**
+   * Zs = Separator, Space (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte SPACE_SEPARATOR = 12;
+  /**
+   * Zl = Separator, Line (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte LINE_SEPARATOR = 13;
+  /**
+   * Zp = Separator, Paragraph (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte PARAGRAPH_SEPARATOR = 14;
+  /**
+   * Cc = Other, Control (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte CONTROL = 15;
+  /**
+   * Cf = Other, Format (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte FORMAT = 16;
+  /**
+   * Cs = Other, Surrogate (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte SURROGATE = 19;
+  /**
+   * Co = Other, Private Use (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte PRIVATE_USE = 18;
+  /**
+   * Cn = Other, Not Assigned (Normative).
+   *
+   * @since 1.1
+   */
+  public static final byte UNASSIGNED = 0;
+  /**
+   * Lm = Letter, Modifier (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte MODIFIER_LETTER = 4;
+  /**
+   * Lo = Letter, Other (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte OTHER_LETTER = 5;
+  /**
+   * Pc = Punctuation, Connector (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte CONNECTOR_PUNCTUATION = 23;
+  /**
+   * Pd = Punctuation, Dash (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte DASH_PUNCTUATION = 20;
+  /**
+   * Ps = Punctuation, Open (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte START_PUNCTUATION = 21;
+  /**
+   * Pe = Punctuation, Close (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte END_PUNCTUATION = 22;
+  /**
+   * Pi = Punctuation, Initial Quote (Informative).
+   *
+   * @since 1.4
+   */
+  public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
+  /**
+   * Pf = Punctuation, Final Quote (Informative).
+   *
+   * @since 1.4
+   */
+  public static final byte FINAL_QUOTE_PUNCTUATION = 30;
+  /**
+   * Po = Punctuation, Other (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte OTHER_PUNCTUATION = 24;
+  /**
+   * Sm = Symbol, Math (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte MATH_SYMBOL = 25;
+  /**
+   * Sc = Symbol, Currency (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte CURRENCY_SYMBOL = 26;
+  /**
+   * Sk = Symbol, Modifier (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte MODIFIER_SYMBOL = 27;
+  /**
+   * So = Symbol, Other (Informative).
+   *
+   * @since 1.1
+   */
+  public static final byte OTHER_SYMBOL = 28;
+  /**
+   * Undefined bidirectional character type. Undefined char values have
+   * undefined directionality in the Unicode specification.
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_UNDEFINED = -1;
+  /**
+   * Strong bidirectional character type "L".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
+  /**
+   * Strong bidirectional character type "R".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
+  /**
+   * Strong bidirectional character type "AL".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
+  /**
+   * Weak bidirectional character type "EN".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
+  /**
+   * Weak bidirectional character type "ES".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
+  /**
+   * Weak bidirectional character type "ET".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
+  /**
+   * Weak bidirectional character type "AN".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
+  /**
+   * Weak bidirectional character type "CS".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
+  /**
+   * Weak bidirectional character type "NSM".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
+  /**
+   * Weak bidirectional character type "BN".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
+  /**
+   * Neutral bidirectional character type "B".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
+  /**
+   * Neutral bidirectional character type "S".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
+  /**
+   * Strong bidirectional character type "WS".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_WHITESPACE = 12;
+  /**
+   * Neutral bidirectional character type "ON".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
+  /**
+   * Strong bidirectional character type "LRE".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
+  /**
+   * Strong bidirectional character type "LRO".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
+  /**
+   * Strong bidirectional character type "RLE".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
+  /**
+   * Strong bidirectional character type "RLO".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
+  /**
+   * Weak bidirectional character type "PDF".
+   *
+   * @since 1.4
+   */
+  public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
+  /**
+   * Mask for grabbing the type out of the result of readChar.
+   * @see #readChar(char)
+   */
+  private static final int TYPE_MASK = 0x1F;
+  /**
+   * Mask for grabbing the non-breaking space flag out of the result of
+   * readChar.
+   * @see #readChar(char)
+   */
+  private static final int NO_BREAK_MASK = 0x20;
+  /**
+   * Mask for grabbing the mirrored directionality flag out of the result
+   * of readChar.
+   * @see #readChar(char)
+   */
+  private static final int MIRROR_MASK = 0x40;
+  /**
+   * Grabs an attribute offset from the Unicode attribute database. The lower
+   * 5 bits are the character type, the next 2 bits are flags, and the top
+   * 9 bits are the offset into the attribute tables. Note that the top 9
+   * bits are meaningless in this context; they are useful only in the native
+   * code.
+   *
+   * @param ch the character to look up
+   * @return the character's attribute offset and type
+   * @see #TYPE_MASK
+   * @see #NO_BREAK_MASK
+   * @see #MIRROR_MASK
+   */
+  private static native char readChar(char ch);
+  /**
+   * Wraps up a character.
+   *
+   * @param value the character to wrap
+   */
+  public Character(char value)
+  {
+    this.value = value;
+  }
+  /**
+   * Returns the character which has been wrapped by this class.
+   *
+   * @return the character wrapped
+   */
+  public char charValue()
+  {
+    return value;
+  }
+  /**
+   * Returns the numerical value (unsigned) of the wrapped character.
+   * Range of returned values: 0x0000-0xFFFF.
+   *
+   * @return the value of the wrapped character
+   */
+  public int hashCode()
+  {
+    return value;
+  }
+  /**
+   * Determines if an object is equal to this object. This is only true for
+   * another Character object wrapping the same value.
+   *
+   * @param o object to compare
+   * @return true if o is a Character with the same value
+   */
+  public boolean equals(Object o)
+  {
+    return o instanceof Character && value == ((Character) o).value;
+  }
+  /**
+   * Converts the wrapped character into a String.
+   *
+   * @return a String containing one character -- the wrapped character
+   *         of this instance
+   */
+  public String toString()
+  {
+    // This assumes that String.valueOf(char) can create a single-character
+    // String more efficiently than through the public API.
+    return String.valueOf(value);
+  }
+  /**
+   * Returns a String of length 1 representing the specified character.
+   *
+   * @param ch the character to convert
+   * @return a String containing the character
+   * @since 1.4
+   */
+  public String toString(char ch)
+  {
+    // This assumes that String.valueOf(char) can create a single-character
+    // String more efficiently than through the public API.
+    return String.valueOf(ch);
+  }
+  /**
+   * Determines if a character is a Unicode lowercase letter. For example,
+   * <code>'a'</code> is lowercase.
+   * <br>
+   * lowercase = [Ll]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode lowercase letter, else false
+   * @see #isUpperCase(char)
+   * @see #isTitleCase(char)
+   * @see #toLowerCase(char)
+   * @see #getType(char)
+   */
+  public static boolean isLowerCase(char ch)
+  {
+    return getType(ch) == LOWERCASE_LETTER;
+  }
+  /**
+   * Determines if a character is a Unicode uppercase letter. For example,
+   * <code>'A'</code> is uppercase.
+   * <br>
+   * uppercase = [Lu]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode uppercase letter, else false
+   * @see #isLowerCase(char)
+   * @see #isTitleCase(char)
+   * @see #toUpperCase(char)
+   * @see #getType(char)
+   */
+  public static boolean isUpperCase(char ch)
+  {
+    return getType(ch) == UPPERCASE_LETTER;
+  }
+  /**
+   * Determines if a character is a Unicode titlecase letter. For example,
+   * the character "Lj" (Latin capital L with small letter j) is titlecase.
+   * <br>
+   * titlecase = [Lt]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode titlecase letter, else false
+   * @see #isLowerCase(char)
+   * @see #isUpperCase(char)
+   * @see #toTitleCase(char)
+   * @see #getType(char)
+   */
+  public static boolean isTitleCase(char ch)
+  {
+    return getType(ch) == TITLECASE_LETTER;
+  }
+  /**
+   * Determines if a character is a Unicode decimal digit. For example,
+   * <code>'0'</code> is a digit.
+   * <br>
+   * Unicode decimal digit = [Nd]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode decimal digit, else false
+   * @see #digit(char, int)
+   * @see #forDigit(int, int)
+   * @see #getType(char)
+   */
+  public static boolean isDigit(char ch)
+  {
+    return getType(ch) == DECIMAL_DIGIT_NUMBER;
+  }
+  /**
+   * Determines if a character is part of the Unicode Standard. This is an
+   * evolving standard, but covers every character in the data file.
+   * <br>
+   * defined = not [Cn]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode character, else false
+   * @see #isDigit(char)
+   * @see #isLetter(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isLowerCase(char)
+   * @see #isTitleCase(char)
+   * @see #isUpperCase(char)
+   */
+  public static boolean isDefined(char ch)
+  {
+    return getType(ch) == UNASSIGNED;
+  }
+  /**
+   * Determines if a character is a Unicode letter. Not all letters have case,
+   * so this may return true when isLowerCase and isUpperCase return false.
+   * <br>
+   * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode letter, else false
+   * @see #isDigit(char)
+   * @see #isJavaIdentifierStart(char)
+   * @see #isJavaLetter(char)
+   * @see #isJavaLetterOrDigit(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isLowerCase(char)
+   * @see #isTitleCase(char)
+   * @see #isUnicodeIdentifierStart(char)
+   * @see #isUpperCase(char)
+   */
+  public static boolean isLetter(char ch)
+  {
+    return ((1 << getType(ch))
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER))) != 0;
+  }
+  /**
+   * Determines if a character is a Unicode letter or a Unicode digit. This
+   * is the combination of isLetter and isDigit.
+   * <br>
+   * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode letter or a Unicode digit, else false
+   * @see #isDigit(char)
+   * @see #isJavaIdentifierPart(char)
+   * @see #isJavaLetter(char)
+   * @see #isJavaLetterOrDigit(char)
+   * @see #isLetter(char)
+   * @see #isUnicodeIdentifierPart(char)
+   */
+  public static boolean isLetterOrDigit(char ch)
+  {
+    return ((1 << getType(ch))
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER)
+               | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
+  }
+  /**
+   * Determines if a character can start a Java identifier. This is the
+   * combination of isLetter, any character where getType returns
+   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
+   * (like '_').
+   *
+   * @param ch character to test
+   * @return true if ch can start a Java identifier, else false
+   * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
+   * @see #isJavaLetterOrDigit(char)
+   * @see #isJavaIdentifierStart(char)
+   * @see #isJavaIdentifierPart(char)
+   * @see #isLetter(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isUnicodeIdentifierStart(char)
+   */
+  public static boolean isJavaLetter(char ch)
+  {
+    return isJavaIdentifierStart(ch);
+  }
+  /**
+   * Determines if a character can follow the first letter in
+   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
+   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
+   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
+   * or isIdentifierIgnorable.
+   *
+   * @param ch character to test
+   * @return true if ch can follow the first letter in a Java identifier
+   * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
+   * @see #isJavaLetter(char)
+   * @see #isJavaIdentifierStart(char)
+   * @see #isJavaIdentifierPart(char)
+   * @see #isLetter(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isUnicodeIdentifierPart(char)
+   * @see #isIdentifierIgnorable(char)
+   */
+  public static boolean isJavaLetterOrDigit(char ch)
+  {
+    return isJavaIdentifierPart(ch);
+  }
+  /**
+   * Determines if a character can start a Java identifier. This is the
+   * combination of isLetter, any character where getType returns
+   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
+   * (like '_').
+   * <br>
+   * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
+   *
+   * @param ch character to test
+   * @return true if ch can start a Java identifier, else false
+   * @see #isJavaIdentifierPart(char)
+   * @see #isLetter(char)
+   * @see #isUnicodeIdentifierStart(char)
+   * @since 1.1
+   */
+  public static boolean isJavaIdentifierStart(char ch)
+  {
+    return ((1 << getType(ch))
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER)
+               | (1 << LETTER_NUMBER)
+               | (1 << CURRENCY_SYMBOL)
+               | (1 << CONNECTOR_PUNCTUATION))) != 0;
+  }
+  /**
+   * Determines if a character can follow the first letter in
+   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
+   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
+   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
+   * or isIdentifierIgnorable.
+   * <br>
+   * Java identifier extender =
+   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
+   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
+   *
+   * @param ch character to test
+   * @return true if ch can follow the first letter in a Java identifier
+   * @see #isIdentifierIgnorable(char)
+   * @see #isJavaIdentifierStart(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isUnicodeIdentifierPart(char)
+   * @since 1.1
+   */
+  public static boolean isJavaIdentifierPart(char ch)
+  {
+    int category = getType(ch);
+    return ((1 << category)
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER)
+               | (1 << NON_SPACING_MARK)
+               | (1 << COMBINING_SPACING_MARK)
+               | (1 << DECIMAL_DIGIT_NUMBER)
+               | (1 << LETTER_NUMBER)
+               | (1 << CURRENCY_SYMBOL)
+               | (1 << CONNECTOR_PUNCTUATION)
+               | (1 << FORMAT))) != 0
+      || (category == CONTROL && isIdentifierIgnorable(ch));
+  }
+  /**
+   * Determines if a character can start a Unicode identifier.  Only
+   * letters can start a Unicode identifier, but this includes characters
+   * in LETTER_NUMBER.
+   * <br>
+   * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
+   *
+   * @param ch character to test
+   * @return true if ch can start a Unicode identifier, else false
+   * @see #isJavaIdentifierStart(char)
+   * @see #isLetter(char)
+   * @see #isUnicodeIdentifierPart(char)
+   * @since 1.1
+   */
+  public static boolean isUnicodeIdentifierStart(char ch)
+  {
+    return ((1 << getType(ch))
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER)
+               | (1 << LETTER_NUMBER))) != 0;
+  }
+  /**
+   * Determines if a character can follow the first letter in
+   * a Unicode identifier. This includes letters, connecting punctuation,
+   * digits, numeric letters, combining marks, non-spacing marks, and
+   * isIdentifierIgnorable.
+   * <br>
+   * Unicode identifier extender =
+   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
+   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
+   *
+   * @param ch character to test
+   * @return true if ch can follow the first letter in a Unicode identifier
+   * @see #isIdentifierIgnorable(char)
+   * @see #isJavaIdentifierPart(char)
+   * @see #isLetterOrDigit(char)
+   * @see #isUnicodeIdentifierStart(char)
+   * @since 1.1
+   */
+  public static boolean isUnicodeIdentifierPart(char ch)
+  {
+    int category = getType(ch);
+    return ((1 << category)
+            & ((1 << UPPERCASE_LETTER)
+               | (1 << LOWERCASE_LETTER)
+               | (1 << TITLECASE_LETTER)
+               | (1 << MODIFIER_LETTER)
+               | (1 << OTHER_LETTER)
+               | (1 << NON_SPACING_MARK)
+               | (1 << COMBINING_SPACING_MARK)
+               | (1 << DECIMAL_DIGIT_NUMBER)
+               | (1 << LETTER_NUMBER)
+               | (1 << CONNECTOR_PUNCTUATION)
+               | (1 << FORMAT))) != 0
+      || (category == CONTROL && isIdentifierIgnorable(ch));
+  }
+  /**
+   * Determines if a character is ignorable in a Unicode identifier. This
+   * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
+   * through <code>'\u0008'</code>, <code>'\u000E'</code> through
+   * <code>'\u001B'</code>, and <code>'\u007F'</code> through
+   * <code>'\u009F'</code>), and FORMAT characters.
+   * <br>
+   * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
+   *    |U+007F-U+009F
+   *
+   * @param ch character to test
+   * @return true if ch is ignorable in a Unicode or Java identifier
+   * @see #isJavaIdentifierPart(char)
+   * @see #isUnicodeIdentifierPart(char)
+   * @since 1.1
+   */
+  public static boolean isIdentifierIgnorable(char ch)
+  {
+    return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
+                               || (ch <= '\u001B' && ch >= '\u000E')))
+      || getType(ch) == FORMAT;
+  }
+  /**
+   * Converts a Unicode character into its lowercase equivalent mapping.
+   * If a mapping does not exist, then the character passed is returned.
+   * Note that isLowerCase(toLowerCase(ch)) does not always return true.
+   *
+   * @param ch character to convert to lowercase
+   * @return lowercase mapping of ch, or ch if lowercase mapping does
+   *         not exist
+   * @see #isLowerCase(char)
+   * @see #isUpperCase(char)
+   * @see #toTitleCase(char)
+   * @see #toUpperCase(char)
+   */
+  public static native char toLowerCase(char ch);
+  /**
+   * Converts a Unicode character into its uppercase equivalent mapping.
+   * If a mapping does not exist, then the character passed is returned.
+   * Note that isUpperCase(toUpperCase(ch)) does not always return true.
+   *
+   * @param ch character to convert to uppercase
+   * @return uppercase mapping of ch, or ch if uppercase mapping does
+   *         not exist
+   * @see #isLowerCase(char)
+   * @see #isUpperCase(char)
+   * @see #toLowerCase(char)
+   * @see #toTitleCase(char)
+   */
+  public static native char toUpperCase(char ch);
+  /**
+   * Converts a Unicode character into its titlecase equivalent mapping.
+   * If a mapping does not exist, then the character passed is returned.
+   * Note that isTitleCase(toTitleCase(ch)) does not always return true.
+   *
+   * @param ch character to convert to titlecase
+   * @return titlecase mapping of ch, or ch if titlecase mapping does
+   *         not exist
+   * @see #isTitleCase(char)
+   * @see #toLowerCase(char)
+   * @see #toUpperCase(char)
+   */
+  public static native char toTitleCase(char ch);
+  /**
+   * Converts a character into a digit of the specified radix. If the radix
+   * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
+   * exceeds the radix, or if ch is not a decimal digit or in the case
+   * insensitive set of 'a'-'z', the result is -1.
+   * <br>
+   * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
+   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
+   *
+   * @param ch character to convert into a digit
+   * @param radix radix in which ch is a digit
+   * @return digit which ch represents in radix, or -1 not a valid digit
+   * @see #MIN_RADIX
+   * @see #MAX_RADIX
+   * @see #forDigit(int, int)
+   * @see #isDigit(char)
+   * @see #getNumericValue(char)
+   */
+  public static native int digit(char ch, int radix);
+  /**
+   * Returns the Unicode numeric value property of a character. For example,
+   * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
+   *
+   * <p>This method also returns values for the letters A through Z, (not
+   * specified by Unicode), in these ranges: <code>'\u0041'</code>
+   * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
+   * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
+   * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
+   * <code>'\uFF5A'</code> (full width variants).
+   *
+   * <p>If the character lacks a numeric value property, -1 is returned.
+   * If the character has a numeric value property which is not representable
+   * as a nonnegative integer, such as a fraction, -2 is returned.
+   *
+   * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
+   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
+   *
+   * @param ch character from which the numeric value property will
+   *        be retrieved
+   * @return the numeric value property of ch, or -1 if it does not exist, or
+   *         -2 if it is not representable as a nonnegative integer
+   * @see #forDigit(int, int)
+   * @see #digit(char, int)
+   * @see #isDigit(char)
+   * @since 1.1
+   */
+  public static native int getNumericValue(char ch);
+  /**
+   * Determines if a character is a ISO-LATIN-1 space. This is only the five
+   * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
+   * <code>'\r'</code>, and <code>' '</code>.
+   * <br>
+   * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
+   *
+   * @param ch character to test
+   * @return true if ch is a space, else false
+   * @deprecated Replaced by {@link #isWhitespace(char)}
+   * @see #isSpaceChar(char)
+   * @see #isWhitespace(char)
+   */
+  public static boolean isSpace(char ch)
+  {
+    // Performing the subtraction up front alleviates need to compare longs.
+    return ch-- <= ' ' && ((1 << ch)
+                           & ((1 << (' ' - 1))
+                              | (1 << ('\t' - 1))
+                              | (1 << ('\n' - 1))
+                              | (1 << ('\r' - 1))
+                              | (1 << ('\f' - 1)))) != 0;
+  }
+  /**
+   * Determines if a character is a Unicode space character. This includes
+   * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
+   * <br>
+   * Unicode space = [Zs]|[Zp]|[Zl]
+   *
+   * @param ch character to test
+   * @return true if ch is a Unicode space, else false
+   * @see #isWhitespace(char)
+   * @since 1.1
+   */
+  public static boolean isSpaceChar(char ch)
+  {
+    return ((1 << getType(ch))
+            & ((1 << SPACE_SEPARATOR)
+               | (1 << LINE_SEPARATOR)
+               | (1 << PARAGRAPH_SEPARATOR))) != 0;
+  }
+  /**
+   * Determines if a character is Java whitespace. This includes Unicode
+   * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
+   * PARAGRAPH_SEPARATOR) except the non-breaking spaces
+   * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
+   * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
+   * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
+   * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
+   * and <code>'\u001F'</code>.
+   * <br>
+   * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
+   *
+   * @param ch character to test
+   * @return true if ch is Java whitespace, else false
+   * @see #isSpaceChar(char)
+   * @since 1.1
+   */
+  public static boolean isWhitespace(char ch)
+  {
+    int attr = readChar(ch);
+    return ((((1 << (attr & TYPE_MASK))
+              & ((1 << SPACE_SEPARATOR)
+                 | (1 << LINE_SEPARATOR)
+                 | (1 << PARAGRAPH_SEPARATOR))) != 0)
+            && (attr & NO_BREAK_MASK) == 0)
+      || (ch <= '\u001F' && ((1 << ch)
+                             & ((1 << '\t')
+                                | (1 << '\n')
+                                | (1 << '\u000B')
+                                | (1 << '\u000C')
+                                | (1 << '\r')
+                                | (1 << '\u001C')
+                                | (1 << '\u001D')
+                                | (1 << '\u001E')
+                                | (1 << '\u001F'))) != 0);
+  }
+  /**
+   * Determines if a character has the ISO Control property.
+   * <br>
+   * ISO Control = [Cc]
+   *
+   * @param ch character to test
+   * @return true if ch is an ISO Control character, else false
+   * @see #isSpaceChar(char)
+   * @see #isWhitespace(char)
+   * @since 1.1
+   */
+  public static boolean isISOControl(char ch)
+  {
+    return getType(ch) == CONTROL;
+  }
+  /**
+   * Returns the Unicode general category property of a character.
+   *
+   * @param ch character from which the general category property will
+   *        be retrieved
+   * @return the character category property of ch as an integer
+   * @see #UNASSIGNED
+   * @see #UPPERCASE_LETTER
+   * @see #LOWERCASE_LETTER
+   * @see #TITLECASE_LETTER
+   * @see #MODIFIER_LETTER
+   * @see #OTHER_LETTER
+   * @see #NON_SPACING_MARK
+   * @see #ENCLOSING_MARK
+   * @see #COMBINING_SPACING_MARK
+   * @see #DECIMAL_DIGIT_NUMBER
+   * @see #LETTER_NUMBER
+   * @see #OTHER_NUMBER
+   * @see #SPACE_SEPARATOR
+   * @see #LINE_SEPARATOR
+   * @see #PARAGRAPH_SEPARATOR
+   * @see #CONTROL
+   * @see #FORMAT
+   * @see #PRIVATE_USE
+   * @see #SURROGATE
+   * @see #DASH_PUNCTUATION
+   * @see #START_PUNCTUATION
+   * @see #END_PUNCTUATION
+   * @see #CONNECTOR_PUNCTUATION
+   * @see #OTHER_PUNCTUATION
+   * @see #MATH_SYMBOL
+   * @see #CURRENCY_SYMBOL
+   * @see #MODIFIER_SYMBOL
+   * @see #INITIAL_QUOTE_PUNCTUATION
+   * @see #FINAL_QUOTE_PUNCTUATION
+   * @since 1.1
+   */
+  public static native int getType(char ch);
+  /**
+   * Converts a digit into a character which represents that digit
+   * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
+   * or the digit exceeds the radix, then the null character <code>'\0'</code>
+   * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
+   * <br>
+   * return value boundary = U+0030-U+0039|U+0061-U+007A
+   *
+   * @param digit digit to be converted into a character
+   * @param radix radix of digit
+   * @return character representing digit in radix, or '\0'
+   * @see #MIN_RADIX
+   * @see #MAX_RADIX
+   * @see #digit(char, int)
+   */
+  public static char forDigit(int digit, int radix)
+  {
+    if (radix < MIN_RADIX || radix > MAX_RADIX ||
+        digit < 0 || digit >= radix)
+      return '\0';
+    return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
+  }
+  /**
+   * Returns the Unicode directionality property of the character. This
+   * is used in the visual ordering of text.
+   *
+   * @param ch the character to look up
+   * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
+   * @see #DIRECTIONALITY_UNDEFINED
+   * @see #DIRECTIONALITY_LEFT_TO_RIGHT
+   * @see #DIRECTIONALITY_RIGHT_TO_LEFT
+   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
+   * @see #DIRECTIONALITY_EUROPEAN_NUMBER
+   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
+   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
+   * @see #DIRECTIONALITY_ARABIC_NUMBER
+   * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
+   * @see #DIRECTIONALITY_NONSPACING_MARK
+   * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
+   * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
+   * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
+   * @see #DIRECTIONALITY_WHITESPACE
+   * @see #DIRECTIONALITY_OTHER_NEUTRALS
+   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
+   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
+   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
+   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
+   * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
+   * @since 1.4
+   */
+  public static native byte getDirectionality(char ch);
+  /**
+   * Determines whether the character is mirrored according to Unicode. For
+   * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
+   * left-to-right text, but ')' in right-to-left text.
+   *
+   * @param ch the character to look up
+   * @return true if the character is mirrored
+   * @since 1.4
+   */
+  public static boolean isMirrored(char ch)
+  {
+    return (readChar(ch) & MIRROR_MASK) != 0;
+  }
+  /**
+   * Compares another Character to this Character, numerically.
+   *
+   * @param anotherCharacter Character to compare with this Character
+   * @return a negative integer if this Character is less than
+   *         anotherCharacter, zero if this Character is equal, and
+   *         a positive integer if this Character is greater
+   * @throws NullPointerException if anotherCharacter is null
+   * @since 1.2
+   */
+  public int compareTo(Character anotherCharacter)
+  {
+    return value - anotherCharacter.value;
+  }
+  /**
+   * Compares an object to this Character.  Assuming the object is a
+   * Character object, this method performs the same comparison as
+   * compareTo(Character).
+   *
+   * @param o object to compare
+   * @return the comparison value
+   * @throws ClassCastException if o is not a Character object
+   * @throws NullPointerException if o is null
+   * @see #compareTo(Character)
+   * @since 1.2
+   */
+  public int compareTo(Object o)
+  {
+    return compareTo((Character) o);
  }
-}
+} // class Character
--- a/libjava/java/lang/natCharacter.cc
+++ b/libjava/java/lang/natCharacter.cc
-// natCharacter.cc - Native part of Character class.
+/* java.lang.Character -- Wrapper class for char, and Unicode subsets
+   Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
-/* Copyright (C) 1998, 1999  Free Software Foundation
+This file is part of GNU Classpath.
-   This file is part of libgcj.
+GNU Classpath is free software; you can redistribute it and/or modify
-This software is copyrighted work licensed under the terms of the
+it under the terms of the GNU General Public License as published by
-Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+the Free Software Foundation; either version 2, or (at your option)
-details.  */
+any later version.
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
 #include <config.h>
@@ -18,267 +45,69 @@ details.  */
-#define asize(x)  ((sizeof (x)) / sizeof (x[0]))
-static jchar
-to_lower_title (jchar ch)
-{
-  for (unsigned int i = 0; i < asize (title_to_upper_table); ++i)
-    {
-      // We can assume that the entries in the two tables are
-      // parallel.  This is checked in the script.
-      if (title_to_upper_table[i][1] == ch
-	  || title_to_upper_table[i][0] == ch)
-	return title_to_lower_table[i][1];
-    }
-  return ch;
-}
-static jchar
-to_upper_title (jchar ch)
-{
-  for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
-    {
-      // We can assume that the entries in the two tables are
-      // parallel.  This is checked in the script.
-      if (title_to_lower_table[i][1] == ch
-	  || title_to_lower_table[i][0] == ch)
-	return title_to_upper_table[i][1];
-    }
-  return ch;
-}
-jboolean
-java::lang::Character::isTitleCase (jchar ch)
-{
-  for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
-    {
-      if (title_to_lower_table[i][0] == ch)
-	return true;
-    }
-  return false;
-}
 jchar
-java::lang::Character::toTitleCase (jchar ch)
+java::lang::Character::readChar(jchar ch)
 {
-  // Both titlecase mapping tables have the same length.  This is
+  // Perform 16-bit addition to find the correct entry in data.
-  // checked in the chartables script.
+  return data[(jchar) (blocks[ch >> SHIFT] + ch)];
-  for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
-    {
-      if (title_to_lower_table[i][0] == ch)
-	return ch;
-      if (title_to_lower_table[i][1] == ch)
-	return title_to_lower_table[i][0];
-      if (title_to_upper_table[i][1] == ch)
-	return title_to_upper_table[i][0];
-    }
-  return toUpperCase (ch);
-}
-#ifdef COMPACT_CHARACTER
-static int
-table_search (const jchar table[][2], int table_len, jchar ch)
-{
-  int low, high, i, old;
-  low = 0;
-  high = table_len;
-  i = high / 2;
-  while (true)
-    {
-      if (ch < table[i][0])
-	high = i;
-      else if (ch > table[i][1])
-	low = i;
-      else
-	return i;
-      old = i;
-      i = (high + low) / 2;
-      if (i == old)
-	break;
-    }
-  return -1;
-}
-jint
-java::lang::Character::digit_value (jchar ch)
-{
-  int index = table_search (digit_table, asize (digit_table), ch);
-  if (index == -1)
-    return -1;
-  jchar base = digit_table[index][0];
-  // Tamil doesn't have a digit `0'.  So we special-case it here.
-  if (base == TAMIL_DIGIT_ONE)
-    return ch - base + 1;
-  return ch - base;
-}
-jint
-java::lang::Character::getNumericValue (jchar ch)
-{
-  jint d = digit (ch, 36);
-  if (d != -1)
-    return d;
-  for (unsigned int i = 0; i < asize (numeric_table); ++i)
-    {
-      if (numeric_table[i] == ch)
-	return numeric_value[i];
-    }
-  return -1;
 }
 jint
-java::lang::Character::getType (jchar ch)
+java::lang::Character::getType(jchar ch)
 {
-  int index = table_search (all_table, asize (all_table), ch);
+  // Perform 16-bit addition to find the correct entry in data.
-  if (index != -1)
+  return (jint) (data[(jchar) (blocks[ch >> SHIFT] + ch)] & TYPE_MASK);
-    return category_table[index];
-  return UNASSIGNED;
-}
-jboolean
-java::lang::Character::isLowerCase (jchar ch)
-{
-  if (ch >= 0x2000 && ch <= 0x2fff)
-    return false;
-  if (table_search (lower_case_table, asize (lower_case_table), ch) != -1)
-    return true;
-  int low, high, i, old;
-  low = 0;
-  high = asize (lower_anomalous_table);
-  i = high / 2;
-  while (true)
-    {
-      if (ch < lower_anomalous_table[i])
-	high = i;
-      else if (ch > lower_anomalous_table[i])
-	low = i;
-      else
-	return true;
-      old = i;
-      i = (high + low) / 2;
-      if (i == old)
-	break;
-    }
-  return false;
-}
-jboolean
-java::lang::Character::isSpaceChar (jchar ch)
-{
-  return table_search (space_table, asize (space_table), ch) != -1;
-}
-jboolean
-java::lang::Character::isUpperCase (jchar ch)
-{
-  if (ch >= 0x2000 && ch <= 0x2fff)
-    return false;
-  return table_search (upper_case_table, asize (upper_case_table), ch) != -1;
 }
 jchar
-java::lang::Character::toLowerCase (jchar ch)
+java::lang::Character::toLowerCase(jchar ch)
 {
-  int index = table_search (upper_case_table, asize (upper_case_table), ch);
+  return (jchar) (ch + lower[readChar(ch) >> 7]);
-  if (index == -1)
-    return to_lower_title (ch);
-  return (jchar) (ch - upper_case_table[index][0]
-		  + upper_case_map_table[index]);
 }
 jchar
-java::lang::Character::toUpperCase (jchar ch)
+java::lang::Character::toUpperCase(jchar ch)
 {
-  int index = table_search (lower_case_table, asize (lower_case_table), ch);
+  return (jchar) (ch + upper[readChar(ch) >> 7]);
-  if (index == -1)
-    return to_upper_title (ch);
-  return (jchar) (ch - lower_case_table[index][0]
-		  + lower_case_map_table[index]);
 }
-#else /* COMPACT_CHARACTER */
+jchar
+java::lang::Character::toTitleCase(jchar ch)
-jint
-java::lang::Character::digit_value (jchar ch)
 {
-  if (type_table[ch] == DECIMAL_DIGIT_NUMBER)
+  // As title is short, it doesn't hurt to exhaustively iterate over it.
-    return attribute_table[ch];
+  for (int i = title_length - 2; i >= 0; i -= 2)
-  return -1;
+    if (title[i] == ch)
+      return title[i + 1];
+  return toUpperCase(ch);
 }
 jint
-java::lang::Character::getNumericValue (jchar ch)
+java::lang::Character::digit(jchar ch, jint radix)
 {
-  jint d = digit (ch, 36);
+  if (radix < MIN_RADIX || radix > MAX_RADIX)
-  if (d != -1)
+    return (jint) -1;
-    return d;
+  jchar attr = readChar(ch);
+  if (((1 << (attr & TYPE_MASK))
-  // Some characters require two attributes.  We special-case them here.
+       & ((1 << UPPERCASE_LETTER)
-  if (ch >= ROMAN_START && ch <= ROMAN_END)
+          | (1 << LOWERCASE_LETTER)
-    return secondary_attribute_table[ch - ROMAN_START];
+          | (1 << DECIMAL_DIGIT_NUMBER))))
-  if (type_table[ch] == LETTER_NUMBER || type_table[ch] == OTHER_NUMBER)
+    {
-    return attribute_table[ch];
+      // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
-  return -1;
+      jint digit = (jint) numValue[attr >> 7];
+      return (digit >= 0 && digit < radix) ? digit : (jint) -1;
+    }
+  return (jint) -1;
 }
 jint
-java::lang::Character::getType (jchar ch)
+java::lang::Character::getNumericValue(jchar ch)
-{
-  return type_table[ch];
-}
-jboolean
-java::lang::Character::isLowerCase (jchar ch)
-{
-  if (ch >= 0x2000 && ch <= 0x2fff)
-    return false;
-  return type_table[ch] == LOWERCASE_LETTER;
-}
-jboolean
-java::lang::Character::isSpaceChar (jchar ch)
-{
-  return (type_table[ch] == SPACE_SEPARATOR
-	  || type_table[ch] == LINE_SEPARATOR
-	  || type_table[ch] == PARAGRAPH_SEPARATOR);
-}
-jboolean
-java::lang::Character::isUpperCase (jchar ch)
-{
-  if (ch >= 0x2000 && ch <= 0x2fff)
-    return false;
-  return type_table[ch] == UPPERCASE_LETTER;
-}
-jchar
-java::lang::Character::toLowerCase (jchar ch)
 {
-  if (type_table[ch] == UPPERCASE_LETTER)
+  // numValue is stored as an array of jshort, since 10000 is the maximum.
-    return attribute_table[ch];
+  return (jint) numValue[readChar(ch) >> 7];
-  return to_lower_title (ch);
 }
-jchar
+jbyte
-java::lang::Character::toUpperCase (jchar ch)
+java::lang::Character::getDirectionality(jchar ch)
 {
-  if (type_table[ch] == LOWERCASE_LETTER)
+  return direction[readChar(ch) >> 7];
-    return attribute_table[ch];
-  return to_upper_title (ch);
 }
-#endif /* COMPACT_CHARACTER */
--- a/libjava/scripts/MakeCharTables.java
+++ b/libjava/scripts/MakeCharTables.java
+/* MakeCharTables.java - converts gnu.java.lang.CharData into
+                         include/java-chartables.h
+   Copyright (C) 2002 Free Software Foundation, Inc.
+This file is part of GNU Classpath.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+import gnu.java.lang.CharData;
+public class MakeCharTables implements CharData
+{
+  public static void main(String[] args)
+  {
+    System.out.println("/* java-chartables.h -- Character tables for java.lang.Character -*- c++ -*-\n"
+                       + "   Copyright (C) 2002 Free Software Foundation, Inc.\n"
+                       + "   *** This file is generated by scripts/MakeCharTables.java ***\n"
+                       + "\n"
+                       + "This file is part of GNU Classpath.\n"
+                       + "\n"
+                       + "GNU Classpath is free software; you can redistribute it and/or modify\n"
+                       + "it under the terms of the GNU General Public License as published by\n"
+                       + "the Free Software Foundation; either version 2, or (at your option)\n"
+                       + "any later version.\n"
+                       + "\n"
+                       + "GNU Classpath is distributed in the hope that it will be useful, but\n"
+                       + "WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+                       + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+                       + "General Public License for more details.\n"
+                       + "\n"
+                       + "You should have received a copy of the GNU General Public License\n"
+                       + "along with GNU Classpath; see the file COPYING.  If not, write to the\n"
+                       + "Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA\n"
+                       + "02111-1307 USA.\n"
+                       + "\n"
+                       + "Linking this library statically or dynamically with other modules is\n"
+                       + "making a combined work based on this library.  Thus, the terms and\n"
+                       + "conditions of the GNU General Public License cover the whole\n"
+                       + "combination.\n"
+                       + "\n"
+                       + "As a special exception, the copyright holders of this library give you\n"
+                       + "permission to link this library with independent modules to produce an\n"
+                       + "executable, regardless of the license terms of these independent\n"
+                       + "modules, and to copy and distribute the resulting executable under\n"
+                       + "terms of your choice, provided that you also meet, for each linked\n"
+                       + "independent module, the terms and conditions of the license of that\n"
+                       + "module.  An independent module is a module which is not derived from\n"
+                       + "or based on this library.  If you modify this library, you may extend\n"
+                       + "this exception to your version of the library, but you are not\n"
+                       + "obligated to do so.  If you do not wish to do so, delete this\n"
+                       + "exception statement from your version. */\n"
+                       + "\n"
+                       + "#ifndef __JAVA_CHARTABLES_H__\n"
+                       + "#define __JAVA_CHARTABLES_H__\n"
+                       + "\n"
+                       + "// These tables are automatically generated by scripts/MakeCharTables.java.\n"
+                       + "// This is in turn parsing gnu.java.lang.CharData, which is generated by\n"
+                       + "// scripts/unicode-muncher.pl.  The Unicode data comes from\n"
+                       + "// www.unicode.org; this header is based on\n"
+                       + "// " + SOURCE + ". JDK 1.4 uses Unicode version 3.0.0.\n"
+                       + "// DO NOT EDIT the tables.  Instead, fix the upstream scripts and run\n"
+                       + "// them again.\n"
+                       + "\n"
+                       + "// The data is stored in C style arrays of the appropriate CNI types, to\n"
+                       + "// guarantee that the data is constant and non-relocatable.  The field\n"
+                       + "// <code>blocks</code> stores the offset of a block of 2<supSHIFT</sup>\n"
+                       + "// characters within <code>data</code>. The data field, in turn, stores\n"
+                       + "// information about each character in the low order bits, and an offset\n"
+                       + "// into the attribute tables <code>upper</code>, <code>lower</code>,\n"
+                       + "// <code>numValue</code>, and <code>direction</code>.  Notice that the\n"
+                       + "// attribute tables are much smaller than 0xffff entries; as many characters\n"
+                       + "// in Unicode share common attributes.  Finally, there is a listing for\n"
+                       + "// <code>title</code> exceptions (most characters just have the same title\n"
+                       + "// case as upper case).\n"
+                       + "\n"
+                       + "// This file should only be included by natCharacter.cc\n"
+                       + "\n"
+                       + "/**\n"
+                       + " * The character shift amount to look up the block offset. In other words,\n"
+                       + " * <code>(char) (blocks[ch >> SHIFT] + ch)</code> is the index where\n"
+                       + " * <code>ch</code> is described in <code>data</code>.\n"
+                       + " */\n"
+                       + "#define SHIFT " + SHIFT);
+    convertString("/**\n"
+                  + " * The mapping of character blocks to their location in <code>data</code>.\n"
+                  + " * Each entry has been adjusted so that a modulo 16 sum with the desired\n"
+                  + " * character gives the actual index into <code>data</code>.\n"
+                  + " */",
+                  char.class, "blocks", BLOCKS);
+    convertString("/**\n"
+                  + " * Information about each character.  The low order 5 bits form the\n"
+                  + " * character type, the next bit is a flag for non-breaking spaces, and the\n"
+                  + " * next bit is a flag for mirrored directionality.  The high order 9 bits\n"
+                  + " * form the offset into the attribute tables.  Note that this limits the\n"
+                  + " * number of unique character attributes to 512, which is not a problem\n"
+                  + " * as of Unicode version 3.2.0, but may soon become one.\n"
+                  + " */",
+                  char.class, "data", DATA);
+    convertString("/**\n"
+                  + " * This is the attribute table for computing the numeric value of a\n"
+                  + " * character.  The value is -1 if Unicode does not define a value, -2\n"
+                  + " * if the value is not a positive integer, otherwise it is the value.\n"
+                  + " */",
+                  short.class, "numValue", NUM_VALUE);
+    convertString("/**\n"
+                  + " * This is the attribute table for computing the uppercase representation\n"
+                  + " * of a character.  The value is the difference between the character and\n"
+                  + " * its uppercase version.\n"
+                  + " */",
+                  short.class, "upper", UPPER);
+    convertString("/**\n"
+                  + " * This is the attribute table for computing the lowercase representation\n"
+                  + " * of a character.  The value is the difference between the character and\n"
+                  + " * its lowercase version.\n"
+                  + " */",
+                  short.class, "lower", LOWER);
+    convertString("/**\n"
+                  + " * This is the attribute table for computing the directionality class\n"
+                  + " * of a character.  At present, the value is in the range 0 - 18 if the\n"
+                  + " * character has a direction, otherwise it is -1.\n"
+                  + " */",
+                  byte.class, "direction", DIRECTION);
+    convertString("/**\n"
+                  + " * This is the listing of titlecase special cases (all other character\n"
+                  + " * can use <code>upper</code> to determine their titlecase).  The listing\n"
+                  + " * is a sequence of character pairs; converting the first character of the\n"
+                  + " * pair to titlecase produces the second character.\n"
+                  + " */",
+                  char.class, "title", TITLE);
+    System.out.println();
+    System.out.println("#endif /* __JAVA_CHARTABLES_H__ */");
+  }
+  private static void convertString(String header, Class type,
+                                    String name, String field)
+  {
+    System.out.println();
+    System.out.println(header);
+    System.out.println("static const j" + type.getName() + " " + name
+                       + "[] = {");
+    char[] data = field.toCharArray();
+    int wrap;
+    if (type == char.class)
+      wrap = 10;
+    else if (type == byte.class)
+      wrap = 21;
+    else if (type == short.class)
+      wrap = 13;
+    else
+      throw new Error("Unexpeced type");
+    for (int i = 0; i < data.length; i += wrap)
+      {
+        System.out.print("   ");
+        for (int j = 0; j < wrap; j++)
+          {
+            if (i + j >= data.length)
+              break;
+            System.out.print(" ");
+            if (type == char.class)
+              System.out.print((int) data[i + j]);
+            else if (type == byte.class)
+              System.out.print((byte) data[i + j]);
+            else if (type == short.class)
+              System.out.print((short) data[i + j]);
+            System.out.print(",");
+          }
+        System.out.println();
+      }
+    System.out.println("  };\n"
+                       + "/** Length of " + name + ". */\n"
+                       + "static const int " + name + "_length = "
+                       + data.length + ";");
+  }
+}
--- a/libjava/scripts/blocks.pl
+++ b/libjava/scripts/blocks.pl
-#! /usr/bin/perl
-if ($ARGV[0] eq '')
-{
-    $file = 'Blocks.txt';
-    if (! -f $file)
-    {
-	# Too painful to figure out how to get Perl to do it.
-	system 'wget -o .wget-log http://www.unicode.org/Public/UNIDATA/Blocks.txt';
-    }
-}
-else
-{
-    $file = $ARGV[0];
-}
-open (INPUT, "< $file") || die "couldn't open $file: $!";
-@array = ();
-while (<INPUT>)
-{
-    next if /^#/;
-    chop;
-    next if /^$/;
-    ($start, $to, $text) = split (/; /);
-    ($symbol = $text) =~ tr/a-z/A-Z/;
-    $symbol =~ s/[- ]/_/g;
-    # Special case for one of the SPECIALS.
-    next if $start eq 'FEFF';
-    # Special case some areas that our heuristic mishandles.
-    if ($symbol eq 'HIGH_SURROGATES')
-    {
-	$symbol = 'SURROGATES_AREA';
-	$text = 'Surrogates Area';
-	$to = 'DFFF';
-    }
-    elsif ($symbol =~ /SURROGATES/)
-    {
-	next;
-    }
-    elsif ($symbol eq 'PRIVATE_USE')
-    {
-	$symbol .= '_AREA';
-	$text = 'Private Use Area';
-    }
-    printf "    public static final UnicodeBlock %s = new UnicodeBlock (\"%s\", '\\u%s', '\\u%s');\n",
-           $symbol, $text, $start, $to;
-    push (@array, $symbol);
-}
-printf "    private static final UnicodeBlock[] blocks = {\n";
-foreach (@array)
-{
-    printf "      %s", $_;
-    printf "," unless $_ eq 'SPECIALS';
-    printf "\n";
-}
-printf "    };\n";
-close (INPUT);
--- a/libjava/scripts/unicode-blocks.pl
+++ b/libjava/scripts/unicode-blocks.pl
+#!/usr/bin/perl -w
+# unicode-blocks.pl -- Script to generate java.lang.Character.UnicodeBlock
+# Copyright (C) 2002 Free Software Foundation, Inc.
+#
+# This file is part of GNU Classpath.
+#
+# GNU Classpath is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Classpath is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Classpath; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+#
+# Linking this library statically or dynamically with other modules is
+# making a combined work based on this library.  Thus, the terms and
+# conditions of the GNU General Public License cover the whole
+# combination.
+#
+# As a special exception, the copyright holders of this library give you
+# permission to link this library with independent modules to produce an
+# executable, regardless of the license terms of these independent
+# modules, and to copy and distribute the resulting executable under
+# terms of your choice, provided that you also meet, for each linked
+# independent module, the terms and conditions of the license of that
+# module.  An independent module is a module which is not derived from
+# or based on this library.  If you modify this library, you may extend
+# this exception to your version of the library, but you are not
+# obligated to do so.  If you do not wish to do so, delete this
+# exception statement from your version.
+# Code for reading Blocks.txt and generating (to standard out) the code for
+# java.lang.Character.UnicodeBlock, for pasting into java/lang/Character.java.
+# You should probably check that the results are accurate to the
+# specification, but I made sure it works OOB for Unicode 3.0.0 and JDK 1.4.
+# As the grammar for the Blocks.txt file is changing in Unicode 3.2.0, you
+# will have to tweak this some for future use.  For now, the relevant
+# Unicode definition files are found in libjava/gnu/gcj/convert/.
+#
+# author Eric Blake <ebb9@email.byu.edu>
+#
+# usage: unicode-blocks.pl <blocks.txt>
+#    where <blocks.txt> is obtained from www.unicode.org (named Blocks-3.txt
+#    for Unicode version 3.0.0).
+die "Usage: $0 <blocks.txt>" unless @ARGV == 1;
+open (BLOCKS, $ARGV[0]) || die "Can't open Unicode block file: $!\n";
+# A hash of added fields and the JDK they were added in, to automatically
+# print @since tags.  Maintaining this is optional (and tedious), but nice.
+my %additions = ("SYRIAC" => "1.4",
+                 "THAANA" => "1.4",
+                 "SINHALA" => "1.4",
+                 "MYANMAR" => "1.4",
+                 "ETHIOPIC" => "1.4",
+                 "CHEROKEE" => "1.4",
+                 "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS" => "1.4",
+                 "OGHAM" => "1.4",
+                 "RUNIC" => "1.4",
+                 "KHMER" => "1.4",
+                 "MONGOLIAN" => "1.4",
+                 "BRAILLE_PATTERNS" => "1.4",
+                 "CJK_RADICALS_SUPPLEMENT" => "1.4",
+                 "KANGXI_RADICALS" => "1.4",
+                 "IDEOGRAPHIC_DESCRIPTION_CHARACTERS" => "1.4",
+                 "BOPOMOFO_EXTENDED" => "1.4",
+                 "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A" => "1.4",
+                 "YI_SYLLABLES" => "1.4",
+                 "YI_RADICALS" => "1.4",
+                 );
+print <<'EOF';
+  /**
+   * A family of character subsets in the Unicode specification. A character
+   * is in at most one of these blocks.
+   *
+   * This inner class was generated automatically from
+   * <code>$ARGV[0]</code>, by some perl scripts.
+   * This Unicode definition file can be found on the
+   * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
+   * JDK 1.4 uses Unicode version 3.0.0.
+   *
+   * @author scripts/unicode-blocks.pl (written by Eric Blake)
+   * @since 1.2
+   */
+  public static final class UnicodeBlock extends Subset
+  {
+    /** The start of the subset. */
+    private final char start;
+    /** The end of the subset. */
+    private final char end;
+    /**
+     * Constructor for strictly defined blocks.
+     *
+     * @param start the start character of the range
+     * @param end the end character of the range
+     * @param name the block name
+     */
+    private UnicodeBlock(char start, char end, String name)
+    {
+      super(name);
+      this.start = start;
+      this.end = end;
+    }
+    /**
+     * Returns the Unicode character block which a character belongs to.
+     *
+     * @param ch the character to look up
+     * @return the set it belongs to, or null if it is not in one
+     */
+    public static UnicodeBlock of(char ch)
+    {
+      // Special case, since SPECIALS contains two ranges.
+      if (ch == '\uFEFF')
+        return SPECIALS;
+      // Simple binary search for the correct block.
+      int low = 0;
+      int hi = sets.length - 1;
+      while (low <= hi)
+        {
+          int mid = (low + hi) >> 1;
+          UnicodeBlock b = sets[mid];
+          if (ch < b.start)
+            hi = mid - 1;
+          else if (ch > b.end)
+            low = mid + 1;
+          else
+            return b;
+        }
+      return null;
+    }
+EOF
+my $seenSpecials = 0;
+my $seenSurrogates = 0;
+my $surrogateStart = 0;
+my @names = ();
+while (<BLOCKS>) {
+    next if /^\#/;
+    my ($start, $end, $block) = split(/; /);
+    next unless defined $block;
+    chomp $block;
+    $block =~ s/ *$//;
+    if (! $seenSpecials and $block =~ /Specials/) {
+        # Special case SPECIALS, since it is two disjoint ranges
+        $seenSpecials = 1;
+        next;              
+    }
+    if ($block =~ /Surrogates/) {
+        # Special case SURROGATES_AREA, since it one range, not three
+        # consecutive, in Java
+        $seenSurrogates++;
+        if ($seenSurrogates == 1) {
+            $surrogateStart = $start;
+            next;
+        } elsif ($seenSurrogates == 2) {
+            next;
+        } else {
+            $start = $surrogateStart;
+            $block = "Surrogates Area";
+        }
+    }
+    # Special case the name of PRIVATE_USE_AREA.
+    $block =~ s/(Private Use)/$1 Area/;
+    (my $name = $block) =~ tr/a-z -/A-Z__/;
+    push @names, $name;
+    my $since = (defined $additions{$name}
+                 ? "\n     * \@since $additions{$name}" : "");
+    my $extra = ($block =~ /Specials/ ? "'\\uFEFF', " : "");
+    print <<EOF;
+    /**
+     * $block.
+     * $extra'\\u$start' - '\\u$end'.$since
+     */
+    public final static UnicodeBlock $name
+      = new UnicodeBlock('\\u$start', '\\u$end',
+                         "$name");
+EOF
+}
+print <<EOF;
+    /**
+     * The defined subsets.
+     */
+    private static final UnicodeBlock sets[] = {
+EOF
+foreach (@names) {
+    print "      $_,\n";
+}
+print <<EOF;
+    };
+  } // class UnicodeBlock
+EOF
--- a/libjava/scripts/unicode-decomp.pl
+++ b/libjava/scripts/unicode-decomp.pl
+#!/usr/bin/perl -w
+# unicode-decomp.pl - script to generate database for java.text.Collator
+# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
+#
+# This file is part of libjava.
+# 
+# This software is copyrighted work licensed under the terms of the
+# Libjava License.  Please consult the file "LIBJAVA_LICENSE" for
+# details.
+# Code for reading UnicodeData.txt and generating the code for
+# gnu.java.lang.CharData.  For now, the relevant Unicode definition files
+# are found in libjava/gnu/gcj/convert/.
+#
+# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
+#   where <UnicodeData.txt> is obtained from www.unicode.org (named
+#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
+#   is the final location of include/java-chardecomp.h.
+#   As of JDK 1.4, use Unicode version 3.0.0 for best results.
+#
+# If this exits with nonzero status, then you must investigate the
+# cause of the problem.
+# Diagnostics and other information to stderr.
+# With -n, the files are not created, but all processing still occurs.
+# These maps characters to their decompositions.
+my %canonical_decomposition = ();
+my %full_decomposition = ();
+# Handle `-n' and open output files.
+if ($ARGV[0] && $ARGV[0] eq '-n')
+{
+    shift @ARGV;
+    $ARGV[1] = '/dev/null';
+}
+die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
+open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
+# Process the Unicode file.
+$| = 1;
+my $count = 0;
+print STDERR "Parsing attributes file";
+while (<UNICODE>)
+{
+    print STDERR "." unless $count++ % 1000;
+    chomp;
+    s/\r//g;
+    my ($ch, undef, undef, undef, undef, $decomp) = split ';';
+    $ch = hex($ch);
+    if ($decomp ne '')
+    {
+        my $is_full = 0;
+        my @decomp = ();
+        foreach (split (' ', $decomp))
+        {
+            if (/^\<.*\>$/)
+            {
+                $is_full = 1;
+                next;
+            }
+	    push (@decomp, hex ($_));
+	}
+        my $s = pack "n*", @decomp;
+        if ($is_full)
+        {
+            $full_decomposition{$ch} = $s;
+        }
+        else
+        {
+            $canonical_decomposition{$ch} = $s;
+        }
+    }
+}
+# Now generate decomposition tables.
+open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
+print STDERR "\nGenerating tables\n";
+print DECOMP <<EOF;
+// java-chardecomp.h - Decomposition character tables -*- c++ -*-
+#ifndef __JAVA_CHARDECOMP_H__
+#define __JAVA_CHARDECOMP_H__
+// These tables are automatically generated by the $0
+// script.  DO NOT EDIT the tables.  Instead, fix the script
+// and run it again.
+// This file should only be included by natCollator.cc
+struct decomp_entry
+{
+  jchar key;
+  const char *value;
+};
+EOF
+&write_decompositions;
+print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
+close(DECOMP);
+print STDERR "Done\n";
+exit;
+# Write a single decomposition table.
+sub write_single_decomposition($$%)
+{
+    my ($name, $is_canon, %table) = @_;
+    my $first_line = 1;
+    print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
+    for my $key (0 .. 0xffff)
+    {
+	next if ! defined $table{$key};
+        print DECOMP ",\n" unless $first_line;
+	$first_line = 0;
+	printf DECOMP "  { 0x%04x, \"", $key;
+	# We represent the expansion as a series of bytes, terminated
+	# with a double nul.  This is ugly, but relatively
+	# space-efficient.  Most expansions are short, but there are a
+	# few that are very long (e.g. \uFDFA).  This means that if we
+	# chose a fixed-space representation we would waste a lot of
+	# space.
+	my @expansion = unpack "n*", $table{$key};
+	foreach my $char (@expansion)
+	{
+	    printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
+	}
+	print DECOMP "\" }";
+    }
+    print DECOMP "\n};\n\n";
+}
+sub write_decompositions()
+{
+    &write_single_decomposition ('canonical', 1, %canonical_decomposition);
+    &write_single_decomposition ('full', 0, %full_decomposition);
+}
--- a/libjava/scripts/unicode-muncher.pl
+++ b/libjava/scripts/unicode-muncher.pl
+#!/usr/bin/perl -w
+# unicode-muncher.pl -- generate Unicode database for java.lang.Character
+# Copyright (C) 1998, 2002 Free Software Foundation, Inc.
+#
+# This file is part of GNU Classpath.
+#
+# GNU Classpath is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Classpath is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Classpath; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+#
+# Linking this library statically or dynamically with other modules is
+# making a combined work based on this library.  Thus, the terms and
+# conditions of the GNU General Public License cover the whole
+# combination.
+#
+# As a special exception, the copyright holders of this library give you
+# permission to link this library with independent modules to produce an
+# executable, regardless of the license terms of these independent
+# modules, and to copy and distribute the resulting executable under
+# terms of your choice, provided that you also meet, for each linked
+# independent module, the terms and conditions of the license of that
+# module.  An independent module is a module which is not derived from
+# or based on this library.  If you modify this library, you may extend
+# this exception to your version of the library, but you are not
+# obligated to do so.  If you do not wish to do so, delete this
+# exception statement from your version.
+# Code for reading UnicodeData.txt and generating the code for
+# gnu.java.lang.CharData.  For now, the relevant Unicode definition files
+# are found in libjava/gnu/gcj/convert/.
+#
+# Inspired by code from Jochen Hoenicke.
+# author Eric Blake <ebb9@email.byu.edu>
+#
+# Usage: ./unicode-muncher <UnicodeData.txt> <CharData.java>
+#   where <UnicodeData.txt> is obtained from www.unicode.org (named
+#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
+#   is the final location for the Java interface gnu.java.lang.CharData.
+#   As of JDK 1.4, use Unicode version 3.0.0 for best results.
+##
+## Convert a 16-bit integer to a Java source code String literal character
+##
+sub javaChar($) {
+    my ($char) = @_;
+    die "Out of range: $char\n" if $char < -0x8000 or $char > 0xffff;
+    $char += 0x10000 if $char < 0;
+    # Special case characters that must be escaped, or are shorter as ASCII
+    return sprintf("\\%03o", $char) if $char < 0x20;
+    return "\\\"" if $char == 0x22;
+    return "\\\\" if $char == 0x5c;
+    return pack("C", $char) if $char < 0x7f;
+    return sprintf("\\u%04x", $char);
+}
+##
+## Convert the text UnicodeData file from www.unicode.org into a Java
+## interface with string constants holding the compressed information.
+##
+my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf
+                   SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf);
+my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF);
+my $NOBREAK_FLAG  = 32;
+my $MIRRORED_FLAG = 64;
+my @info = ();
+my $titlecase = "";
+my $count = 0;
+my $range = 0;
+die "Usage: $0 <UnicodeData.txt> <CharData.java>" unless @ARGV == 2;
+open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
+# Stage 1: Parse the attribute file
+$| = 1;
+print "GNU Classpath Unicode Attribute Database Generator 2.0\n";
+print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
+print "Parsing attributes file";
+while(<UNICODE>) {
+    print "." unless $count++ % 1000;
+    chomp;
+    s/\r//g;
+    my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric,
+        $mirrored, undef, undef, $upcase, $lowcase, $title) = split ';';
+    $ch = hex($ch);
+    next if $ch > 0xffff; # Ignore surrogate pairs, since Java does
+    my ($type, $numValue, $upperchar, $lowerchar, $direction);
+    $type = 0;
+    while ($category !~ /^$TYPECODES[$type]$/) {
+        if (++$type == @TYPECODES) {
+            die "$ch: Unknown type: $category";
+        }
+    }
+    $type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/);
+    $type |= $MIRRORED_FLAG if ($mirrored =~ /Y/);
+    if ($numeric =~ /^[0-9]+$/) {
+        $numValue = $numeric;
+        die "numValue too big: $ch, $numValue\n" if $numValue >= 0x7fff;
+    } elsif ($numeric eq "") {
+        # Special case sequences of 'a'-'z'
+        if ($ch >= 0x0041 && $ch <= 0x005a) {
+            $numValue = $ch - 0x0037;
+        } elsif ($ch >= 0x0061 && $ch <= 0x007a) {
+            $numValue = $ch - 0x0057;
+        } elsif ($ch >= 0xff21 && $ch <= 0xff3a) {
+            $numValue = $ch - 0xff17;
+        } elsif ($ch >= 0xff41 && $ch <= 0xff5a) {
+            $numValue = $ch - 0xff37;
+        } else {
+            $numValue = -1;
+        }
+    } else {
+        $numValue = -2;
+    }
+    $upperchar = $upcase ? hex($upcase) - $ch : 0;
+    $lowerchar = $lowcase ? hex($lowcase) - $ch : 0;
+    if ($title ne $upcase) {
+        my $titlechar = $title ? hex($title) : $ch;
+        $titlecase .= pack("n2", $ch, $titlechar);
+    }
+    $direction = 0;
+    while ($bidir !~ /^$DIRCODES[$direction]$/) {
+        if (++$direction == @DIRCODES) {
+            $direction = -1;
+            last;
+        }
+    }
+    if ($range) {
+        die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
+        for ($range + 1 .. $ch - 1) {
+            $info[$_] = pack("n5", $type, $numValue, $upperchar,
+                             $lowerchar, $direction);
+        }
+        $range = 0;
+    } elsif ($name =~ /First>$/) {
+        $range = $ch;
+    }
+    $info[$ch] = pack("n5", $type, $numValue, $upperchar, $lowerchar,
+                      $direction);
+}
+close UNICODE;
+# Stage 2: Compress the data structures
+printf "\nCompressing data structures";
+$count = 0;
+my $info = ();
+my %charhash = ();
+my @charinfo = ();
+for my $ch (0 .. 0xffff) {
+    print "." unless $count++ % 0x1000;
+    if (! defined $info[$ch]) {
+        $info[$ch] = pack("n5", 0, -1, 0, 0, -1);
+    }
+    my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info[$ch]);
+    if (! exists $charhash{$info[$ch]}) {
+        push @charinfo, [ $numVal, $upper, $lower, $direction ];
+        $charhash{$info[$ch]} = $#charinfo;
+    }
+    $info .= pack("n", ($charhash{$info[$ch]} << 7) | $type);
+}
+my $charlen = @charinfo;
+my $bestshift;
+my $bestest = 1000000;
+my $bestblkstr;
+die "Too many unique character entries: $charlen\n" if $charlen > 512;
+print "\nUnique character entries: $charlen\n";
+for my $i (3 .. 8) {
+    my $blksize = 1 << $i;
+    my %blocks = ();
+    my @blkarray = ();
+    my ($j, $k);
+    print "shift: $i";
+    for ($j = 0; $j < 0x10000; $j += $blksize) {
+        my $blkkey = substr $info, 2 * $j, 2 * $blksize;
+        if (! exists $blocks{$blkkey}) {
+            push @blkarray, $blkkey;
+            $blocks{$blkkey} = $#blkarray;
+        }
+    }
+    my $blknum = @blkarray;
+    my $blocklen = $blknum * $blksize;
+    printf " before %5d", $blocklen;
+    # Now we try to pack the blkarray as tight as possible by finding matching
+    # heads and tails.
+    for ($j = $blksize - 1; $j > 0; $j--) {
+        my %tails = ();
+        for $k (0 .. $#blkarray) {
+            next if ! defined $blkarray[$k];
+            my $len = length $blkarray[$k];
+            my $tail = substr $blkarray[$k], $len - $j * 2;
+            if (exists $tails{$tail}) {
+                push @{$tails{$tail}}, $k;
+            } else {
+                $tails{$tail} = [ $k ];
+            }
+        }
+        # tails are calculated, now calculate the heads and merge.
+      BLOCK:
+        for $k (0 .. $#blkarray) {
+            next if ! defined $blkarray[$k];
+            my $tomerge = $k;
+            while (1) {
+                my $head = substr($blkarray[$tomerge], 0, $j * 2);
+                my $entry = $tails{$head};
+                next BLOCK if ! defined $entry;
+                my $other = shift @{$entry};
+                if ($other == $tomerge) {
+                    if (@{$entry}) {
+                        push @{$entry}, $other;
+                        $other = shift @{$entry};
+                    } else {
+                        push @{$entry}, $other;
+                        next BLOCK;
+                    }
+                }
+                if (@{$entry} == 0) {
+                    delete $tails{$head};
+                }
+                # a match was found
+                my $merge = $blkarray[$other]
+                    . substr($blkarray[$tomerge], $j * 2);
+                $blocklen -= $j;
+                $blknum--;
+                if ($other < $tomerge) {
+                    $blkarray[$tomerge] = undef;
+                    $blkarray[$other] = $merge;
+                    my $len = length $merge;
+                    my $tail = substr $merge, $len - $j * 2;
+                    $tails{$tail} = [ map { $_ == $tomerge ? $other : $_ }
+                                      @{$tails{$tail}} ];
+                    next BLOCK;
+                }
+                $blkarray[$tomerge] = $merge;
+                $blkarray[$other] = undef;
+            }
+        }
+    }
+    my $blockstr;
+    for $k (0 .. $#blkarray) {
+        $blockstr .= $blkarray[$k] if defined $blkarray[$k];
+    }
+    die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen;
+    my $estimate = 2 * $blocklen + (0x20000 >> $i);
+    printf " after merge %5d: %6d bytes\n", $blocklen, $estimate;
+    if ($estimate < $bestest) {
+        $bestest = $estimate;
+        $bestshift = $i;
+        $bestblkstr = $blockstr;
+    }
+}
+my @blocks;
+my $blksize = 1 << $bestshift;
+for (my $j = 0; $j < 0x10000; $j += $blksize) {
+    my $blkkey = substr $info, 2 * $j, 2 * $blksize;
+    my $index = index $bestblkstr, $blkkey;
+    while ($index & 1) {
+        die "not found: $j" if $index == -1;
+        $index = index $bestblkstr, $blkkey, $index + 1;
+    }
+    push @blocks, ($index / 2 - $j) & 0xffff;
+}
+# Phase 3: Generate the file
+die "UTF-8 limit of blocks may be exceeded: " . scalar(@blocks) . "\n"
+    if @blocks > 0xffff / 3;
+die "UTF-8 limit of data may be exceeded: " . length($bestblkstr) . "\n"
+    if length($bestblkstr) > 0xffff / 3;
+{
+    print "Generating $ARGV[1] with shift of $bestshift";
+    my ($i, $j);
+    open OUTPUT, "> $ARGV[1]" or die "Failed creating output file: $!\n";
+    print OUTPUT <<EOF;
+/* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   *** This file is generated by scripts/unicode-muncher.pl ***
+This file is part of GNU Classpath.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+package gnu.java.lang;
+/**
+ * This contains the info about the unicode characters, that
+ * java.lang.Character needs.  It is generated automatically from
+ * <code>$ARGV[0]</code>, by some
+ * perl scripts. This Unicode definition file can be found on the
+ * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
+ * JDK 1.4 uses Unicode version 3.0.0.
+ *
+ * The data is stored as string constants, but Character will convert these
+ * Strings to their respective <code>char[]</code> components.  The field
+ * <code>BLOCKS</code> stores the offset of a block of 2<sup>SHIFT</sup>
+ * characters within <code>DATA</code>.  The DATA field, in turn, stores
+ * information about each character in the low order bits, and an offset
+ * into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
+ * <code>NUM_VALUE</code>, and <code>DIRECTION</code>.  Notice that the
+ * attribute tables are much smaller than 0xffff entries; as many characters
+ * in Unicode share common attributes.  Finally, there is a listing for
+ * <code>TITLE</code> exceptions (most characters just have the same
+ * title case as upper case).
+ *
+ * \@author scripts/unicode-muncher.pl (written by Jochen Hoenicke,
+ *         Eric Blake)
+ * \@see Character
+ */
+public interface CharData
+{
+  /**
+   * The Unicode definition file that was parsed to build this database.
+   */
+  String SOURCE = \"$ARGV[0]\";
+  /**
+   * The character shift amount to look up the block offset. In other words,
+   * <code>(char) (BLOCKS.value[ch >> SHIFT] + ch)</code> is the index where
+   * <code>ch</code> is described in <code>DATA</code>.
+   */
+  int SHIFT = $bestshift;
+  /**
+   * The mapping of character blocks to their location in <code>DATA</code>.
+   * Each entry has been adjusted so that the 16-bit sum with the desired
+   * character gives the actual index into <code>DATA</code>.
+   */
+  String BLOCKS
+EOF
+    for ($i = 0; $i < @blocks / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if @blocks <= $i * 11 + $j;
+            my $val = $blocks[$i * 11 + $j];
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * Information about each character.  The low order 5 bits form the
+   * character type, the next bit is a flag for non-breaking spaces, and the
+   * next bit is a flag for mirrored directionality.  The high order 9 bits
+   * form the offset into the attribute tables.  Note that this limits the
+   * number of unique character attributes to 512, which is not a problem
+   * as of Unicode version 3.2.0, but may soon become one.
+   */
+  String DATA
+EOF
+    my $len = length($bestblkstr) / 2;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = unpack "n", substr($bestblkstr, 2 * ($i*11 + $j), 2);
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * This is the attribute table for computing the numeric value of a
+   * character.  The value is -1 if Unicode does not define a value, -2
+   * if the value is not a positive integer, otherwise it is the value.
+   * Note that this is a signed value, but stored as an unsigned char
+   * since this is a String literal.
+   */
+  String NUM_VALUE
+EOF
+    $len = @charinfo;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = $charinfo[$i * 11 + $j][0];
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * This is the attribute table for computing the uppercase representation
+   * of a character.  The value is the signed difference between the
+   * character and its uppercase version.  Note that this is stored as an
+   * unsigned char since this is a String literal.
+   */
+  String UPPER
+EOF
+    $len = @charinfo;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = $charinfo[$i * 11 + $j][1];
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * This is the attribute table for computing the lowercase representation
+   * of a character.  The value is the signed difference between the
+   * character and its lowercase version.  Note that this is stored as an
+   * unsigned char since this is a String literal.
+   */
+  String LOWER
+EOF
+    $len = @charinfo;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = $charinfo[$i * 11 + $j][2];
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * This is the attribute table for computing the directionality class
+   * of a character.  At present, the value is in the range 0 - 18 if the
+   * character has a direction, otherwise it is -1.  Note that this is
+   * stored as an unsigned char since this is a String literal.
+   */
+  String DIRECTION
+EOF
+    $len = @charinfo;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = $charinfo[$i * 11 + $j][3];
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT <<EOF;
+;
+  /**
+   * This is the listing of titlecase special cases (all other character
+   * can use <code>UPPER</code> to determine their titlecase).  The listing
+   * is a sequence of character pairs; converting the first character of the
+   * pair to titlecase produces the second character.
+   */
+  String TITLE
+EOF
+    $len = length($titlecase) / 2;
+    for ($i = 0; $i < $len / 11; $i++) {
+        print OUTPUT $i ? "\n    + \"" : "    = \"";
+        for $j (0 .. 10) {
+            last if $len <= $i * 11 + $j;
+            my $val = unpack "n", substr($titlecase, 2 * ($i*11 + $j), 2);
+            print OUTPUT javaChar($val);
+        }
+        print OUTPUT "\"";
+    }
+    print OUTPUT ";\n}\n";
+    close OUTPUT;
+}
+print "\nDone.\n";