Commit 1fa78272 by Eric Blake Committed by Eric Blake

unicode-decomp.pl: Move from chartables.pl...

2002-03-04  Eric Blake  <ebb9@email.byu.edu>

	* scripts/unicode-decomp.pl: Move from chartables.pl, and remove
	the code for generating include/java-chartables.h.
	* scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and
	merge with Classpath.
	* scripts/unicode-muncher.pl: Copy from Classpath.
	* scritps/MakeCharTables.java: New file.
	* gnu/gcj/convert/Blocks-3.txt: New file.
	* gnu/gcj/convert/UnicodeData-3.0.0.txt: New file.
	* gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file.
	* gnu/java/lang/CharData.java: Copy from Classpath.
	* Makefile.am (ordinary_java_source_files): Add
	gnu/java/lang/CharData.java.
	* configure.in: Remove --enable-fast-character option.
	* java/lang/Character.java: Merge algorithms and Javadoc with
	Classpath.
	* java/lang/natCharacter.cc: Implement Unicode lookup table more
	efficiently.
	* include/java-chardecomp.h: Regenerate.
	* include/java-chartables.h: Regenerate.

From-SVN: r50368
parent b87e4a4c
2002-03-04 Eric Blake <ebb9@email.byu.edu>
* scripts/unicode-decomp.pl: Move from chartables.pl, and remove
the code for generating include/java-chartables.h.
* scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and
merge with Classpath.
* scripts/unicode-muncher.pl: Copy from Classpath.
* scritps/MakeCharTables.java: New file.
* gnu/gcj/convert/Blocks-3.txt: New file.
* gnu/gcj/convert/UnicodeData-3.0.0.txt: New file.
* gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file.
* gnu/java/lang/CharData.java: Copy from Classpath.
* Makefile.am (ordinary_java_source_files): Add
gnu/java/lang/CharData.java.
* configure.in: Remove --enable-fast-character option.
* java/lang/Character.java: Merge algorithms and Javadoc with
Classpath.
* java/lang/natCharacter.cc: Implement Unicode lookup table more
efficiently.
* include/java-chardecomp.h: Regenerate.
* include/java-chartables.h: Regenerate.
2002-03-06 Bryce McKinlay <bryce@waitaki.otago.ac.nz>
* java/awt/MediaTracker.java: Implemented.
......
......@@ -1288,6 +1288,7 @@ gnu/java/io/NullOutputStream.java \
gnu/java/io/ObjectIdentityWrapper.java \
gnu/java/lang/ArrayHelper.java \
gnu/java/lang/ClassHelper.java \
gnu/java/lang/CharData.java \
gnu/java/lang/reflect/TypeSignature.java \
gnu/java/locale/Calendar.java \
gnu/java/locale/Calendar_de.java \
......
# chartables.pl - A perl program to generate tables for use by the
# Character class.
# Copyright (C) 1998, 1999 Red Hat, Inc.
#
# This file is part of libjava.
#
# This software is copyrighted work licensed under the terms of the
# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
# details.
# This program requires a `unidata.txt' file of the form distributed
# on the Unicode 2.0 CD ROM. Or, get it more conveniently here:
# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
# Version `2.1.8' of this file was last used to update the Character class.
# Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
# "The Java Language Specification", ISBN 0-201-63451-1
# plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
# Usage: perl chartables.pl [-n] UnicodeData-VERSION.txt
# If this exits with nonzero status, then you must investigate the
# cause of the problem.
# Diagnostics and other information to stderr.
# This creates the new include/java-chartables.h and
# include/java-chardecomp.h files directly.
# With -n, the files are not created, but all processing
# still occurs.
# Fields in the table.
$CODE = 0;
$NAME = 1;
$CATEGORY = 2;
$DECOMPOSITION = 5;
$DECIMAL = 6;
$DIGIT = 7;
$NUMERIC = 8;
$UPPERCASE = 12;
$LOWERCASE = 13;
$TITLECASE = 14;
# A special case.
$TAMIL_DIGIT_ONE = 0x0be7;
$TAMIL_DIGIT_NINE = 0x0bef;
# These are endpoints of legitimate gaps in the tables.
$CJK_IDEOGRAPH_END = 0x9fa5;
$HANGUL_END = 0xd7a3;
$HIGH_SURROGATE_END = 0xdb7f;
$PRIVATE_HIGH_SURROGATE_END = 0xdbff;
$LOW_SURROGATE_END = 0xdfff;
$PRIVATE_END = 0xf8ff;
%title_to_upper = ();
%title_to_lower = ();
%numerics = ();
%name = ();
@digit_start = ();
@digit_end = ();
@space_start = ();
@space_end = ();
# @letter_start = ();
# @letter_end = ();
@all_start = ();
@all_end = ();
@all_cats = ();
@upper_start = ();
@upper_end = ();
@upper_map = ();
%upper_anom = ();
@lower_start = ();
@lower_end = ();
@lower_map = ();
%lower_anom = ();
@attributes = ();
# There are a few characters which actually need two attributes.
# These are special-cased.
$ROMAN_START = 0x2160;
$ROMAN_END = 0x217f;
%second_attributes = ();
$prevcode = -1;
$status = 0;
%category_map =
(
'Mn' => 'NON_SPACING_MARK',
'Mc' => 'COMBINING_SPACING_MARK',
'Me' => 'ENCLOSING_MARK',
'Nd' => 'DECIMAL_DIGIT_NUMBER',
'Nl' => 'LETTER_NUMBER',
'No' => 'OTHER_NUMBER',
'Zs' => 'SPACE_SEPARATOR',
'Zl' => 'LINE_SEPARATOR',
'Zp' => 'PARAGRAPH_SEPARATOR',
'Cc' => 'CONTROL',
'Cf' => 'FORMAT',
'Cs' => 'SURROGATE',
'Co' => 'PRIVATE_USE',
'Cn' => 'UNASSIGNED',
'Lu' => 'UPPERCASE_LETTER',
'Ll' => 'LOWERCASE_LETTER',
'Lt' => 'TITLECASE_LETTER',
'Lm' => 'MODIFIER_LETTER',
'Lo' => 'OTHER_LETTER',
'Pc' => 'CONNECTOR_PUNCTUATION',
'Pd' => 'DASH_PUNCTUATION',
'Ps' => 'START_PUNCTUATION',
'Pe' => 'END_PUNCTUATION',
'Pi' => 'START_PUNCTUATION',
'Pf' => 'END_PUNCTUATION',
'Po' => 'OTHER_PUNCTUATION',
'Sm' => 'MATH_SYMBOL',
'Sc' => 'CURRENCY_SYMBOL',
'Sk' => 'MODIFIER_SYMBOL',
'So' => 'OTHER_SYMBOL'
);
# These maps characters to their decompositions.
%canonical_decomposition = ();
%full_decomposition = ();
# Handle `-n' and open output files.
local ($f1, $f2) = ('include/java-chartables.h',
'include/java-chardecomp.h');
if ($ARGV[0] eq '-n')
{
shift @ARGV;
$f1 = '/dev/null';
$f2 = '/dev/null';
}
open (CHARTABLE, "> $f1");
open (DECOMP, "> $f2");
# Process the Unicode file.
while (<>)
{
chop;
# Specify a limit for split so that we pick up trailing fields.
# We make the limit larger than we need, to catch the case where
# there are extra fields.
@fields = split (';', $_, 30);
# Convert code to number.
$ncode = hex ($fields[$CODE]);
if ($#fields != 14)
{
print STDERR ("Entry for \\u", $fields[$CODE],
" has wrong number of fields: ", $#fields, "\n");
}
$name{$fields[$CODE]} = $fields[$NAME];
# If we've found a gap in the table, fill it in.
if ($ncode != $prevcode + 1)
{
&process_gap (*fields, $prevcode, $ncode);
}
&process_char (*fields, $ncode);
$prevcode = $ncode;
}
if ($prevcode != 0xffff)
{
# Setting of `fields' parameter doesn't matter here.
&process_gap (*fields, $prevcode, 0x10000);
}
print CHARTABLE "// java-chartables.h - Character tables for java.lang.Character -*- c++ -*-\n\n";
print CHARTABLE "#ifndef __JAVA_CHARTABLES_H__\n";
print CHARTABLE "#define __JAVA_CHARTABLES_H__\n\n";
print CHARTABLE "// These tables are automatically generated by the chartables.pl\n";
print CHARTABLE "// script. DO NOT EDIT the tables. Instead, fix the script\n";
print CHARTABLE "// and run it again.\n\n";
print CHARTABLE "// This file should only be included by natCharacter.cc\n\n";
$bytes = 0;
# Titlecase mapping tables.
if ($#title_to_lower != $#title_to_upper)
{
# If this fails we need to reimplement toTitleCase.
print STDERR "titlecase mappings have different sizes\n";
$status = 1;
}
# Also ensure that the tables are entirely parallel.
foreach $key (sort keys %title_to_lower)
{
if (! defined $title_to_upper{$key})
{
print STDERR "titlecase mappings have different entries\n";
$status = 1;
}
}
&print_single_map ("title_to_lower_table", %title_to_lower);
&print_single_map ("title_to_upper_table", %title_to_upper);
print CHARTABLE "#ifdef COMPACT_CHARACTER\n\n";
printf CHARTABLE "#define TAMIL_DIGIT_ONE 0x%04x\n\n", $TAMIL_DIGIT_ONE;
# All numeric values.
&print_numerics;
# Digits only.
&print_block ("digit_table", *digit_start, *digit_end);
# Space characters.
&print_block ("space_table", *space_start, *space_end);
# Letters. We used to generate a separate letter table. But this
# doesn't really seem worthwhile. Simply using `all_table' saves us
# about 800 bytes, and only adds 3 table probes to isLetter.
# &print_block ("letter_table", *letter_start, *letter_end);
# Case tables.
&print_case_table ("upper", *upper_start, *upper_end, *upper_map, *upper_anom);
&print_case_table ("lower", *lower_start, *lower_end, *lower_map, *lower_anom);
# Everything else.
&print_all_block (*all_start, *all_end, *all_cats);
print CHARTABLE "#else /* COMPACT_CHARACTER */\n\n";
printf CHARTABLE "#define ROMAN_START 0x%04x\n", $ROMAN_START;
printf CHARTABLE "#define ROMAN_END 0x%04x\n\n", $ROMAN_END;
&print_fast_tables (*all_start, *all_end, *all_cats,
*attributes, *second_attributes);
print CHARTABLE "#endif /* COMPACT_CHARACTER */\n\n";
print CHARTABLE "#endif /* __JAVA_CHARTABLES_H__ */\n";
printf STDERR "Approximately %d bytes of data generated (compact case)\n",
$bytes;
# Now generate decomposition tables.
printf DECOMP "// java-chardecomp.h - Decomposition character tables -*- c++ -*-\n\n";
printf DECOMP "#ifndef __JAVA_CHARDECOMP_H__\n";
printf DECOMP "#define __JAVA_CHARDECOMP_H__\n\n";
print DECOMP "// These tables are automatically generated by the chartables.pl\n";
print DECOMP "// script. DO NOT EDIT the tables. Instead, fix the script\n";
print DECOMP "// and run it again.\n\n";
print DECOMP "// This file should only be included by natCollator.cc\n\n";
print DECOMP "struct decomp_entry\n{\n";
print DECOMP " jchar key;\n";
print DECOMP " const char *value;\n";
print DECOMP "};\n\n";
&write_decompositions;
printf DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
close (CHARTABLE);
close (DECOMP);
exit $status;
# Process a gap in the space.
sub process_gap
{
local (*fields, $prevcode, $ncode) = @_;
local (@gap_fields, $i);
if ($ncode == $CJK_IDEOGRAPH_END
|| $ncode == $HANGUL_END
|| $ncode == $HIGH_SURROGATE_END
|| $ncode == $PRIVATE_HIGH_SURROGATE_END
|| $ncode == $LOW_SURROGATE_END
|| $ncode == $PRIVATE_END)
{
# The characters in the gap we just found are known to
# have the same properties as the character at the end of
# the gap.
@gap_fields = @fields;
}
else
{
# This prints too much to be enabled.
# print STDERR "Gap found at \\u", $fields[$CODE], "\n";
@gap_fields = ('', '', 'Cn', '', '', '', '', '', '', '', '',
'', '', '', '');
}
for ($i = $prevcode + 1; $i < $ncode; ++$i)
{
$gap_fields[$CODE] = sprintf ("%04x", $i);
$gap_fields[$NAME] = "CHARACTER " . $gap_fields[$CODE];
&process_char (*gap_fields, $i);
}
}
# Process a single character.
sub process_char
{
local (*fields, $ncode) = @_;
if ($fields[$DECOMPOSITION] ne '')
{
&add_decomposition ($ncode, $fields[$DECOMPOSITION]);
}
# If this is a titlecase character, mark it.
if ($fields[$CATEGORY] eq 'Lt')
{
$title_to_upper{$fields[$CODE]} = $fields[$UPPERCASE];
$title_to_lower{$fields[$CODE]} = $fields[$LOWERCASE];
}
else
{
# For upper and lower case mappings, we try to build compact
# tables that map range onto range. We specifically want to
# avoid titlecase characters. Java specifies a range check to
# make sure the character is not between 0x2000 and 0x2fff.
# We avoid that here because we need to generate table entries
# -- toLower and toUpper still work in that range.
if ($fields[$UPPERCASE] eq ''
&& ($fields[$LOWERCASE] ne ''
|| $fields[$NAME] =~ /CAPITAL (LETTER|LIGATURE)/))
{
if ($fields[$LOWERCASE] ne '')
{
&update_case_block (*upper_start, *upper_end, *upper_map,
$fields[$CODE], $fields[$LOWERCASE]);
&set_attribute ($ncode, hex ($fields[$LOWERCASE]));
}
else
{
$upper_anom{$fields[$CODE]} = 1;
}
}
elsif ($fields[$LOWERCASE] ne '')
{
print STDERR ("Java missed upper case char \\u",
$fields[$CODE], "\n");
}
elsif ($fields[$CATEGORY] eq 'Lu')
{
# This case is for letters which are marked as upper case
# but for which there is no lower case equivalent. For
# instance, LATIN LETTER YR.
}
if ($fields[$LOWERCASE] eq ''
&& ($fields[$UPPERCASE] ne ''
|| $fields[$NAME] =~ /SMALL (LETTER|LIGATURE)/))
{
if ($fields[$UPPERCASE] ne '')
{
&update_case_block (*lower_start, *lower_end, *lower_map,
$fields[$CODE], $fields[$UPPERCASE]);
&set_attribute ($ncode, hex ($fields[$UPPERCASE]));
}
else
{
$lower_anom{$fields[$CODE]} = 1;
}
}
elsif ($fields[$UPPERCASE] ne '')
{
print STDERR ("Java missed lower case char \\u",
$fields[$CODE], "\n");
}
elsif ($fields[$CATEGORY] eq 'Ll')
{
# This case is for letters which are marked as lower case
# but for which there is no upper case equivalent. For
# instance, FEMININE ORDINAL INDICATOR.
}
}
# If we have a non-decimal numeric value, add it to the list.
if ($fields[$CATEGORY] eq 'Nd'
&& ($ncode < 0x2000 || $ncode > 0x2fff)
&& $fields[$NAME] =~ /DIGIT/)
{
# This is a digit character that is handled elsewhere.
}
elsif ($fields[$DIGIT] ne '' || $fields[$NUMERIC] ne '')
{
# Do a simple check.
if ($fields[$DECIMAL] ne '')
{
# This catches bugs in an earlier implementation of
# chartables.pl. Now it is here for historical interest
# only.
# print STDERR ("Character \u", $fields[$CODE],
# " would have been missed as digit\n");
}
local ($val) = $fields[$DIGIT];
$val = $fields[$NUMERIC] if $val eq '';
local ($ok) = 1;
# If we have a value which is not a positive integer, then we
# set the value to -2 to make life easier for
# Character.getNumericValue.
if ($val !~ m/^[0-9]+$/)
{
if ($fields[$CATEGORY] ne 'Nl'
&& $fields[$CATEGORY] ne 'No')
{
# This shows a few errors in the Unicode table. These
# characters have a missing Numeric field, and the `N'
# for the mirrored field shows up there instead. I
# reported these characters to errata@unicode.org on
# Thu Sep 10 1998. They said it will be fixed in the
# 2.1.6 release of the tables.
print STDERR ("Character \u", $fields[$CODE],
" has value but is not numeric; val = '",
$val, "'\n");
# We skip these.
$ok = 0;
}
$val = "-2";
}
if ($ok)
{
$numerics{$fields[$CODE]} = $val;
&set_attribute ($ncode, $val);
}
}
# We build a table that lists ranges of ordinary decimal values.
# At each step we make sure that the digits are in the correct
# order, with no holes, as this is assumed by Character. If this
# fails, reimplementation is required. This implementation
# dovetails nicely with the Java Spec, which has strange rules for
# what constitutes a decimal value. In particular the Unicode
# name must contain the word `DIGIT'. The spec doesn't directly
# say that digits must have type `Nd' (or that their value must an
# integer), but that can be inferred from the list of digits in
# the book(s). Currently the only Unicode characters whose name
# includes `DIGIT' which would not fit are the Tibetan "half"
# digits.
if ($fields[$CATEGORY] eq 'Nd')
{
if (($ncode < 0x2000 || $ncode > 0x2fff)
&& $fields[$NAME] =~ /DIGIT/)
{
&update_digit_block (*digit_start, *digit_end, $fields[$CODE],
$fields[$DECIMAL]);
&set_attribute ($ncode, $fields[$DECIMAL]);
}
else
{
# If this fails then Character.getType will fail. We
# assume that things in `digit_table' are the only
# category `Nd' characters.
print STDERR ("Character \u", $fields[$CODE],
" is class Nd but not in digit table\n");
$status = 1;
}
}
# Keep track of space characters.
if ($fields[$CATEGORY] =~ /Z[slp]/)
{
&update_block (*space_start, *space_end, $fields[$CODE]);
}
# Keep track of letters.
# if ($fields[$CATEGORY] =~ /L[ultmo]/)
# {
# &update_letter_block (*letter_start, *letter_end, $fields[$CODE],
# $fields[$CATEGORY]);
# }
# Keep track of all characters. You might think we wouldn't have
# to do this for uppercase letters, or other characters we already
# "classify". The problem is that this classification is
# different. E.g., \u216f is uppercase by Java rules, but is a
# LETTER_NUMBER here.
&update_all_block (*all_start, *all_end, *all_cats,
$fields[$CODE], $fields[$CATEGORY]);
}
# Called to add a new decomposition.
sub add_decomposition
{
local ($ncode, $value) = @_;
local ($is_full) = 0;
local ($first) = 1;
local (@decomp) = ();
foreach (split (' ', $value))
{
if ($first && /^\<.*\>$/)
{
$is_full = 1;
}
else
{
push (@decomp, hex ($_));
}
$first = 0;
}
# We pack the value into a string because this means we can stick
# with Perl 4 features.
local ($s) = pack "I*", @decomp;
if ($is_full)
{
$full_decomposition{$ncode} = $s;
}
else
{
$canonical_decomposition{$ncode} = $s;
}
}
# Write a single decomposition table.
sub write_single_decomposition
{
local ($name, $is_canon, %table) = @_;
printf DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
local ($key, @expansion, $char);
local ($first_line) = 1;
for ($key = 0; $key <= 65535; ++$key)
{
next if ! defined $table{$key};
printf DECOMP ",\n"
unless $first_line;
$first_line = 0;
printf DECOMP " { 0x%04x, \"", $key;
# We represent the expansion as a series of bytes, terminated
# with a double nul. This is ugly, but relatively
# space-efficient. Most expansions are short, but there are a
# few that are very long (e.g. \uFDFA). This means that if we
# chose a fixed-space representation we would waste a lot of
# space.
@expansion = unpack "I*", $table{$key};
foreach $char (@expansion)
{
printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
}
printf DECOMP "\" }";
}
printf DECOMP "\n};\n\n";
}
sub write_decompositions
{
&write_single_decomposition ('canonical', 1, %canonical_decomposition);
&write_single_decomposition ('full', 0, %full_decomposition);
}
# We represent a block of characters with a pair of lists. This
# function updates the pair to account for the new character. Returns
# 1 if we added to the old block, 0 otherwise.
sub update_block
{
local (*start, *end, $char) = @_;
local ($nchar) = hex ($char);
local ($count) = $#end;
if ($count >= 0 && $end[$count] == $nchar - 1)
{
++$end[$count];
return 1;
}
else
{
++$count;
$start[$count] = $nchar;
$end[$count] = $nchar;
}
return 0;
}
# Return true if we will be appending this character to the end of the
# existing block.
sub block_append_p
{
local (*end, $char) = @_;
return $#end >= 0 && $end[$#end] == $char - 1;
}
# This updates the digit block. This table is much like an ordinary
# block, but it has an extra constraint.
sub update_digit_block
{
local (*start, *end, $char, $value) = @_;
&update_block ($start, $end, $char);
local ($nchar) = hex ($char);
# We want to make sure that the new digit's value is correct for
# its place in the block. However, we special-case Tamil digits,
# since Tamil does not have a digit `0'.
local ($count) = $#start;
if (($nchar < $TAMIL_DIGIT_ONE || $nchar > $TAMIL_DIGIT_NINE)
&& $nchar - $start[$count] != $value)
{
# If this fails then Character.digit_value will be wrong.
print STDERR "Character \\u", $char, " violates digit constraint\n";
$status = 1;
}
}
# Update letter table. We could be smart about avoiding upper or
# lower case letters, but it is much simpler to just track them all.
sub update_letter_block
{
local (*start, *end, $char, $category) = @_;
&update_block (*start, *end, $char);
}
# Update `all' table. This table holds all the characters we don't
# already categorize for other reasons. FIXME: if a given type has
# very few characters, we should just inline the code. E.g., there is
# only one paragraph separator.
sub update_all_block
{
local (*start, *end, *cats, $char, $category) = @_;
local ($nchar) = hex ($char);
local ($count) = $#end;
if ($count >= 0
&& $end[$count] == $nchar - 1
&& $cats[$count] eq $category)
{
++$end[$count];
}
else
{
++$count;
$start[$count] = $nchar;
$end[$count] = $nchar;
$cats[$count] = $category;
}
}
# Update a case table. We handle case tables specially because we
# want to map (e.g.) a block of uppercase characters directly onto the
# corresponding block of lowercase characters. Therefore we generate
# a new entry when the block would no longer map directly.
sub update_case_block
{
local (*start, *end, *map, $char, $mapchar) = @_;
local ($nchar) = hex ($char);
local ($nmap) = hex ($mapchar);
local ($count) = $#end;
if ($count >= 0
&& $end[$count] == $nchar - 1
&& $nchar - $start[$count] == $nmap - $map[$count])
{
++$end[$count];
}
else
{
++$count;
$start[$count] = $nchar;
$end[$count] = $nchar;
$map[$count] = $nmap;
}
}
# Set the attribute value for the character. Each character can have
# only one attribute.
sub set_attribute
{
local ($ncode, $attr) = @_;
if ($attributes{$ncode} ne '' && $attributes{$ncode} ne $attr)
{
if ($ncode >= $ROMAN_START && $ncode <= $ROMAN_END)
{
$second_attributes{$ncode} = $attr;
}
else
{
printf STDERR "character \\u%04x already has attribute\n", $ncode;
}
}
# Attributes can be interpreted as unsigned in some situations,
# so we check against 65535. This could cause errors -- we need
# to check the interpretation here.
elsif ($attr < -32768 || $attr > 65535)
{
printf STDERR "attribute out of range for character \\u%04x\n", $ncode;
}
else
{
$attributes{$ncode} = $attr;
}
}
# Print a block table.
sub print_block
{
local ($title, *start, *end) = @_;
print CHARTABLE "static const jchar ", $title, "[][2] =\n";
print CHARTABLE " {\n";
local ($i) = 0;
while ($i <= $#start)
{
print CHARTABLE " { ";
&print_char ($start[$i]);
print CHARTABLE ", ";
&print_char ($end[$i]);
print CHARTABLE " }";
print CHARTABLE "," if ($i != $#start);
print CHARTABLE "\n";
++$i;
$bytes += 4; # Two bytes per char.
}
print CHARTABLE " };\n\n";
}
# Print the numerics table.
sub print_numerics
{
local ($i, $key, $count, @keys);
$i = 0;
@keys = sort keys %numerics;
$count = @keys;
print CHARTABLE "static const jchar numeric_table[] =\n";
print CHARTABLE " { ";
foreach $key (@keys)
{
&print_char (hex ($key));
++$i;
print CHARTABLE ", " if $i < $count;
# Print 5 per line.
print CHARTABLE "\n " if ($i % 5 == 0);
$bytes += 2; # One character.
}
print CHARTABLE " };\n\n";
print CHARTABLE "static const jshort numeric_value[] =\n";
print CHARTABLE " { ";
$i = 0;
foreach $key (@keys)
{
print CHARTABLE $numerics{$key};
if ($numerics{$key} > 32767 || $numerics{$key} < -32768)
{
# This means our generated type info is incorrect. We
# could just detect and work around this here, but I'm
# lazy.
print STDERR "numeric value won't fit in a short\n";
$status = 1;
}
++$i;
print CHARTABLE ", " if $i < $count;
# Print 10 per line.
print CHARTABLE "\n " if ($i % 10 == 0);
$bytes += 2; # One short.
}
print CHARTABLE " };\n\n";
}
# Print a table that maps one single letter onto another. It assumes
# the map is index by char code.
sub print_single_map
{
local ($title, %map) = @_;
local (@keys) = sort keys %map;
$num = @keys;
print CHARTABLE "static const jchar ", $title, "[][2] =\n";
print CHARTABLE " {\n";
$i = 0;
for $key (@keys)
{
print CHARTABLE " { ";
&print_char (hex ($key));
print CHARTABLE ", ";
&print_char (hex ($map{$key}));
print CHARTABLE " }";
++$i;
if ($i < $num)
{
print CHARTABLE ",";
}
else
{
print CHARTABLE " ";
}
print CHARTABLE " // ", $name{$key}, "\n";
$bytes += 4; # Two bytes per char.
}
print CHARTABLE " };\n\n";
}
# Print the `all' block.
sub print_all_block
{
local (*start, *end, *cats) = @_;
&print_block ("all_table", *start, *end);
local ($i) = 0;
local ($sum) = 0;
while ($i <= $#start)
{
$sum += $end[$i] - $start[$i] + 1;
++$i;
}
# We do this computation just to make sure it isn't cheaper to
# simply list all the characters individually.
printf STDERR ("all_table encodes %d characters in %d entries\n",
$sum, $#start + 1);
print CHARTABLE "static const jbyte category_table[] =\n";
print CHARTABLE " { ";
$i = 0;
while ($i <= $#cats)
{
if ($i > 0 && $cats[$i] eq $cats[$i - 1])
{
# This isn't an error. We can have a duplicate because
# two ranges are not adjacent while the intervening
# characters are left out of the table for other reasons.
# We could exploit this to make the table a little smaller.
# printf STDERR "Duplicate all entry at \\u%04x\n", $start[$i];
}
print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
print CHARTABLE ", " if ($i < $#cats);
++$i;
print CHARTABLE "\n ";
++$bytes;
}
print CHARTABLE " };\n\n";
}
# Print case table.
sub print_case_table
{
local ($title, *start, *end, *map, *anomalous) = @_;
&print_block ($title . '_case_table', *start, *end);
print CHARTABLE "static const jchar ", $title, "_case_map_table[] =\n";
print CHARTABLE " { ";
local ($i) = 0;
while ($i <= $#map)
{
&print_char ($map[$i]);
print CHARTABLE ", " if $i < $#map;
++$i;
print CHARTABLE "\n " if $i % 5 == 0;
$bytes += 2;
}
print CHARTABLE " };\n";
local ($key, @keys);
@keys = sort keys %anomalous;
if ($title eq 'upper')
{
if ($#keys >= 0)
{
# If these are found we need to change Character.isUpperCase.
print STDERR "Found anomalous upper case characters\n";
$status = 1;
}
}
else
{
print CHARTABLE "\n";
print CHARTABLE "static const jchar ", $title, "_anomalous_table[] =\n";
print CHARTABLE " { ";
$i = 0;
foreach $key (@keys)
{
&print_char (hex ($key));
print CHARTABLE ", " if $i < $#keys;
++$i;
print CHARTABLE "\n " if $i % 5 == 0;
$bytes += 2;
}
print CHARTABLE " };\n";
}
print CHARTABLE "\n";
}
# Print the type table and attributes table for the fast version.
sub print_fast_tables
{
local (*start, *end, *cats, *atts, *second_atts) = @_;
print CHARTABLE "static const jbyte type_table[] =\n{ ";
local ($i, $j);
for ($i = 0; $i <= $#cats; ++$i)
{
for ($j = $start[$i]; $j <= $end[$i]; ++$j)
{
print CHARTABLE 'java::lang::Character::', $category_map{$cats[$i]};
print CHARTABLE "," if ($i < $#cats || $j < $end[$i]);
print CHARTABLE "\n ";
}
}
print CHARTABLE "\n };\n\n";
print CHARTABLE "static const jshort attribute_table[] =\n{ ";
for ($i = 0; $i <= 0xffff; ++$i)
{
$atts{$i} = 0 if ! defined $atts{$i};
print CHARTABLE $atts{$i};
print CHARTABLE ", " if $i < 0xffff;
print CHARTABLE "\n " if $i % 5 == 1;
}
print CHARTABLE "\n };\n\n";
print CHARTABLE "static const jshort secondary_attribute_table[] =\n{ ";
for ($i = $ROMAN_START; $i <= $ROMAN_END; ++$i)
{
print CHARTABLE $second_atts{$i};
print CHARTABLE ", " if $i < $ROMAN_END;
print CHARTABLE "\n " if $i % 5 == 1;
}
print CHARTABLE "\n };\n\n";
}
# Print a character constant.
sub print_char
{
local ($ncode) = @_;
printf CHARTABLE "0x%04x", $ncode;
}
......@@ -42,13 +42,6 @@ AC_SUBST(COMPPATH)
dnl The -no-testsuite modules omit the test subdir.
AM_CONDITIONAL(TESTSUBDIR, test -d $srcdir/testsuite)
dnl See whether the user prefers size or speed for Character.
dnl The default is size.
AC_ARG_ENABLE(fast-character,
[ --enable-fast-character prefer speed over size for Character],
# Nothing
, AC_DEFINE(COMPACT_CHARACTER))
dnl Should the runtime set system properties by examining the
dnl environment variable GCJ_PROPERTIES?
AC_ARG_ENABLE(getenv-properties,
......
# Start Code; End Code; Block Name
0000; 007F; Basic Latin
0080; 00FF; Latin-1 Supplement
0100; 017F; Latin Extended-A
0180; 024F; Latin Extended-B
0250; 02AF; IPA Extensions
02B0; 02FF; Spacing Modifier Letters
0300; 036F; Combining Diacritical Marks
0370; 03FF; Greek
0400; 04FF; Cyrillic
0530; 058F; Armenian
0590; 05FF; Hebrew
0600; 06FF; Arabic
0700; 074F; Syriac
0780; 07BF; Thaana
0900; 097F; Devanagari
0980; 09FF; Bengali
0A00; 0A7F; Gurmukhi
0A80; 0AFF; Gujarati
0B00; 0B7F; Oriya
0B80; 0BFF; Tamil
0C00; 0C7F; Telugu
0C80; 0CFF; Kannada
0D00; 0D7F; Malayalam
0D80; 0DFF; Sinhala
0E00; 0E7F; Thai
0E80; 0EFF; Lao
0F00; 0FFF; Tibetan
1000; 109F; Myanmar
10A0; 10FF; Georgian
1100; 11FF; Hangul Jamo
1200; 137F; Ethiopic
13A0; 13FF; Cherokee
1400; 167F; Unified Canadian Aboriginal Syllabics
1680; 169F; Ogham
16A0; 16FF; Runic
1780; 17FF; Khmer
1800; 18AF; Mongolian
1E00; 1EFF; Latin Extended Additional
1F00; 1FFF; Greek Extended
2000; 206F; General Punctuation
2070; 209F; Superscripts and Subscripts
20A0; 20CF; Currency Symbols
20D0; 20FF; Combining Marks for Symbols
2100; 214F; Letterlike Symbols
2150; 218F; Number Forms
2190; 21FF; Arrows
2200; 22FF; Mathematical Operators
2300; 23FF; Miscellaneous Technical
2400; 243F; Control Pictures
2440; 245F; Optical Character Recognition
2460; 24FF; Enclosed Alphanumerics
2500; 257F; Box Drawing
2580; 259F; Block Elements
25A0; 25FF; Geometric Shapes
2600; 26FF; Miscellaneous Symbols
2700; 27BF; Dingbats
2800; 28FF; Braille Patterns
2E80; 2EFF; CJK Radicals Supplement
2F00; 2FDF; Kangxi Radicals
2FF0; 2FFF; Ideographic Description Characters
3000; 303F; CJK Symbols and Punctuation
3040; 309F; Hiragana
30A0; 30FF; Katakana
3100; 312F; Bopomofo
3130; 318F; Hangul Compatibility Jamo
3190; 319F; Kanbun
31A0; 31BF; Bopomofo Extended
3200; 32FF; Enclosed CJK Letters and Months
3300; 33FF; CJK Compatibility
3400; 4DB5; CJK Unified Ideographs Extension A
4E00; 9FFF; CJK Unified Ideographs
A000; A48F; Yi Syllables
A490; A4CF; Yi Radicals
AC00; D7A3; Hangul Syllables
D800; DB7F; High Surrogates
DB80; DBFF; High Private Use Surrogates
DC00; DFFF; Low Surrogates
E000; F8FF; Private Use
F900; FAFF; CJK Compatibility Ideographs
FB00; FB4F; Alphabetic Presentation Forms
FB50; FDFF; Arabic Presentation Forms-A
FE20; FE2F; Combining Half Marks
FE30; FE4F; CJK Compatibility Forms
FE50; FE6F; Small Form Variants
FE70; FEFE; Arabic Presentation Forms-B
FEFF; FEFF; Specials
FF00; FFEF; Halfwidth and Fullwidth Forms
FFF0; FFFD; Specials
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Language" content="en-us">
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
<meta name="ProgId" content="FrontPage.Editor.Document">
<link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css">
<title>Unicode Character Database</title>
</head>
<body>
<h1>UNICODE CHARACTER DATABASE<br>
Version 3.0.0</h1>
<table border="1" cellspacing="2" cellpadding="0" height="87" width="100%">
<tr>
<td valign="TOP" width="144">Revision</td>
<td valign="TOP">3.0.0</td>
</tr>
<tr>
<td valign="TOP" width="144">Authors</td>
<td valign="TOP">Mark Davis and Ken Whistler</td>
</tr>
<tr>
<td valign="TOP" width="144">Date</td>
<td valign="TOP">1999-09-11</td>
</tr>
<tr>
<td valign="TOP" width="144">This Version</td>
<td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
</tr>
<tr>
<td valign="TOP" width="144">Previous Version</td>
<td valign="TOP">n/a</td>
</tr>
<tr>
<td valign="TOP" width="144">Latest Version</td>
<td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
</tr>
</table>
<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p>
<h2>Disclaimer</h2>
<p>The Unicode Character Database is provided as is by Unicode, Inc. No claims
are made as to fitness for any particular purpose. No warranties of any kind are
expressed or implied. The recipient agrees to determine applicability of
information provided. If this file has been purchased on magnetic or optical
media from Unicode, Inc., the sole remedy for any claim will be exchange of
defective media within 90 days of receipt.</p>
<p>This disclaimer is applicable for all other data files accompanying the
Unicode Character Database, some of which have been compiled by the Unicode
Consortium, and some of which have been supplied by other sources.</p>
<h2>Limitations on Rights to Redistribute This Data</h2>
<p>Recipient is granted the right to make copies in any form for internal
distribution and to freely use the information supplied in the creation of
products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode
Character Database can be redistributed to third parties or other organizations
(whether for profit or not) as long as this notice and the disclaimer notice are
retained. Information can be extracted from these files and used in
documentation or programs, as long as there is an accompanying notice indicating
the source.</p>
<h2>Introduction</h2>
<p>The Unicode Character Database is a set of files that define the Unicode
character properties and internal mappings. For more information about character
properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The
Unicode Standard</a></i>.</p>
<p>The Unicode Character Database has been updated to reflect Version 3.0 of the
Unicode Standard, with many characters added to those published in Version 2.0.
A number of corrections have also been made to case mappings or other errors in
the database noted since the publication of Version 2.0. Normative bidirectional
properties have also been modified to reflect decisions of the Unicode Technical
Committee.</p>
<p>For more information on versions of the Unicode Standard and how to reference
them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p>
<h2>Conformance</h2>
<p>Character properties may be either normative or informative. <i>Normative</i>
means that implementations that claim conformance to the Unicode Standard (at a
particular version) and which make use of a particular property or field must
follow the specifications of the standard for that property or field in order to
be conformant. The term <i>normative</i> when applied to a property or field of
the Unicode Character Database, does <i>not</i> mean that the value of that
field will never change. Corrections and extensions to the standard in the
future may require minor changes to normative values, even though the Unicode
Technical Committee strives to minimize such changes. An<i> informative </i>property
or field is strongly recommended, but a conformant implementation is free to use
or change such values as it may require while still being conformant to the
standard. Particular implementations may choose to override the properties and
mappings that are not normative. In that case, it is up to the implementer to
establish a protocol to convey that information.</p>
<h2>Files</h2>
<p>The following summarizes the files in the Unicode Character Database. &nbsp;For
more information about these files, see the referenced technical report or
section of Unicode Standard, Version 3.0.</p>
<p><b>UnicodeData.txt (Chapter 4)</b>
<ul>
<li>The main file in the Unicode Character Database.</li>
<li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>.
This file also characterizes which properties are normative and which are
informative.</li>
</ul>
<p><b>PropList.txt (Chapter 4)</b>
<ul>
<li>Additional informative properties list: <i>Alphabetic, Ideographic,</i>
and <i>Mathematical</i>, among others.</li>
</ul>
<p><b>SpecialCasing.txt (Chapter 4)</b>
<ul>
<li>List of informative special casing properties, including one-to-many
mappings such as SHARP S =&gt; &quot;SS&quot;, and locale-specific mappings,
such as for Turkish <i>dotless i</i>.</li>
</ul>
<p><b>Blocks.txt (Chapter 14)</b>
<ul>
<li>List of normative block names.</li>
</ul>
<p><b>Jamo.txt (Chapter 4)</b>
<ul>
<li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names
algorithmically.</li>
</ul>
<p><b>ArabicShaping.txt (Section 8.2)</b>
<ul>
<li>Basic Arabic and Syriac character shaping properties, such as initial,
medial and final shapes. These properties are normative for minimal shaping
of Arabic and Syriac. </li>
</ul>
<p><b>NamesList.txt (Chapter 14)</b>
<ul>
<li>This file duplicates some of the material in the UnicodeData file, and
adds informative annotations uses in the character charts, as printed in the
Unicode Standard. </li>
<li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
the appropriate version of the book. Changes in the Unicode Character
Database since then may not be reflected in these files, since they are
primarily of archival interest.</li>
</ul>
<p><b>Index.txt (Chapter 14)</b>
<ul>
<li>Informative index to Unicode characters, as printed in the Unicode
Standard</li>
<li><b>Note: </b>The information in NamesList.txt and Index.txt files matches
the appropriate version of the book. Changes in the Unicode Character
Database since then may not be reflected in these files, since they are
primarily of archival interest.</li>
</ul>
<p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15
Unicode Normalization Forms</a>)</b>
<ul>
<li>Normative properties for normalization.</li>
</ul>
<p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR
#14: Line Breaking Properties</a>)</b>
<ul>
<li>Normative and informative properties for line breaking. To see which
properties are informative and which are normative, consult UTR#14.</li>
</ul>
<p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR
#11: East Asian Character Width</a>)</b>
<ul>
<li>Informative properties for determining the choice of wide vs. narrow
glyphs in East Asian contexts.</li>
</ul>
<p><b>diffXvY.txt</b>
<ul>
<li>Mechanically-generated informative files containing accumulated
differences between successive versions of UnicodeData.txt</li>
</ul>
</body>
</html>
This source diff could not be displayed because it is too large. You can view the blob instead.
/* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
Copyright (C) 2002 Free Software Foundation, Inc.
*** This file is generated by scripts/unicode-muncher.pl ***
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.lang;
/**
* This contains the info about the unicode characters, that
* java.lang.Character needs. It is generated automatically from
* <code>gnu/gcj/convert/UnicodeData-3.0.0.txt</code>, by some
* perl scripts. This Unicode definition file can be found on the
* <a href="http://www.unicode.org">http://www.unicode.org</a> website.
* JDK 1.4 uses Unicode version 3.0.0.
*
* The data is stored as string constants, but Character will convert these
* Strings to their respective <code>char[]</code> components. The field
* <code>BLOCKS</code> stores the offset of a block of 2<sup>SHIFT</sup>
* characters within <code>DATA</code>. The DATA field, in turn, stores
* information about each character in the low order bits, and an offset
* into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
* <code>NUM_VALUE</code>, and <code>DIRECTION</code>. Notice that the
* attribute tables are much smaller than 0xffff entries; as many characters
* in Unicode share common attributes. Finally, there is a listing for
* <code>TITLE</code> exceptions (most characters just have the same
* title case as upper case).
*
* @author scripts/unicode-muncher.pl (written by Jochen Hoenicke,
* Eric Blake)
* @see Character
*/
public interface CharData
{
/**
* The Unicode definition file that was parsed to build this database.
*/
String SOURCE = "gnu/gcj/convert/UnicodeData-3.0.0.txt";
/**
* The character shift amount to look up the block offset. In other words,
* <code>(char) (BLOCKS.value[ch >> SHIFT] + ch)</code> is the index where
* <code>ch</code> is described in <code>DATA</code>.
*/
int SHIFT = 5;
/**
* The mapping of character blocks to their location in <code>DATA</code>.
* Each entry has been adjusted so that the 16-bit sum with the desired
* character gives the actual index into <code>DATA</code>.
*/
String BLOCKS
= "\u01c2\u01c2\u01c1\u012c\u012b\u01a0\u01f8\u027e\u0201\u0290\u01b7"
+ "\u02c9\u0157\u02a9\u023f\u01b8\u0101\u0257\u02f9\u02f9\u0370\u0363"
+ "\ufe8f\u033e\uff85\uff65\ufdb5\ufda1\uffbd\u02be\uffbe\uffe9\ufea8"
+ "\uff2e\u022f\ufea1\uff9d\ufe61\uffa9\ufb20\u00c3\u010d\u00fe\ufb61"
+ "\ufb5a\u0105\u0103\u00f8\ufe8b\u0223\u0258\u01c1\u01f6\u01d6\ufee1"
+ "\u0223\ufdaa\ufb55\u0163\ufe63\u00f6\u00fd\ufe03\ufde3\ufdc3\ufda3"
+ "\ufd83\ufd63\ufd43\ufd23\ufd03\ufce3_J\u00a5\u0133\ufe08"
+ "]\u014f\u01a2\uf669\u0123\u0122\uf8c1\ufe50\u0102\u0101\ufa43"
+ "\ufc88\ufea7\u00c0\ufca1\ufb8f\uf47d\u0099\ufb11\ufe1a\ufd22\ufb29"
+ "5\uf3b9\ufe51\ufcc8\uffb5\uf339\ufa92\uff85\uff35\ufa4e\uf766"
+ "\uff25\ufad1\ufb63\ufc34\ufedf\uf763\ufcaa\ufa82\ufdb9\uf6e3\ufe3f"
+ "\ufcc1\ufe49\uf7eb\uf70f\uf2e8\ufd99\uf5e3\uf964\ufbfc\ufd91\uf563"
+ "\uf543\uf43c\uf436\uef6b\uf864\uf844\ufc52\uf804\uf7e4\ufc09\uf7a4"
+ "\uee9c\ufb8a\uf744\uf7f4\uf704\uf7c4\uf78c\uf46b\uf4cc\uf72c\uf644"
+ "\uf3fb\uf647\uf243\uf5c4\uf5a4\ueca1\uf563\uf544\uf524\uf504\uf4e4"
+ "\uf4c4\uf4a4\uf484\uf464\uf444\uf424\uf404\uf3e4\uf3c4\uf3a4\uf384"
+ "\uf364\uf344\uf324\uf283\uf771\uf2c4\uf2a4\uf731\ueec3\ueea3\uee83"
+ "\uee63\uf1e4\uf49c\uf53f\ued49\uf2d1\uf144\ue8ed\ue81e\uf0e4\uf591"
+ "\ued03\uece3\uecc3\ueca3\uec83\uec63\uec43\uec23\uec03\uebe3\uebc3"
+ "\ueba3\ueb83\ueb63\ueb43\ueb23\ueb03\ueae3\ueac3\ueaa3\uea83\uea63"
+ "\uea43\uea23\uea03\ue9e3\ue9c3\ue9a3\ue983\ue963\ue943\ue923\ue903"
+ "\ue8e3\ue8c3\ue8a3\ue883\ue863\ue843\ue823\ue803\ue7e3\ue501\ue4e1"
+ "\ue4c1\ue4a1\uefd1\ue461\ue441\ue427\uef81\uef51\uef51\uef51\uef51"
+ "\uef41\ue1ad\uef40\uef40\uef40\ue1e0\ue37f\uef00\ue9d3\uebe5\ue4df"
+ "\ueece\uef5f\ue490\uef3f\ueee3\uef76\uee6e\uef6a\uef77\uef72\uee8b"
+ "\uefcd\uee77\uee1f\uee21\uef5f\uefd7\uee90\uefcd\uefdb\uef63\ue223"
+ "\ue203\ue1e3\ued25\ued37\ued13\uef16\uef02\ued41\ued37\ued2d\uec25"
+ "\uec05\uebe5\uebc5\uebc8\uea76\ueb63\ueb66\ue9b0\ueb05\ueae5\ue9e3"
+ "\udf43\udf23\udf03\udee3\ue90a\uea1d\ueb52\ueb49\ueb3f\ueadf\ude03"
+ "\udde3\ue925\ue905\ue8e5\ue8c5\ue8a5\ue885\ue865\ue845\udcc3\udca3"
+ "\udc83\udc63\udc43\udc23\udc03\udbe3\udbc3\udba3\udb83\udb63\udb43"
+ "\udb23\udb03\udae3\udac3\udaa3\uda83\uda63\uda43\uda23\uda03\ud9e3"
+ "\ud9c3\ud9a3\ud983\ud963\ud943\ud923\ud903\ud8e3\ud8c3\ud8a3\ud883"
+ "\ud863\ud843\ud823\ud803\ud7e3\ud7c3\ud7a3\ud783\ud763\ue2ab\ue285"
+ "\ue265\ue26a\ue225\ue205\ue1e5\ue1c5\ue1a5\ue185\ue188\udd89\ue3a2"
+ "\udfcd\ud923\ud904\ue342\ud8c3\ud8a4\udcd1\udcac\ud67b\ud824\ud804"
+ "\ue262\ucede\ud403\ud3e3\ue110\ue0e3\ue1c5\ue0b1\ue0b1\ue158\udf11"
+ "\ue02e\ue00d\udfed\udfcd\ude66\udf8d\udf6d\udf4f\udf2e\ud564\ud544"
+ "\ud524\ud504\ud4e4\ud4c4\ud4a4\ud484\ud464\ud444\ud424\ud404\ud3e4"
+ "\ud3c4\ud3a4\ud384\ud364\ud344\ud324\ud304\ud2e4\ud2c4\ud2a4\ud284"
+ "\ud264\ud244\ud224\ud204\ud1e4\ud1c4\ud1a4\ud184\ud164\ud144\ud124"
+ "\ud104\ud0e4\ud0c4\ud0a4\ud084\ud064\ud044\ud024\ud004\ucfe4\ucfc4"
+ "\ucfa4\ucf84\ucf64\ucf44\ucf24\ucf04\ucee4\ucec4\ucea4\uce84\uce64"
+ "\uce44\uce24\uce04\ucde4\ucdc4\ucda4\ucd84\ucd64\ucd44\ucd24\ucd04"
+ "\ucce4\uccc4\ucca4\ucc84\ucc64\ucc44\ucc24\ucc04\ucbe4\ucbc4\ucba4"
+ "\ucb84\ucb64\ucb44\ucb24\ucb04\ucae4\ucac4\ucaa4\uca84\uca64\uca44"
+ "\uca24\uca04\uc9e4\uc9c4\uc9a4\uc984\uc964\uc944\uc924\uc904\uc8e4"
+ "\uc8c4\uc8a4\uc884\uc864\uc844\uc824\uc804\uc7e4\uc7c4\uc7a4\uc784"
+ "\uc764\uc744\uc724\uc704\uc6e4\uc6c4\uc6a4\uc684\uc664\uc644\uc624"
+ "\uc604\uc5e4\uc5c4\uc5a4\uc584\uc564\uc544\uc524\uc504\uc4e4\uc4c4"
+ "\uc4a4\uc484\uc464\uc444\uc424\uc404\uc3e4\uc3c4\uc3a4\uc384\uc364"
+ "\uc344\uc324\uc304\uc2e4\uc2c4\uc2a4\uc284\uc264\uc244\uc224\uc204"
+ "\uc1e4\uc1c4\uc1a4\uc184\uc164\uc144\uc124\uc104\uc0e4\uc0c4\uc0a4"
+ "\uc084\uc064\uc044\uc024\uc004\ubfe4\ubfc4\ubfa4\ubf84\ubf64\ubf44"
+ "\ubf24\ubf04\ubee4\ubec4\ubea4\ube84\ube64\ube44\ube24\ube04\ubde4"
+ "\ubdc4\ubda4\ubd84\ubd64\ubd44\ubd24\ubd04\ubce4\ubcc4\ubca4\ubc84"
+ "\ubc64\ubc44\ubc24\ubc04\ubbe4\ub2e0\ub803\ub7e3\ubb64\ubb44\ubb24"
+ "\ubb04\ubae4\ubac4\ubaa4\uba84\uba64\uba44\uba24\uba04\ub9e4\ub9c4"
+ "\ub9a4\ub984\ub964\ub944\ub924\ub904\ub8e4\ub8c4\ub8a4\ub884\ub864"
+ "\ub844\ub824\ub804\ub7e4\ub7c4\ub7a4\ub784\ub764\ub744\ub724\ub704"
+ "\ub6e4\ub6c4\ub6a4\ub684\ub664\ub644\ub624\ub604\ub5e4\ub5c4\ub5a4"
+ "\ub584\ub564\ub544\ub524\ub504\ub4e4\ub4c4\ub4a4\ub484\ub464\ub444"
+ "\ub424\ub404\ub3e4\ub3c4\ub3a4\ub384\ub364\ub344\ub324\ub304\ub2e4"
+ "\ub2c4\ub2a4\ub284\ub264\ub244\ub224\ub204\ub1e4\ub1c4\ub1a4\ub184"
+ "\ub164\ub144\ub124\ub104\ub0e4\ub0c4\ub0a4\ub084\ub064\ub044\ub024"
+ "\ub004\uafe4\uafc4\uafa4\uaf84\uaf64\uaf44\uaf24\uaf04\uaee4\uaec4"
+ "\uaea4\uae84\uae64\uae44\uae24\uae04\uade4\uadc4\uada4\uad84\uad64"
+ "\uad44\uad24\uad04\uace4\uacc4\uaca4\uac84\uac64\uac44\uac24\uac04"
+ "\uabe4\uabc4\uaba4\uab84\uab64\uab44\uab24\uab04\uaae4\uaac4\uaaa4"
+ "\uaa84\uaa64\uaa44\uaa24\uaa04\ua9e4\ua9c4\ua9a4\ua984\ua964\ua944"
+ "\ua924\ua904\ua8e4\ua8c4\ua8a4\ua884\ua864\ua844\ua824\ua804\ua7e4"
+ "\ua7c4\ua7a4\ua784\ua764\ua744\ua724\ua704\ua6e4\ua6c4\ua6a4\ua684"
+ "\ua664\ua644\ua624\ua604\ua5e4\ua5c4\ua5a4\ua584\ua564\ua544\ua524"
+ "\ua504\ua4e4\ua4c4\ua4a4\ua484\ua464\ua444\ua424\ua404\ua3e4\ua3c4"
+ "\ua3a4\ua384\ua364\ua344\ua324\ua304\ua2e4\ua2c4\ua2a4\ua284\ua264"
+ "\ua244\ua224\ua204\ua1e4\ua1c4\ua1a4\ua184\ua164\ua144\ua124\ua104"
+ "\ua0e4\ua0c4\ua0a4\ua084\ua064\ua044\ua024\ua004\u9fe4\u9fc4\u9fa4"
+ "\u9f84\u9f64\u9f44\u9f24\u9f04\u9ee4\u9ec4\u9ea4\u9e84\u9e64\u9e44"
+ "\u9e24\u9e04\u9de4\u9dc4\u9da4\u9d84\u9d64\u9d44\u9d24\u9d04\u9ce4"
+ "\u9cc4\u9ca4\u9c84\u9c64\u9c44\u9c24\u9c04\u9be4\u9bc4\u9ba4\u9b84"
+ "\u9b64\u9b44\u9b24\u9b04\u9ae4\u9ac4\u9aa4\u9a84\u9a64\u9a44\u9a24"
+ "\u9a04\u99e4\u99c4\u99a4\u9984\u9964\u9944\u9924\u9904\u98e4\u98c4"
+ "\u98a4\u9884\u9864\u9844\u9824\u9804\u97e4\u97c4\u97a4\u9784\u9764"
+ "\u9744\u9724\u9704\u96e4\u96c4\u96a4\u9684\u9664\u9644\u9624\u9604"
+ "\u95e4\u95c4\u95a4\u9584\u9564\u9544\u9524\u9504\u94e4\u94c4\u94a4"
+ "\u9484\u9464\u9444\u9424\u9404\u93e4\u93c4\u93a4\u9384\u9364\u9344"
+ "\u9324\u9304\u92e4\u92c4\u92a4\u9284\u9264\u9244\u9224\u9204\u91e4"
+ "\u91c4\u91a4\u9184\u9164\u9144\u9124\u9104\u90e4\u90c4\u90a4\u9084"
+ "\u9064\u9044\u9024\u9004\u8fe4\u8fc4\u8fa4\u8f84\u8f64\u8f44\u8f24"
+ "\u8f04\u8ee4\u8ec4\u8ea4\u8e84\u8e64\u8e44\u8e24\u8e04\u8de4\u8dc4"
+ "\u8da4\u8d84\u8d64\u8d44\u8d24\u8d04\u8ce4\u8cc4\u8ca4\u8c84\u8c64"
+ "\u8c44\u8c24\u8c04\u8be4\u8bc4\u8ba4\u8b84\u8b64\u8b44\u8b24\u8b04"
+ "\u8ae4\u8ac4\u8aa4\u8a84\u8a64\u8a44\u8a24\u8a04\u89e4\u89c4\u89a4"
+ "\u8984\u8964\u8944\u8924\u8904\u88e4\u88c4\u88a4\u8884\u8864\u8844"
+ "\u8824\u8804\u87e4\u87c4\u87a4\u8784\u8764\u8744\u8724\u8704\u86e4"
+ "\u86c4\u86a4\u8684\u8664\u8644\u8624\u8604\u85e4\u85c4\u85a4\u8584"
+ "\u8564\u8544\u8524\u8504\u84e4\u84c4\u84a4\u8484\u8464\u8444\u8424"
+ "\u8404\u83e4\u83c4\u83a4\u8384\u8364\u8344\u8324\u8304\u82e4\u82c4"
+ "\u82a4\u8284\u8264\u8244\u8224\u8204\u81e4\u81c4\u81a4\u8184\u8164"
+ "\u8144\u8124\u8104\u80e4\u80c4\u80a4\u8084\u8064\u8044\u8024\u8004"
+ "\u7fe4\u7fc4\u7fa4\u7f84\u7f64\u7f44\u7f24\u7f04\u7ee4\u7ec4\u7ea4"
+ "\u7e84\u7e64\u7e44\u7e24\u7e04\u7de4\u7dc4\u7da4\u7d84\u7d64\u7d44"
+ "\u7d24\u7d04\u7ce4\u7cc4\u7ca4\u7c84\u7c64\u7c44\u7c24\u7c04\u7be4"
+ "\u7bc4\u7ba4\u7b84\u7b64\u7b44\u7b24\u7b04\u7ae4\u7ac4\u7aa4\u7a84"
+ "\u7a64\u7a44\u7a24\u7a04\u79e4\u79c4\u79a4\u7984\u7964\u7944\u7924"
+ "\u7904\u78e4\u78c4\u78a4\u7884\u7864\u7844\u7824\u7804\u77e4\u77c4"
+ "\u77a4\u7784\u7764\u7744\u7724\u7704\u76e4\u76c4\u76a4\u7684\u7664"
+ "\u7644\u7624\u7604\u75e4\u75c4\u75a4\u7584\u7564\u7544\u7524\u7504"
+ "\u74e4\u74c4\u74a4\u7484\u7464\u7444\u7424\u7404\u73e4\u73c4\u73a4"
+ "\u7384\u7364\u7344\u7324\u7304\u72e4\u72c4\u72a4\u7284\u7264\u7244"
+ "\u7224\u7204\u71e4\u71c4\u71a4\u7184\u7164\u7144\u7124\u7104\u70e4"
+ "\u70c4\u70a4\u7084\u7064\u7044\u7024\u7004\u6fe4\u6fc4\u6fa4\u6f84"
+ "\u6f64\u6f44\u6f24\u6f04\u6ee4\u6ec4\u6ea4\u6e84\u6e64\u6e44\u6e24"
+ "\u6e04\u6de4\u6dc4\u6da4\u6d84\u6d64\u6d44\u6d24\u6d04\u6ce4\u6cc4"
+ "\u6ca4\u6c84\u6c64\u6c44\u6c24\u6c04\u6be4\u6bc4\u6ba4\u6b84\u6b64"
+ "\u6b44\u6b24\u6b04\u6ae4\u6ac4\u6aa4\u6a84\u6a64\u6a44\u6a24\u6a04"
+ "\u69e4\u60f0\u6603\u65e3\u6964\u6944\u6924\u6904\u68e4\u68c4\u68a4"
+ "\u6884\u6864\u6844\u6824\u6804\u67e4\u67c4\u67a4\u6784\u6764\u6744"
+ "\u6724\u6704\u66e4\u66c4\u66a4\u6684\u6664\u6644\u6624\u6604\u65e4"
+ "\u65c4\u65a4\u6584\u6564\u6544\u6524\u6504\u6b20\u6ddb\u6e96\u60e3"
+ "\u60c3\u60a3\u6083\u6063\u6043\u6023\u6003\u5fe3\u5fc3\u5fa3\u5f83"
+ "\u5f63\u5f43\u5f23\u5f03\u5ee3\u5ec3\u5ea3\u5e83\u5e63\u5e43\u5e23"
+ "\u5e03\u5de3\u5dc3\u5da3\u5d83\u5d63\u5d43\u5d23\u5d03\u5ce3\u5cc3"
+ "\u5ca3\u5c83\u5c63\u5c43\u5c23\u5c03\u5be3\u5bc3\u5ba3\u5b83\u5b63"
+ "\u5b43\u5b23\u5b03\u5ae3\u5ac3\u5aa3\u5a83\u5a63\u5a43\u5a23\u5a03"
+ "\u59e3\u5d64\u5d44\u5d24\u5d04\u5ce4\u5cc4\u5ca4\u5c84\u5c64\u5c44"
+ "\u5c24\u5c04\u5be4\u5bc4\u5ba4\u5b84\u5b64\u5b44\u5b24\u5b04\u5ae4"
+ "\u5ac4\u5aa4\u5a84\u5a64\u5a44\u5a24\u5a04\u59e4\u59c4\u59a4\u5984"
+ "\u5964\u5944\u5924\u5904\u58e4\u58c4\u58a4\u5884\u5864\u5844\u5824"
+ "\u5804\u57e4\u57c4\u57a4\u5784\u5764\u5744\u5724\u5704\u56e4\u56c4"
+ "\u56a4\u5684\u5664\u5644\u5624\u5604\u55e4\u55c4\u55a4\u5584\u5564"
+ "\u5544\u5524\u5504\u54e4\u54c4\u54a4\u5484\u5464\u5444\u5424\u5404"
+ "\u53e4\u53c4\u53a4\u5384\u5364\u5344\u5324\u5304\u52e4\u52c4\u52a4"
+ "\u5284\u5264\u5244\u5224\u5204\u51e4\u51c4\u51a4\u5184\u5164\u5144"
+ "\u5124\u5104\u50e4\u50c4\u50a4\u5084\u5064\u5044\u5024\u5004\u4fe4"
+ "\u4fc4\u4fa4\u4f84\u4f64\u4f44\u4f24\u4f04\u4ee4\u4ec4\u4ea4\u4e84"
+ "\u4e64\u4e44\u4e24\u4e04\u4de4\u4dc4\u4da4\u4d84\u4d64\u4d44\u4d24"
+ "\u4d04\u4ce4\u4cc4\u4ca4\u4c84\u4c64\u4c44\u4c24\u4c04\u4be4\u4bc4"
+ "\u4ba4\u4b84\u4b64\u4b44\u4b24\u4b04\u4ae4\u4ac4\u4aa4\u4a84\u4a64"
+ "\u4a44\u4a24\u4a04\u49e4\u49c4\u49a4\u4984\u4964\u4944\u4924\u4904"
+ "\u48e4\u48c4\u48a4\u4884\u4864\u4844\u4824\u4804\u47e4\u47c4\u47a4"
+ "\u4784\u4764\u4744\u4724\u4704\u46e4\u46c4\u46a4\u4684\u4664\u4644"
+ "\u4624\u4604\u45e4\u45c4\u45a4\u4584\u4564\u4544\u4524\u4504\u44e4"
+ "\u44c4\u44a4\u4484\u4464\u4444\u4424\u4404\u43e4\u43c4\u43a4\u4384"
+ "\u4364\u4344\u4324\u4304\u42e4\u42c4\u42a4\u4284\u4264\u4244\u4224"
+ "\u4204\u41e4\u41c4\u41a4\u4184\u4164\u4144\u4124\u4104\u40e4\u40c4"
+ "\u40a4\u4084\u4064\u4044\u4024\u4004\u3fe4\u3fc4\u3fa4\u3f84\u3f64"
+ "\u3f44\u3f24\u3f04\u3ee4\u3ec4\u3ea4\u3e84\u3e64\u3e44\u3e24\u3e04"
+ "\u3de4\u3dc4\u3da4\u3d84\u3d64\u3d44\u3d24\u3d04\u3ce4\u3cc4\u3ca4"
+ "\u3c84\u3c64\u3c44\u3c24\u3c04\u3be4\u3bc4\u3ba4\u3b84\u3b64\u3b44"
+ "\u3b24\u3b04\u3ae4\u3ac4\u3aa4\u3a84\u3a64\u3a44\u3a24\u3a04\u39e4"
+ "\u39c4\u39a4\u3984\u3964\u3944\u3924\u3904\u38e4\u38c4\u38a4\u3884"
+ "\u3864\u3844\u3824\u3804\u37e4\u37c4\u37a4\u3784\u3764\u3744\u3724"
+ "\u3704\u36e4\u36c4\u36a4\u3684\u3664\u3644\u3624\u3604\u35e4\u35c4"
+ "\u35a4\u3584\u3564\u3544\u3524\u3504\u34e4\u34c4\u34a4\u3484\u3464"
+ "\u3444\u3424\u3404\u33e4\u33c4\u33a4\u3384\u3364\u3344\u3324\u3304"
+ "\u32e4\u32c4\u32a4\u3284\u3264\u3244\u3224\u3204\u31e4\u28f2\u2e03"
+ "\u2de3\u3c25\u3c05\u3be5\u3bc5\u3ba5\u3b85\u3b65\u3b45\u3b25\u3b05"
+ "\u3ae5\u3ac5\u3aa5\u3a85\u3a65\u3a45\u3a25\u3a05\u39e5\u39c5\u39a5"
+ "\u3985\u3965\u3945\u3925\u3905\u38e5\u38c5\u38a5\u3885\u3865\u3845"
+ "\u3825\u3805\u37e5\u37c5\u37a5\u3785\u3765\u3745\u3725\u3705\u36e5"
+ "\u36c5\u36a5\u3685\u3665\u3645\u3625\u3605\u35e5\u35c5\u35a5\u3585"
+ "\u3565\u3545\u3525\u3505\u34e5\u34c5\u34a5\u3485\u3465\u3445\u3445"
+ "\u3425\u3405\u33e5\u33c5\u33a5\u3385\u3365\u3345\u3325\u3305\u32e5"
+ "\u32c5\u32a5\u3285\u3265\u3245\u3225\u3205\u31e5\u31c5\u31a5\u3185"
+ "\u3165\u3145\u3125\u3105\u30e5\u30c5\u30a5\u3085\u3065\u3045\u3025"
+ "\u3005\u2fe5\u2fc5\u2fa5\u2f85\u2f65\u2f45\u2f25\u2f05\u2ee5\u2ec5"
+ "\u2ea5\u2e85\u2e65\u2e45\u2e25\u2e05\u2de5\u2dc5\u2da5\u2d85\u2d65"
+ "\u2d45\u2d25\u2d05\u2ce5\u2cc5\u2ca5\u2c85\u2c65\u2c45\u2c25\u2c05"
+ "\u2be5\u2bc5\u2ba5\u2b85\u2b65\u2b45\u2b25\u2b05\u2ae5\u2ac5\u2aa5"
+ "\u2a85\u2a65\u2a45\u2a25\u2a05\u29e5\u29c5\u29a5\u2985\u2965\u2945"
+ "\u2925\u2905\u28e5\u28c5\u28a5\u2885\u2865\u2845\u2825\u2805\u27e5"
+ "\u27c5\u27a5\u2785\u2765\u2745\u2725\u2705\u26e5\u26c5\u26a5\u2685"
+ "\u2665\u2645\u2625\u2605\u25e5\u25c5\u25a5\u2585\u2565\u2545\u2525"
+ "\u2505\u24e5\u24c5\u24a5\u2485\u2465\u2445\u2425\u2405\u23e5\u23c5"
+ "\u23a5\u2385\u2365\u2345\u2325\u2305\u22e5\u22c5\u22a5\u2285\u2265"
+ "\u2245\u2225\u2205\u21e5\u21c5\u21a5\u2185\u2165\u2145\u2125\u2105"
+ "\u20e5\u20c5\u20a5\u2085\u2065\u2045\u2025\u2005\u1fe5\u1fc5\u1fa5"
+ "\u1f85\u1f65\u1f45\u1f25\u1f05\u1ee5\u1ec5\u1ea5\u1e85\u1e65\u1e45"
+ "\u1e25\u1e05\u1de5\u1dc5\u1da5\u1d85\u1d65\u1d45\u1d25\u1d05\u1ce5"
+ "\u1cc5\u1ca5\u1c85\u1c65\u1c45\u1c25\u1c05\u1be5\u1bc5\u1ba5\u1b85"
+ "\u1b65\u1064\u1044\u1024\u1004\u0fe4\u0fc4\u0fa4\u0f84\u0f64\u0668"
+ "\u0b83\u0b63\u0b43\u0b23\u0b03\u0ae3\u0a72\u1945\u0a51\u0d16\u0cf6"
+ "\u0cac\u0ca3\u0c96\u0c76\u0c56\u0c36\u0c16\u0bf6\u0bd6\u0bb6\u0b96"
+ "\u0b76\u0b58\u0b26\u0b16\u0ab2\u0ad6\u0a96\u02c2\u07c3\u1665\u1661"
+ "\u03ff\u09f6\u09d6\u09b6\u05dc\u0bb2\u15a1\u0fc0\u01c0\u01b1\u09c5"
+ "\u0826\u127f";
/**
* Information about each character. The low order 5 bits form the
* character type, the next bit is a flag for non-breaking spaces, and the
* next bit is a flag for mirrored directionality. The high order 9 bits
* form the offset into the attribute tables. Note that this limits the
* number of unique character attributes to 512, which is not a problem
* as of Unicode version 3.2.0, but may soon become one.
*/
String DATA
= "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u3e00\u3e00\u2f81\u3002\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05"
+ "\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+ "\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3e00\u3e00\u3e00\u5098\u3e00\u3e00\u3e00\u3e00\u4586\u3e00\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3a05"
+ "\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u3e00\u3e00\u3e00\u3e00\u5102\u5102\u5102\u5102"
+ "\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102"
+ "\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102\u5102"
+ "\u5102\u5102\u5102\u5102\u5102\u2902\u3e00\u5098\u2a14\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4606\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u1a1b\u1a1b\u3e00\u3e00\u3e00\u3e00\u4504\u3e00\u3e00"
+ "\u3e00\u0298\u3e00\u0298\u6515\u6596\u0298\u1a97\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4504\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u4504\u4504\u1a1b\u1a1b\u1a1b\u1a1b"
+ "\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u4504"
+ "\u4504\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b"
+ "\u1a1b\u1a1b\u1a1b\u1a1b\u2902\u7102\u2902\u3e00\u2902\u2902\u7301"
+ "\u7301\u7301\u7301\u7203\u1a1b\u1a1b\u1a1b\u6c82\u6c82\u2902\u2902"
+ "\u3e00\u3e00\u2902\u2902\u6d01\u6d01\u7381\u7381\u3e00\u1a1b\u1a1b"
+ "\u1a1b\u1b02\u1b82\u1c02\u1c82\u1d02\u1d82\u1e02\u1e82\u1f02\u1f82"
+ "\u2002\u2082\u2102\u2182\u2202\u2282\u2302\u2382\u2402\u2482\u2502"
+ "\u2582\u2602\u2682\u2702\u2782\u0455\u0c99\u04d6\u0c99\017\017"
+ "\017\017\017\u010f\017\017\017\017\017\017\017"
+ "\017\017\017\017\017\017\017\017\017\017\017"
+ "\017\017\017\017\017\017\017\017\u008f\u010f\u008f"
+ "\u018f\u010f\017\017\017\017\017\017\017\017\017"
+ "\017\017\017\017\017\u010f\u010f\u010f\u008f\u020c\u0298"
+ "\u0298\u0318\u039a\u0318\u0298\u0298\u0455\u04d6\u0298\u0519\u0598"
+ "\u0614\u0598\u0698\u0709\u0789\u0809\u0889\u0909\u0989\u0a09\u0a89"
+ "\u0b09\u0b89\u0598\u0298\u0c59\u0c99\u0c59\u0298\u0d01\u0d81\u0e01"
+ "\u0e81\u0f01\u0f81\u1001\u1081\u1101\u1181\u1201\u1281\u1301\u1381"
+ "\u1401\u1481\u1501\u1581\u1601\u1681\u1701\u1781\u1801\u1881\u1901"
+ "\u1981\u0455\u0298\u04d6\u1a1b\u1a97\u0298\u0298\u0298\u0c99\u0455"
+ "\u04d6\u3e00\u0298\u0298\u0298\u0298\u0298\u0298\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u282c\u0298\u039a\u039a\u039a\u039a\u289c"
+ "\u289c\u1a1b\u289c\u2902\u29dd\u0c99\u2a14\u289c\u1a1b\u2a9c\u0519"
+ "\u2b0b\u2b8b\u1a1b\u2c02\u289c\u0298\u1a1b\u2c8b\u2902\u2d5e\u2d8b"
+ "\u2d8b\u2d8b\u0298\u0298\u0519\u0614\u0c99\u0c99\u0c99\u3e00\u0298"
+ "\u039a\u0318\u0298\u3e00\u3e00\u3e00\u3e00\u5305\u5305\u5305\u3e00"
+ "\u5305\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u4f1c\u4f1c\u4e81\u4e81"
+ "\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81\u4e81"
+ "\u4e81\u4e81\u4e81\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+ "\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+ "\u2e01\u2e01\u2e01\u2e01\u0c99\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+ "\u2e01\u2902\u3281\u2f81\u3002\u2f81\u3002\u3301\u2f81\u3002\u3381"
+ "\u3381\u2f81\u3002\u2902\u3401\u3481\u3501\u2f81\u3002\u3381\u3581"
+ "\u3602\u3681\u3701\u2f81\u3002\u2902\u2902\u3681\u3781\u2902\u3801"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2902\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+ "\u5e89\u5f09\u5f89\u4586\u4586\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u1a1b\u1a1b\u4681"
+ "\u0298\u4701\u4701\u4701\u3e00\u4781\u3e00\u4801\u4801\u2902\u2e01"
+ "\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+ "\u2e01\u2e01\u2e01\u2e01\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u2e82\u2e82\u2e82\u0c99\u2e82\u2e82\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u2f02\u2e82\u2e82\u4982\u2e82\u2e82\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u2e82\u2e82\u4a02\u4a82\u4a82\u3e00\u4b02\u4b82\u4c01"
+ "\u4c01\u4c01\u4c82\u4d02\u2902\u3e00\u3e00\u2f81\u3002\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2902\u3b01\u3b83\u3c02\u2f81\u3002\u3d01\u3d81\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u3081\u3102\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u2902\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u4d82\u4e02\u3c82\u2902\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u5a10\u5a10\u5a10\u5a10\u5a10\u5a10\u7d8b\u3e00\u3e00\u3e00\u7e0b"
+ "\u7e8b\u7f0b\u7f8b\u800b\u808b\u0519\u0519\u0c99\u0455\u04d6\u2902"
+ "\u3a05\u3a05\u3a05\u3a05\u3b01\u3b83\u3c02\u3b01\u3b83\u3c02\u3b01"
+ "\u3b83\u3c02\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3c82\u2f81\u3002\u4f1c"
+ "\u4586\u4586\u4586\u4586\u3e00\u4f87\u4f87\u3e00\u3e00\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u3181\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3202\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u3881\u2f81\u3002\u3881\u2902\u2902\u2f81\u3002"
+ "\u3881\u2f81\u3002\u3901\u3901\u2f81\u3002\u2f81\u3002\u3981\u2f81"
+ "\u3002\u2902\u3a05\u2f81\u3002\u2902\u3a82\u4c01\u2f81\u3002\u2f81"
+ "\u3002\u3e00\u3e00\u2f81\u3002\u3e00\u3e00\u2f81\u3002\u3e00\u3e00"
+ "\u3e00\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u0598\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u5298\u3e00\u3e00\u3e00\u5298\u5298\u5298\u5298\u5298"
+ "\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u5298\u3e00\u5a10"
+ "\u5305\u4586\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u3e00\u3e00"
+ "\u5a10\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+ "\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+ "\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01\u4c01"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u2902\u2902\u2902\u3e82\u3f02\u2902\u3f82\u3f82\u2902\u4002\u2902"
+ "\u4082\u2902\u2902\u2902\u2902\u3f82\u2902\u2902\u4102\u2902\u2902"
+ "\u2902\u2902\u4182\u4202\u2902\u2902\u2902\u2902\u2902\u4202\u2902"
+ "\u2902\u4282\u2902\u2902\u4302\u2902\u2902\u2902\u2902\u2902\u2902"
+ "\u2902\u2902\u2902\u2902\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u2902\u2902\u2902\u2902\u2902\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u5205\u4586\u5205\u5205\u3e00\u5205\u5205"
+ "\u3e00\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5298\u5305"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4f87\u4f87\u4586\u4f87"
+ "\u4f87\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b\u2d8b"
+ "\u2d8b\u828b\u4382\u2902\u2902\u4382\u2902\u2902\u2902\u2902\u4382"
+ "\u2902\u4402\u4402\u2902\u2902\u2902\u2902\u2902\u2902\u4482\u2902"
+ "\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902\u2902"
+ "\u2902\u2902\u3e00\u3e00\u4504\u4504\u4504\u4504\u4504\u4504\u4504"
+ "\u4504\u4504\u1a1b\u1a1b\u4504\u4504\u4504\u4504\u4504\u1a1b\u1a1b"
+ "\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u1a1b\u4504\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001"
+ "\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001\u5001"
+ "\u5001\u5001\u5001\u5001\u3e00\u3e00\u4504\u5098\u5098\u5098\u5098"
+ "\u5098\u5098\u2e01\u2e01\u3e00\u2e01\u2e01\u2e01\u2e01\u2e01\u2e01"
+ "\u2e01\u2e01\u2e01\u4882\u4902\u4902\u4902\u2902\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82\u2e82"
+ "\u2e82\u2e82\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02"
+ "\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4e02\u4586\u4586\u4586\u4586"
+ "\u4586\u5098\u4586\u4586\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u3e00\u4586\u4586\u4586\u5198\u4586\u4586\u5198\u4586\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5205\u5205"
+ "\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205"
+ "\u5205\u5205\u5205\u3e00\u3e00\u3e00\u3e00\u3e00\u5205\u5205\u5205"
+ "\u5198\u5198\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89"
+ "\u630b\u638b\u640b\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u5a88\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u5a88\u5a88\u5a88\u5a88\u3e00"
+ "\u4586\u4586\u4586\u3e00\u4586\u4586\u4586\u4586\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u4586\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88"
+ "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3e00\u3e00\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89"
+ "\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u4f1c\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u5409\u5489\u5509\u5589\u5609\u5689\u5709\u5789\u5809"
+ "\u5889\u0318\u5918\u5918\u5298\u3e00\u3e00\u4586\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u3e00\u3e00\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u6515\u6596"
+ "\u5384\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305\u5305"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88"
+ "\u5a88\u5098\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u4586\u4586\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u5098\u5098\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586"
+ "\u4586\u4586\u5384\u5384\u4586\u4586\u289c\u4586\u4586\u4586\u4586"
+ "\u3e00\u3e00\u0709\u0789\u0809\u0889\u0909\u0989\u0a09\u0a89\u0b09"
+ "\u0b89\u5305\u5305\u5305\u599c\u599c\u3e00\u3a05\u3a05\u3a05\u3e00"
+ "\u3a05\u3e00\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u4586\u3a05\u3a05\u4586\u4586\u4586\u4586\u4586\u4586\u3e00"
+ "\u4586\u4586\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+ "\u3e00\u3e00\u4586\u4586\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u4586\u3a05\u5a88\u5a88"
+ "\u5a88\u5a88\u5a88\u3e00\u4586\u5a88\u5a88\u3e00\u5a88\u5a88\u4586"
+ "\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3e00\u5098\u5098\u5098"
+ "\u5098\u5098\u5098\u5098\u5098\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+ "\u5e89\u5f09\u5f89\u630b\u660b\u668b\u670b\u678b\u680b\u688b\u690b"
+ "\u698b\u638b\u6a0b\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3e00\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3e00\u4586\u3a05\u5a88\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u5a88\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3a05\u4586\u4586"
+ "\u4586\u4586\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3e00\u3a05\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+ "\u4586\u3e00\u5a88\u5a88\u5a88\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05"
+ "\u3a05\u3a05\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4586"
+ "\u3a05\u3a05\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00"
+ "\u3e00\u3e00\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u039a"
+ "\u039a\u039a\u039a\u039a\u039a\u039a\u039a\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3a05\u3a05\u4586\u4586\u5098\u5098\u5b09\u5b89\u5c09\u5c89"
+ "\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u0298\u0298\u0318\u039a\u0318\u0298\u0298\u6515\u6596\u0298\u0519"
+ "\u0598\u0614\u0598\u0698\u0709\u0789\u0809\u0889\u0909\u0989\u0a09"
+ "\u0a89\u0b09\u0b89\u0598\u0298\u0c99\u0c99\u0c99\u0298\u0298\u0298"
+ "\u0298\u0298\u0298\u2a14\u0298\u0298\u0298\u0298\u5a10\u5a10\u5a10"
+ "\u5a10\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09"
+ "\u5f89\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3e00\u3e00\u3e00\u3e00\u5a88\u4586\u4586\u4586\u4586\u3e00\u3e00"
+ "\u5a88\u5a88\u3e00\u3e00\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3a05"
+ "\u3a05\u3e00\u3a05\u3e00\u3e00\u3a05\u3a05\u3e00\u3a05\u3e00\u3e00"
+ "\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00"
+ "\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u3e00\u4586\u3e00\u5a88\u5a88"
+ "\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u3e00\u3e00\u4586"
+ "\u4586\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3e00\u3a05\u3a05"
+ "\u4586\u4586\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+ "\u5e89\u5f09\u5f89\u3a05\u3a05\u039a\u039a\u600b\u608b\u610b\u618b"
+ "\u620b\u628b\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3e00\u3e00\u4586\u3a05\u5a88\u5a88\u4586\u4586\u4586\u4586"
+ "\u4586\u3e00\u4586\u4586\u5a88\u3e00\u5a88\u5a88\u4586\u3e00\u3e00"
+ "\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89"
+ "\u5e09\u5e89\u5f09\u5f89\u620b\u620b\u620b\u620b\u620b\u620b\u620b"
+ "\u620b\u620b\u620b\u4f1c\u4586\u4f1c\u4586\u4f1c\u4586\u6515\u6596"
+ "\u6515\u6596\u5a88\u5a88\u4586\u4586\u4586\u3e00\u3e00\u3e00\u5a88"
+ "\u5a88\u3e00\u3e00\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u4586\u5a88\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05"
+ "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05\u3e00\u5a88"
+ "\u4586\u4586\u4586\u4586\u5a88\u4586\u3e00\u3e00\u3e00\u4586\u4586"
+ "\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u5a88\u5a88\u5a88\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u5a88\u5a88\u3e00\u3e00\u3e00\u5a88"
+ "\u5a88\u5a88\u3e00\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u4504\u3e00"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u3e00\u3e00\u5b09\u5b89\u5c09"
+ "\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u3e00\u3e00\u3a05\u3a05"
+ "\u3e00\u3e00\u3e00\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09\u5d89\u5e09"
+ "\u5e89\u5f09\u5f89\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4f87"
+ "\u4f87\u4f87\u5a88\u4586\u4586\u4586\u3e00\u3e00\u5a88\u5a88\u5a88"
+ "\u3e00\u5a88\u5a88\u5a88\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u5a88\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u4586\u3e00\u3e00\u3e00\u3e00\u5a88\u5a88\u5a88\u4586\u4586\u4586"
+ "\u3e00\u4586\u3e00\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88\u5a88"
+ "\u4586\u5a88\u5a88\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u4586\u4586\u5098\u5098\u5098\u5098\u5098\u5098\u5098\u039a"
+ "\u5098\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u4504"
+ "\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u4586\u5098\u5b09\u5b89"
+ "\u5c09\u5c89\u5d09\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u5098\u3e00"
+ "\u3e00\u3e00\u3e00\u3a05\u4f1c\u4f1c\u4f1c\u5098\u5098\u5098\u5098"
+ "\u5098\u5098\u5098\u5098\u64b8\u5098\u5098\u5098\u5098\u5098\u5098"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4586\u4586\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4586\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u3e00\u3e00"
+ "\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00\u3e00\u3e00\u3e00"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u1a97\u4504\u4504\u4504\u3e00\u5b09\u5b89\u5c09\u5c89\u5d09"
+ "\u5d89\u5e09\u5e89\u5f09\u5f89\u5098\u5098\u5098\u5098\u5098\u5098"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u5a88\u5a88\u4586\u4586\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u020c\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u6515"
+ "\u6596\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u5098\u5098\u5098\u6a8b\u6b0b\u6b8b\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u4586\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81"
+ "\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002\u2f81\u3002"
+ "\u2f81\u3002\u2902\u2902\u2902\u2902\u2902\u6c02\u3e00\u3e00\u3e00"
+ "\u3e00\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6d01\u6d01"
+ "\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6c82\u6c82\u6c82\u6c82\u6c82"
+ "\u6c82\u6c82\u6c82\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01\u6d01"
+ "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u3e00\u3e00\u6d01\u6d01\u6d01"
+ "\u6d01\u6d01\u6d01\u3e00\u3e00\u2902\u6c82\u2902\u6c82\u2902\u6c82"
+ "\u2902\u6c82\u3e00\u6d01\u3e00\u6d01\u3e00\u6d01\u3e00\u6d01\u6c82"
+ "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6d01\u6d01\u6d01\u6d01"
+ "\u6d01\u6d01\u6d01\u6d01\u6d82\u6d82\u6e02\u6e02\u6e02\u6e02\u6e82"
+ "\u6e82\u6f02\u6f02\u6f82\u6f82\u7002\u7002\u3e00\u3e00\u6c82\u6c82"
+ "\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u7083\u7083\u7083\u7083\u7083"
+ "\u7083\u7083\u7083\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82\u6c82"
+ "\u7083\u7083\u7083\u7083\u7083\u7083\u7083\u7083\u6c82\u6c82\u2902"
+ "\u7102\u2902\u3e00\u2902\u2902\u6d01\u6d01\u7181\u7181\u7203\u1a1b"
+ "\u7282\u1a1b\u1b02\u1b82\u1c02\u1c82\u1d02\u1d82\u1e02\u1e82\u1f02"
+ "\u1f82\u2002\u2082\u2102\u2182\u2202\u2282\u2302\u2382\u2402\u2482"
+ "\u2502\u2582\u2602\u2682\u2702\u2782\u6515\u0c99\u6596\u0c99\u3e00"
+ "\u6c82\u6c82\u2902\u2902\u2902\u7402\u2902\u2902\u6d01\u6d01\u7481"
+ "\u7481\u7501\u1a1b\u1a1b\u1a1b\u3e00\u3e00\u2902\u7102\u2902\u3e00"
+ "\u2902\u2902\u7581\u7581\u7601\u7601\u7203\u1a1b\u1a1b\u3e00\u020c"
+ "\u020c\u020c\u020c\u020c\u020c\u020c\u76ac\u020c\u020c\u020c\u770c"
+ "\u5a10\u5a10\u7790\u7810\u2a14\u78b4\u2a14\u2a14\u2a14\u2a14\u0298"
+ "\u0298\u791d\u799e\u6515\u791d\u791d\u799e\u6515\u791d\u0298\u0298"
+ "\u0298\u0298\u0298\u0298\u0298\u0298\u7a0d\u7a8e\u7b10\u7b90\u7c10"
+ "\u7c90\u7d10\u76ac\u0318\u0318\u0318\u0318\u0318\u0298\u0298\u0298"
+ "\u0298\u29dd\u2d5e\u0298\u0298\u0298\u0298\u1a97\u7d8b\u2c8b\u2b0b"
+ "\u2b8b\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u0519\u0519\u0c99\u0455"
+ "\u04d6\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00"
+ "\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u4c01\u289c\u289c\u289c\u289c\u4c01\u289c\u289c\u2902\u4c01\u4c01"
+ "\u4c01\u2902\u2902\u4c01\u4c01\u4c01\u2902\u289c\u4c01\u289c\u289c"
+ "\u289c\u4c01\u4c01\u4c01\u4c01\u4c01\u289c\u289c\ua08a\ua10a\ua18a"
+ "\ua20a\ua28a\ua30a\ua38a\ua40a\ua48a\u4586\u4586\u4586\u4586\u4586"
+ "\u4586\u2a14\u4504\u4504\u4504\u4504\u4504\u289c\u289c\ua50a\ua58a"
+ "\ua60a\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u3e00\u289c\u289c"
+ "\u289c\u289c\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u0c99\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99\u0c99"
+ "\u289c\u289c\u0c99\u289c\u0c99\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99\u289c"
+ "\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u930a\u938a\u940a\u948a\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u0c99\u0c99"
+ "\u0c99\u0c99\u0c99\u289c\u289c\u289c\u289c\u289c\u0c99\u0c99\u289c"
+ "\u289c\u289c\u289c\u4c01\u289c\u8101\u289c\u4c01\u289c\u8181\u8201"
+ "\u4c01\u4c01\u2a9c\u2902\u4c01\u4c01\u289c\u4c01\u2902\u3a05\u3a05"
+ "\u3a05\u3a05\u2902\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u830a\u838a"
+ "\u840a\u848a\u850a\u858a\u860a\u868a\u870a\u878a\u880a\u888a\u890a"
+ "\u898a\u8a0a\u8a8a\u8b0a\u8b8a\u8c0a\u8c8a\u8d0a\u8d8a\u8e0a\u8e8a"
+ "\u8f0a\u8f8a\u900a\u908a\u910a\u918a\u920a\u928a\u0c99\u0c99\u0c59"
+ "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+ "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+ "\u0c99\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99"
+ "\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59"
+ "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c99\u0c99\u0c99\u0c99\u0c59\u0c59"
+ "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+ "\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99\u0c99\u0c99"
+ "\u0c59\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u0c99\u289c\u289c\u0c99"
+ "\u289c\u289c\u0c99\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u0c99"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u0c99\u0c59\u0c59\u0c59\u0c59\u0c99"
+ "\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99"
+ "\u0c59\u0519\u0519\u0c99\u0c59\u0c59\u0c99\u0c99\u0c99\u0c59\u0c59"
+ "\u0c59\u0c59\u0c99\u0c59\u0c99\u0c59\u0c99\u0c99\u0c99\u0c99\u0c59"
+ "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99\u0c99\u0c99"
+ "\u0c99\u0c99\u0c59\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u0455\u04d6\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00"
+ "\u3e00\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c"
+ "\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9a9c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c"
+ "\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c\u9b1c"
+ "\u7d8b\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u0c59\u0c99\u0c59\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+ "\u0c59\u0c99\u0c99\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59"
+ "\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c59\u0c99"
+ "\u0c99\u0c59\u0c59\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u039a\u039a\u0c99\u1a1b\u289c"
+ "\u039a\u039a\u3e00\u289c\u0c99\u0c99\u0c99\u0c99\u289c\u289c\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u5a10\u5a10"
+ "\u5a10\u289c\u289c\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u3e00"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u3e00\u289c\u3e00\u289c\u289c\u289c\u289c\u3e00"
+ "\u3e00\u3e00\u289c\u3e00\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u828b\u9b8b\u9c0b\u9c8b\u9d0b\u9d8b\u9e0b\u9e8b"
+ "\u9f0b\u9f8b\u828b\u9b8b\u9c0b\u9c8b\u9d0b\u9d8b\u9e0b\u9e8b\u9f0b"
+ "\u9f8b\u289c\u3e00\u3e00\u3e00\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u0c59\u0c59\u0c59\u0c59\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c\u289c"
+ "\u289c\u289c\u289c\u289c\u289c\u4f1c\u289c\u289c\u289c\u289c\u289c"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u620b\u620b\u620b\u620b\u620b\u620b"
+ "\u620b\u620b\u620b\u620b\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00\u4f1c\u600b\u608b\u610b\u618b"
+ "\ua68b\ua70b\ua78b\ua80b\ua88b\u630b\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u289c\u3e00\u289c\u289c\u289c\u3e00\u289c\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u2c8b"
+ "\u2b0b\u2b8b\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u950b\u958b\u960b"
+ "\u968b\u970b\u978b\u980b\u988b\u990b\u998b\u9a0b\u2c8b\u2b0b\u2b8b"
+ "\u7e0b\u7e8b\u7f0b\u7f8b\u800b\u808b\u950b\u958b\u960b\u968b\u970b"
+ "\u978b\u980b\u988b\u990b\u998b\u9a0b\u4f1c\u4f1c\u4f1c\u4f1c\u020c"
+ "\u0298\u0298\u0298\u289c\u4504\u3a05\ua00a\u0455\u04d6\u0455\u04d6"
+ "\u0455\u04d6\u0455\u04d6\u0455\u04d6\u289c\u289c\u0455\u04d6\u0455"
+ "\u04d6\u0455\u04d6\u0455\u04d6\u2a14\u6515\u6596\u6596\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3e00\u3e00\u3e00"
+ "\u3e00\u4586\u4586\u1a1b\u1a1b\u4504\u4504\u3e00\u3a05\u3a05\u3a05"
+ "\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05\u3a05"
+ "\u3a05\u3e00\u4f1c\u4f1c\u620b\u620b\u620b\u620b\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c\u4f1c"
+ "\u4f1c\u4f1c\u4f1c\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\ua913\ua913"
+ "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913"
+ "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913"
+ "\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua913\ua992\ua992\ua992"
+ "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992"
+ "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992\ua992"
+ "\ua992\ua992\ua992\ua992\ua992\ua992\ua992\u5205\u5205\u5205\u5205"
+ "\u5205\u5205\u5205\u5205\u5205\u0519\u5205\u5205\u5205\u5205\u5205"
+ "\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u5205\u3e00\u5205\u5205"
+ "\u5205\u5205\u5205\u3e00\u5205\u3e00\u4586\u4586\u4586\u4586\u3e00"
+ "\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00\u3e00"
+ "\u0298\u2a14\u2a14\u1a97\u1a97\u6515\u6596\u6515\u6596\u6515\u6596"
+ "\u6515\u6596\u6515\u6596\u6515\u6596\u3e00\u3e00\u3e00\u3e00\u0298"
+ "\u0298\u0298\u0298\u1a97\u1a97\u1a97\u0598\u0298\u0598\u3e00\u0298"
+ "\u0598\u0298\u0298\u2a14\u6515\u6596\u6515\u6596\u6515\u6596\u0318"
+ "\u0298\u0d01\u0d81\u0e01\u0e81\u0f01\u0f81\u1001\u1081\u1101\u1181"
+ "\u1201\u1281\u1301\u1381\u1401\u1481\u1501\u1581\u1601\u1681\u1701"
+ "\u1781\u1801\u1881\u1901\u1981\u6515\u0298\u6596\u1a1b\u1a97";
/**
* This is the attribute table for computing the numeric value of a
* character. The value is -1 if Unicode does not define a value, -2
* if the value is not a positive integer, otherwise it is the value.
* Note that this is a signed value, but stored as an unsigned char
* since this is a String literal.
*/
String NUM_VALUE
= "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\000\001\002\003\004\005\006\007"
+ "\010\011\uffff\uffff\012\013\014\015\016\017\020"
+ "\021\022\023\024\025\026\027\030\031\032\033"
+ "\034\035\036\037 !\"#\uffff\uffff\012"
+ "\013\014\015\016\017\020\021\022\023\024\025"
+ "\026\027\030\031\032\033\034\035\036\037 "
+ "!\"#\uffff\uffff\uffff\uffff\uffff\uffff\002\003"
+ "\uffff\001\uffff\ufffe\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\000\001\002\003\004\005\006\007"
+ "\010\011\uffff\uffff\uffff\uffff\000\001\002\003\004"
+ "\005\006\007\010\011\001\002\003\004\uffff\020"
+ "\012d\u03e8\uffff\uffff\uffff\024\036(2<"
+ "FPZ\u2710\021\022\023\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff"
+ "\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffff\000\004"
+ "\005\006\007\010\011\uffff\uffff\uffff\001\001\002"
+ "\003\004\005\006\007\010\011\012\013\0142"
+ "d\u01f4\u03e8\001\002\003\004\005\006\007\010"
+ "\011\012\013\0142d\u01f4\u03e8\u03e8\u1388\u2710"
+ "\uffff\012\013\014\015\016\017\020\021\022\023"
+ "\024\uffff\uffff\002\003\004\005\006\007\010\011"
+ "\012\000\001\002\003\004\005\006\007\010\011"
+ "\012\024\036\005\006\007\010\011\uffff\uffff";
/**
* This is the attribute table for computing the uppercase representation
* of a character. The value is the signed difference between the
* character and its uppercase version. Note that this is stored as an
* unsigned char since this is a String literal.
*/
String UPPER
= "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\uffe0"
+ "\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0"
+ "\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0\uffe0"
+ "\uffe0\uffe0\uffe0\000\000\000\000\000\000\000\000"
+ "\u02e7\000\000\000\000\uffe0y\000\uffff\000\uff18"
+ "\000\ufed4\000\000\000\000\000\000\000a\000"
+ "\000\000\000\000\000\000\0008\000\uffff\ufffe"
+ "\uffb1\000\000\000\uff2e\uff32\uff33\uff36\uff35\uff31\uff2f"
+ "\uff2d\uff2b\uff2a\uff26\uff27\uff25\000\000T\000\000"
+ "\000\000\uffda\uffdb\uffe1\uffc0\uffc1\uffc2\uffc7\000\uffd1"
+ "\uffca\uffaa\uffb0\000\000\000\000\000\uffd0\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\uffc5\010\000J"
+ "Vd\u0080p~\000\011\000\000\ue3db\000"
+ "\000\007\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0"
+ "\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\ufff0\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\uffe6\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000";
/**
* This is the attribute table for computing the lowercase representation
* of a character. The value is the signed difference between the
* character and its lowercase version. Note that this is stored as an
* unsigned char since this is a String literal.
*/
String LOWER
= "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000 "
+ " "
+ " \000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000 \000\000\001\000\uff39\000"
+ "\uff87\000\u00d2\u00ce\u00cdO\u00ca\u00cb\u00cf\000\u00d3"
+ "\u00d1\u00d5\u00d6\u00da\u00d9\u00db\000\000\002\001\000"
+ "\000\uff9f\uffc8\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000&%"
+ "@?\000\000\000\000\000\000\000\000\000"
+ "\000\000\000P\000\0000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\ufff8\000"
+ "\000\000\000\000\000\ufff8\000\uffb6\ufff7\000\uffaa"
+ "\uff9c\000\uff90\ufff9\uff80\uff82\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\ue2a3\udf41\udfba\000\020\020"
+ "\020\020\020\020\020\020\020\020\020\020\020"
+ "\020\020\020\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\032\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000";
/**
* This is the attribute table for computing the directionality class
* of a character. At present, the value is in the range 0 - 18 if the
* character has a direction, otherwise it is -1. Note that this is
* stored as an unsigned char since this is a String literal.
*/
String DIRECTION
= "\011\013\012\014\014\015\005\005\015\015\005"
+ "\007\005\004\003\003\003\003\003\003\003\003"
+ "\003\003\015\015\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\015\015\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\007\015\000\015\015\005\003\003"
+ "\000\003\015\015\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\uffff\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\010\010\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\010\000\000\000\001\001"
+ "\002\002\002\006\006\006\006\006\006\006\006"
+ "\006\006\006\002\011\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\015\015\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\014\011\000\001\015"
+ "\015\015\014\012\016\020\022\017\021\003\003"
+ "\003\003\003\003\003\000\000\000\015\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000\000"
+ "\000\003\003\003\003\003\003\003\003\003\003"
+ "\003\000\000\015\015\015\015\015\015\015\015"
+ "\015\000\000\000\000\000\000\000\000\000\000"
+ "\000\000\000\000\000\000\000\000\000\000";
/**
* This is the listing of titlecase special cases (all other character
* can use <code>UPPER</code> to determine their titlecase). The listing
* is a sequence of character pairs; converting the first character of the
* pair to titlecase produces the second character.
*/
String TITLE
= "\u01c4\u01c5\u01c5\u01c5\u01c6\u01c5\u01c7\u01c8\u01c8\u01c8\u01c9"
+ "\u01c8\u01ca\u01cb\u01cb\u01cb\u01cc\u01cb\u01f1\u01f2\u01f2\u01f2"
+ "\u01f3\u01f2";
}
......@@ -3,7 +3,8 @@
#ifndef __JAVA_CHARDECOMP_H__
#define __JAVA_CHARDECOMP_H__
// These tables are automatically generated by the chartables.pl
// These tables are automatically generated by the scripts/unicode-decomp.pl
// script. DO NOT EDIT the tables. Instead, fix the script
// and run it again.
......@@ -200,8 +201,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x01dc, "\x00\xfc\x03\x00" },
{ 0x01de, "\x00\xc4\x03\x04" },
{ 0x01df, "\x00\xe4\x03\x04" },
{ 0x01e0, "\x00\x41\x03\x07\x03\x04" },
{ 0x01e1, "\x00\x61\x03\x07\x03\x04" },
{ 0x01e0, "\x02\x26\x03\x04" },
{ 0x01e1, "\x02\x27\x03\x04" },
{ 0x01e2, "\x00\xc6\x03\x04" },
{ 0x01e3, "\x00\xe6\x03\x04" },
{ 0x01e6, "\x00\x47\x03\x0c" },
......@@ -217,6 +218,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x01f0, "\x00\x6a\x03\x0c" },
{ 0x01f4, "\x00\x47\x03\x01" },
{ 0x01f5, "\x00\x67\x03\x01" },
{ 0x01f8, "\x00\x4e\x03\x00" },
{ 0x01f9, "\x00\x6e\x03\x00" },
{ 0x01fa, "\x00\xc5\x03\x01" },
{ 0x01fb, "\x00\xe5\x03\x01" },
{ 0x01fc, "\x00\xc6\x03\x01" },
......@@ -247,6 +250,26 @@ static const decomp_entry canonical_decomposition[] =
{ 0x0215, "\x00\x75\x03\x0f" },
{ 0x0216, "\x00\x55\x03\x11" },
{ 0x0217, "\x00\x75\x03\x11" },
{ 0x0218, "\x00\x53\x03\x26" },
{ 0x0219, "\x00\x73\x03\x26" },
{ 0x021a, "\x00\x54\x03\x26" },
{ 0x021b, "\x00\x74\x03\x26" },
{ 0x021e, "\x00\x48\x03\x0c" },
{ 0x021f, "\x00\x68\x03\x0c" },
{ 0x0226, "\x00\x41\x03\x07" },
{ 0x0227, "\x00\x61\x03\x07" },
{ 0x0228, "\x00\x45\x03\x27" },
{ 0x0229, "\x00\x65\x03\x27" },
{ 0x022a, "\x00\xd6\x03\x04" },
{ 0x022b, "\x00\xf6\x03\x04" },
{ 0x022c, "\x00\xd5\x03\x04" },
{ 0x022d, "\x00\xf5\x03\x04" },
{ 0x022e, "\x00\x4f\x03\x07" },
{ 0x022f, "\x00\x6f\x03\x07" },
{ 0x0230, "\x02\x2e\x03\x04" },
{ 0x0231, "\x02\x2f\x03\x04" },
{ 0x0232, "\x00\x59\x03\x04" },
{ 0x0233, "\x00\x79\x03\x04" },
{ 0x0340, "\x03\x00" },
{ 0x0341, "\x03\x01" },
{ 0x0343, "\x03\x13" },
......@@ -277,17 +300,21 @@ static const decomp_entry canonical_decomposition[] =
{ 0x03ce, "\x03\xc9\x03\x01" },
{ 0x03d3, "\x03\xd2\x03\x01" },
{ 0x03d4, "\x03\xd2\x03\x08" },
{ 0x0400, "\x04\x15\x03\x00" },
{ 0x0401, "\x04\x15\x03\x08" },
{ 0x0403, "\x04\x13\x03\x01" },
{ 0x0407, "\x04\x06\x03\x08" },
{ 0x040c, "\x04\x1a\x03\x01" },
{ 0x040d, "\x04\x18\x03\x00" },
{ 0x040e, "\x04\x23\x03\x06" },
{ 0x0419, "\x04\x18\x03\x06" },
{ 0x0439, "\x04\x38\x03\x06" },
{ 0x0450, "\x04\x35\x03\x00" },
{ 0x0451, "\x04\x35\x03\x08" },
{ 0x0453, "\x04\x33\x03\x01" },
{ 0x0457, "\x04\x56\x03\x08" },
{ 0x045c, "\x04\x3a\x03\x01" },
{ 0x045d, "\x04\x38\x03\x00" },
{ 0x045e, "\x04\x43\x03\x06" },
{ 0x0476, "\x04\x74\x03\x0f" },
{ 0x0477, "\x04\x75\x03\x0f" },
......@@ -313,6 +340,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x04e7, "\x04\x3e\x03\x08" },
{ 0x04ea, "\x04\xe8\x03\x08" },
{ 0x04eb, "\x04\xe9\x03\x08" },
{ 0x04ec, "\x04\x2d\x03\x08" },
{ 0x04ed, "\x04\x4d\x03\x08" },
{ 0x04ee, "\x04\x23\x03\x04" },
{ 0x04ef, "\x04\x43\x03\x04" },
{ 0x04f0, "\x04\x23\x03\x08" },
......@@ -323,6 +352,14 @@ static const decomp_entry canonical_decomposition[] =
{ 0x04f5, "\x04\x47\x03\x08" },
{ 0x04f8, "\x04\x2b\x03\x08" },
{ 0x04f9, "\x04\x4b\x03\x08" },
{ 0x0622, "\x06\x27\x06\x53" },
{ 0x0623, "\x06\x27\x06\x54" },
{ 0x0624, "\x06\x48\x06\x54" },
{ 0x0625, "\x06\x27\x06\x55" },
{ 0x0626, "\x06\x4a\x06\x54" },
{ 0x06c0, "\x06\xd5\x06\x54" },
{ 0x06c2, "\x06\xc1\x06\x54" },
{ 0x06d3, "\x06\xd2\x06\x54" },
{ 0x0929, "\x09\x28\x09\x3c" },
{ 0x0931, "\x09\x30\x09\x3c" },
{ 0x0934, "\x09\x33\x09\x3c" },
......@@ -334,23 +371,22 @@ static const decomp_entry canonical_decomposition[] =
{ 0x095d, "\x09\x22\x09\x3c" },
{ 0x095e, "\x09\x2b\x09\x3c" },
{ 0x095f, "\x09\x2f\x09\x3c" },
{ 0x09b0, "\x09\xac\x09\xbc" },
{ 0x09cb, "\x09\xc7\x09\xbe" },
{ 0x09cc, "\x09\xc7\x09\xd7" },
{ 0x09dc, "\x09\xa1\x09\xbc" },
{ 0x09dd, "\x09\xa2\x09\xbc" },
{ 0x09df, "\x09\xaf\x09\xbc" },
{ 0x0a33, "\x0a\x32\x0a\x3c" },
{ 0x0a36, "\x0a\x38\x0a\x3c" },
{ 0x0a59, "\x0a\x16\x0a\x3c" },
{ 0x0a5a, "\x0a\x17\x0a\x3c" },
{ 0x0a5b, "\x0a\x1c\x0a\x3c" },
{ 0x0a5c, "\x0a\x21\x0a\x3c" },
{ 0x0a5e, "\x0a\x2b\x0a\x3c" },
{ 0x0b48, "\x0b\x47\x0b\x56" },
{ 0x0b4b, "\x0b\x47\x0b\x3e" },
{ 0x0b4c, "\x0b\x47\x0b\x57" },
{ 0x0b5c, "\x0b\x21\x0b\x3c" },
{ 0x0b5d, "\x0b\x22\x0b\x3c" },
{ 0x0b5f, "\x0b\x2f\x0b\x3c" },
{ 0x0b94, "\x0b\x92\x0b\xd7" },
{ 0x0bca, "\x0b\xc6\x0b\xbe" },
{ 0x0bcb, "\x0b\xc7\x0b\xbe" },
......@@ -364,27 +400,28 @@ static const decomp_entry canonical_decomposition[] =
{ 0x0d4a, "\x0d\x46\x0d\x3e" },
{ 0x0d4b, "\x0d\x47\x0d\x3e" },
{ 0x0d4c, "\x0d\x46\x0d\x57" },
{ 0x0e33, "\x0e\x4d\x0e\x32" },
{ 0x0eb3, "\x0e\xcd\x0e\xb2" },
{ 0x0dda, "\x0d\xd9\x0d\xca" },
{ 0x0ddc, "\x0d\xd9\x0d\xcf" },
{ 0x0ddd, "\x0d\xdc\x0d\xca" },
{ 0x0dde, "\x0d\xd9\x0d\xdf" },
{ 0x0f43, "\x0f\x42\x0f\xb7" },
{ 0x0f4d, "\x0f\x4c\x0f\xb7" },
{ 0x0f52, "\x0f\x51\x0f\xb7" },
{ 0x0f57, "\x0f\x56\x0f\xb7" },
{ 0x0f5c, "\x0f\x5b\x0f\xb7" },
{ 0x0f69, "\x0f\x40\x0f\xb5" },
{ 0x0f73, "\x0f\x72\x0f\x71" },
{ 0x0f73, "\x0f\x71\x0f\x72" },
{ 0x0f75, "\x0f\x71\x0f\x74" },
{ 0x0f76, "\x0f\xb2\x0f\x80" },
{ 0x0f77, "\x0f\x76\x0f\x71" },
{ 0x0f78, "\x0f\xb3\x0f\x80" },
{ 0x0f79, "\x0f\x78\x0f\x71" },
{ 0x0f81, "\x0f\x80\x0f\x71" },
{ 0x0f81, "\x0f\x71\x0f\x80" },
{ 0x0f93, "\x0f\x92\x0f\xb7" },
{ 0x0f9d, "\x0f\x9c\x0f\xb7" },
{ 0x0fa2, "\x0f\xa1\x0f\xb7" },
{ 0x0fa7, "\x0f\xa6\x0f\xb7" },
{ 0x0fac, "\x0f\xab\x0f\xb7" },
{ 0x0fb9, "\x0f\x90\x0f\xb5" },
{ 0x1026, "\x10\x25\x10\x2e" },
{ 0x1e00, "\x00\x41\x03\x25" },
{ 0x1e01, "\x00\x61\x03\x25" },
{ 0x1e02, "\x00\x42\x03\x07" },
......@@ -413,8 +450,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x1e19, "\x00\x65\x03\x2d" },
{ 0x1e1a, "\x00\x45\x03\x30" },
{ 0x1e1b, "\x00\x65\x03\x30" },
{ 0x1e1c, "\x01\x14\x03\x27" },
{ 0x1e1d, "\x01\x15\x03\x27" },
{ 0x1e1c, "\x02\x28\x03\x06" },
{ 0x1e1d, "\x02\x29\x03\x06" },
{ 0x1e1e, "\x00\x46\x03\x07" },
{ 0x1e1f, "\x00\x66\x03\x07" },
{ 0x1e20, "\x00\x47\x03\x04" },
......@@ -552,8 +589,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x1ea9, "\x00\xe2\x03\x09" },
{ 0x1eaa, "\x00\xc2\x03\x03" },
{ 0x1eab, "\x00\xe2\x03\x03" },
{ 0x1eac, "\x00\xc2\x03\x23" },
{ 0x1ead, "\x00\xe2\x03\x23" },
{ 0x1eac, "\x1e\xa0\x03\x02" },
{ 0x1ead, "\x1e\xa1\x03\x02" },
{ 0x1eae, "\x01\x02\x03\x01" },
{ 0x1eaf, "\x01\x03\x03\x01" },
{ 0x1eb0, "\x01\x02\x03\x00" },
......@@ -562,8 +599,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x1eb3, "\x01\x03\x03\x09" },
{ 0x1eb4, "\x01\x02\x03\x03" },
{ 0x1eb5, "\x01\x03\x03\x03" },
{ 0x1eb6, "\x01\x02\x03\x23" },
{ 0x1eb7, "\x01\x03\x03\x23" },
{ 0x1eb6, "\x1e\xa0\x03\x06" },
{ 0x1eb7, "\x1e\xa1\x03\x06" },
{ 0x1eb8, "\x00\x45\x03\x23" },
{ 0x1eb9, "\x00\x65\x03\x23" },
{ 0x1eba, "\x00\x45\x03\x09" },
......@@ -578,8 +615,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x1ec3, "\x00\xea\x03\x09" },
{ 0x1ec4, "\x00\xca\x03\x03" },
{ 0x1ec5, "\x00\xea\x03\x03" },
{ 0x1ec6, "\x00\xca\x03\x23" },
{ 0x1ec7, "\x00\xea\x03\x23" },
{ 0x1ec6, "\x1e\xb8\x03\x02" },
{ 0x1ec7, "\x1e\xb9\x03\x02" },
{ 0x1ec8, "\x00\x49\x03\x09" },
{ 0x1ec9, "\x00\x69\x03\x09" },
{ 0x1eca, "\x00\x49\x03\x23" },
......@@ -596,8 +633,8 @@ static const decomp_entry canonical_decomposition[] =
{ 0x1ed5, "\x00\xf4\x03\x09" },
{ 0x1ed6, "\x00\xd4\x03\x03" },
{ 0x1ed7, "\x00\xf4\x03\x03" },
{ 0x1ed8, "\x00\xd4\x03\x23" },
{ 0x1ed9, "\x00\xf4\x03\x23" },
{ 0x1ed8, "\x1e\xcc\x03\x02" },
{ 0x1ed9, "\x1e\xcd\x03\x02" },
{ 0x1eda, "\x01\xa0\x03\x01" },
{ 0x1edb, "\x01\xa1\x03\x01" },
{ 0x1edc, "\x01\xa0\x03\x00" },
......@@ -864,12 +901,18 @@ static const decomp_entry canonical_decomposition[] =
{ 0x2126, "\x03\xa9" },
{ 0x212a, "\x00\x4b" },
{ 0x212b, "\x00\xc5" },
{ 0x219a, "\x21\x90\x03\x38" },
{ 0x219b, "\x21\x92\x03\x38" },
{ 0x21ae, "\x21\x94\x03\x38" },
{ 0x21cd, "\x21\xd0\x03\x38" },
{ 0x21ce, "\x21\xd4\x03\x38" },
{ 0x21cf, "\x21\xd2\x03\x38" },
{ 0x2204, "\x22\x03\x03\x38" },
{ 0x2209, "\x22\x08\x03\x38" },
{ 0x220c, "\x22\x0b\x03\x38" },
{ 0x2224, "\x22\x23\x03\x38" },
{ 0x2226, "\x22\x25\x03\x38" },
{ 0x2241, "\x00\x7e\x03\x38" },
{ 0x2241, "\x22\x3c\x03\x38" },
{ 0x2244, "\x22\x43\x03\x38" },
{ 0x2247, "\x22\x45\x03\x38" },
{ 0x2249, "\x22\x48\x03\x38" },
......@@ -1252,6 +1295,7 @@ static const decomp_entry canonical_decomposition[] =
{ 0xfa2b, "\x98\xfc" },
{ 0xfa2c, "\x99\x28" },
{ 0xfa2d, "\x9d\xb4" },
{ 0xfb1d, "\x05\xd9\x05\xb4" },
{ 0xfb1f, "\x05\xf2\x05\xb7" },
{ 0xfb2a, "\x05\xe9\x05\xc1" },
{ 0xfb2b, "\x05\xe9\x05\xc2" },
......@@ -1289,9 +1333,6 @@ static const decomp_entry canonical_decomposition[] =
static const decomp_entry full_decomposition[] =
{
{ 0x005e, "\x00\x20\x03\x02" },
{ 0x005f, "\x00\x20\x03\x32" },
{ 0x0060, "\x00\x20\x03\x00" },
{ 0x00a0, "\x00\x20" },
{ 0x00a8, "\x00\x20\x03\x08" },
{ 0x00aa, "\x00\x61" },
......@@ -1346,196 +1387,26 @@ static const decomp_entry full_decomposition[] =
{ 0x02e4, "\x02\x95" },
{ 0x037a, "\x00\x20\x03\x45" },
{ 0x0384, "\x00\x20\x03\x01" },
{ 0x03d0, "\x03\xb2" },
{ 0x03d1, "\x03\xb8" },
{ 0x03d2, "\x03\xa5" },
{ 0x03d5, "\x03\xc6" },
{ 0x03d6, "\x03\xc0" },
{ 0x03f0, "\x03\xba" },
{ 0x03f1, "\x03\xc1" },
{ 0x03f2, "\x03\xc2" },
{ 0x0587, "\x05\x65\x05\x82" },
{ 0x0675, "\x06\x27\x06\x74" },
{ 0x0676, "\x06\x48\x06\x74" },
{ 0x0677, "\x06\xc7\x06\x74" },
{ 0x0678, "\x06\x4a\x06\x74" },
{ 0x0e33, "\x0e\x4d\x0e\x32" },
{ 0x0eb3, "\x0e\xcd\x0e\xb2" },
{ 0x0edc, "\x0e\xab\x0e\x99" },
{ 0x0edd, "\x0e\xab\x0e\xa1" },
{ 0x1101, "\x11\x00\x11\x00" },
{ 0x1104, "\x11\x03\x11\x03" },
{ 0x1108, "\x11\x07\x11\x07" },
{ 0x110a, "\x11\x09\x11\x09" },
{ 0x110d, "\x11\x0c\x11\x0c" },
{ 0x1113, "\x11\x02\x11\x00" },
{ 0x1114, "\x11\x02\x11\x02" },
{ 0x1115, "\x11\x02\x11\x03" },
{ 0x1116, "\x11\x02\x11\x07" },
{ 0x1117, "\x11\x03\x11\x00" },
{ 0x1118, "\x11\x05\x11\x02" },
{ 0x1119, "\x11\x05\x11\x05" },
{ 0x111a, "\x11\x05\x11\x12" },
{ 0x111b, "\x11\x05\x11\x0b" },
{ 0x111c, "\x11\x06\x11\x07" },
{ 0x111d, "\x11\x06\x11\x0b" },
{ 0x111e, "\x11\x07\x11\x00" },
{ 0x111f, "\x11\x07\x11\x02" },
{ 0x1120, "\x11\x07\x11\x03" },
{ 0x1121, "\x11\x07\x11\x09" },
{ 0x1122, "\x11\x07\x11\x09\x11\x00" },
{ 0x1123, "\x11\x07\x11\x09\x11\x03" },
{ 0x1124, "\x11\x07\x11\x09\x11\x07" },
{ 0x1125, "\x11\x07\x11\x09\x11\x09" },
{ 0x1126, "\x11\x07\x11\x09\x11\x0c" },
{ 0x1127, "\x11\x07\x11\x0c" },
{ 0x1128, "\x11\x07\x11\x0e" },
{ 0x1129, "\x11\x07\x11\x10" },
{ 0x112a, "\x11\x07\x11\x11" },
{ 0x112b, "\x11\x07\x11\x0b" },
{ 0x112c, "\x11\x07\x11\x07\x11\x0b" },
{ 0x112d, "\x11\x09\x11\x00" },
{ 0x112e, "\x11\x09\x11\x02" },
{ 0x112f, "\x11\x09\x11\x03" },
{ 0x1130, "\x11\x09\x11\x05" },
{ 0x1131, "\x11\x09\x11\x06" },
{ 0x1132, "\x11\x09\x11\x07" },
{ 0x1133, "\x11\x09\x11\x07\x11\x00" },
{ 0x1134, "\x11\x09\x11\x09\x11\x09" },
{ 0x1135, "\x11\x09\x11\x0b" },
{ 0x1136, "\x11\x09\x11\x0c" },
{ 0x1137, "\x11\x09\x11\x0e" },
{ 0x1138, "\x11\x09\x11\x0f" },
{ 0x1139, "\x11\x09\x11\x10" },
{ 0x113a, "\x11\x09\x11\x11" },
{ 0x113b, "\x11\x09\x11\x12" },
{ 0x113d, "\x11\x3c\x11\x3c" },
{ 0x113f, "\x11\x3e\x11\x3e" },
{ 0x1141, "\x11\x0b\x11\x00" },
{ 0x1142, "\x11\x0b\x11\x03" },
{ 0x1143, "\x11\x0b\x11\x06" },
{ 0x1144, "\x11\x0b\x11\x07" },
{ 0x1145, "\x11\x0b\x11\x09" },
{ 0x1146, "\x11\x0b\x11\x40" },
{ 0x1147, "\x11\x0b\x11\x0b" },
{ 0x1148, "\x11\x0b\x11\x0c" },
{ 0x1149, "\x11\x0b\x11\x0e" },
{ 0x114a, "\x11\x0b\x11\x10" },
{ 0x114b, "\x11\x0b\x11\x11" },
{ 0x114d, "\x11\x0c\x11\x0b" },
{ 0x114f, "\x11\x4e\x11\x4e" },
{ 0x1151, "\x11\x50\x11\x50" },
{ 0x1152, "\x11\x0e\x11\x0f" },
{ 0x1153, "\x11\x0e\x11\x12" },
{ 0x1156, "\x11\x11\x11\x07" },
{ 0x1157, "\x11\x11\x11\x0b" },
{ 0x1158, "\x11\x12\x11\x12" },
{ 0x1162, "\x11\x61\x11\x75" },
{ 0x1164, "\x11\x63\x11\x75" },
{ 0x1166, "\x11\x65\x11\x75" },
{ 0x1168, "\x11\x67\x11\x75" },
{ 0x116a, "\x11\x69\x11\x61" },
{ 0x116b, "\x11\x69\x11\x61\x11\x75" },
{ 0x116c, "\x11\x69\x11\x75" },
{ 0x116f, "\x11\x6e\x11\x65" },
{ 0x1170, "\x11\x6e\x11\x65\x11\x75" },
{ 0x1171, "\x11\x6e\x11\x75" },
{ 0x1174, "\x11\x73\x11\x75" },
{ 0x1176, "\x11\x61\x11\x69" },
{ 0x1177, "\x11\x61\x11\x6e" },
{ 0x1178, "\x11\x63\x11\x69" },
{ 0x1179, "\x11\x63\x11\x6d" },
{ 0x117a, "\x11\x65\x11\x69" },
{ 0x117b, "\x11\x65\x11\x6e" },
{ 0x117c, "\x11\x65\x11\x73" },
{ 0x117d, "\x11\x67\x11\x69" },
{ 0x117e, "\x11\x67\x11\x6e" },
{ 0x117f, "\x11\x69\x11\x65" },
{ 0x1180, "\x11\x69\x11\x66" },
{ 0x1181, "\x11\x69\x11\x68" },
{ 0x1182, "\x11\x69\x11\x69" },
{ 0x1183, "\x11\x69\x11\x6e" },
{ 0x1184, "\x11\x6d\x11\x63" },
{ 0x1185, "\x11\x6d\x11\x64" },
{ 0x1186, "\x11\x6d\x11\x67" },
{ 0x1187, "\x11\x6d\x11\x69" },
{ 0x1188, "\x11\x6d\x11\x75" },
{ 0x1189, "\x11\x6e\x11\x61" },
{ 0x118a, "\x11\x6e\x11\x62" },
{ 0x118b, "\x11\x6e\x11\x65\x11\x73" },
{ 0x118c, "\x11\x6e\x11\x68" },
{ 0x118d, "\x11\x6e\x11\x6e" },
{ 0x118e, "\x11\x72\x11\x61" },
{ 0x118f, "\x11\x72\x11\x65" },
{ 0x1190, "\x11\x72\x11\x66" },
{ 0x1191, "\x11\x72\x11\x67" },
{ 0x1192, "\x11\x72\x11\x68" },
{ 0x1193, "\x11\x72\x11\x6e" },
{ 0x1194, "\x11\x72\x11\x75" },
{ 0x1195, "\x11\x73\x11\x6e" },
{ 0x1196, "\x11\x73\x11\x73" },
{ 0x1197, "\x11\x74\x11\x6e" },
{ 0x1198, "\x11\x75\x11\x61" },
{ 0x1199, "\x11\x75\x11\x63" },
{ 0x119a, "\x11\x75\x11\x69" },
{ 0x119b, "\x11\x75\x11\x6e" },
{ 0x119c, "\x11\x75\x11\x73" },
{ 0x119d, "\x11\x75\x11\x9e" },
{ 0x119f, "\x11\x9e\x11\x65" },
{ 0x11a0, "\x11\x9e\x11\x6e" },
{ 0x11a1, "\x11\x9e\x11\x75" },
{ 0x11a2, "\x11\x9e\x11\x9e" },
{ 0x11a9, "\x11\xa8\x11\xa8" },
{ 0x11aa, "\x11\xa8\x11\xba" },
{ 0x11ac, "\x11\xab\x11\xbd" },
{ 0x11ad, "\x11\xab\x11\xc2" },
{ 0x11b0, "\x11\xaf\x11\xa8" },
{ 0x11b1, "\x11\xaf\x11\xb7" },
{ 0x11b2, "\x11\xaf\x11\xb8" },
{ 0x11b3, "\x11\xaf\x11\xba" },
{ 0x11b4, "\x11\xaf\x11\xc0" },
{ 0x11b5, "\x11\xaf\x11\xc1" },
{ 0x11b6, "\x11\xaf\x11\xc2" },
{ 0x11b9, "\x11\xb8\x11\xba" },
{ 0x11bb, "\x11\xba\x11\xba" },
{ 0x11c3, "\x11\xa8\x11\xaf" },
{ 0x11c4, "\x11\xa8\x11\xba\x11\xa8" },
{ 0x11c5, "\x11\xab\x11\xa8" },
{ 0x11c6, "\x11\xab\x11\xae" },
{ 0x11c7, "\x11\xab\x11\xba" },
{ 0x11c8, "\x11\xab\x11\xeb" },
{ 0x11c9, "\x11\xab\x11\xc0" },
{ 0x11ca, "\x11\xae\x11\xa8" },
{ 0x11cb, "\x11\xae\x11\xaf" },
{ 0x11cc, "\x11\xaf\x11\xa8\x11\xba" },
{ 0x11cd, "\x11\xaf\x11\xab" },
{ 0x11ce, "\x11\xaf\x11\xae" },
{ 0x11cf, "\x11\xaf\x11\xae\x11\xc2" },
{ 0x11d0, "\x11\xaf\x11\xaf" },
{ 0x11d1, "\x11\xaf\x11\xb7\x11\xa8" },
{ 0x11d2, "\x11\xaf\x11\xb7\x11\xba" },
{ 0x11d3, "\x11\xaf\x11\xb8\x11\xba" },
{ 0x11d4, "\x11\xaf\x11\xb8\x11\xc2" },
{ 0x11d5, "\x11\xaf\x11\xb8\x11\xbc" },
{ 0x11d6, "\x11\xaf\x11\xba\x11\xba" },
{ 0x11d7, "\x11\xaf\x11\xeb" },
{ 0x11d8, "\x11\xaf\x11\xbf" },
{ 0x11d9, "\x11\xaf\x11\xf9" },
{ 0x11da, "\x11\xb7\x11\xa8" },
{ 0x11db, "\x11\xb7\x11\xaf" },
{ 0x11dc, "\x11\xb7\x11\xb8" },
{ 0x11dd, "\x11\xb7\x11\xba" },
{ 0x11de, "\x11\xb7\x11\xba\x11\xba" },
{ 0x11df, "\x11\xb7\x11\xeb" },
{ 0x11e0, "\x11\xb7\x11\xbe" },
{ 0x11e1, "\x11\xb7\x11\xc2" },
{ 0x11e2, "\x11\xb7\x11\xbc" },
{ 0x11e3, "\x11\xb8\x11\xaf" },
{ 0x11e4, "\x11\xb8\x11\xc1" },
{ 0x11e5, "\x11\xb8\x11\xc2" },
{ 0x11e6, "\x11\xb8\x11\xbc" },
{ 0x11e7, "\x11\xba\x11\xa8" },
{ 0x11e8, "\x11\xba\x11\xae" },
{ 0x11e9, "\x11\xba\x11\xaf" },
{ 0x11ea, "\x11\xba\x11\xb8" },
{ 0x11ec, "\x11\xbc\x11\xa8" },
{ 0x11ed, "\x11\xbc\x11\xa8\x11\xa8" },
{ 0x11ee, "\x11\xbc\x11\xbc" },
{ 0x11ef, "\x11\xbc\x11\xbf" },
{ 0x11f1, "\x11\xf0\x11\xba" },
{ 0x11f2, "\x11\xf0\x11\xeb" },
{ 0x11f3, "\x11\xc1\x11\xb8" },
{ 0x11f4, "\x11\xc1\x11\xbc" },
{ 0x11f5, "\x11\xc2\x11\xab" },
{ 0x11f6, "\x11\xc2\x11\xaf" },
{ 0x11f7, "\x11\xc2\x11\xb7" },
{ 0x11f8, "\x11\xc2\x11\xb8" },
{ 0x0f0c, "\x0f\x0b" },
{ 0x0f77, "\x0f\xb2\x0f\x81" },
{ 0x0f79, "\x0f\xb3\x0f\x81" },
{ 0x1e9a, "\x00\x61\x02\xbe" },
{ 0x1fbd, "\x00\x20\x03\x13" },
{ 0x1fbf, "\x00\x20\x03\x13" },
......@@ -1555,12 +1426,15 @@ static const decomp_entry full_decomposition[] =
{ 0x2024, "\x00\x2e" },
{ 0x2025, "\x00\x2e\x00\x2e" },
{ 0x2026, "\x00\x2e\x00\x2e\x00\x2e" },
{ 0x202f, "\x00\x20" },
{ 0x2033, "\x20\x32\x20\x32" },
{ 0x2034, "\x20\x32\x20\x32\x20\x32" },
{ 0x2036, "\x20\x35\x20\x35" },
{ 0x2037, "\x20\x35\x20\x35\x20\x35" },
{ 0x203c, "\x00\x21\x00\x21" },
{ 0x203e, "\x00\x20\x03\x05" },
{ 0x2048, "\x00\x3f\x00\x21" },
{ 0x2049, "\x00\x21\x00\x3f" },
{ 0x2070, "\x00\x30" },
{ 0x2074, "\x00\x34" },
{ 0x2075, "\x00\x35" },
......@@ -1631,6 +1505,7 @@ static const decomp_entry full_decomposition[] =
{ 0x2136, "\x05\xd1" },
{ 0x2137, "\x05\xd2" },
{ 0x2138, "\x05\xd3" },
{ 0x2139, "\x00\x69" },
{ 0x2153, "\x00\x31\x20\x44\x00\x33" },
{ 0x2154, "\x00\x32\x20\x44\x00\x33" },
{ 0x2155, "\x00\x31\x20\x44\x00\x35" },
......@@ -1819,8 +1694,227 @@ static const decomp_entry full_decomposition[] =
{ 0x24e8, "\x00\x79" },
{ 0x24e9, "\x00\x7a" },
{ 0x24ea, "\x00\x30" },
{ 0x2e9f, "\x6b\xcd" },
{ 0x2ef3, "\x9f\x9f" },
{ 0x2f00, "\x4e\x00" },
{ 0x2f01, "\x4e\x28" },
{ 0x2f02, "\x4e\x36" },
{ 0x2f03, "\x4e\x3f" },
{ 0x2f04, "\x4e\x59" },
{ 0x2f05, "\x4e\x85" },
{ 0x2f06, "\x4e\x8c" },
{ 0x2f07, "\x4e\xa0" },
{ 0x2f08, "\x4e\xba" },
{ 0x2f09, "\x51\x3f" },
{ 0x2f0a, "\x51\x65" },
{ 0x2f0b, "\x51\x6b" },
{ 0x2f0c, "\x51\x82" },
{ 0x2f0d, "\x51\x96" },
{ 0x2f0e, "\x51\xab" },
{ 0x2f0f, "\x51\xe0" },
{ 0x2f10, "\x51\xf5" },
{ 0x2f11, "\x52\x00" },
{ 0x2f12, "\x52\x9b" },
{ 0x2f13, "\x52\xf9" },
{ 0x2f14, "\x53\x15" },
{ 0x2f15, "\x53\x1a" },
{ 0x2f16, "\x53\x38" },
{ 0x2f17, "\x53\x41" },
{ 0x2f18, "\x53\x5c" },
{ 0x2f19, "\x53\x69" },
{ 0x2f1a, "\x53\x82" },
{ 0x2f1b, "\x53\xb6" },
{ 0x2f1c, "\x53\xc8" },
{ 0x2f1d, "\x53\xe3" },
{ 0x2f1e, "\x56\xd7" },
{ 0x2f1f, "\x57\x1f" },
{ 0x2f20, "\x58\xeb" },
{ 0x2f21, "\x59\x02" },
{ 0x2f22, "\x59\x0a" },
{ 0x2f23, "\x59\x15" },
{ 0x2f24, "\x59\x27" },
{ 0x2f25, "\x59\x73" },
{ 0x2f26, "\x5b\x50" },
{ 0x2f27, "\x5b\x80" },
{ 0x2f28, "\x5b\xf8" },
{ 0x2f29, "\x5c\x0f" },
{ 0x2f2a, "\x5c\x22" },
{ 0x2f2b, "\x5c\x38" },
{ 0x2f2c, "\x5c\x6e" },
{ 0x2f2d, "\x5c\x71" },
{ 0x2f2e, "\x5d\xdb" },
{ 0x2f2f, "\x5d\xe5" },
{ 0x2f30, "\x5d\xf1" },
{ 0x2f31, "\x5d\xfe" },
{ 0x2f32, "\x5e\x72" },
{ 0x2f33, "\x5e\x7a" },
{ 0x2f34, "\x5e\x7f" },
{ 0x2f35, "\x5e\xf4" },
{ 0x2f36, "\x5e\xfe" },
{ 0x2f37, "\x5f\x0b" },
{ 0x2f38, "\x5f\x13" },
{ 0x2f39, "\x5f\x50" },
{ 0x2f3a, "\x5f\x61" },
{ 0x2f3b, "\x5f\x73" },
{ 0x2f3c, "\x5f\xc3" },
{ 0x2f3d, "\x62\x08" },
{ 0x2f3e, "\x62\x36" },
{ 0x2f3f, "\x62\x4b" },
{ 0x2f40, "\x65\x2f" },
{ 0x2f41, "\x65\x34" },
{ 0x2f42, "\x65\x87" },
{ 0x2f43, "\x65\x97" },
{ 0x2f44, "\x65\xa4" },
{ 0x2f45, "\x65\xb9" },
{ 0x2f46, "\x65\xe0" },
{ 0x2f47, "\x65\xe5" },
{ 0x2f48, "\x66\xf0" },
{ 0x2f49, "\x67\x08" },
{ 0x2f4a, "\x67\x28" },
{ 0x2f4b, "\x6b\x20" },
{ 0x2f4c, "\x6b\x62" },
{ 0x2f4d, "\x6b\x79" },
{ 0x2f4e, "\x6b\xb3" },
{ 0x2f4f, "\x6b\xcb" },
{ 0x2f50, "\x6b\xd4" },
{ 0x2f51, "\x6b\xdb" },
{ 0x2f52, "\x6c\x0f" },
{ 0x2f53, "\x6c\x14" },
{ 0x2f54, "\x6c\x34" },
{ 0x2f55, "\x70\x6b" },
{ 0x2f56, "\x72\x2a" },
{ 0x2f57, "\x72\x36" },
{ 0x2f58, "\x72\x3b" },
{ 0x2f59, "\x72\x3f" },
{ 0x2f5a, "\x72\x47" },
{ 0x2f5b, "\x72\x59" },
{ 0x2f5c, "\x72\x5b" },
{ 0x2f5d, "\x72\xac" },
{ 0x2f5e, "\x73\x84" },
{ 0x2f5f, "\x73\x89" },
{ 0x2f60, "\x74\xdc" },
{ 0x2f61, "\x74\xe6" },
{ 0x2f62, "\x75\x18" },
{ 0x2f63, "\x75\x1f" },
{ 0x2f64, "\x75\x28" },
{ 0x2f65, "\x75\x30" },
{ 0x2f66, "\x75\x8b" },
{ 0x2f67, "\x75\x92" },
{ 0x2f68, "\x76\x76" },
{ 0x2f69, "\x76\x7d" },
{ 0x2f6a, "\x76\xae" },
{ 0x2f6b, "\x76\xbf" },
{ 0x2f6c, "\x76\xee" },
{ 0x2f6d, "\x77\xdb" },
{ 0x2f6e, "\x77\xe2" },
{ 0x2f6f, "\x77\xf3" },
{ 0x2f70, "\x79\x3a" },
{ 0x2f71, "\x79\xb8" },
{ 0x2f72, "\x79\xbe" },
{ 0x2f73, "\x7a\x74" },
{ 0x2f74, "\x7a\xcb" },
{ 0x2f75, "\x7a\xf9" },
{ 0x2f76, "\x7c\x73" },
{ 0x2f77, "\x7c\xf8" },
{ 0x2f78, "\x7f\x36" },
{ 0x2f79, "\x7f\x51" },
{ 0x2f7a, "\x7f\x8a" },
{ 0x2f7b, "\x7f\xbd" },
{ 0x2f7c, "\x80\x01" },
{ 0x2f7d, "\x80\x0c" },
{ 0x2f7e, "\x80\x12" },
{ 0x2f7f, "\x80\x33" },
{ 0x2f80, "\x80\x7f" },
{ 0x2f81, "\x80\x89" },
{ 0x2f82, "\x81\xe3" },
{ 0x2f83, "\x81\xea" },
{ 0x2f84, "\x81\xf3" },
{ 0x2f85, "\x81\xfc" },
{ 0x2f86, "\x82\x0c" },
{ 0x2f87, "\x82\x1b" },
{ 0x2f88, "\x82\x1f" },
{ 0x2f89, "\x82\x6e" },
{ 0x2f8a, "\x82\x72" },
{ 0x2f8b, "\x82\x78" },
{ 0x2f8c, "\x86\x4d" },
{ 0x2f8d, "\x86\x6b" },
{ 0x2f8e, "\x88\x40" },
{ 0x2f8f, "\x88\x4c" },
{ 0x2f90, "\x88\x63" },
{ 0x2f91, "\x89\x7e" },
{ 0x2f92, "\x89\x8b" },
{ 0x2f93, "\x89\xd2" },
{ 0x2f94, "\x8a\x00" },
{ 0x2f95, "\x8c\x37" },
{ 0x2f96, "\x8c\x46" },
{ 0x2f97, "\x8c\x55" },
{ 0x2f98, "\x8c\x78" },
{ 0x2f99, "\x8c\x9d" },
{ 0x2f9a, "\x8d\x64" },
{ 0x2f9b, "\x8d\x70" },
{ 0x2f9c, "\x8d\xb3" },
{ 0x2f9d, "\x8e\xab" },
{ 0x2f9e, "\x8e\xca" },
{ 0x2f9f, "\x8f\x9b" },
{ 0x2fa0, "\x8f\xb0" },
{ 0x2fa1, "\x8f\xb5" },
{ 0x2fa2, "\x90\x91" },
{ 0x2fa3, "\x91\x49" },
{ 0x2fa4, "\x91\xc6" },
{ 0x2fa5, "\x91\xcc" },
{ 0x2fa6, "\x91\xd1" },
{ 0x2fa7, "\x95\x77" },
{ 0x2fa8, "\x95\x80" },
{ 0x2fa9, "\x96\x1c" },
{ 0x2faa, "\x96\xb6" },
{ 0x2fab, "\x96\xb9" },
{ 0x2fac, "\x96\xe8" },
{ 0x2fad, "\x97\x51" },
{ 0x2fae, "\x97\x5e" },
{ 0x2faf, "\x97\x62" },
{ 0x2fb0, "\x97\x69" },
{ 0x2fb1, "\x97\xcb" },
{ 0x2fb2, "\x97\xed" },
{ 0x2fb3, "\x97\xf3" },
{ 0x2fb4, "\x98\x01" },
{ 0x2fb5, "\x98\xa8" },
{ 0x2fb6, "\x98\xdb" },
{ 0x2fb7, "\x98\xdf" },
{ 0x2fb8, "\x99\x96" },
{ 0x2fb9, "\x99\x99" },
{ 0x2fba, "\x99\xac" },
{ 0x2fbb, "\x9a\xa8" },
{ 0x2fbc, "\x9a\xd8" },
{ 0x2fbd, "\x9a\xdf" },
{ 0x2fbe, "\x9b\x25" },
{ 0x2fbf, "\x9b\x2f" },
{ 0x2fc0, "\x9b\x32" },
{ 0x2fc1, "\x9b\x3c" },
{ 0x2fc2, "\x9b\x5a" },
{ 0x2fc3, "\x9c\xe5" },
{ 0x2fc4, "\x9e\x75" },
{ 0x2fc5, "\x9e\x7f" },
{ 0x2fc6, "\x9e\xa5" },
{ 0x2fc7, "\x9e\xbb" },
{ 0x2fc8, "\x9e\xc3" },
{ 0x2fc9, "\x9e\xcd" },
{ 0x2fca, "\x9e\xd1" },
{ 0x2fcb, "\x9e\xf9" },
{ 0x2fcc, "\x9e\xfd" },
{ 0x2fcd, "\x9f\x0e" },
{ 0x2fce, "\x9f\x13" },
{ 0x2fcf, "\x9f\x20" },
{ 0x2fd0, "\x9f\x3b" },
{ 0x2fd1, "\x9f\x4a" },
{ 0x2fd2, "\x9f\x52" },
{ 0x2fd3, "\x9f\x8d" },
{ 0x2fd4, "\x9f\x9c" },
{ 0x2fd5, "\x9f\xa0" },
{ 0x3000, "\x00\x20" },
{ 0x3036, "\x30\x12" },
{ 0x3038, "\x53\x41" },
{ 0x3039, "\x53\x44" },
{ 0x303a, "\x53\x45" },
{ 0x309b, "\x00\x20\x30\x99" },
{ 0x309c, "\x00\x20\x30\x9a" },
{ 0x3131, "\x11\x00" },
......@@ -2641,11 +2735,11 @@ static const decomp_entry full_decomposition[] =
{ 0xfc5b, "\x06\x30\x06\x70" },
{ 0xfc5c, "\x06\x31\x06\x70" },
{ 0xfc5d, "\x06\x49\x06\x70" },
{ 0xfc5e, "\x00\x20\x06\x51\x06\x4c" },
{ 0xfc5f, "\x00\x20\x06\x51\x06\x4d" },
{ 0xfc60, "\x00\x20\x06\x51\x06\x4e" },
{ 0xfc61, "\x00\x20\x06\x51\x06\x4f" },
{ 0xfc62, "\x00\x20\x06\x51\x06\x50" },
{ 0xfc5e, "\x00\x20\x06\x4c\x06\x51" },
{ 0xfc5f, "\x00\x20\x06\x4d\x06\x51" },
{ 0xfc60, "\x00\x20\x06\x4e\x06\x51" },
{ 0xfc61, "\x00\x20\x06\x4f\x06\x51" },
{ 0xfc62, "\x00\x20\x06\x50\x06\x51" },
{ 0xfc63, "\x00\x20\x06\x51\x06\x70" },
{ 0xfc64, "\x06\x26\x06\x31" },
{ 0xfc65, "\x06\x26\x06\x32" },
......@@ -2789,9 +2883,9 @@ static const decomp_entry full_decomposition[] =
{ 0xfcef, "\x06\x46\x06\x47" },
{ 0xfcf0, "\x06\x4a\x06\x45" },
{ 0xfcf1, "\x06\x4a\x06\x47" },
{ 0xfcf2, "\x06\x40\x06\x51\x06\x4e" },
{ 0xfcf3, "\x06\x40\x06\x51\x06\x4f" },
{ 0xfcf4, "\x06\x40\x06\x51\x06\x50" },
{ 0xfcf2, "\x06\x40\x06\x4e\x06\x51" },
{ 0xfcf3, "\x06\x40\x06\x4f\x06\x51" },
{ 0xfcf4, "\x06\x40\x06\x50\x06\x51" },
{ 0xfcf5, "\x06\x37\x06\x49" },
{ 0xfcf6, "\x06\x37\x06\x4a" },
{ 0xfcf7, "\x06\x39\x06\x49" },
......
This source diff could not be displayed because it is too large. You can view the blob instead.
// Character.java - Character class.
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
/* Copyright (C) 1998, 1999, 2000 Free Software Foundation
This file is part of GNU Classpath.
This file is part of libgcj.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This software is copyrighted work licensed under the terms of the
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
details. */
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
/*
* Note: This class must not be merged with Classpath. Gcj uses C-style
* arrays (see include/java-chartables.h) to store the Unicode character
* database, whereas Classpath uses Java objects (char[] extracted from
* String constants) in gnu.java.lang.CharData. Gcj's approach is more
* efficient, because there is no vtable or data relocation to worry about.
* However, despite the difference in the database interface, the two
* versions share identical algorithms.
*/
package java.lang;
import java.io.Serializable;
/**
* Wrapper class for the primitive char data type. In addition, this class
* allows one to retrieve property information and perform transformations
* on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
* java.lang.Character is designed to be very dynamic, and as such, it
* retrieves information on the Unicode character set from a separate
* database, gnu.java.lang.CharData, which can be easily upgraded.
*
* <p>For predicates, boundaries are used to describe
* the set of characters for which the method will return true.
* This syntax uses fairly normal regular expression notation.
* See 5.13 of the Unicode Standard, Version 3.0, for the
* boundary specification.
*
* <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
* for more information on the Unicode Standard.
*
* @author Tom Tromey <tromey@cygnus.com>
* @date September 10, 1998
* @author Paul N. Fisher
* @author Jochen Hoenicke
* @author Eric Blake <ebb9@email.byu.edu>
* @since 1.0
* @status updated to 1.4
*/
/* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
* "The Java Language Specification", ISBN 0-201-63451-1,
* online API docs for JDK 1.2 beta from http://www.javasoft.com,
* and The Unicode Standard Version 2.0.
* Status: Believed complete and correct for JDK 1.1; 1.2 methods
* unimplemented.
*/
public final class Character implements Serializable, Comparable
{
public static final char MIN_VALUE = '\u0000';
public static final char MAX_VALUE = '\uffff';
/**
* A subset of Unicode blocks.
*
* @author Paul N. Fisher
* @author Eric Blake <ebb9@email.byu.edu>
* @since 1.2
*/
public static class Subset
{
/** The name of the subset. */
private final String name;
public static final int MIN_RADIX = 2;
public static final int MAX_RADIX = 36;
/**
* Construct a new subset of characters.
*
* @param name the name of the subset
* @throws NullPointerException if name is null
*/
protected Subset(String name)
{
// Note that name.toString() is name, unless name was null.
this.name = name.toString();
}
public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
/**
* Compares two Subsets for equality. This is <code>final</code>, and
* restricts the comparison on the <code>==</code> operator, so it returns
* true only for the same object.
*
* @param o the object to compare
* @return true if o is this
*/
public final boolean equals(Object o)
{
return o == this;
}
// Space.
public static final byte SPACE_SEPARATOR = 12;
public static final byte LINE_SEPARATOR = 13;
public static final byte PARAGRAPH_SEPARATOR = 14;
/**
* Makes the original hashCode of Object final, to be consistent with
* equals.
*
* @return the hash code for this object
*/
public final int hashCode()
{
return super.hashCode();
}
// Letters.
public static final byte UPPERCASE_LETTER = 1;
public static final byte LOWERCASE_LETTER = 2;
public static final byte TITLECASE_LETTER = 3;
public static final byte MODIFIER_LETTER = 4;
public static final byte OTHER_LETTER = 5;
/**
* Returns the name of the subset.
*
* @return the name
*/
public final String toString()
{
return name;
}
} // class Subset
// Numbers.
public static final byte DECIMAL_DIGIT_NUMBER = 9;
public static final byte LETTER_NUMBER = 10;
public static final byte OTHER_NUMBER = 11;
/**
* A family of character subsets in the Unicode specification. A character
* is in at most one of these blocks.
*
* This inner class was generated automatically from
* <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
* This Unicode definition file can be found on the
* <a href="http://www.unicode.org">http://www.unicode.org</a> website.
* JDK 1.4 uses Unicode version 3.0.0.
*
* @author scripts/unicode-blocks.pl (written by Eric Blake)
* @since 1.2
*/
public static final class UnicodeBlock extends Subset
{
/** The start of the subset. */
private final char start;
// Marks.
public static final byte NON_SPACING_MARK = 6;
public static final byte ENCLOSING_MARK = 7;
public static final byte COMBINING_SPACING_MARK = 8;
/** The end of the subset. */
private final char end;
// Punctuation.
public static final byte DASH_PUNCTUATION = 20;
public static final byte START_PUNCTUATION = 21;
public static final byte END_PUNCTUATION = 22;
public static final byte CONNECTOR_PUNCTUATION = 23;
public static final byte OTHER_PUNCTUATION = 24;
/**
* Constructor for strictly defined blocks.
*
* @param start the start character of the range
* @param end the end character of the range
* @param name the block name
*/
private UnicodeBlock(char start, char end, String name)
{
super(name);
this.start = start;
this.end = end;
}
// Symbols.
public static final byte MATH_SYMBOL = 25;
public static final byte CURRENCY_SYMBOL = 26;
public static final byte MODIFIER_SYMBOL = 27;
public static final byte OTHER_SYMBOL = 28;
/**
* Returns the Unicode character block which a character belongs to.
*
* @param ch the character to look up
* @return the set it belongs to, or null if it is not in one
*/
public static UnicodeBlock of(char ch)
{
// Special case, since SPECIALS contains two ranges.
if (ch == '\uFEFF')
return SPECIALS;
// Simple binary search for the correct block.
int low = 0;
int hi = sets.length - 1;
while (low <= hi)
{
int mid = (low + hi) >> 1;
UnicodeBlock b = sets[mid];
if (ch < b.start)
hi = mid - 1;
else if (ch > b.end)
low = mid + 1;
else
return b;
}
return null;
}
// Format controls.
public static final byte CONTROL = 15;
// Note: The JCL book says that both FORMAT and PRIVATE_USE are 18.
// However, FORMAT is actually 16.
public static final byte FORMAT = 16;
/**
* Basic Latin.
* '\u0000' - '\u007F'.
*/
public final static UnicodeBlock BASIC_LATIN
= new UnicodeBlock('\u0000', '\u007F',
"BASIC_LATIN");
// Others.
public static final byte UNASSIGNED = 0;
public static final byte PRIVATE_USE = 18;
public static final byte SURROGATE = 19;
/**
* Latin-1 Supplement.
* '\u0080' - '\u00FF'.
*/
public final static UnicodeBlock LATIN_1_SUPPLEMENT
= new UnicodeBlock('\u0080', '\u00FF',
"LATIN_1_SUPPLEMENT");
private static final long serialVersionUID = 3786198910865385080L;
/**
* Latin Extended-A.
* '\u0100' - '\u017F'.
*/
public final static UnicodeBlock LATIN_EXTENDED_A
= new UnicodeBlock('\u0100', '\u017F',
"LATIN_EXTENDED_A");
public Character (char ch)
{
value = ch;
}
/**
* Latin Extended-B.
* '\u0180' - '\u024F'.
*/
public final static UnicodeBlock LATIN_EXTENDED_B
= new UnicodeBlock('\u0180', '\u024F',
"LATIN_EXTENDED_B");
public char charValue ()
{
return value;
}
/**
* IPA Extensions.
* '\u0250' - '\u02AF'.
*/
public final static UnicodeBlock IPA_EXTENSIONS
= new UnicodeBlock('\u0250', '\u02AF',
"IPA_EXTENSIONS");
// See if a character is a digit. If so, return the corresponding
// value. Otherwise return -1.
private static native int digit_value (char ch);
/**
* Spacing Modifier Letters.
* '\u02B0' - '\u02FF'.
*/
public final static UnicodeBlock SPACING_MODIFIER_LETTERS
= new UnicodeBlock('\u02B0', '\u02FF',
"SPACING_MODIFIER_LETTERS");
public static int digit (char ch, int radix)
{
if (radix < MIN_RADIX || radix > MAX_RADIX)
return -1;
int d = digit_value (ch);
if (d == -1)
{
if (ch >= 'A' && ch <= 'Z')
d = ch - 'A' + 10;
else if (ch >= 'a' && ch <= 'z')
d = ch - 'a' + 10;
else
return -1;
}
return d >= radix ? -1 : d;
}
/**
* Combining Diacritical Marks.
* '\u0300' - '\u036F'.
*/
public final static UnicodeBlock COMBINING_DIACRITICAL_MARKS
= new UnicodeBlock('\u0300', '\u036F',
"COMBINING_DIACRITICAL_MARKS");
public boolean equals (Object obj)
{
// Don't need to compare OBJ to null as instanceof will do this.
if (obj instanceof Character)
return value == ((Character) obj).value;
return false;
}
/**
* Greek.
* '\u0370' - '\u03FF'.
*/
public final static UnicodeBlock GREEK
= new UnicodeBlock('\u0370', '\u03FF',
"GREEK");
public static char forDigit (int d, int rdx)
{
if (d < 0 || d >= rdx || rdx < MIN_RADIX || rdx > MAX_RADIX)
return '\u0000';
if (d < 10)
return (char) ('0' + d);
// The Java Language Spec says to use lowercase, while the JCL
// says to use uppercase. We go with the former.
return (char) ('a' + d - 10);
}
/**
* Cyrillic.
* '\u0400' - '\u04FF'.
*/
public final static UnicodeBlock CYRILLIC
= new UnicodeBlock('\u0400', '\u04FF',
"CYRILLIC");
public static native int getNumericValue (char ch);
public static native int getType (char ch);
/**
* Armenian.
* '\u0530' - '\u058F'.
*/
public final static UnicodeBlock ARMENIAN
= new UnicodeBlock('\u0530', '\u058F',
"ARMENIAN");
public int hashCode ()
{
return value;
}
/**
* Hebrew.
* '\u0590' - '\u05FF'.
*/
public final static UnicodeBlock HEBREW
= new UnicodeBlock('\u0590', '\u05FF',
"HEBREW");
public static boolean isDefined (char ch)
{
return getType (ch) != UNASSIGNED;
}
/**
* Arabic.
* '\u0600' - '\u06FF'.
*/
public final static UnicodeBlock ARABIC
= new UnicodeBlock('\u0600', '\u06FF',
"ARABIC");
public static boolean isDigit (char ch)
{
return digit_value (ch) != -1;
}
/**
* Syriac.
* '\u0700' - '\u074F'.
* @since 1.4
*/
public final static UnicodeBlock SYRIAC
= new UnicodeBlock('\u0700', '\u074F',
"SYRIAC");
// The JCL book says that the argument here is a Character. That is
// wrong.
public static boolean isIdentifierIgnorable (char ch)
{
// This information comes from the Unicode Standard. It isn't
// auto-generated as it doesn't appear in the unidata table.
return ((ch >= '\u0000' && ch <= '\u0008')
|| (ch >= '\u000e' && ch <= '\u001b')
// JDK 1.2 docs say that these are ignorable. The Unicode
// Standard is somewhat ambiguous on this issue.
|| (ch >= '\u007f' && ch <= '\u009f')
|| (ch >= '\u200c' && ch <= '\u200f')
// JCl says 200a through 200e, but that is a typo. The
// Unicode standard says the bidi controls are 202a
// through 202e.
|| (ch >= '\u202a' && ch <= '\u202e')
|| (ch >= '\u206a' && ch <= '\u206f')
|| ch == '\ufeff');
}
/**
* Thaana.
* '\u0780' - '\u07BF'.
* @since 1.4
*/
public final static UnicodeBlock THAANA
= new UnicodeBlock('\u0780', '\u07BF',
"THAANA");
public static boolean isISOControl (char c)
{
return ((c >= '\u0000' && c <= '\u001f')
|| (c >= '\u007f' && c <= '\u009f'));
}
/**
* Devanagari.
* '\u0900' - '\u097F'.
*/
public final static UnicodeBlock DEVANAGARI
= new UnicodeBlock('\u0900', '\u097F',
"DEVANAGARI");
public static boolean isJavaIdentifierPart (char ch)
{
if (isIdentifierIgnorable (ch) || isDigit (ch))
return true;
int type = getType (ch);
return (type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
|| type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
|| type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
|| type == TITLECASE_LETTER || type == MODIFIER_LETTER
|| type == OTHER_LETTER || type == LETTER_NUMBER);
}
/**
* Bengali.
* '\u0980' - '\u09FF'.
*/
public final static UnicodeBlock BENGALI
= new UnicodeBlock('\u0980', '\u09FF',
"BENGALI");
public static boolean isJavaIdentifierStart (char ch)
{
int type = getType (ch);
return (type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
|| type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
|| type == TITLECASE_LETTER || type == MODIFIER_LETTER
|| type == OTHER_LETTER);
}
/**
* Gurmukhi.
* '\u0A00' - '\u0A7F'.
*/
public final static UnicodeBlock GURMUKHI
= new UnicodeBlock('\u0A00', '\u0A7F',
"GURMUKHI");
// Deprecated in 1.2.
public static boolean isJavaLetter (char ch)
{
return ch == '$' || ch == '_' || isLetter (ch);
}
/**
* Gujarati.
* '\u0A80' - '\u0AFF'.
*/
public final static UnicodeBlock GUJARATI
= new UnicodeBlock('\u0A80', '\u0AFF',
"GUJARATI");
// Deprecated in 1.2.
public static boolean isJavaLetterOrDigit (char ch)
{
return ch == '$' || ch == '_' || isLetterOrDigit (ch);
}
/**
* Oriya.
* '\u0B00' - '\u0B7F'.
*/
public final static UnicodeBlock ORIYA
= new UnicodeBlock('\u0B00', '\u0B7F',
"ORIYA");
public static boolean isLetter (char ch)
{
int type = getType (ch);
return (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
|| type == TITLECASE_LETTER || type == MODIFIER_LETTER
|| type == OTHER_LETTER);
}
/**
* Tamil.
* '\u0B80' - '\u0BFF'.
*/
public final static UnicodeBlock TAMIL
= new UnicodeBlock('\u0B80', '\u0BFF',
"TAMIL");
public static boolean isLetterOrDigit (char ch)
{
return isDigit (ch) || isLetter (ch);
}
/**
* Telugu.
* '\u0C00' - '\u0C7F'.
*/
public final static UnicodeBlock TELUGU
= new UnicodeBlock('\u0C00', '\u0C7F',
"TELUGU");
public static native boolean isLowerCase (char ch);
/**
* Kannada.
* '\u0C80' - '\u0CFF'.
*/
public final static UnicodeBlock KANNADA
= new UnicodeBlock('\u0C80', '\u0CFF',
"KANNADA");
// Deprecated in JCL.
public static boolean isSpace (char ch)
{
return ch == '\n' || ch == '\t' || ch == '\f' || ch == '\r' || ch == ' ';
}
/**
* Malayalam.
* '\u0D00' - '\u0D7F'.
*/
public final static UnicodeBlock MALAYALAM
= new UnicodeBlock('\u0D00', '\u0D7F',
"MALAYALAM");
public static native boolean isSpaceChar (char ch);
public static native boolean isTitleCase (char ch);
/**
* Sinhala.
* '\u0D80' - '\u0DFF'.
* @since 1.4
*/
public final static UnicodeBlock SINHALA
= new UnicodeBlock('\u0D80', '\u0DFF',
"SINHALA");
public static boolean isUnicodeIdentifierPart (char ch)
{
if (isIdentifierIgnorable (ch) || isDigit (ch))
return true;
int type = getType (ch);
return (type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER
|| type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
|| type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
|| type == TITLECASE_LETTER || type == MODIFIER_LETTER
|| type == OTHER_LETTER);
}
/**
* Thai.
* '\u0E00' - '\u0E7F'.
*/
public final static UnicodeBlock THAI
= new UnicodeBlock('\u0E00', '\u0E7F',
"THAI");
public static boolean isUnicodeIdentifierStart (char ch)
{
return isLetter (ch);
}
/**
* Lao.
* '\u0E80' - '\u0EFF'.
*/
public final static UnicodeBlock LAO
= new UnicodeBlock('\u0E80', '\u0EFF',
"LAO");
public static native boolean isUpperCase (char ch);
/**
* Tibetan.
* '\u0F00' - '\u0FFF'.
*/
public final static UnicodeBlock TIBETAN
= new UnicodeBlock('\u0F00', '\u0FFF',
"TIBETAN");
public static boolean isWhitespace (char ch)
{
return ((ch >= '\u0009' && ch <= '\r')
|| (ch >= '\u001c' && ch <= '\u001f')
|| (ch != '\u00a0' && ch != '\ufeff' && isSpaceChar (ch)));
}
/**
* Myanmar.
* '\u1000' - '\u109F'.
* @since 1.4
*/
public final static UnicodeBlock MYANMAR
= new UnicodeBlock('\u1000', '\u109F',
"MYANMAR");
public static native char toLowerCase (char ch);
public static native char toTitleCase (char ch);
public static native char toUpperCase (char ch);
/**
* Georgian.
* '\u10A0' - '\u10FF'.
*/
public final static UnicodeBlock GEORGIAN
= new UnicodeBlock('\u10A0', '\u10FF',
"GEORGIAN");
public String toString ()
{
return String.valueOf(value);
}
/**
* Hangul Jamo.
* '\u1100' - '\u11FF'.
*/
public final static UnicodeBlock HANGUL_JAMO
= new UnicodeBlock('\u1100', '\u11FF',
"HANGUL_JAMO");
public int compareTo (Character anotherCharacter)
{
return value - anotherCharacter.value;
}
/**
* Ethiopic.
* '\u1200' - '\u137F'.
* @since 1.4
*/
public final static UnicodeBlock ETHIOPIC
= new UnicodeBlock('\u1200', '\u137F',
"ETHIOPIC");
public int compareTo (Object o)
{
return compareTo ((Character) o);
}
/**
* Cherokee.
* '\u13A0' - '\u13FF'.
* @since 1.4
*/
public final static UnicodeBlock CHEROKEE
= new UnicodeBlock('\u13A0', '\u13FF',
"CHEROKEE");
// Private data.
private char value;
/**
* Unified Canadian Aboriginal Syllabics.
* '\u1400' - '\u167F'.
* @since 1.4
*/
public final static UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
= new UnicodeBlock('\u1400', '\u167F',
"UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
public static class Subset
{
protected Subset (String name)
{
this.name = name;
}
/**
* Ogham.
* '\u1680' - '\u169F'.
* @since 1.4
*/
public final static UnicodeBlock OGHAM
= new UnicodeBlock('\u1680', '\u169F',
"OGHAM");
public final boolean equals (Object obj)
{
return obj == this;
}
/**
* Runic.
* '\u16A0' - '\u16FF'.
* @since 1.4
*/
public final static UnicodeBlock RUNIC
= new UnicodeBlock('\u16A0', '\u16FF',
"RUNIC");
public final int hashCode ()
{
return super.hashCode ();
}
/**
* Khmer.
* '\u1780' - '\u17FF'.
* @since 1.4
*/
public final static UnicodeBlock KHMER
= new UnicodeBlock('\u1780', '\u17FF',
"KHMER");
public final String toString ()
{
return name;
}
/**
* Mongolian.
* '\u1800' - '\u18AF'.
* @since 1.4
*/
public final static UnicodeBlock MONGOLIAN
= new UnicodeBlock('\u1800', '\u18AF',
"MONGOLIAN");
// Name of this subset.
private String name;
}
/**
* Latin Extended Additional.
* '\u1E00' - '\u1EFF'.
*/
public final static UnicodeBlock LATIN_EXTENDED_ADDITIONAL
= new UnicodeBlock('\u1E00', '\u1EFF',
"LATIN_EXTENDED_ADDITIONAL");
public static final class UnicodeBlock extends Subset
{
private UnicodeBlock (String name, char start, char end)
{
super (name);
this.start = start;
this.end = end;
}
/**
* Greek Extended.
* '\u1F00' - '\u1FFF'.
*/
public final static UnicodeBlock GREEK_EXTENDED
= new UnicodeBlock('\u1F00', '\u1FFF',
"GREEK_EXTENDED");
public static UnicodeBlock of (char c)
{
// A special case we need.
if (c == '\uFEFF')
return SPECIALS;
// Do a binary search to find the correct subset.
int hi = blocks.length;
int lo = 0;
while (hi > lo)
{
int mid = (hi + lo) / 2;
UnicodeBlock ub = blocks[mid];
if (c < ub.start)
hi = mid;
else if (c > ub.end)
lo = mid;
else
return ub;
}
/**
* General Punctuation.
* '\u2000' - '\u206F'.
*/
public final static UnicodeBlock GENERAL_PUNCTUATION
= new UnicodeBlock('\u2000', '\u206F',
"GENERAL_PUNCTUATION");
return null;
}
/**
* Superscripts and Subscripts.
* '\u2070' - '\u209F'.
*/
public final static UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
= new UnicodeBlock('\u2070', '\u209F',
"SUPERSCRIPTS_AND_SUBSCRIPTS");
/**
* Currency Symbols.
* '\u20A0' - '\u20CF'.
*/
public final static UnicodeBlock CURRENCY_SYMBOLS
= new UnicodeBlock('\u20A0', '\u20CF',
"CURRENCY_SYMBOLS");
/**
* Combining Marks for Symbols.
* '\u20D0' - '\u20FF'.
*/
public final static UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
= new UnicodeBlock('\u20D0', '\u20FF',
"COMBINING_MARKS_FOR_SYMBOLS");
/**
* Letterlike Symbols.
* '\u2100' - '\u214F'.
*/
public final static UnicodeBlock LETTERLIKE_SYMBOLS
= new UnicodeBlock('\u2100', '\u214F',
"LETTERLIKE_SYMBOLS");
/**
* Number Forms.
* '\u2150' - '\u218F'.
*/
public final static UnicodeBlock NUMBER_FORMS
= new UnicodeBlock('\u2150', '\u218F',
"NUMBER_FORMS");
/**
* Arrows.
* '\u2190' - '\u21FF'.
*/
public final static UnicodeBlock ARROWS
= new UnicodeBlock('\u2190', '\u21FF',
"ARROWS");
/**
* Mathematical Operators.
* '\u2200' - '\u22FF'.
*/
public final static UnicodeBlock MATHEMATICAL_OPERATORS
= new UnicodeBlock('\u2200', '\u22FF',
"MATHEMATICAL_OPERATORS");
/**
* Miscellaneous Technical.
* '\u2300' - '\u23FF'.
*/
public final static UnicodeBlock MISCELLANEOUS_TECHNICAL
= new UnicodeBlock('\u2300', '\u23FF',
"MISCELLANEOUS_TECHNICAL");
/**
* Control Pictures.
* '\u2400' - '\u243F'.
*/
public final static UnicodeBlock CONTROL_PICTURES
= new UnicodeBlock('\u2400', '\u243F',
"CONTROL_PICTURES");
/**
* Optical Character Recognition.
* '\u2440' - '\u245F'.
*/
public final static UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
= new UnicodeBlock('\u2440', '\u245F',
"OPTICAL_CHARACTER_RECOGNITION");
// Start and end characters.
private char start, end;
// Everything from here to the end of UnicodeBlock is
// automatically generated by the blocks.pl script.
public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock ("Basic Latin", '\u0000', '\u007F');
public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock ("Latin-1 Supplement", '\u0080', '\u00FF');
public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock ("Latin Extended-A", '\u0100', '\u017F');
public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock ("Latin Extended-B", '\u0180', '\u024F');
public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock ("IPA Extensions", '\u0250', '\u02AF');
public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock ("Spacing Modifier Letters", '\u02B0', '\u02FF');
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock ("Combining Diacritical Marks", '\u0300', '\u036F');
public static final UnicodeBlock GREEK = new UnicodeBlock ("Greek", '\u0370', '\u03FF');
public static final UnicodeBlock CYRILLIC = new UnicodeBlock ("Cyrillic", '\u0400', '\u04FF');
public static final UnicodeBlock ARMENIAN = new UnicodeBlock ("Armenian", '\u0530', '\u058F');
public static final UnicodeBlock HEBREW = new UnicodeBlock ("Hebrew", '\u0590', '\u05FF');
public static final UnicodeBlock ARABIC = new UnicodeBlock ("Arabic", '\u0600', '\u06FF');
public static final UnicodeBlock SYRIAC__ = new UnicodeBlock ("Syriac ", '\u0700', '\u074F');
public static final UnicodeBlock THAANA = new UnicodeBlock ("Thaana", '\u0780', '\u07BF');
public static final UnicodeBlock DEVANAGARI = new UnicodeBlock ("Devanagari", '\u0900', '\u097F');
public static final UnicodeBlock BENGALI = new UnicodeBlock ("Bengali", '\u0980', '\u09FF');
public static final UnicodeBlock GURMUKHI = new UnicodeBlock ("Gurmukhi", '\u0A00', '\u0A7F');
public static final UnicodeBlock GUJARATI = new UnicodeBlock ("Gujarati", '\u0A80', '\u0AFF');
public static final UnicodeBlock ORIYA = new UnicodeBlock ("Oriya", '\u0B00', '\u0B7F');
public static final UnicodeBlock TAMIL = new UnicodeBlock ("Tamil", '\u0B80', '\u0BFF');
public static final UnicodeBlock TELUGU = new UnicodeBlock ("Telugu", '\u0C00', '\u0C7F');
public static final UnicodeBlock KANNADA = new UnicodeBlock ("Kannada", '\u0C80', '\u0CFF');
public static final UnicodeBlock MALAYALAM = new UnicodeBlock ("Malayalam", '\u0D00', '\u0D7F');
public static final UnicodeBlock SINHALA = new UnicodeBlock ("Sinhala", '\u0D80', '\u0DFF');
public static final UnicodeBlock THAI = new UnicodeBlock ("Thai", '\u0E00', '\u0E7F');
public static final UnicodeBlock LAO = new UnicodeBlock ("Lao", '\u0E80', '\u0EFF');
public static final UnicodeBlock TIBETAN = new UnicodeBlock ("Tibetan", '\u0F00', '\u0FFF');
public static final UnicodeBlock MYANMAR_ = new UnicodeBlock ("Myanmar ", '\u1000', '\u109F');
public static final UnicodeBlock GEORGIAN = new UnicodeBlock ("Georgian", '\u10A0', '\u10FF');
public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock ("Hangul Jamo", '\u1100', '\u11FF');
public static final UnicodeBlock ETHIOPIC = new UnicodeBlock ("Ethiopic", '\u1200', '\u137F');
public static final UnicodeBlock CHEROKEE = new UnicodeBlock ("Cherokee", '\u13A0', '\u13FF');
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock ("Unified Canadian Aboriginal Syllabics", '\u1400', '\u167F');
public static final UnicodeBlock OGHAM = new UnicodeBlock ("Ogham", '\u1680', '\u169F');
public static final UnicodeBlock RUNIC = new UnicodeBlock ("Runic", '\u16A0', '\u16FF');
public static final UnicodeBlock KHMER = new UnicodeBlock ("Khmer", '\u1780', '\u17FF');
public static final UnicodeBlock MONGOLIAN = new UnicodeBlock ("Mongolian", '\u1800', '\u18AF');
public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock ("Latin Extended Additional", '\u1E00', '\u1EFF');
public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock ("Greek Extended", '\u1F00', '\u1FFF');
public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock ("General Punctuation", '\u2000', '\u206F');
public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock ("Superscripts and Subscripts", '\u2070', '\u209F');
public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock ("Currency Symbols", '\u20A0', '\u20CF');
public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock ("Combining Marks for Symbols", '\u20D0', '\u20FF');
public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock ("Letterlike Symbols", '\u2100', '\u214F');
public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock ("Number Forms", '\u2150', '\u218F');
public static final UnicodeBlock ARROWS = new UnicodeBlock ("Arrows", '\u2190', '\u21FF');
public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock ("Mathematical Operators", '\u2200', '\u22FF');
public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock ("Miscellaneous Technical", '\u2300', '\u23FF');
public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock ("Control Pictures", '\u2400', '\u243F');
public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock ("Optical Character Recognition", '\u2440', '\u245F');
public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock ("Enclosed Alphanumerics", '\u2460', '\u24FF');
public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock ("Box Drawing", '\u2500', '\u257F');
public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock ("Block Elements", '\u2580', '\u259F');
public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock ("Geometric Shapes", '\u25A0', '\u25FF');
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock ("Miscellaneous Symbols", '\u2600', '\u26FF');
public static final UnicodeBlock DINGBATS = new UnicodeBlock ("Dingbats", '\u2700', '\u27BF');
public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock ("Braille Patterns", '\u2800', '\u28FF');
public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock ("CJK Radicals Supplement", '\u2E80', '\u2EFF');
public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock ("Kangxi Radicals", '\u2F00', '\u2FDF');
public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock ("Ideographic Description Characters", '\u2FF0', '\u2FFF');
public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock ("CJK Symbols and Punctuation", '\u3000', '\u303F');
public static final UnicodeBlock HIRAGANA = new UnicodeBlock ("Hiragana", '\u3040', '\u309F');
public static final UnicodeBlock KATAKANA = new UnicodeBlock ("Katakana", '\u30A0', '\u30FF');
public static final UnicodeBlock BOPOMOFO = new UnicodeBlock ("Bopomofo", '\u3100', '\u312F');
public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock ("Hangul Compatibility Jamo", '\u3130', '\u318F');
public static final UnicodeBlock KANBUN = new UnicodeBlock ("Kanbun", '\u3190', '\u319F');
public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock ("Bopomofo Extended", '\u31A0', '\u31BF');
public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock ("Enclosed CJK Letters and Months", '\u3200', '\u32FF');
public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock ("CJK Compatibility", '\u3300', '\u33FF');
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock ("CJK Unified Ideographs Extension A", '\u3400', '\u4DB5');
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock ("CJK Unified Ideographs", '\u4E00', '\u9FFF');
public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock ("Yi Syllables", '\uA000', '\uA48F');
public static final UnicodeBlock YI_RADICALS = new UnicodeBlock ("Yi Radicals", '\uA490', '\uA4CF');
public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock ("Hangul Syllables", '\uAC00', '\uD7A3');
public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock ("Surrogates Area", '\uD800', '\uDFFF');
public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock ("Private Use Area", '\uE000', '\uF8FF');
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock ("CJK Compatibility Ideographs", '\uF900', '\uFAFF');
public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock ("Alphabetic Presentation Forms", '\uFB00', '\uFB4F');
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock ("Arabic Presentation Forms-A", '\uFB50', '\uFDFF');
public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock ("Combining Half Marks", '\uFE20', '\uFE2F');
public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock ("CJK Compatibility Forms", '\uFE30', '\uFE4F');
public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock ("Small Form Variants", '\uFE50', '\uFE6F');
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock ("Arabic Presentation Forms-B", '\uFE70', '\uFEFE');
public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock ("Halfwidth and Fullwidth Forms", '\uFF00', '\uFFEF');
public static final UnicodeBlock SPECIALS = new UnicodeBlock ("Specials", '\uFFF0', '\uFFFD');
private static final UnicodeBlock[] blocks = {
/**
* Enclosed Alphanumerics.
* '\u2460' - '\u24FF'.
*/
public final static UnicodeBlock ENCLOSED_ALPHANUMERICS
= new UnicodeBlock('\u2460', '\u24FF',
"ENCLOSED_ALPHANUMERICS");
/**
* Box Drawing.
* '\u2500' - '\u257F'.
*/
public final static UnicodeBlock BOX_DRAWING
= new UnicodeBlock('\u2500', '\u257F',
"BOX_DRAWING");
/**
* Block Elements.
* '\u2580' - '\u259F'.
*/
public final static UnicodeBlock BLOCK_ELEMENTS
= new UnicodeBlock('\u2580', '\u259F',
"BLOCK_ELEMENTS");
/**
* Geometric Shapes.
* '\u25A0' - '\u25FF'.
*/
public final static UnicodeBlock GEOMETRIC_SHAPES
= new UnicodeBlock('\u25A0', '\u25FF',
"GEOMETRIC_SHAPES");
/**
* Miscellaneous Symbols.
* '\u2600' - '\u26FF'.
*/
public final static UnicodeBlock MISCELLANEOUS_SYMBOLS
= new UnicodeBlock('\u2600', '\u26FF',
"MISCELLANEOUS_SYMBOLS");
/**
* Dingbats.
* '\u2700' - '\u27BF'.
*/
public final static UnicodeBlock DINGBATS
= new UnicodeBlock('\u2700', '\u27BF',
"DINGBATS");
/**
* Braille Patterns.
* '\u2800' - '\u28FF'.
* @since 1.4
*/
public final static UnicodeBlock BRAILLE_PATTERNS
= new UnicodeBlock('\u2800', '\u28FF',
"BRAILLE_PATTERNS");
/**
* CJK Radicals Supplement.
* '\u2E80' - '\u2EFF'.
* @since 1.4
*/
public final static UnicodeBlock CJK_RADICALS_SUPPLEMENT
= new UnicodeBlock('\u2E80', '\u2EFF',
"CJK_RADICALS_SUPPLEMENT");
/**
* Kangxi Radicals.
* '\u2F00' - '\u2FDF'.
* @since 1.4
*/
public final static UnicodeBlock KANGXI_RADICALS
= new UnicodeBlock('\u2F00', '\u2FDF',
"KANGXI_RADICALS");
/**
* Ideographic Description Characters.
* '\u2FF0' - '\u2FFF'.
* @since 1.4
*/
public final static UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
= new UnicodeBlock('\u2FF0', '\u2FFF',
"IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
/**
* CJK Symbols and Punctuation.
* '\u3000' - '\u303F'.
*/
public final static UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
= new UnicodeBlock('\u3000', '\u303F',
"CJK_SYMBOLS_AND_PUNCTUATION");
/**
* Hiragana.
* '\u3040' - '\u309F'.
*/
public final static UnicodeBlock HIRAGANA
= new UnicodeBlock('\u3040', '\u309F',
"HIRAGANA");
/**
* Katakana.
* '\u30A0' - '\u30FF'.
*/
public final static UnicodeBlock KATAKANA
= new UnicodeBlock('\u30A0', '\u30FF',
"KATAKANA");
/**
* Bopomofo.
* '\u3100' - '\u312F'.
*/
public final static UnicodeBlock BOPOMOFO
= new UnicodeBlock('\u3100', '\u312F',
"BOPOMOFO");
/**
* Hangul Compatibility Jamo.
* '\u3130' - '\u318F'.
*/
public final static UnicodeBlock HANGUL_COMPATIBILITY_JAMO
= new UnicodeBlock('\u3130', '\u318F',
"HANGUL_COMPATIBILITY_JAMO");
/**
* Kanbun.
* '\u3190' - '\u319F'.
*/
public final static UnicodeBlock KANBUN
= new UnicodeBlock('\u3190', '\u319F',
"KANBUN");
/**
* Bopomofo Extended.
* '\u31A0' - '\u31BF'.
* @since 1.4
*/
public final static UnicodeBlock BOPOMOFO_EXTENDED
= new UnicodeBlock('\u31A0', '\u31BF',
"BOPOMOFO_EXTENDED");
/**
* Enclosed CJK Letters and Months.
* '\u3200' - '\u32FF'.
*/
public final static UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
= new UnicodeBlock('\u3200', '\u32FF',
"ENCLOSED_CJK_LETTERS_AND_MONTHS");
/**
* CJK Compatibility.
* '\u3300' - '\u33FF'.
*/
public final static UnicodeBlock CJK_COMPATIBILITY
= new UnicodeBlock('\u3300', '\u33FF',
"CJK_COMPATIBILITY");
/**
* CJK Unified Ideographs Extension A.
* '\u3400' - '\u4DB5'.
* @since 1.4
*/
public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
= new UnicodeBlock('\u3400', '\u4DB5',
"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
/**
* CJK Unified Ideographs.
* '\u4E00' - '\u9FFF'.
*/
public final static UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
= new UnicodeBlock('\u4E00', '\u9FFF',
"CJK_UNIFIED_IDEOGRAPHS");
/**
* Yi Syllables.
* '\uA000' - '\uA48F'.
* @since 1.4
*/
public final static UnicodeBlock YI_SYLLABLES
= new UnicodeBlock('\uA000', '\uA48F',
"YI_SYLLABLES");
/**
* Yi Radicals.
* '\uA490' - '\uA4CF'.
* @since 1.4
*/
public final static UnicodeBlock YI_RADICALS
= new UnicodeBlock('\uA490', '\uA4CF',
"YI_RADICALS");
/**
* Hangul Syllables.
* '\uAC00' - '\uD7A3'.
*/
public final static UnicodeBlock HANGUL_SYLLABLES
= new UnicodeBlock('\uAC00', '\uD7A3',
"HANGUL_SYLLABLES");
/**
* Surrogates Area.
* '\uD800' - '\uDFFF'.
*/
public final static UnicodeBlock SURROGATES_AREA
= new UnicodeBlock('\uD800', '\uDFFF',
"SURROGATES_AREA");
/**
* Private Use Area.
* '\uE000' - '\uF8FF'.
*/
public final static UnicodeBlock PRIVATE_USE_AREA
= new UnicodeBlock('\uE000', '\uF8FF',
"PRIVATE_USE_AREA");
/**
* CJK Compatibility Ideographs.
* '\uF900' - '\uFAFF'.
*/
public final static UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
= new UnicodeBlock('\uF900', '\uFAFF',
"CJK_COMPATIBILITY_IDEOGRAPHS");
/**
* Alphabetic Presentation Forms.
* '\uFB00' - '\uFB4F'.
*/
public final static UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
= new UnicodeBlock('\uFB00', '\uFB4F',
"ALPHABETIC_PRESENTATION_FORMS");
/**
* Arabic Presentation Forms-A.
* '\uFB50' - '\uFDFF'.
*/
public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_A
= new UnicodeBlock('\uFB50', '\uFDFF',
"ARABIC_PRESENTATION_FORMS_A");
/**
* Combining Half Marks.
* '\uFE20' - '\uFE2F'.
*/
public final static UnicodeBlock COMBINING_HALF_MARKS
= new UnicodeBlock('\uFE20', '\uFE2F',
"COMBINING_HALF_MARKS");
/**
* CJK Compatibility Forms.
* '\uFE30' - '\uFE4F'.
*/
public final static UnicodeBlock CJK_COMPATIBILITY_FORMS
= new UnicodeBlock('\uFE30', '\uFE4F',
"CJK_COMPATIBILITY_FORMS");
/**
* Small Form Variants.
* '\uFE50' - '\uFE6F'.
*/
public final static UnicodeBlock SMALL_FORM_VARIANTS
= new UnicodeBlock('\uFE50', '\uFE6F',
"SMALL_FORM_VARIANTS");
/**
* Arabic Presentation Forms-B.
* '\uFE70' - '\uFEFE'.
*/
public final static UnicodeBlock ARABIC_PRESENTATION_FORMS_B
= new UnicodeBlock('\uFE70', '\uFEFE',
"ARABIC_PRESENTATION_FORMS_B");
/**
* Halfwidth and Fullwidth Forms.
* '\uFF00' - '\uFFEF'.
*/
public final static UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
= new UnicodeBlock('\uFF00', '\uFFEF',
"HALFWIDTH_AND_FULLWIDTH_FORMS");
/**
* Specials.
* '\uFEFF', '\uFFF0' - '\uFFFD'.
*/
public final static UnicodeBlock SPECIALS
= new UnicodeBlock('\uFFF0', '\uFFFD',
"SPECIALS");
/**
* The defined subsets.
*/
private static final UnicodeBlock sets[] = {
BASIC_LATIN,
LATIN_1_SUPPLEMENT,
LATIN_EXTENDED_A,
......@@ -445,7 +912,7 @@ public final class Character implements Serializable, Comparable
ARMENIAN,
HEBREW,
ARABIC,
SYRIAC__,
SYRIAC,
THAANA,
DEVANAGARI,
BENGALI,
......@@ -460,7 +927,7 @@ public final class Character implements Serializable, Comparable
THAI,
LAO,
TIBETAN,
MYANMAR_,
MYANMAR,
GEORGIAN,
HANGUL_JAMO,
ETHIOPIC,
......@@ -517,7 +984,1180 @@ public final class Character implements Serializable, Comparable
SMALL_FORM_VARIANTS,
ARABIC_PRESENTATION_FORMS_B,
HALFWIDTH_AND_FULLWIDTH_FORMS,
SPECIALS
SPECIALS,
};
} // class UnicodeBlock
/**
* The immutable value of this Character.
*
* @serial the value of this Character
*/
private final char value;
/**
* Compatible with JDK 1.0+.
*/
private static final long serialVersionUID = 3786198910865385080L;
/**
* Smallest value allowed for radix arguments in Java. This value is 2.
*
* @see #digit(char, int)
* @see #forDigit(int, int)
* @see Integer#toString(int, int)
* @see Integer#valueOf(String)
*/
public static final int MIN_RADIX = 2;
/**
* Largest value allowed for radix arguments in Java. This value is 36.
*
* @see #digit(char, int)
* @see #forDigit(int, int)
* @see Integer#toString(int, int)
* @see Integer#valueOf(String)
*/
public static final int MAX_RADIX = 36;
/**
* The minimum value the char data type can hold.
* This value is <code>'\\u0000'</code>.
*/
public static final char MIN_VALUE = '\u0000';
/**
* The maximum value the char data type can hold.
* This value is <code>'\\uFFFF'</code>.
*/
public static final char MAX_VALUE = '\uFFFF';
/**
* Class object representing the primitive char data type.
*
* @since 1.1
*/
public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
/**
* Lu = Letter, Uppercase (Informative).
*
* @since 1.1
*/
public static final byte UPPERCASE_LETTER = 1;
/**
* Ll = Letter, Lowercase (Informative).
*
* @since 1.1
*/
public static final byte LOWERCASE_LETTER = 2;
/**
* Lt = Letter, Titlecase (Informative).
*
* @since 1.1
*/
public static final byte TITLECASE_LETTER = 3;
/**
* Mn = Mark, Non-Spacing (Normative).
*
* @since 1.1
*/
public static final byte NON_SPACING_MARK = 6;
/**
* Mc = Mark, Spacing Combining (Normative).
*
* @since 1.1
*/
public static final byte COMBINING_SPACING_MARK = 8;
/**
* Me = Mark, Enclosing (Normative).
*
* @since 1.1
*/
public static final byte ENCLOSING_MARK = 7;
/**
* Nd = Number, Decimal Digit (Normative).
*
* @since 1.1
*/
public static final byte DECIMAL_DIGIT_NUMBER = 9;
/**
* Nl = Number, Letter (Normative).
*
* @since 1.1
*/
public static final byte LETTER_NUMBER = 10;
/**
* No = Number, Other (Normative).
*
* @since 1.1
*/
public static final byte OTHER_NUMBER = 11;
/**
* Zs = Separator, Space (Normative).
*
* @since 1.1
*/
public static final byte SPACE_SEPARATOR = 12;
/**
* Zl = Separator, Line (Normative).
*
* @since 1.1
*/
public static final byte LINE_SEPARATOR = 13;
/**
* Zp = Separator, Paragraph (Normative).
*
* @since 1.1
*/
public static final byte PARAGRAPH_SEPARATOR = 14;
/**
* Cc = Other, Control (Normative).
*
* @since 1.1
*/
public static final byte CONTROL = 15;
/**
* Cf = Other, Format (Normative).
*
* @since 1.1
*/
public static final byte FORMAT = 16;
/**
* Cs = Other, Surrogate (Normative).
*
* @since 1.1
*/
public static final byte SURROGATE = 19;
/**
* Co = Other, Private Use (Normative).
*
* @since 1.1
*/
public static final byte PRIVATE_USE = 18;
/**
* Cn = Other, Not Assigned (Normative).
*
* @since 1.1
*/
public static final byte UNASSIGNED = 0;
/**
* Lm = Letter, Modifier (Informative).
*
* @since 1.1
*/
public static final byte MODIFIER_LETTER = 4;
/**
* Lo = Letter, Other (Informative).
*
* @since 1.1
*/
public static final byte OTHER_LETTER = 5;
/**
* Pc = Punctuation, Connector (Informative).
*
* @since 1.1
*/
public static final byte CONNECTOR_PUNCTUATION = 23;
/**
* Pd = Punctuation, Dash (Informative).
*
* @since 1.1
*/
public static final byte DASH_PUNCTUATION = 20;
/**
* Ps = Punctuation, Open (Informative).
*
* @since 1.1
*/
public static final byte START_PUNCTUATION = 21;
/**
* Pe = Punctuation, Close (Informative).
*
* @since 1.1
*/
public static final byte END_PUNCTUATION = 22;
/**
* Pi = Punctuation, Initial Quote (Informative).
*
* @since 1.4
*/
public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
/**
* Pf = Punctuation, Final Quote (Informative).
*
* @since 1.4
*/
public static final byte FINAL_QUOTE_PUNCTUATION = 30;
/**
* Po = Punctuation, Other (Informative).
*
* @since 1.1
*/
public static final byte OTHER_PUNCTUATION = 24;
/**
* Sm = Symbol, Math (Informative).
*
* @since 1.1
*/
public static final byte MATH_SYMBOL = 25;
/**
* Sc = Symbol, Currency (Informative).
*
* @since 1.1
*/
public static final byte CURRENCY_SYMBOL = 26;
/**
* Sk = Symbol, Modifier (Informative).
*
* @since 1.1
*/
public static final byte MODIFIER_SYMBOL = 27;
/**
* So = Symbol, Other (Informative).
*
* @since 1.1
*/
public static final byte OTHER_SYMBOL = 28;
/**
* Undefined bidirectional character type. Undefined char values have
* undefined directionality in the Unicode specification.
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_UNDEFINED = -1;
/**
* Strong bidirectional character type "L".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
/**
* Strong bidirectional character type "R".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
/**
* Strong bidirectional character type "AL".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
/**
* Weak bidirectional character type "EN".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
/**
* Weak bidirectional character type "ES".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
/**
* Weak bidirectional character type "ET".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
/**
* Weak bidirectional character type "AN".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
/**
* Weak bidirectional character type "CS".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
/**
* Weak bidirectional character type "NSM".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
/**
* Weak bidirectional character type "BN".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
/**
* Neutral bidirectional character type "B".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
/**
* Neutral bidirectional character type "S".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
/**
* Strong bidirectional character type "WS".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_WHITESPACE = 12;
/**
* Neutral bidirectional character type "ON".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
/**
* Strong bidirectional character type "LRE".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
/**
* Strong bidirectional character type "LRO".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
/**
* Strong bidirectional character type "RLE".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
/**
* Strong bidirectional character type "RLO".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
/**
* Weak bidirectional character type "PDF".
*
* @since 1.4
*/
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
/**
* Mask for grabbing the type out of the result of readChar.
* @see #readChar(char)
*/
private static final int TYPE_MASK = 0x1F;
/**
* Mask for grabbing the non-breaking space flag out of the result of
* readChar.
* @see #readChar(char)
*/
private static final int NO_BREAK_MASK = 0x20;
/**
* Mask for grabbing the mirrored directionality flag out of the result
* of readChar.
* @see #readChar(char)
*/
private static final int MIRROR_MASK = 0x40;
/**
* Grabs an attribute offset from the Unicode attribute database. The lower
* 5 bits are the character type, the next 2 bits are flags, and the top
* 9 bits are the offset into the attribute tables. Note that the top 9
* bits are meaningless in this context; they are useful only in the native
* code.
*
* @param ch the character to look up
* @return the character's attribute offset and type
* @see #TYPE_MASK
* @see #NO_BREAK_MASK
* @see #MIRROR_MASK
*/
private static native char readChar(char ch);
/**
* Wraps up a character.
*
* @param value the character to wrap
*/
public Character(char value)
{
this.value = value;
}
/**
* Returns the character which has been wrapped by this class.
*
* @return the character wrapped
*/
public char charValue()
{
return value;
}
/**
* Returns the numerical value (unsigned) of the wrapped character.
* Range of returned values: 0x0000-0xFFFF.
*
* @return the value of the wrapped character
*/
public int hashCode()
{
return value;
}
/**
* Determines if an object is equal to this object. This is only true for
* another Character object wrapping the same value.
*
* @param o object to compare
* @return true if o is a Character with the same value
*/
public boolean equals(Object o)
{
return o instanceof Character && value == ((Character) o).value;
}
/**
* Converts the wrapped character into a String.
*
* @return a String containing one character -- the wrapped character
* of this instance
*/
public String toString()
{
// This assumes that String.valueOf(char) can create a single-character
// String more efficiently than through the public API.
return String.valueOf(value);
}
/**
* Returns a String of length 1 representing the specified character.
*
* @param ch the character to convert
* @return a String containing the character
* @since 1.4
*/
public String toString(char ch)
{
// This assumes that String.valueOf(char) can create a single-character
// String more efficiently than through the public API.
return String.valueOf(ch);
}
/**
* Determines if a character is a Unicode lowercase letter. For example,
* <code>'a'</code> is lowercase.
* <br>
* lowercase = [Ll]
*
* @param ch character to test
* @return true if ch is a Unicode lowercase letter, else false
* @see #isUpperCase(char)
* @see #isTitleCase(char)
* @see #toLowerCase(char)
* @see #getType(char)
*/
public static boolean isLowerCase(char ch)
{
return getType(ch) == LOWERCASE_LETTER;
}
/**
* Determines if a character is a Unicode uppercase letter. For example,
* <code>'A'</code> is uppercase.
* <br>
* uppercase = [Lu]
*
* @param ch character to test
* @return true if ch is a Unicode uppercase letter, else false
* @see #isLowerCase(char)
* @see #isTitleCase(char)
* @see #toUpperCase(char)
* @see #getType(char)
*/
public static boolean isUpperCase(char ch)
{
return getType(ch) == UPPERCASE_LETTER;
}
/**
* Determines if a character is a Unicode titlecase letter. For example,
* the character "Lj" (Latin capital L with small letter j) is titlecase.
* <br>
* titlecase = [Lt]
*
* @param ch character to test
* @return true if ch is a Unicode titlecase letter, else false
* @see #isLowerCase(char)
* @see #isUpperCase(char)
* @see #toTitleCase(char)
* @see #getType(char)
*/
public static boolean isTitleCase(char ch)
{
return getType(ch) == TITLECASE_LETTER;
}
/**
* Determines if a character is a Unicode decimal digit. For example,
* <code>'0'</code> is a digit.
* <br>
* Unicode decimal digit = [Nd]
*
* @param ch character to test
* @return true if ch is a Unicode decimal digit, else false
* @see #digit(char, int)
* @see #forDigit(int, int)
* @see #getType(char)
*/
public static boolean isDigit(char ch)
{
return getType(ch) == DECIMAL_DIGIT_NUMBER;
}
/**
* Determines if a character is part of the Unicode Standard. This is an
* evolving standard, but covers every character in the data file.
* <br>
* defined = not [Cn]
*
* @param ch character to test
* @return true if ch is a Unicode character, else false
* @see #isDigit(char)
* @see #isLetter(char)
* @see #isLetterOrDigit(char)
* @see #isLowerCase(char)
* @see #isTitleCase(char)
* @see #isUpperCase(char)
*/
public static boolean isDefined(char ch)
{
return getType(ch) == UNASSIGNED;
}
/**
* Determines if a character is a Unicode letter. Not all letters have case,
* so this may return true when isLowerCase and isUpperCase return false.
* <br>
* letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
*
* @param ch character to test
* @return true if ch is a Unicode letter, else false
* @see #isDigit(char)
* @see #isJavaIdentifierStart(char)
* @see #isJavaLetter(char)
* @see #isJavaLetterOrDigit(char)
* @see #isLetterOrDigit(char)
* @see #isLowerCase(char)
* @see #isTitleCase(char)
* @see #isUnicodeIdentifierStart(char)
* @see #isUpperCase(char)
*/
public static boolean isLetter(char ch)
{
return ((1 << getType(ch))
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER))) != 0;
}
/**
* Determines if a character is a Unicode letter or a Unicode digit. This
* is the combination of isLetter and isDigit.
* <br>
* letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
*
* @param ch character to test
* @return true if ch is a Unicode letter or a Unicode digit, else false
* @see #isDigit(char)
* @see #isJavaIdentifierPart(char)
* @see #isJavaLetter(char)
* @see #isJavaLetterOrDigit(char)
* @see #isLetter(char)
* @see #isUnicodeIdentifierPart(char)
*/
public static boolean isLetterOrDigit(char ch)
{
return ((1 << getType(ch))
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER)
| (1 << DECIMAL_DIGIT_NUMBER))) != 0;
}
/**
* Determines if a character can start a Java identifier. This is the
* combination of isLetter, any character where getType returns
* LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
* (like '_').
*
* @param ch character to test
* @return true if ch can start a Java identifier, else false
* @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
* @see #isJavaLetterOrDigit(char)
* @see #isJavaIdentifierStart(char)
* @see #isJavaIdentifierPart(char)
* @see #isLetter(char)
* @see #isLetterOrDigit(char)
* @see #isUnicodeIdentifierStart(char)
*/
public static boolean isJavaLetter(char ch)
{
return isJavaIdentifierStart(ch);
}
/**
* Determines if a character can follow the first letter in
* a Java identifier. This is the combination of isJavaLetter (isLetter,
* type of LETTER_NUMBER, currency, connecting punctuation) and digit,
* numeric letter (like Roman numerals), combining marks, non-spacing marks,
* or isIdentifierIgnorable.
*
* @param ch character to test
* @return true if ch can follow the first letter in a Java identifier
* @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
* @see #isJavaLetter(char)
* @see #isJavaIdentifierStart(char)
* @see #isJavaIdentifierPart(char)
* @see #isLetter(char)
* @see #isLetterOrDigit(char)
* @see #isUnicodeIdentifierPart(char)
* @see #isIdentifierIgnorable(char)
*/
public static boolean isJavaLetterOrDigit(char ch)
{
return isJavaIdentifierPart(ch);
}
/**
* Determines if a character can start a Java identifier. This is the
* combination of isLetter, any character where getType returns
* LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
* (like '_').
* <br>
* Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
*
* @param ch character to test
* @return true if ch can start a Java identifier, else false
* @see #isJavaIdentifierPart(char)
* @see #isLetter(char)
* @see #isUnicodeIdentifierStart(char)
* @since 1.1
*/
public static boolean isJavaIdentifierStart(char ch)
{
return ((1 << getType(ch))
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER)
| (1 << LETTER_NUMBER)
| (1 << CURRENCY_SYMBOL)
| (1 << CONNECTOR_PUNCTUATION))) != 0;
}
/**
* Determines if a character can follow the first letter in
* a Java identifier. This is the combination of isJavaLetter (isLetter,
* type of LETTER_NUMBER, currency, connecting punctuation) and digit,
* numeric letter (like Roman numerals), combining marks, non-spacing marks,
* or isIdentifierIgnorable.
* <br>
* Java identifier extender =
* [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
* |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
*
* @param ch character to test
* @return true if ch can follow the first letter in a Java identifier
* @see #isIdentifierIgnorable(char)
* @see #isJavaIdentifierStart(char)
* @see #isLetterOrDigit(char)
* @see #isUnicodeIdentifierPart(char)
* @since 1.1
*/
public static boolean isJavaIdentifierPart(char ch)
{
int category = getType(ch);
return ((1 << category)
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER)
| (1 << NON_SPACING_MARK)
| (1 << COMBINING_SPACING_MARK)
| (1 << DECIMAL_DIGIT_NUMBER)
| (1 << LETTER_NUMBER)
| (1 << CURRENCY_SYMBOL)
| (1 << CONNECTOR_PUNCTUATION)
| (1 << FORMAT))) != 0
|| (category == CONTROL && isIdentifierIgnorable(ch));
}
/**
* Determines if a character can start a Unicode identifier. Only
* letters can start a Unicode identifier, but this includes characters
* in LETTER_NUMBER.
* <br>
* Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
*
* @param ch character to test
* @return true if ch can start a Unicode identifier, else false
* @see #isJavaIdentifierStart(char)
* @see #isLetter(char)
* @see #isUnicodeIdentifierPart(char)
* @since 1.1
*/
public static boolean isUnicodeIdentifierStart(char ch)
{
return ((1 << getType(ch))
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER)
| (1 << LETTER_NUMBER))) != 0;
}
/**
* Determines if a character can follow the first letter in
* a Unicode identifier. This includes letters, connecting punctuation,
* digits, numeric letters, combining marks, non-spacing marks, and
* isIdentifierIgnorable.
* <br>
* Unicode identifier extender =
* [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
* |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
*
* @param ch character to test
* @return true if ch can follow the first letter in a Unicode identifier
* @see #isIdentifierIgnorable(char)
* @see #isJavaIdentifierPart(char)
* @see #isLetterOrDigit(char)
* @see #isUnicodeIdentifierStart(char)
* @since 1.1
*/
public static boolean isUnicodeIdentifierPart(char ch)
{
int category = getType(ch);
return ((1 << category)
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << TITLECASE_LETTER)
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER)
| (1 << NON_SPACING_MARK)
| (1 << COMBINING_SPACING_MARK)
| (1 << DECIMAL_DIGIT_NUMBER)
| (1 << LETTER_NUMBER)
| (1 << CONNECTOR_PUNCTUATION)
| (1 << FORMAT))) != 0
|| (category == CONTROL && isIdentifierIgnorable(ch));
}
/**
* Determines if a character is ignorable in a Unicode identifier. This
* includes the non-whitespace ISO control characters (<code>'\u0000'</code>
* through <code>'\u0008'</code>, <code>'\u000E'</code> through
* <code>'\u001B'</code>, and <code>'\u007F'</code> through
* <code>'\u009F'</code>), and FORMAT characters.
* <br>
* Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
* |U+007F-U+009F
*
* @param ch character to test
* @return true if ch is ignorable in a Unicode or Java identifier
* @see #isJavaIdentifierPart(char)
* @see #isUnicodeIdentifierPart(char)
* @since 1.1
*/
public static boolean isIdentifierIgnorable(char ch)
{
return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
|| (ch <= '\u001B' && ch >= '\u000E')))
|| getType(ch) == FORMAT;
}
/**
* Converts a Unicode character into its lowercase equivalent mapping.
* If a mapping does not exist, then the character passed is returned.
* Note that isLowerCase(toLowerCase(ch)) does not always return true.
*
* @param ch character to convert to lowercase
* @return lowercase mapping of ch, or ch if lowercase mapping does
* not exist
* @see #isLowerCase(char)
* @see #isUpperCase(char)
* @see #toTitleCase(char)
* @see #toUpperCase(char)
*/
public static native char toLowerCase(char ch);
/**
* Converts a Unicode character into its uppercase equivalent mapping.
* If a mapping does not exist, then the character passed is returned.
* Note that isUpperCase(toUpperCase(ch)) does not always return true.
*
* @param ch character to convert to uppercase
* @return uppercase mapping of ch, or ch if uppercase mapping does
* not exist
* @see #isLowerCase(char)
* @see #isUpperCase(char)
* @see #toLowerCase(char)
* @see #toTitleCase(char)
*/
public static native char toUpperCase(char ch);
/**
* Converts a Unicode character into its titlecase equivalent mapping.
* If a mapping does not exist, then the character passed is returned.
* Note that isTitleCase(toTitleCase(ch)) does not always return true.
*
* @param ch character to convert to titlecase
* @return titlecase mapping of ch, or ch if titlecase mapping does
* not exist
* @see #isTitleCase(char)
* @see #toLowerCase(char)
* @see #toUpperCase(char)
*/
public static native char toTitleCase(char ch);
/**
* Converts a character into a digit of the specified radix. If the radix
* exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
* exceeds the radix, or if ch is not a decimal digit or in the case
* insensitive set of 'a'-'z', the result is -1.
* <br>
* character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
* |U+FF21-U+FF3A|U+FF41-U+FF5A
*
* @param ch character to convert into a digit
* @param radix radix in which ch is a digit
* @return digit which ch represents in radix, or -1 not a valid digit
* @see #MIN_RADIX
* @see #MAX_RADIX
* @see #forDigit(int, int)
* @see #isDigit(char)
* @see #getNumericValue(char)
*/
public static native int digit(char ch, int radix);
/**
* Returns the Unicode numeric value property of a character. For example,
* <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
*
* <p>This method also returns values for the letters A through Z, (not
* specified by Unicode), in these ranges: <code>'\u0041'</code>
* through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
* through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
* through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
* <code>'\uFF5A'</code> (full width variants).
*
* <p>If the character lacks a numeric value property, -1 is returned.
* If the character has a numeric value property which is not representable
* as a nonnegative integer, such as a fraction, -2 is returned.
*
* character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
* |U+FF21-U+FF3A|U+FF41-U+FF5A
*
* @param ch character from which the numeric value property will
* be retrieved
* @return the numeric value property of ch, or -1 if it does not exist, or
* -2 if it is not representable as a nonnegative integer
* @see #forDigit(int, int)
* @see #digit(char, int)
* @see #isDigit(char)
* @since 1.1
*/
public static native int getNumericValue(char ch);
/**
* Determines if a character is a ISO-LATIN-1 space. This is only the five
* characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
* <code>'\r'</code>, and <code>' '</code>.
* <br>
* Java space = U+0020|U+0009|U+000A|U+000C|U+000D
*
* @param ch character to test
* @return true if ch is a space, else false
* @deprecated Replaced by {@link #isWhitespace(char)}
* @see #isSpaceChar(char)
* @see #isWhitespace(char)
*/
public static boolean isSpace(char ch)
{
// Performing the subtraction up front alleviates need to compare longs.
return ch-- <= ' ' && ((1 << ch)
& ((1 << (' ' - 1))
| (1 << ('\t' - 1))
| (1 << ('\n' - 1))
| (1 << ('\r' - 1))
| (1 << ('\f' - 1)))) != 0;
}
/**
* Determines if a character is a Unicode space character. This includes
* SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
* <br>
* Unicode space = [Zs]|[Zp]|[Zl]
*
* @param ch character to test
* @return true if ch is a Unicode space, else false
* @see #isWhitespace(char)
* @since 1.1
*/
public static boolean isSpaceChar(char ch)
{
return ((1 << getType(ch))
& ((1 << SPACE_SEPARATOR)
| (1 << LINE_SEPARATOR)
| (1 << PARAGRAPH_SEPARATOR))) != 0;
}
/**
* Determines if a character is Java whitespace. This includes Unicode
* space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
* PARAGRAPH_SEPARATOR) except the non-breaking spaces
* (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
* and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
* <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
* <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
* and <code>'\u001F'</code>.
* <br>
* Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
*
* @param ch character to test
* @return true if ch is Java whitespace, else false
* @see #isSpaceChar(char)
* @since 1.1
*/
public static boolean isWhitespace(char ch)
{
int attr = readChar(ch);
return ((((1 << (attr & TYPE_MASK))
& ((1 << SPACE_SEPARATOR)
| (1 << LINE_SEPARATOR)
| (1 << PARAGRAPH_SEPARATOR))) != 0)
&& (attr & NO_BREAK_MASK) == 0)
|| (ch <= '\u001F' && ((1 << ch)
& ((1 << '\t')
| (1 << '\n')
| (1 << '\u000B')
| (1 << '\u000C')
| (1 << '\r')
| (1 << '\u001C')
| (1 << '\u001D')
| (1 << '\u001E')
| (1 << '\u001F'))) != 0);
}
/**
* Determines if a character has the ISO Control property.
* <br>
* ISO Control = [Cc]
*
* @param ch character to test
* @return true if ch is an ISO Control character, else false
* @see #isSpaceChar(char)
* @see #isWhitespace(char)
* @since 1.1
*/
public static boolean isISOControl(char ch)
{
return getType(ch) == CONTROL;
}
/**
* Returns the Unicode general category property of a character.
*
* @param ch character from which the general category property will
* be retrieved
* @return the character category property of ch as an integer
* @see #UNASSIGNED
* @see #UPPERCASE_LETTER
* @see #LOWERCASE_LETTER
* @see #TITLECASE_LETTER
* @see #MODIFIER_LETTER
* @see #OTHER_LETTER
* @see #NON_SPACING_MARK
* @see #ENCLOSING_MARK
* @see #COMBINING_SPACING_MARK
* @see #DECIMAL_DIGIT_NUMBER
* @see #LETTER_NUMBER
* @see #OTHER_NUMBER
* @see #SPACE_SEPARATOR
* @see #LINE_SEPARATOR
* @see #PARAGRAPH_SEPARATOR
* @see #CONTROL
* @see #FORMAT
* @see #PRIVATE_USE
* @see #SURROGATE
* @see #DASH_PUNCTUATION
* @see #START_PUNCTUATION
* @see #END_PUNCTUATION
* @see #CONNECTOR_PUNCTUATION
* @see #OTHER_PUNCTUATION
* @see #MATH_SYMBOL
* @see #CURRENCY_SYMBOL
* @see #MODIFIER_SYMBOL
* @see #INITIAL_QUOTE_PUNCTUATION
* @see #FINAL_QUOTE_PUNCTUATION
* @since 1.1
*/
public static native int getType(char ch);
/**
* Converts a digit into a character which represents that digit
* in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
* or the digit exceeds the radix, then the null character <code>'\0'</code>
* is returned. Otherwise the return value is in '0'-'9' and 'a'-'z'.
* <br>
* return value boundary = U+0030-U+0039|U+0061-U+007A
*
* @param digit digit to be converted into a character
* @param radix radix of digit
* @return character representing digit in radix, or '\0'
* @see #MIN_RADIX
* @see #MAX_RADIX
* @see #digit(char, int)
*/
public static char forDigit(int digit, int radix)
{
if (radix < MIN_RADIX || radix > MAX_RADIX ||
digit < 0 || digit >= radix)
return '\0';
return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
}
/**
* Returns the Unicode directionality property of the character. This
* is used in the visual ordering of text.
*
* @param ch the character to look up
* @return the directionality constant, or DIRECTIONALITY_UNDEFINED
* @see #DIRECTIONALITY_UNDEFINED
* @see #DIRECTIONALITY_LEFT_TO_RIGHT
* @see #DIRECTIONALITY_RIGHT_TO_LEFT
* @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
* @see #DIRECTIONALITY_EUROPEAN_NUMBER
* @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
* @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
* @see #DIRECTIONALITY_ARABIC_NUMBER
* @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
* @see #DIRECTIONALITY_NONSPACING_MARK
* @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
* @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
* @see #DIRECTIONALITY_SEGMENT_SEPARATOR
* @see #DIRECTIONALITY_WHITESPACE
* @see #DIRECTIONALITY_OTHER_NEUTRALS
* @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
* @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
* @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
* @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
* @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
* @since 1.4
*/
public static native byte getDirectionality(char ch);
/**
* Determines whether the character is mirrored according to Unicode. For
* example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
* left-to-right text, but ')' in right-to-left text.
*
* @param ch the character to look up
* @return true if the character is mirrored
* @since 1.4
*/
public static boolean isMirrored(char ch)
{
return (readChar(ch) & MIRROR_MASK) != 0;
}
/**
* Compares another Character to this Character, numerically.
*
* @param anotherCharacter Character to compare with this Character
* @return a negative integer if this Character is less than
* anotherCharacter, zero if this Character is equal, and
* a positive integer if this Character is greater
* @throws NullPointerException if anotherCharacter is null
* @since 1.2
*/
public int compareTo(Character anotherCharacter)
{
return value - anotherCharacter.value;
}
/**
* Compares an object to this Character. Assuming the object is a
* Character object, this method performs the same comparison as
* compareTo(Character).
*
* @param o object to compare
* @return the comparison value
* @throws ClassCastException if o is not a Character object
* @throws NullPointerException if o is null
* @see #compareTo(Character)
* @since 1.2
*/
public int compareTo(Object o)
{
return compareTo((Character) o);
}
}
} // class Character
// natCharacter.cc - Native part of Character class.
/* Copyright (C) 1998, 1999 Free Software Foundation
This file is part of libgcj.
This software is copyrighted work licensed under the terms of the
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
details. */
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
#include <config.h>
......@@ -18,267 +45,69 @@ details. */
#define asize(x) ((sizeof (x)) / sizeof (x[0]))
static jchar
to_lower_title (jchar ch)
{
for (unsigned int i = 0; i < asize (title_to_upper_table); ++i)
{
// We can assume that the entries in the two tables are
// parallel. This is checked in the script.
if (title_to_upper_table[i][1] == ch
|| title_to_upper_table[i][0] == ch)
return title_to_lower_table[i][1];
}
return ch;
}
static jchar
to_upper_title (jchar ch)
{
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
{
// We can assume that the entries in the two tables are
// parallel. This is checked in the script.
if (title_to_lower_table[i][1] == ch
|| title_to_lower_table[i][0] == ch)
return title_to_upper_table[i][1];
}
return ch;
}
jboolean
java::lang::Character::isTitleCase (jchar ch)
{
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
{
if (title_to_lower_table[i][0] == ch)
return true;
}
return false;
}
jchar
java::lang::Character::toTitleCase (jchar ch)
java::lang::Character::readChar(jchar ch)
{
// Both titlecase mapping tables have the same length. This is
// checked in the chartables script.
for (unsigned int i = 0; i < asize (title_to_lower_table); ++i)
{
if (title_to_lower_table[i][0] == ch)
return ch;
if (title_to_lower_table[i][1] == ch)
return title_to_lower_table[i][0];
if (title_to_upper_table[i][1] == ch)
return title_to_upper_table[i][0];
}
return toUpperCase (ch);
}
#ifdef COMPACT_CHARACTER
static int
table_search (const jchar table[][2], int table_len, jchar ch)
{
int low, high, i, old;
low = 0;
high = table_len;
i = high / 2;
while (true)
{
if (ch < table[i][0])
high = i;
else if (ch > table[i][1])
low = i;
else
return i;
old = i;
i = (high + low) / 2;
if (i == old)
break;
}
return -1;
}
jint
java::lang::Character::digit_value (jchar ch)
{
int index = table_search (digit_table, asize (digit_table), ch);
if (index == -1)
return -1;
jchar base = digit_table[index][0];
// Tamil doesn't have a digit `0'. So we special-case it here.
if (base == TAMIL_DIGIT_ONE)
return ch - base + 1;
return ch - base;
}
jint
java::lang::Character::getNumericValue (jchar ch)
{
jint d = digit (ch, 36);
if (d != -1)
return d;
for (unsigned int i = 0; i < asize (numeric_table); ++i)
{
if (numeric_table[i] == ch)
return numeric_value[i];
}
return -1;
// Perform 16-bit addition to find the correct entry in data.
return data[(jchar) (blocks[ch >> SHIFT] + ch)];
}
jint
java::lang::Character::getType (jchar ch)
java::lang::Character::getType(jchar ch)
{
int index = table_search (all_table, asize (all_table), ch);
if (index != -1)
return category_table[index];
return UNASSIGNED;
}
jboolean
java::lang::Character::isLowerCase (jchar ch)
{
if (ch >= 0x2000 && ch <= 0x2fff)
return false;
if (table_search (lower_case_table, asize (lower_case_table), ch) != -1)
return true;
int low, high, i, old;
low = 0;
high = asize (lower_anomalous_table);
i = high / 2;
while (true)
{
if (ch < lower_anomalous_table[i])
high = i;
else if (ch > lower_anomalous_table[i])
low = i;
else
return true;
old = i;
i = (high + low) / 2;
if (i == old)
break;
}
return false;
}
jboolean
java::lang::Character::isSpaceChar (jchar ch)
{
return table_search (space_table, asize (space_table), ch) != -1;
}
jboolean
java::lang::Character::isUpperCase (jchar ch)
{
if (ch >= 0x2000 && ch <= 0x2fff)
return false;
return table_search (upper_case_table, asize (upper_case_table), ch) != -1;
// Perform 16-bit addition to find the correct entry in data.
return (jint) (data[(jchar) (blocks[ch >> SHIFT] + ch)] & TYPE_MASK);
}
jchar
java::lang::Character::toLowerCase (jchar ch)
java::lang::Character::toLowerCase(jchar ch)
{
int index = table_search (upper_case_table, asize (upper_case_table), ch);
if (index == -1)
return to_lower_title (ch);
return (jchar) (ch - upper_case_table[index][0]
+ upper_case_map_table[index]);
return (jchar) (ch + lower[readChar(ch) >> 7]);
}
jchar
java::lang::Character::toUpperCase (jchar ch)
java::lang::Character::toUpperCase(jchar ch)
{
int index = table_search (lower_case_table, asize (lower_case_table), ch);
if (index == -1)
return to_upper_title (ch);
return (jchar) (ch - lower_case_table[index][0]
+ lower_case_map_table[index]);
return (jchar) (ch + upper[readChar(ch) >> 7]);
}
#else /* COMPACT_CHARACTER */
jint
java::lang::Character::digit_value (jchar ch)
jchar
java::lang::Character::toTitleCase(jchar ch)
{
if (type_table[ch] == DECIMAL_DIGIT_NUMBER)
return attribute_table[ch];
return -1;
// As title is short, it doesn't hurt to exhaustively iterate over it.
for (int i = title_length - 2; i >= 0; i -= 2)
if (title[i] == ch)
return title[i + 1];
return toUpperCase(ch);
}
jint
java::lang::Character::getNumericValue (jchar ch)
{
jint d = digit (ch, 36);
if (d != -1)
return d;
// Some characters require two attributes. We special-case them here.
if (ch >= ROMAN_START && ch <= ROMAN_END)
return secondary_attribute_table[ch - ROMAN_START];
if (type_table[ch] == LETTER_NUMBER || type_table[ch] == OTHER_NUMBER)
return attribute_table[ch];
return -1;
java::lang::Character::digit(jchar ch, jint radix)
{
if (radix < MIN_RADIX || radix > MAX_RADIX)
return (jint) -1;
jchar attr = readChar(ch);
if (((1 << (attr & TYPE_MASK))
& ((1 << UPPERCASE_LETTER)
| (1 << LOWERCASE_LETTER)
| (1 << DECIMAL_DIGIT_NUMBER))))
{
// Signedness doesn't matter; 0xffff vs. -1 are both rejected.
jint digit = (jint) numValue[attr >> 7];
return (digit >= 0 && digit < radix) ? digit : (jint) -1;
}
return (jint) -1;
}
jint
java::lang::Character::getType (jchar ch)
{
return type_table[ch];
}
jboolean
java::lang::Character::isLowerCase (jchar ch)
{
if (ch >= 0x2000 && ch <= 0x2fff)
return false;
return type_table[ch] == LOWERCASE_LETTER;
}
jboolean
java::lang::Character::isSpaceChar (jchar ch)
{
return (type_table[ch] == SPACE_SEPARATOR
|| type_table[ch] == LINE_SEPARATOR
|| type_table[ch] == PARAGRAPH_SEPARATOR);
}
jboolean
java::lang::Character::isUpperCase (jchar ch)
{
if (ch >= 0x2000 && ch <= 0x2fff)
return false;
return type_table[ch] == UPPERCASE_LETTER;
}
jchar
java::lang::Character::toLowerCase (jchar ch)
java::lang::Character::getNumericValue(jchar ch)
{
if (type_table[ch] == UPPERCASE_LETTER)
return attribute_table[ch];
return to_lower_title (ch);
// numValue is stored as an array of jshort, since 10000 is the maximum.
return (jint) numValue[readChar(ch) >> 7];
}
jchar
java::lang::Character::toUpperCase (jchar ch)
jbyte
java::lang::Character::getDirectionality(jchar ch)
{
if (type_table[ch] == LOWERCASE_LETTER)
return attribute_table[ch];
return to_upper_title (ch);
return direction[readChar(ch) >> 7];
}
#endif /* COMPACT_CHARACTER */
/* MakeCharTables.java - converts gnu.java.lang.CharData into
include/java-chartables.h
Copyright (C) 2002 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
import gnu.java.lang.CharData;
public class MakeCharTables implements CharData
{
public static void main(String[] args)
{
System.out.println("/* java-chartables.h -- Character tables for java.lang.Character -*- c++ -*-\n"
+ " Copyright (C) 2002 Free Software Foundation, Inc.\n"
+ " *** This file is generated by scripts/MakeCharTables.java ***\n"
+ "\n"
+ "This file is part of GNU Classpath.\n"
+ "\n"
+ "GNU Classpath is free software; you can redistribute it and/or modify\n"
+ "it under the terms of the GNU General Public License as published by\n"
+ "the Free Software Foundation; either version 2, or (at your option)\n"
+ "any later version.\n"
+ "\n"
+ "GNU Classpath is distributed in the hope that it will be useful, but\n"
+ "WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+ "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
+ "General Public License for more details.\n"
+ "\n"
+ "You should have received a copy of the GNU General Public License\n"
+ "along with GNU Classpath; see the file COPYING. If not, write to the\n"
+ "Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA\n"
+ "02111-1307 USA.\n"
+ "\n"
+ "Linking this library statically or dynamically with other modules is\n"
+ "making a combined work based on this library. Thus, the terms and\n"
+ "conditions of the GNU General Public License cover the whole\n"
+ "combination.\n"
+ "\n"
+ "As a special exception, the copyright holders of this library give you\n"
+ "permission to link this library with independent modules to produce an\n"
+ "executable, regardless of the license terms of these independent\n"
+ "modules, and to copy and distribute the resulting executable under\n"
+ "terms of your choice, provided that you also meet, for each linked\n"
+ "independent module, the terms and conditions of the license of that\n"
+ "module. An independent module is a module which is not derived from\n"
+ "or based on this library. If you modify this library, you may extend\n"
+ "this exception to your version of the library, but you are not\n"
+ "obligated to do so. If you do not wish to do so, delete this\n"
+ "exception statement from your version. */\n"
+ "\n"
+ "#ifndef __JAVA_CHARTABLES_H__\n"
+ "#define __JAVA_CHARTABLES_H__\n"
+ "\n"
+ "// These tables are automatically generated by scripts/MakeCharTables.java.\n"
+ "// This is in turn parsing gnu.java.lang.CharData, which is generated by\n"
+ "// scripts/unicode-muncher.pl. The Unicode data comes from\n"
+ "// www.unicode.org; this header is based on\n"
+ "// " + SOURCE + ". JDK 1.4 uses Unicode version 3.0.0.\n"
+ "// DO NOT EDIT the tables. Instead, fix the upstream scripts and run\n"
+ "// them again.\n"
+ "\n"
+ "// The data is stored in C style arrays of the appropriate CNI types, to\n"
+ "// guarantee that the data is constant and non-relocatable. The field\n"
+ "// <code>blocks</code> stores the offset of a block of 2<supSHIFT</sup>\n"
+ "// characters within <code>data</code>. The data field, in turn, stores\n"
+ "// information about each character in the low order bits, and an offset\n"
+ "// into the attribute tables <code>upper</code>, <code>lower</code>,\n"
+ "// <code>numValue</code>, and <code>direction</code>. Notice that the\n"
+ "// attribute tables are much smaller than 0xffff entries; as many characters\n"
+ "// in Unicode share common attributes. Finally, there is a listing for\n"
+ "// <code>title</code> exceptions (most characters just have the same title\n"
+ "// case as upper case).\n"
+ "\n"
+ "// This file should only be included by natCharacter.cc\n"
+ "\n"
+ "/**\n"
+ " * The character shift amount to look up the block offset. In other words,\n"
+ " * <code>(char) (blocks[ch >> SHIFT] + ch)</code> is the index where\n"
+ " * <code>ch</code> is described in <code>data</code>.\n"
+ " */\n"
+ "#define SHIFT " + SHIFT);
convertString("/**\n"
+ " * The mapping of character blocks to their location in <code>data</code>.\n"
+ " * Each entry has been adjusted so that a modulo 16 sum with the desired\n"
+ " * character gives the actual index into <code>data</code>.\n"
+ " */",
char.class, "blocks", BLOCKS);
convertString("/**\n"
+ " * Information about each character. The low order 5 bits form the\n"
+ " * character type, the next bit is a flag for non-breaking spaces, and the\n"
+ " * next bit is a flag for mirrored directionality. The high order 9 bits\n"
+ " * form the offset into the attribute tables. Note that this limits the\n"
+ " * number of unique character attributes to 512, which is not a problem\n"
+ " * as of Unicode version 3.2.0, but may soon become one.\n"
+ " */",
char.class, "data", DATA);
convertString("/**\n"
+ " * This is the attribute table for computing the numeric value of a\n"
+ " * character. The value is -1 if Unicode does not define a value, -2\n"
+ " * if the value is not a positive integer, otherwise it is the value.\n"
+ " */",
short.class, "numValue", NUM_VALUE);
convertString("/**\n"
+ " * This is the attribute table for computing the uppercase representation\n"
+ " * of a character. The value is the difference between the character and\n"
+ " * its uppercase version.\n"
+ " */",
short.class, "upper", UPPER);
convertString("/**\n"
+ " * This is the attribute table for computing the lowercase representation\n"
+ " * of a character. The value is the difference between the character and\n"
+ " * its lowercase version.\n"
+ " */",
short.class, "lower", LOWER);
convertString("/**\n"
+ " * This is the attribute table for computing the directionality class\n"
+ " * of a character. At present, the value is in the range 0 - 18 if the\n"
+ " * character has a direction, otherwise it is -1.\n"
+ " */",
byte.class, "direction", DIRECTION);
convertString("/**\n"
+ " * This is the listing of titlecase special cases (all other character\n"
+ " * can use <code>upper</code> to determine their titlecase). The listing\n"
+ " * is a sequence of character pairs; converting the first character of the\n"
+ " * pair to titlecase produces the second character.\n"
+ " */",
char.class, "title", TITLE);
System.out.println();
System.out.println("#endif /* __JAVA_CHARTABLES_H__ */");
}
private static void convertString(String header, Class type,
String name, String field)
{
System.out.println();
System.out.println(header);
System.out.println("static const j" + type.getName() + " " + name
+ "[] = {");
char[] data = field.toCharArray();
int wrap;
if (type == char.class)
wrap = 10;
else if (type == byte.class)
wrap = 21;
else if (type == short.class)
wrap = 13;
else
throw new Error("Unexpeced type");
for (int i = 0; i < data.length; i += wrap)
{
System.out.print(" ");
for (int j = 0; j < wrap; j++)
{
if (i + j >= data.length)
break;
System.out.print(" ");
if (type == char.class)
System.out.print((int) data[i + j]);
else if (type == byte.class)
System.out.print((byte) data[i + j]);
else if (type == short.class)
System.out.print((short) data[i + j]);
System.out.print(",");
}
System.out.println();
}
System.out.println(" };\n"
+ "/** Length of " + name + ". */\n"
+ "static const int " + name + "_length = "
+ data.length + ";");
}
}
#! /usr/bin/perl
if ($ARGV[0] eq '')
{
$file = 'Blocks.txt';
if (! -f $file)
{
# Too painful to figure out how to get Perl to do it.
system 'wget -o .wget-log http://www.unicode.org/Public/UNIDATA/Blocks.txt';
}
}
else
{
$file = $ARGV[0];
}
open (INPUT, "< $file") || die "couldn't open $file: $!";
@array = ();
while (<INPUT>)
{
next if /^#/;
chop;
next if /^$/;
($start, $to, $text) = split (/; /);
($symbol = $text) =~ tr/a-z/A-Z/;
$symbol =~ s/[- ]/_/g;
# Special case for one of the SPECIALS.
next if $start eq 'FEFF';
# Special case some areas that our heuristic mishandles.
if ($symbol eq 'HIGH_SURROGATES')
{
$symbol = 'SURROGATES_AREA';
$text = 'Surrogates Area';
$to = 'DFFF';
}
elsif ($symbol =~ /SURROGATES/)
{
next;
}
elsif ($symbol eq 'PRIVATE_USE')
{
$symbol .= '_AREA';
$text = 'Private Use Area';
}
printf " public static final UnicodeBlock %s = new UnicodeBlock (\"%s\", '\\u%s', '\\u%s');\n",
$symbol, $text, $start, $to;
push (@array, $symbol);
}
printf " private static final UnicodeBlock[] blocks = {\n";
foreach (@array)
{
printf " %s", $_;
printf "," unless $_ eq 'SPECIALS';
printf "\n";
}
printf " };\n";
close (INPUT);
#!/usr/bin/perl -w
# unicode-blocks.pl -- Script to generate java.lang.Character.UnicodeBlock
# Copyright (C) 2002 Free Software Foundation, Inc.
#
# This file is part of GNU Classpath.
#
# GNU Classpath is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# GNU Classpath is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Classpath; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# 02111-1307 USA.
#
# Linking this library statically or dynamically with other modules is
# making a combined work based on this library. Thus, the terms and
# conditions of the GNU General Public License cover the whole
# combination.
#
# As a special exception, the copyright holders of this library give you
# permission to link this library with independent modules to produce an
# executable, regardless of the license terms of these independent
# modules, and to copy and distribute the resulting executable under
# terms of your choice, provided that you also meet, for each linked
# independent module, the terms and conditions of the license of that
# module. An independent module is a module which is not derived from
# or based on this library. If you modify this library, you may extend
# this exception to your version of the library, but you are not
# obligated to do so. If you do not wish to do so, delete this
# exception statement from your version.
# Code for reading Blocks.txt and generating (to standard out) the code for
# java.lang.Character.UnicodeBlock, for pasting into java/lang/Character.java.
# You should probably check that the results are accurate to the
# specification, but I made sure it works OOB for Unicode 3.0.0 and JDK 1.4.
# As the grammar for the Blocks.txt file is changing in Unicode 3.2.0, you
# will have to tweak this some for future use. For now, the relevant
# Unicode definition files are found in libjava/gnu/gcj/convert/.
#
# author Eric Blake <ebb9@email.byu.edu>
#
# usage: unicode-blocks.pl <blocks.txt>
# where <blocks.txt> is obtained from www.unicode.org (named Blocks-3.txt
# for Unicode version 3.0.0).
die "Usage: $0 <blocks.txt>" unless @ARGV == 1;
open (BLOCKS, $ARGV[0]) || die "Can't open Unicode block file: $!\n";
# A hash of added fields and the JDK they were added in, to automatically
# print @since tags. Maintaining this is optional (and tedious), but nice.
my %additions = ("SYRIAC" => "1.4",
"THAANA" => "1.4",
"SINHALA" => "1.4",
"MYANMAR" => "1.4",
"ETHIOPIC" => "1.4",
"CHEROKEE" => "1.4",
"UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS" => "1.4",
"OGHAM" => "1.4",
"RUNIC" => "1.4",
"KHMER" => "1.4",
"MONGOLIAN" => "1.4",
"BRAILLE_PATTERNS" => "1.4",
"CJK_RADICALS_SUPPLEMENT" => "1.4",
"KANGXI_RADICALS" => "1.4",
"IDEOGRAPHIC_DESCRIPTION_CHARACTERS" => "1.4",
"BOPOMOFO_EXTENDED" => "1.4",
"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A" => "1.4",
"YI_SYLLABLES" => "1.4",
"YI_RADICALS" => "1.4",
);
print <<'EOF';
/**
* A family of character subsets in the Unicode specification. A character
* is in at most one of these blocks.
*
* This inner class was generated automatically from
* <code>$ARGV[0]</code>, by some perl scripts.
* This Unicode definition file can be found on the
* <a href="http://www.unicode.org">http://www.unicode.org</a> website.
* JDK 1.4 uses Unicode version 3.0.0.
*
* @author scripts/unicode-blocks.pl (written by Eric Blake)
* @since 1.2
*/
public static final class UnicodeBlock extends Subset
{
/** The start of the subset. */
private final char start;
/** The end of the subset. */
private final char end;
/**
* Constructor for strictly defined blocks.
*
* @param start the start character of the range
* @param end the end character of the range
* @param name the block name
*/
private UnicodeBlock(char start, char end, String name)
{
super(name);
this.start = start;
this.end = end;
}
/**
* Returns the Unicode character block which a character belongs to.
*
* @param ch the character to look up
* @return the set it belongs to, or null if it is not in one
*/
public static UnicodeBlock of(char ch)
{
// Special case, since SPECIALS contains two ranges.
if (ch == '\uFEFF')
return SPECIALS;
// Simple binary search for the correct block.
int low = 0;
int hi = sets.length - 1;
while (low <= hi)
{
int mid = (low + hi) >> 1;
UnicodeBlock b = sets[mid];
if (ch < b.start)
hi = mid - 1;
else if (ch > b.end)
low = mid + 1;
else
return b;
}
return null;
}
EOF
my $seenSpecials = 0;
my $seenSurrogates = 0;
my $surrogateStart = 0;
my @names = ();
while (<BLOCKS>) {
next if /^\#/;
my ($start, $end, $block) = split(/; /);
next unless defined $block;
chomp $block;
$block =~ s/ *$//;
if (! $seenSpecials and $block =~ /Specials/) {
# Special case SPECIALS, since it is two disjoint ranges
$seenSpecials = 1;
next;
}
if ($block =~ /Surrogates/) {
# Special case SURROGATES_AREA, since it one range, not three
# consecutive, in Java
$seenSurrogates++;
if ($seenSurrogates == 1) {
$surrogateStart = $start;
next;
} elsif ($seenSurrogates == 2) {
next;
} else {
$start = $surrogateStart;
$block = "Surrogates Area";
}
}
# Special case the name of PRIVATE_USE_AREA.
$block =~ s/(Private Use)/$1 Area/;
(my $name = $block) =~ tr/a-z -/A-Z__/;
push @names, $name;
my $since = (defined $additions{$name}
? "\n * \@since $additions{$name}" : "");
my $extra = ($block =~ /Specials/ ? "'\\uFEFF', " : "");
print <<EOF;
/**
* $block.
* $extra'\\u$start' - '\\u$end'.$since
*/
public final static UnicodeBlock $name
= new UnicodeBlock('\\u$start', '\\u$end',
"$name");
EOF
}
print <<EOF;
/**
* The defined subsets.
*/
private static final UnicodeBlock sets[] = {
EOF
foreach (@names) {
print " $_,\n";
}
print <<EOF;
};
} // class UnicodeBlock
EOF
#!/usr/bin/perl -w
# unicode-decomp.pl - script to generate database for java.text.Collator
# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
#
# This file is part of libjava.
#
# This software is copyrighted work licensed under the terms of the
# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
# details.
# Code for reading UnicodeData.txt and generating the code for
# gnu.java.lang.CharData. For now, the relevant Unicode definition files
# are found in libjava/gnu/gcj/convert/.
#
# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
# where <UnicodeData.txt> is obtained from www.unicode.org (named
# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
# is the final location of include/java-chardecomp.h.
# As of JDK 1.4, use Unicode version 3.0.0 for best results.
#
# If this exits with nonzero status, then you must investigate the
# cause of the problem.
# Diagnostics and other information to stderr.
# With -n, the files are not created, but all processing still occurs.
# These maps characters to their decompositions.
my %canonical_decomposition = ();
my %full_decomposition = ();
# Handle `-n' and open output files.
if ($ARGV[0] && $ARGV[0] eq '-n')
{
shift @ARGV;
$ARGV[1] = '/dev/null';
}
die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
# Process the Unicode file.
$| = 1;
my $count = 0;
print STDERR "Parsing attributes file";
while (<UNICODE>)
{
print STDERR "." unless $count++ % 1000;
chomp;
s/\r//g;
my ($ch, undef, undef, undef, undef, $decomp) = split ';';
$ch = hex($ch);
if ($decomp ne '')
{
my $is_full = 0;
my @decomp = ();
foreach (split (' ', $decomp))
{
if (/^\<.*\>$/)
{
$is_full = 1;
next;
}
push (@decomp, hex ($_));
}
my $s = pack "n*", @decomp;
if ($is_full)
{
$full_decomposition{$ch} = $s;
}
else
{
$canonical_decomposition{$ch} = $s;
}
}
}
# Now generate decomposition tables.
open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
print STDERR "\nGenerating tables\n";
print DECOMP <<EOF;
// java-chardecomp.h - Decomposition character tables -*- c++ -*-
#ifndef __JAVA_CHARDECOMP_H__
#define __JAVA_CHARDECOMP_H__
// These tables are automatically generated by the $0
// script. DO NOT EDIT the tables. Instead, fix the script
// and run it again.
// This file should only be included by natCollator.cc
struct decomp_entry
{
jchar key;
const char *value;
};
EOF
&write_decompositions;
print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
close(DECOMP);
print STDERR "Done\n";
exit;
# Write a single decomposition table.
sub write_single_decomposition($$%)
{
my ($name, $is_canon, %table) = @_;
my $first_line = 1;
print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
for my $key (0 .. 0xffff)
{
next if ! defined $table{$key};
print DECOMP ",\n" unless $first_line;
$first_line = 0;
printf DECOMP " { 0x%04x, \"", $key;
# We represent the expansion as a series of bytes, terminated
# with a double nul. This is ugly, but relatively
# space-efficient. Most expansions are short, but there are a
# few that are very long (e.g. \uFDFA). This means that if we
# chose a fixed-space representation we would waste a lot of
# space.
my @expansion = unpack "n*", $table{$key};
foreach my $char (@expansion)
{
printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
}
print DECOMP "\" }";
}
print DECOMP "\n};\n\n";
}
sub write_decompositions()
{
&write_single_decomposition ('canonical', 1, %canonical_decomposition);
&write_single_decomposition ('full', 0, %full_decomposition);
}
#!/usr/bin/perl -w
# unicode-muncher.pl -- generate Unicode database for java.lang.Character
# Copyright (C) 1998, 2002 Free Software Foundation, Inc.
#
# This file is part of GNU Classpath.
#
# GNU Classpath is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# GNU Classpath is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Classpath; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# 02111-1307 USA.
#
# Linking this library statically or dynamically with other modules is
# making a combined work based on this library. Thus, the terms and
# conditions of the GNU General Public License cover the whole
# combination.
#
# As a special exception, the copyright holders of this library give you
# permission to link this library with independent modules to produce an
# executable, regardless of the license terms of these independent
# modules, and to copy and distribute the resulting executable under
# terms of your choice, provided that you also meet, for each linked
# independent module, the terms and conditions of the license of that
# module. An independent module is a module which is not derived from
# or based on this library. If you modify this library, you may extend
# this exception to your version of the library, but you are not
# obligated to do so. If you do not wish to do so, delete this
# exception statement from your version.
# Code for reading UnicodeData.txt and generating the code for
# gnu.java.lang.CharData. For now, the relevant Unicode definition files
# are found in libjava/gnu/gcj/convert/.
#
# Inspired by code from Jochen Hoenicke.
# author Eric Blake <ebb9@email.byu.edu>
#
# Usage: ./unicode-muncher <UnicodeData.txt> <CharData.java>
# where <UnicodeData.txt> is obtained from www.unicode.org (named
# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
# is the final location for the Java interface gnu.java.lang.CharData.
# As of JDK 1.4, use Unicode version 3.0.0 for best results.
##
## Convert a 16-bit integer to a Java source code String literal character
##
sub javaChar($) {
my ($char) = @_;
die "Out of range: $char\n" if $char < -0x8000 or $char > 0xffff;
$char += 0x10000 if $char < 0;
# Special case characters that must be escaped, or are shorter as ASCII
return sprintf("\\%03o", $char) if $char < 0x20;
return "\\\"" if $char == 0x22;
return "\\\\" if $char == 0x5c;
return pack("C", $char) if $char < 0x7f;
return sprintf("\\u%04x", $char);
}
##
## Convert the text UnicodeData file from www.unicode.org into a Java
## interface with string constants holding the compressed information.
##
my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf
SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf);
my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF);
my $NOBREAK_FLAG = 32;
my $MIRRORED_FLAG = 64;
my @info = ();
my $titlecase = "";
my $count = 0;
my $range = 0;
die "Usage: $0 <UnicodeData.txt> <CharData.java>" unless @ARGV == 2;
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
# Stage 1: Parse the attribute file
$| = 1;
print "GNU Classpath Unicode Attribute Database Generator 2.0\n";
print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
print "Parsing attributes file";
while(<UNICODE>) {
print "." unless $count++ % 1000;
chomp;
s/\r//g;
my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric,
$mirrored, undef, undef, $upcase, $lowcase, $title) = split ';';
$ch = hex($ch);
next if $ch > 0xffff; # Ignore surrogate pairs, since Java does
my ($type, $numValue, $upperchar, $lowerchar, $direction);
$type = 0;
while ($category !~ /^$TYPECODES[$type]$/) {
if (++$type == @TYPECODES) {
die "$ch: Unknown type: $category";
}
}
$type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/);
$type |= $MIRRORED_FLAG if ($mirrored =~ /Y/);
if ($numeric =~ /^[0-9]+$/) {
$numValue = $numeric;
die "numValue too big: $ch, $numValue\n" if $numValue >= 0x7fff;
} elsif ($numeric eq "") {
# Special case sequences of 'a'-'z'
if ($ch >= 0x0041 && $ch <= 0x005a) {
$numValue = $ch - 0x0037;
} elsif ($ch >= 0x0061 && $ch <= 0x007a) {
$numValue = $ch - 0x0057;
} elsif ($ch >= 0xff21 && $ch <= 0xff3a) {
$numValue = $ch - 0xff17;
} elsif ($ch >= 0xff41 && $ch <= 0xff5a) {
$numValue = $ch - 0xff37;
} else {
$numValue = -1;
}
} else {
$numValue = -2;
}
$upperchar = $upcase ? hex($upcase) - $ch : 0;
$lowerchar = $lowcase ? hex($lowcase) - $ch : 0;
if ($title ne $upcase) {
my $titlechar = $title ? hex($title) : $ch;
$titlecase .= pack("n2", $ch, $titlechar);
}
$direction = 0;
while ($bidir !~ /^$DIRCODES[$direction]$/) {
if (++$direction == @DIRCODES) {
$direction = -1;
last;
}
}
if ($range) {
die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
for ($range + 1 .. $ch - 1) {
$info[$_] = pack("n5", $type, $numValue, $upperchar,
$lowerchar, $direction);
}
$range = 0;
} elsif ($name =~ /First>$/) {
$range = $ch;
}
$info[$ch] = pack("n5", $type, $numValue, $upperchar, $lowerchar,
$direction);
}
close UNICODE;
# Stage 2: Compress the data structures
printf "\nCompressing data structures";
$count = 0;
my $info = ();
my %charhash = ();
my @charinfo = ();
for my $ch (0 .. 0xffff) {
print "." unless $count++ % 0x1000;
if (! defined $info[$ch]) {
$info[$ch] = pack("n5", 0, -1, 0, 0, -1);
}
my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info[$ch]);
if (! exists $charhash{$info[$ch]}) {
push @charinfo, [ $numVal, $upper, $lower, $direction ];
$charhash{$info[$ch]} = $#charinfo;
}
$info .= pack("n", ($charhash{$info[$ch]} << 7) | $type);
}
my $charlen = @charinfo;
my $bestshift;
my $bestest = 1000000;
my $bestblkstr;
die "Too many unique character entries: $charlen\n" if $charlen > 512;
print "\nUnique character entries: $charlen\n";
for my $i (3 .. 8) {
my $blksize = 1 << $i;
my %blocks = ();
my @blkarray = ();
my ($j, $k);
print "shift: $i";
for ($j = 0; $j < 0x10000; $j += $blksize) {
my $blkkey = substr $info, 2 * $j, 2 * $blksize;
if (! exists $blocks{$blkkey}) {
push @blkarray, $blkkey;
$blocks{$blkkey} = $#blkarray;
}
}
my $blknum = @blkarray;
my $blocklen = $blknum * $blksize;
printf " before %5d", $blocklen;
# Now we try to pack the blkarray as tight as possible by finding matching
# heads and tails.
for ($j = $blksize - 1; $j > 0; $j--) {
my %tails = ();
for $k (0 .. $#blkarray) {
next if ! defined $blkarray[$k];
my $len = length $blkarray[$k];
my $tail = substr $blkarray[$k], $len - $j * 2;
if (exists $tails{$tail}) {
push @{$tails{$tail}}, $k;
} else {
$tails{$tail} = [ $k ];
}
}
# tails are calculated, now calculate the heads and merge.
BLOCK:
for $k (0 .. $#blkarray) {
next if ! defined $blkarray[$k];
my $tomerge = $k;
while (1) {
my $head = substr($blkarray[$tomerge], 0, $j * 2);
my $entry = $tails{$head};
next BLOCK if ! defined $entry;
my $other = shift @{$entry};
if ($other == $tomerge) {
if (@{$entry}) {
push @{$entry}, $other;
$other = shift @{$entry};
} else {
push @{$entry}, $other;
next BLOCK;
}
}
if (@{$entry} == 0) {
delete $tails{$head};
}
# a match was found
my $merge = $blkarray[$other]
. substr($blkarray[$tomerge], $j * 2);
$blocklen -= $j;
$blknum--;
if ($other < $tomerge) {
$blkarray[$tomerge] = undef;
$blkarray[$other] = $merge;
my $len = length $merge;
my $tail = substr $merge, $len - $j * 2;
$tails{$tail} = [ map { $_ == $tomerge ? $other : $_ }
@{$tails{$tail}} ];
next BLOCK;
}
$blkarray[$tomerge] = $merge;
$blkarray[$other] = undef;
}
}
}
my $blockstr;
for $k (0 .. $#blkarray) {
$blockstr .= $blkarray[$k] if defined $blkarray[$k];
}
die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen;
my $estimate = 2 * $blocklen + (0x20000 >> $i);
printf " after merge %5d: %6d bytes\n", $blocklen, $estimate;
if ($estimate < $bestest) {
$bestest = $estimate;
$bestshift = $i;
$bestblkstr = $blockstr;
}
}
my @blocks;
my $blksize = 1 << $bestshift;
for (my $j = 0; $j < 0x10000; $j += $blksize) {
my $blkkey = substr $info, 2 * $j, 2 * $blksize;
my $index = index $bestblkstr, $blkkey;
while ($index & 1) {
die "not found: $j" if $index == -1;
$index = index $bestblkstr, $blkkey, $index + 1;
}
push @blocks, ($index / 2 - $j) & 0xffff;
}
# Phase 3: Generate the file
die "UTF-8 limit of blocks may be exceeded: " . scalar(@blocks) . "\n"
if @blocks > 0xffff / 3;
die "UTF-8 limit of data may be exceeded: " . length($bestblkstr) . "\n"
if length($bestblkstr) > 0xffff / 3;
{
print "Generating $ARGV[1] with shift of $bestshift";
my ($i, $j);
open OUTPUT, "> $ARGV[1]" or die "Failed creating output file: $!\n";
print OUTPUT <<EOF;
/* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
Copyright (C) 2002 Free Software Foundation, Inc.
*** This file is generated by scripts/unicode-muncher.pl ***
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.lang;
/**
* This contains the info about the unicode characters, that
* java.lang.Character needs. It is generated automatically from
* <code>$ARGV[0]</code>, by some
* perl scripts. This Unicode definition file can be found on the
* <a href="http://www.unicode.org">http://www.unicode.org</a> website.
* JDK 1.4 uses Unicode version 3.0.0.
*
* The data is stored as string constants, but Character will convert these
* Strings to their respective <code>char[]</code> components. The field
* <code>BLOCKS</code> stores the offset of a block of 2<sup>SHIFT</sup>
* characters within <code>DATA</code>. The DATA field, in turn, stores
* information about each character in the low order bits, and an offset
* into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
* <code>NUM_VALUE</code>, and <code>DIRECTION</code>. Notice that the
* attribute tables are much smaller than 0xffff entries; as many characters
* in Unicode share common attributes. Finally, there is a listing for
* <code>TITLE</code> exceptions (most characters just have the same
* title case as upper case).
*
* \@author scripts/unicode-muncher.pl (written by Jochen Hoenicke,
* Eric Blake)
* \@see Character
*/
public interface CharData
{
/**
* The Unicode definition file that was parsed to build this database.
*/
String SOURCE = \"$ARGV[0]\";
/**
* The character shift amount to look up the block offset. In other words,
* <code>(char) (BLOCKS.value[ch >> SHIFT] + ch)</code> is the index where
* <code>ch</code> is described in <code>DATA</code>.
*/
int SHIFT = $bestshift;
/**
* The mapping of character blocks to their location in <code>DATA</code>.
* Each entry has been adjusted so that the 16-bit sum with the desired
* character gives the actual index into <code>DATA</code>.
*/
String BLOCKS
EOF
for ($i = 0; $i < @blocks / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if @blocks <= $i * 11 + $j;
my $val = $blocks[$i * 11 + $j];
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* Information about each character. The low order 5 bits form the
* character type, the next bit is a flag for non-breaking spaces, and the
* next bit is a flag for mirrored directionality. The high order 9 bits
* form the offset into the attribute tables. Note that this limits the
* number of unique character attributes to 512, which is not a problem
* as of Unicode version 3.2.0, but may soon become one.
*/
String DATA
EOF
my $len = length($bestblkstr) / 2;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = unpack "n", substr($bestblkstr, 2 * ($i*11 + $j), 2);
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* This is the attribute table for computing the numeric value of a
* character. The value is -1 if Unicode does not define a value, -2
* if the value is not a positive integer, otherwise it is the value.
* Note that this is a signed value, but stored as an unsigned char
* since this is a String literal.
*/
String NUM_VALUE
EOF
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][0];
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* This is the attribute table for computing the uppercase representation
* of a character. The value is the signed difference between the
* character and its uppercase version. Note that this is stored as an
* unsigned char since this is a String literal.
*/
String UPPER
EOF
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][1];
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* This is the attribute table for computing the lowercase representation
* of a character. The value is the signed difference between the
* character and its lowercase version. Note that this is stored as an
* unsigned char since this is a String literal.
*/
String LOWER
EOF
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][2];
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* This is the attribute table for computing the directionality class
* of a character. At present, the value is in the range 0 - 18 if the
* character has a direction, otherwise it is -1. Note that this is
* stored as an unsigned char since this is a String literal.
*/
String DIRECTION
EOF
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][3];
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT <<EOF;
;
/**
* This is the listing of titlecase special cases (all other character
* can use <code>UPPER</code> to determine their titlecase). The listing
* is a sequence of character pairs; converting the first character of the
* pair to titlecase produces the second character.
*/
String TITLE
EOF
$len = length($titlecase) / 2;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = unpack "n", substr($titlecase, 2 * ($i*11 + $j), 2);
print OUTPUT javaChar($val);
}
print OUTPUT "\"";
}
print OUTPUT ";\n}\n";
close OUTPUT;
}
print "\nDone.\n";
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment