Character.java 20.6 KB
Newer Older
Tom Tromey committed
1 2
// Character.java - Character class.

3
/* Copyright (C) 1998, 1999, 2000  Free Software Foundation
Tom Tromey committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35

   This file is part of libgcj.

This software is copyrighted work licensed under the terms of the
Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
details.  */

package java.lang;

import java.io.Serializable;

/**
 * @author Tom Tromey <tromey@cygnus.com>
 * @date September 10, 1998 
 */

/* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
 * "The Java Language Specification", ISBN 0-201-63451-1,
 * online API docs for JDK 1.2 beta from http://www.javasoft.com,
 * and The Unicode Standard Version 2.0.
 * Status: Believed complete and correct for JDK 1.1; 1.2 methods
 * unimplemented.
 */

public final class Character implements Serializable, Comparable
{
  public static final char MIN_VALUE = '\u0000';
  public static final char MAX_VALUE = '\uffff';

  public static final int MIN_RADIX = 2;
  public static final int MAX_RADIX = 36;

36
  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
Tom Tromey committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

  // Space.
  public static final byte SPACE_SEPARATOR     = 12;
  public static final byte LINE_SEPARATOR      = 13;
  public static final byte PARAGRAPH_SEPARATOR = 14;

  // Letters.
  public static final byte UPPERCASE_LETTER = 1;
  public static final byte LOWERCASE_LETTER = 2;
  public static final byte TITLECASE_LETTER = 3;
  public static final byte MODIFIER_LETTER  = 4;
  public static final byte OTHER_LETTER     = 5;

  // Numbers.
  public static final byte DECIMAL_DIGIT_NUMBER =  9;
  public static final byte LETTER_NUMBER        = 10;
  public static final byte OTHER_NUMBER         = 11;

  // Marks.
  public static final byte NON_SPACING_MARK     = 6;
  public static final byte ENCLOSING_MARK       = 7;
  public static final byte COMBINING_SPACING_MARK = 8;

  // Punctuation.
  public static final byte DASH_PUNCTUATION      = 20;
  public static final byte START_PUNCTUATION     = 21;
  public static final byte END_PUNCTUATION       = 22;
  public static final byte CONNECTOR_PUNCTUATION = 23;
  public static final byte OTHER_PUNCTUATION     = 24;

  // Symbols.
  public static final byte MATH_SYMBOL     = 25;
  public static final byte CURRENCY_SYMBOL = 26;
  public static final byte MODIFIER_SYMBOL = 27;
  public static final byte OTHER_SYMBOL    = 28;

  // Format controls.
  public static final byte CONTROL = 15;
  // Note: The JCL book says that both FORMAT and PRIVATE_USE are 18.
  // However, FORMAT is actually 16.
  public static final byte FORMAT  = 16;

  // Others.
  public static final byte UNASSIGNED  = 0;
  public static final byte PRIVATE_USE = 18;
  public static final byte SURROGATE   = 19;

84
  private static final long serialVersionUID = 3786198910865385080L;
Tom Tromey committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284

  public Character (char ch)
  {
    value = ch;
  }

  public char charValue ()
  {
    return value;
  }

  // See if a character is a digit.  If so, return the corresponding
  // value.  Otherwise return -1.
  private static native int digit_value (char ch);

  public static int digit (char ch, int radix)
  {
    if (radix < MIN_RADIX || radix > MAX_RADIX)
      return -1;

    int d = digit_value (ch);
    if (d == -1)
      {
	if (ch >= 'A' && ch <= 'Z')
	  d = ch - 'A' + 10;
	else if (ch >= 'a' && ch <= 'z')
	  d = ch - 'a' + 10;
	else
	  return -1;
      }
    return d >= radix ? -1 : d;
  }

  public boolean equals (Object obj)
  {
    // Don't need to compare OBJ to null as instanceof will do this.
    if (obj instanceof Character)
      return value == ((Character) obj).value;
    return false;
  }

  public static char forDigit (int d, int rdx)
  {
    if (d < 0 || d >= rdx || rdx < MIN_RADIX || rdx > MAX_RADIX)
      return '\u0000';
    if (d < 10)
      return (char) ('0' + d);
    // The Java Language Spec says to use lowercase, while the JCL
    // says to use uppercase.  We go with the former.
    return (char) ('a' + d - 10);
  }

  public static native int getNumericValue (char ch);
  public static native int getType (char ch);

  public int hashCode ()
  {
    return value;
  }

  public static boolean isDefined (char ch)
  {
    return getType (ch) != UNASSIGNED;
  }

  public static boolean isDigit (char ch)
  {
    return digit_value (ch) != -1;
  }

  // The JCL book says that the argument here is a Character.  That is
  // wrong.
  public static boolean isIdentifierIgnorable (char ch)
  {
    // This information comes from the Unicode Standard.  It isn't
    // auto-generated as it doesn't appear in the unidata table.
    return ((ch >= '\u0000' && ch <= '\u0008')
	    || (ch >= '\u000e' && ch <= '\u001b')
	    // JDK 1.2 docs say that these are ignorable.  The Unicode
	    // Standard is somewhat ambiguous on this issue.
	    || (ch >= '\u007f' && ch <= '\u009f')
	    || (ch >= '\u200c' && ch <= '\u200f')
	    // JCl says 200a through 200e, but that is a typo.  The
	    // Unicode standard says the bidi controls are 202a
	    // through 202e.
	    || (ch >= '\u202a' && ch <= '\u202e')
	    || (ch >= '\u206a' && ch <= '\u206f')
	    || ch == '\ufeff');
  }

  public static boolean isISOControl (char c)
  {
    return ((c >= '\u0000' && c <= '\u001f')
	    || (c >= '\u007f' && c <= '\u009f'));
  }

  public static boolean isJavaIdentifierPart (char ch)
  {
    if (isIdentifierIgnorable (ch) || isDigit (ch))
      return true;
    int type = getType (ch);
    return (type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
	    || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
	    || type == OTHER_LETTER || type == LETTER_NUMBER);
  }

  public static boolean isJavaIdentifierStart (char ch)
  {
    int type = getType (ch);
    return (type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
	    || type == OTHER_LETTER);
  }

  // Deprecated in 1.2.
  public static boolean isJavaLetter (char ch)
  {
    return ch == '$' || ch == '_' || isLetter (ch);
  }

  // Deprecated in 1.2.
  public static boolean isJavaLetterOrDigit (char ch)
  {
    return ch == '$' || ch == '_' || isLetterOrDigit (ch);
  }

  public static boolean isLetter (char ch)
  {
    int type = getType (ch);
    return (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
	    || type == OTHER_LETTER);
  }

  public static boolean isLetterOrDigit (char ch)
  {
    return isDigit (ch) || isLetter (ch);
  }

  public static native boolean isLowerCase (char ch);

  // Deprecated in JCL.
  public static boolean isSpace (char ch)
  {
    return ch == '\n' || ch == '\t' || ch == '\f' || ch == '\r' || ch == ' ';
  }

  public static native boolean isSpaceChar (char ch);
  public static native boolean isTitleCase (char ch);

  public static boolean isUnicodeIdentifierPart (char ch)
  {
    if (isIdentifierIgnorable (ch) || isDigit (ch))
      return true;
    int type = getType (ch);
    return (type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER
	    || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
	    || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
	    || type == TITLECASE_LETTER || type == MODIFIER_LETTER
	    || type == OTHER_LETTER);
  }

  public static boolean isUnicodeIdentifierStart (char ch)
  {
    return isLetter (ch);
  }

  public static native boolean isUpperCase (char ch);

  public static boolean isWhitespace (char ch)
  {
    return ((ch >= '\u0009' && ch <= '\r')
	    || (ch >= '\u001c' && ch <= '\u001f')
	    || (ch != '\u00a0' && ch != '\ufeff' && isSpaceChar (ch)));
  }

  public static native char toLowerCase (char ch);
  public static native char toTitleCase (char ch);
  public static native char toUpperCase (char ch);

  public String toString ()
  {
    return String.valueOf(value);
  }

  public int compareTo (Character anotherCharacter)
  {
    return value - anotherCharacter.value;
  }

  public int compareTo (Object o)
  {
    return compareTo ((Character) o);
  }

  // Private data.
  private char value;
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361

  public static class Subset
  {
    protected Subset (String name)
    {
      this.name = name;
    }

    public final boolean equals (Object obj)
    {
      return obj == this;
    }

    public final int hashCode ()
    {
      return super.hashCode ();
    }

    public final String toString ()
    {
      return name;
    }

    // Name of this subset.
    private String name;
  }

  public static final class UnicodeBlock extends Subset
  {
    private UnicodeBlock (String name, char start, char end)
    {
      super (name);
      this.start = start;
      this.end = end;
    }

    public static UnicodeBlock of (char c)
    {
      // A special case we need.
      if (c == '\uFEFF')
	return SPECIALS;

      // Do a binary search to find the correct subset.
      int hi = blocks.length;
      int lo = 0;
      while (hi > lo)
	{
	  int mid = (hi + lo) / 2;
	  UnicodeBlock ub = blocks[mid];
	  if (c < ub.start)
	    hi = mid;
	  else if (c > ub.end)
	    lo = mid;
	  else
	    return ub;
	}

      return null;
    }

    // Start and end characters.
    private char start, end;

    // Everything from here to the end of UnicodeBlock is
    // automatically generated by the blocks.pl script.
    public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock ("Basic Latin", '\u0000', '\u007F');
    public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock ("Latin-1 Supplement", '\u0080', '\u00FF');
    public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock ("Latin Extended-A", '\u0100', '\u017F');
    public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock ("Latin Extended-B", '\u0180', '\u024F');
    public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock ("IPA Extensions", '\u0250', '\u02AF');
    public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock ("Spacing Modifier Letters", '\u02B0', '\u02FF');
    public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock ("Combining Diacritical Marks", '\u0300', '\u036F');
    public static final UnicodeBlock GREEK = new UnicodeBlock ("Greek", '\u0370', '\u03FF');
    public static final UnicodeBlock CYRILLIC = new UnicodeBlock ("Cyrillic", '\u0400', '\u04FF');
    public static final UnicodeBlock ARMENIAN = new UnicodeBlock ("Armenian", '\u0530', '\u058F');
    public static final UnicodeBlock HEBREW = new UnicodeBlock ("Hebrew", '\u0590', '\u05FF');
    public static final UnicodeBlock ARABIC = new UnicodeBlock ("Arabic", '\u0600', '\u06FF');
362 363
    public static final UnicodeBlock SYRIAC__ = new UnicodeBlock ("Syriac  ", '\u0700', '\u074F');
    public static final UnicodeBlock THAANA = new UnicodeBlock ("Thaana", '\u0780', '\u07BF');
364 365 366 367 368 369 370 371 372
    public static final UnicodeBlock DEVANAGARI = new UnicodeBlock ("Devanagari", '\u0900', '\u097F');
    public static final UnicodeBlock BENGALI = new UnicodeBlock ("Bengali", '\u0980', '\u09FF');
    public static final UnicodeBlock GURMUKHI = new UnicodeBlock ("Gurmukhi", '\u0A00', '\u0A7F');
    public static final UnicodeBlock GUJARATI = new UnicodeBlock ("Gujarati", '\u0A80', '\u0AFF');
    public static final UnicodeBlock ORIYA = new UnicodeBlock ("Oriya", '\u0B00', '\u0B7F');
    public static final UnicodeBlock TAMIL = new UnicodeBlock ("Tamil", '\u0B80', '\u0BFF');
    public static final UnicodeBlock TELUGU = new UnicodeBlock ("Telugu", '\u0C00', '\u0C7F');
    public static final UnicodeBlock KANNADA = new UnicodeBlock ("Kannada", '\u0C80', '\u0CFF');
    public static final UnicodeBlock MALAYALAM = new UnicodeBlock ("Malayalam", '\u0D00', '\u0D7F');
373
    public static final UnicodeBlock SINHALA = new UnicodeBlock ("Sinhala", '\u0D80', '\u0DFF');
374 375
    public static final UnicodeBlock THAI = new UnicodeBlock ("Thai", '\u0E00', '\u0E7F');
    public static final UnicodeBlock LAO = new UnicodeBlock ("Lao", '\u0E80', '\u0EFF');
376 377
    public static final UnicodeBlock TIBETAN = new UnicodeBlock ("Tibetan", '\u0F00', '\u0FFF');
    public static final UnicodeBlock MYANMAR_ = new UnicodeBlock ("Myanmar ", '\u1000', '\u109F');
378 379
    public static final UnicodeBlock GEORGIAN = new UnicodeBlock ("Georgian", '\u10A0', '\u10FF');
    public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock ("Hangul Jamo", '\u1100', '\u11FF');
380 381 382 383 384 385 386
    public static final UnicodeBlock ETHIOPIC = new UnicodeBlock ("Ethiopic", '\u1200', '\u137F');
    public static final UnicodeBlock CHEROKEE = new UnicodeBlock ("Cherokee", '\u13A0', '\u13FF');
    public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock ("Unified Canadian Aboriginal Syllabics", '\u1400', '\u167F');
    public static final UnicodeBlock OGHAM = new UnicodeBlock ("Ogham", '\u1680', '\u169F');
    public static final UnicodeBlock RUNIC = new UnicodeBlock ("Runic", '\u16A0', '\u16FF');
    public static final UnicodeBlock KHMER = new UnicodeBlock ("Khmer", '\u1780', '\u17FF');
    public static final UnicodeBlock MONGOLIAN = new UnicodeBlock ("Mongolian", '\u1800', '\u18AF');
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
    public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock ("Latin Extended Additional", '\u1E00', '\u1EFF');
    public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock ("Greek Extended", '\u1F00', '\u1FFF');
    public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock ("General Punctuation", '\u2000', '\u206F');
    public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock ("Superscripts and Subscripts", '\u2070', '\u209F');
    public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock ("Currency Symbols", '\u20A0', '\u20CF');
    public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock ("Combining Marks for Symbols", '\u20D0', '\u20FF');
    public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock ("Letterlike Symbols", '\u2100', '\u214F');
    public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock ("Number Forms", '\u2150', '\u218F');
    public static final UnicodeBlock ARROWS = new UnicodeBlock ("Arrows", '\u2190', '\u21FF');
    public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock ("Mathematical Operators", '\u2200', '\u22FF');
    public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock ("Miscellaneous Technical", '\u2300', '\u23FF');
    public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock ("Control Pictures", '\u2400', '\u243F');
    public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock ("Optical Character Recognition", '\u2440', '\u245F');
    public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock ("Enclosed Alphanumerics", '\u2460', '\u24FF');
    public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock ("Box Drawing", '\u2500', '\u257F');
    public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock ("Block Elements", '\u2580', '\u259F');
    public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock ("Geometric Shapes", '\u25A0', '\u25FF');
    public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock ("Miscellaneous Symbols", '\u2600', '\u26FF');
    public static final UnicodeBlock DINGBATS = new UnicodeBlock ("Dingbats", '\u2700', '\u27BF');
406 407 408 409
    public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock ("Braille Patterns", '\u2800', '\u28FF');
    public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock ("CJK Radicals Supplement", '\u2E80', '\u2EFF');
    public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock ("Kangxi Radicals", '\u2F00', '\u2FDF');
    public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock ("Ideographic Description Characters", '\u2FF0', '\u2FFF');
410 411 412 413 414 415
    public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock ("CJK Symbols and Punctuation", '\u3000', '\u303F');
    public static final UnicodeBlock HIRAGANA = new UnicodeBlock ("Hiragana", '\u3040', '\u309F');
    public static final UnicodeBlock KATAKANA = new UnicodeBlock ("Katakana", '\u30A0', '\u30FF');
    public static final UnicodeBlock BOPOMOFO = new UnicodeBlock ("Bopomofo", '\u3100', '\u312F');
    public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock ("Hangul Compatibility Jamo", '\u3130', '\u318F');
    public static final UnicodeBlock KANBUN = new UnicodeBlock ("Kanbun", '\u3190', '\u319F');
416
    public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock ("Bopomofo Extended", '\u31A0', '\u31BF');
417 418
    public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock ("Enclosed CJK Letters and Months", '\u3200', '\u32FF');
    public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock ("CJK Compatibility", '\u3300', '\u33FF');
419
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock ("CJK Unified Ideographs Extension A", '\u3400', '\u4DB5');
420
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock ("CJK Unified Ideographs", '\u4E00', '\u9FFF');
421 422
    public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock ("Yi Syllables", '\uA000', '\uA48F');
    public static final UnicodeBlock YI_RADICALS = new UnicodeBlock ("Yi Radicals", '\uA490', '\uA4CF');
423
    public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock ("Hangul Syllables", '\uAC00', '\uD7A3');
424 425
    public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock ("Surrogates Area", '\uD800', '\uDFFF');
    public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock ("Private Use Area", '\uE000', '\uF8FF');
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
    public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock ("CJK Compatibility Ideographs", '\uF900', '\uFAFF');
    public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock ("Alphabetic Presentation Forms", '\uFB00', '\uFB4F');
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock ("Arabic Presentation Forms-A", '\uFB50', '\uFDFF');
    public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock ("Combining Half Marks", '\uFE20', '\uFE2F');
    public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock ("CJK Compatibility Forms", '\uFE30', '\uFE4F');
    public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock ("Small Form Variants", '\uFE50', '\uFE6F');
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock ("Arabic Presentation Forms-B", '\uFE70', '\uFEFE');
    public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock ("Halfwidth and Fullwidth Forms", '\uFF00', '\uFFEF');
    public static final UnicodeBlock SPECIALS = new UnicodeBlock ("Specials", '\uFFF0', '\uFFFD');
    private static final UnicodeBlock[] blocks = {
      BASIC_LATIN,
      LATIN_1_SUPPLEMENT,
      LATIN_EXTENDED_A,
      LATIN_EXTENDED_B,
      IPA_EXTENSIONS,
      SPACING_MODIFIER_LETTERS,
      COMBINING_DIACRITICAL_MARKS,
      GREEK,
      CYRILLIC,
      ARMENIAN,
      HEBREW,
      ARABIC,
448 449
      SYRIAC__,
      THAANA,
450 451 452 453 454 455 456 457 458
      DEVANAGARI,
      BENGALI,
      GURMUKHI,
      GUJARATI,
      ORIYA,
      TAMIL,
      TELUGU,
      KANNADA,
      MALAYALAM,
459
      SINHALA,
460 461 462
      THAI,
      LAO,
      TIBETAN,
463
      MYANMAR_,
464 465
      GEORGIAN,
      HANGUL_JAMO,
466 467 468 469 470 471 472
      ETHIOPIC,
      CHEROKEE,
      UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
      OGHAM,
      RUNIC,
      KHMER,
      MONGOLIAN,
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
      LATIN_EXTENDED_ADDITIONAL,
      GREEK_EXTENDED,
      GENERAL_PUNCTUATION,
      SUPERSCRIPTS_AND_SUBSCRIPTS,
      CURRENCY_SYMBOLS,
      COMBINING_MARKS_FOR_SYMBOLS,
      LETTERLIKE_SYMBOLS,
      NUMBER_FORMS,
      ARROWS,
      MATHEMATICAL_OPERATORS,
      MISCELLANEOUS_TECHNICAL,
      CONTROL_PICTURES,
      OPTICAL_CHARACTER_RECOGNITION,
      ENCLOSED_ALPHANUMERICS,
      BOX_DRAWING,
      BLOCK_ELEMENTS,
      GEOMETRIC_SHAPES,
      MISCELLANEOUS_SYMBOLS,
      DINGBATS,
492 493 494 495
      BRAILLE_PATTERNS,
      CJK_RADICALS_SUPPLEMENT,
      KANGXI_RADICALS,
      IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
496 497 498 499 500 501
      CJK_SYMBOLS_AND_PUNCTUATION,
      HIRAGANA,
      KATAKANA,
      BOPOMOFO,
      HANGUL_COMPATIBILITY_JAMO,
      KANBUN,
502
      BOPOMOFO_EXTENDED,
503 504
      ENCLOSED_CJK_LETTERS_AND_MONTHS,
      CJK_COMPATIBILITY,
505
      CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
506
      CJK_UNIFIED_IDEOGRAPHS,
507 508
      YI_SYLLABLES,
      YI_RADICALS,
509
      HANGUL_SYLLABLES,
510 511
      SURROGATES_AREA,
      PRIVATE_USE_AREA,
512 513 514 515 516 517 518 519 520 521 522
      CJK_COMPATIBILITY_IDEOGRAPHS,
      ALPHABETIC_PRESENTATION_FORMS,
      ARABIC_PRESENTATION_FORMS_A,
      COMBINING_HALF_MARKS,
      CJK_COMPATIBILITY_FORMS,
      SMALL_FORM_VARIANTS,
      ARABIC_PRESENTATION_FORMS_B,
      HALFWIDTH_AND_FULLWIDTH_FORMS,
      SPECIALS
    };
  }
Tom Tromey committed
523
}