Commit c686e630 by Robert Schuster Committed by Anthony Green

ISO_8859_1.java, [...]: Fixed canonical names and aliases according to...

2005-02-07  Robert Schuster  <thebohemian@gmx.net>

        * gnu/java/nio/charset/ISO_8859_1.java,
        gnu/java/nio/charset/US_ASCII.java,
        gnu/java/nio/charset/UTF_16.java,
        gnu/java/nio/charset/UTF_16_LE.java,
        gnu/java/nio/charset/UTF_16_BE.java,
        gnu/java/nio/charset/UTF_8.java: Fixed canonical names
         and aliases according to
         "http://www.iana.org/assignments/character-sets",
         "http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
         and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
        * gnu/java/nio/charset/Provider.java: Made charset lookup
         case-insensitive which fixes bug #11740.

From-SVN: r94711
parent f0d87cda
2005-02-07 Robert Schuster <thebohemian@gmx.net>
* gnu/java/nio/charset/ISO_8859_1.java,
gnu/java/nio/charset/US_ASCII.java,
gnu/java/nio/charset/UTF_16.java,
gnu/java/nio/charset/UTF_16_LE.java,
gnu/java/nio/charset/UTF_16_BE.java,
gnu/java/nio/charset/UTF_8.java: Fixed canonical names
and aliases according to
"http://www.iana.org/assignments/character-sets",
"http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
* gnu/java/nio/charset/Provider.java: Made charset lookup
case-insensitive which fixes bug #11740.
2005-02-07 Tom Tromey <tromey@redhat.com> 2005-02-07 Tom Tromey <tromey@redhat.com>
PR libgcj/19611: PR libgcj/19611:
......
/* ISO_8859_1.java -- /* ISO_8859_1.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -53,7 +53,28 @@ final class ISO_8859_1 extends Charset ...@@ -53,7 +53,28 @@ final class ISO_8859_1 extends Charset
{ {
ISO_8859_1 () ISO_8859_1 ()
{ {
super ("ISO-8859-1", new String[]{"ISO-LATIN-1"}); /* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super ("ISO-8859-1", new String[] {
/* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-100",
"ISO_8859-1",
"latin1",
"l1",
"IBM819",
"CP819",
"csISOLatin1",
"8859_1",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
"819"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
/* Provider.java -- /* Provider.java --
Copyright (C) 2002 Free Software Foundation, Inc. Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -48,6 +48,7 @@ import java.util.Iterator; ...@@ -48,6 +48,7 @@ import java.util.Iterator;
* {@link Charset#charsetForName} and * {@link Charset#availableCharsets}. * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
* *
* @author Jesse Rosenstock * @author Jesse Rosenstock
* @author Robert Schuster (thebohemian@gmx.net)
* @see Charset * @see Charset
*/ */
public final class Provider extends CharsetProvider public final class Provider extends CharsetProvider
...@@ -63,12 +64,14 @@ public final class Provider extends CharsetProvider ...@@ -63,12 +64,14 @@ public final class Provider extends CharsetProvider
} }
/** /**
* Map from charset name to charset canonical name. * Map from charset name to charset canonical name. The strings
* are all lower-case to allow case-insensitive retrieval of
* Charset instances.
*/ */
private final HashMap canonicalNames; private final HashMap canonicalNames;
/** /**
* Map from canonical name to Charset. * Map from lower-case canonical name to Charset.
* TODO: We may want to use soft references. We would then need to keep * TODO: We may want to use soft references. We would then need to keep
* track of the class name to regenerate the object. * track of the class name to regenerate the object.
*/ */
...@@ -76,8 +79,6 @@ public final class Provider extends CharsetProvider ...@@ -76,8 +79,6 @@ public final class Provider extends CharsetProvider
private Provider () private Provider ()
{ {
// FIXME: We might need to make the name comparison case insensitive.
// Verify this with the Sun JDK.
canonicalNames = new HashMap (); canonicalNames = new HashMap ();
charsets = new HashMap (); charsets = new HashMap ();
...@@ -106,24 +107,42 @@ public final class Provider extends CharsetProvider ...@@ -106,24 +107,42 @@ public final class Provider extends CharsetProvider
.iterator (); .iterator ();
} }
/**
* Returns a Charset instance by converting the given
* name to lower-case, looking up the canonical charset
* name and finally looking up the Charset with that name.
*
* <p>The lookup is therefore case-insensitive.</p>
*
* @returns The Charset having <code>charsetName</code>
* as its alias or null if no such Charset exist.
*/
public Charset charsetForName (String charsetName) public Charset charsetForName (String charsetName)
{ {
return (Charset) charsets.get (canonicalize (charsetName)); return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
}
private Object canonicalize (String charsetName)
{
Object o = canonicalNames.get (charsetName);
return o == null ? charsetName : o;
} }
/**
* Puts a Charset under its canonical name into the 'charsets' map.
* Then puts a mapping from all its alias names to the canonical name.
*
* <p>All names are converted to lower-case</p>.
*
* @param cs
*/
private void addCharset (Charset cs) private void addCharset (Charset cs)
{ {
String canonicalName = cs.name (); String canonicalName = cs.name().toLowerCase();
charsets.put (canonicalName, cs); charsets.put (canonicalName, cs);
/* Adds a mapping between the canonical name
* itself making a lookup using that name
* no special case.
*/
canonicalNames.put(canonicalName, canonicalName);
for (Iterator i = cs.aliases ().iterator (); i.hasNext (); ) for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
canonicalNames.put (i.next (), canonicalName); canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
} }
public static synchronized Provider provider () public static synchronized Provider provider ()
......
/* US_ASCII.java -- /* US_ASCII.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -53,7 +53,29 @@ final class US_ASCII extends Charset ...@@ -53,7 +53,29 @@ final class US_ASCII extends Charset
{ {
US_ASCII () US_ASCII ()
{ {
super ("US-ASCII", new String[]{"ISO646-US"}); /* Canonical charset name chosen according to:
* http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
*/
super ("US-ASCII", new String[] {
/* These names are provided by
* http://www.iana.org/assignments/character-sets
*/
"iso-ir-6",
"ANSI_X3.4-1986",
"ISO_646.irv:1991",
"ASCII",
"ISO646-US",
"ASCII",
"us",
"IBM367",
"cp367",
"csASCII",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
"windows-20127"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
/* UTF_16.java -- /* UTF_16.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -51,7 +51,14 @@ final class UTF_16 extends Charset ...@@ -51,7 +51,14 @@ final class UTF_16 extends Charset
{ {
UTF_16 () UTF_16 ()
{ {
super ("UTF-16", null); super ("UTF-16", new String[] {
// witnessed by the internet
"UTF16",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
/* UTF_16BE.java -- /* UTF_16BE.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -51,7 +51,18 @@ final class UTF_16BE extends Charset ...@@ -51,7 +51,18 @@ final class UTF_16BE extends Charset
{ {
UTF_16BE () UTF_16BE ()
{ {
super ("UTF-16BE", null); super ("UTF-16BE", new String[] {
// witnessed by the internet
"UTF16BE",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
"ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
"UTF16_BigEndian",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UnicodeBigUnmarked"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
/* UTF_16LE.java -- /* UTF_16LE.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -51,7 +51,17 @@ final class UTF_16LE extends Charset ...@@ -51,7 +51,17 @@ final class UTF_16LE extends Charset
{ {
UTF_16LE () UTF_16LE ()
{ {
super ("UTF-16LE", null); super ("UTF-16LE", new String[] {
// witnessed by the internet
"UTF16LE",
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
"UTF16_LittleEndian",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UnicodeLittleUnmarked"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
/* UTF_8.java -- /* UTF_8.java --
Copyright (C) 2002, 2004 Free Software Foundation, Inc. Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Classpath. This file is part of GNU Classpath.
...@@ -62,7 +62,15 @@ final class UTF_8 extends Charset ...@@ -62,7 +62,15 @@ final class UTF_8 extends Charset
{ {
UTF_8 () UTF_8 ()
{ {
super ("UTF-8", null); super ("UTF-8", new String[] {
/* These names are provided by
* http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
*/
"ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
"windows-65001", "cp1208",
// see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
"UTF8"
});
} }
public boolean contains (Charset cs) public boolean contains (Charset cs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment