Commit c01b7cdf by Tom Tromey Committed by Tom Tromey

re PR java/2319 (invalid UTF-8 sequences should be rejected)

	* lex.c (java_read_char): Disallow invalid and overlong
	sequences.  Fixes PR java/2319.

From-SVN: r43475
parent 5d291213
2001-06-19 Tom Tromey <tromey@redhat.com>
* lex.c (java_read_char): Disallow invalid and overlong
sequences. Fixes PR java/2319.
2001-06-05 Jeff Sturm <jsturm@one-point.com>
* decl.c (create_primitive_vtable): Don't call make_decl_rtl.
......
......@@ -454,15 +454,21 @@ java_read_char (lex)
if (c == EOF)
return UEOF;
if (c < 128)
return (unicode_t)c;
return (unicode_t) c;
else
{
if ((c & 0xe0) == 0xc0)
{
c1 = getc (lex->finput);
if ((c1 & 0xc0) == 0x80)
return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
c = c1;
{
unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
/* Check for valid 2-byte characters. We explicitly
allow \0 because this encoding is common in the
Java world. */
if (r == 0 || (r >= 0x80 && r <= 0x7ff))
return r;
}
}
else if ((c & 0xf0) == 0xe0)
{
......@@ -471,16 +477,23 @@ java_read_char (lex)
{
c2 = getc (lex->finput);
if ((c2 & 0xc0) == 0x80)
return (unicode_t)(((c & 0xf) << 12) +
(( c1 & 0x3f) << 6) + (c2 & 0x3f));
else
c = c2;
{
unicode_t r = (unicode_t)(((c & 0xf) << 12) +
(( c1 & 0x3f) << 6)
+ (c2 & 0x3f));
/* Check for valid 3-byte characters.
Don't allow surrogate, \ufffe or \uffff. */
if (r >= 0x800 && r <= 0xffff
&& ! (r >= 0xd800 && r <= 0xdfff)
&& r != 0xfffe && r != 0xffff)
return r;
}
}
else
c = c1;
}
/* We simply don't support invalid characters. */
/* We simply don't support invalid characters. We also
don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
cannot be valid Java characters. */
java_lex_error ("malformed UTF-8 character", 0);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment