Commit 747800ee by Tom Tromey Committed by Tom Tromey

lex.h (_JAVA_IDENTIFIER_IGNORABLE): New macro.

	* lex.h (_JAVA_IDENTIFIER_IGNORABLE): New macro.
	(JAVA_ID_CHAR_P): Also try java_ignorable_control_p.
	* lex.c (java_read_unicode): Removed `term_context' argument.
	Recognize any number of `u' in `\u'.
	(java_read_unicode_collapsing_terminators): New function.
	(java_get_unicode): Use it.
	(java_lineterminator): Removed.
	(yylex): Produce error if character literal is newline or single
	quote.  Return if eof found in middle of `//' comment.  EOF in
	`//' comment is only an error if pedantic.
	(java_ignorable_control_p): New function.
	(java_parse_end_comment): Return if eof found in middle of
	comment.
	Include flags.h.
	* jv-scan.c (pedantic): New global.

From-SVN: r37232
parent 35e9340f
2000-11-03 Tom Tromey <tromey@cygnus.com>
* lex.h (_JAVA_IDENTIFIER_IGNORABLE): New macro.
(JAVA_ID_CHAR_P): Also try java_ignorable_control_p.
* lex.c (java_read_unicode): Removed `term_context' argument.
Recognize any number of `u' in `\u'.
(java_read_unicode_collapsing_terminators): New function.
(java_get_unicode): Use it.
(java_lineterminator): Removed.
(yylex): Produce error if character literal is newline or single
quote. Return if eof found in middle of `//' comment. EOF in
`//' comment is only an error if pedantic.
(java_ignorable_control_p): New function.
(java_parse_end_comment): Return if eof found in middle of
comment.
Include flags.h.
* jv-scan.c (pedantic): New global.
2000-10-27 Zack Weinberg <zack@wolery.stanford.edu> 2000-10-27 Zack Weinberg <zack@wolery.stanford.edu>
* Make-lang.in: Move all build rules here from Makefile.in, * Make-lang.in: Move all build rules here from Makefile.in,
......
...@@ -62,6 +62,8 @@ int flag_find_main = 0; ...@@ -62,6 +62,8 @@ int flag_find_main = 0;
int flag_dump_class = 0; int flag_dump_class = 0;
int flag_list_filename = 0; int flag_list_filename = 0;
int pedantic = 0;
/* This is used to mark options with no short value. */ /* This is used to mark options with no short value. */
......
...@@ -35,9 +35,9 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */ ...@@ -35,9 +35,9 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */
Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
#include "keyword.h" #include "keyword.h"
#include "flags.h"
/* Function declaration */ /* Function declaration */
static int java_lineterminator PARAMS ((unicode_t));
static char *java_sprint_unicode PARAMS ((struct java_line *, int)); static char *java_sprint_unicode PARAMS ((struct java_line *, int));
static void java_unicode_2_utf8 PARAMS ((unicode_t)); static void java_unicode_2_utf8 PARAMS ((unicode_t));
static void java_lex_error PARAMS ((const char *, int)); static void java_lex_error PARAMS ((const char *, int));
...@@ -48,10 +48,13 @@ static tree build_wfl_node PARAMS ((tree)); ...@@ -48,10 +48,13 @@ static tree build_wfl_node PARAMS ((tree));
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
static unicode_t java_parse_escape_sequence PARAMS ((void)); static unicode_t java_parse_escape_sequence PARAMS ((void));
static int java_letter_or_digit_p PARAMS ((unicode_t)); static int java_letter_or_digit_p PARAMS ((unicode_t));
static int java_ignorable_control_p PARAMS ((unicode_t));
static int java_parse_doc_section PARAMS ((unicode_t)); static int java_parse_doc_section PARAMS ((unicode_t));
static void java_parse_end_comment PARAMS ((unicode_t)); static void java_parse_end_comment PARAMS ((unicode_t));
static unicode_t java_get_unicode PARAMS ((void)); static unicode_t java_get_unicode PARAMS ((void));
static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *)); static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
static unicode_t java_read_unicode_collapsing_terminators
PARAMS ((java_lexer *, int *));
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
static unicode_t java_read_char PARAMS ((java_lexer *)); static unicode_t java_read_char PARAMS ((java_lexer *));
static void java_allocate_new_line PARAMS ((void)); static void java_allocate_new_line PARAMS ((void));
...@@ -494,9 +497,8 @@ java_store_unicode (l, c, unicode_escape_p) ...@@ -494,9 +497,8 @@ java_store_unicode (l, c, unicode_escape_p)
} }
static unicode_t static unicode_t
java_read_unicode (lex, term_context, unicode_escape_p) java_read_unicode (lex, unicode_escape_p)
java_lexer *lex; java_lexer *lex;
int term_context;
int *unicode_escape_p; int *unicode_escape_p;
{ {
unicode_t c; unicode_t c;
...@@ -507,9 +509,7 @@ java_read_unicode (lex, term_context, unicode_escape_p) ...@@ -507,9 +509,7 @@ java_read_unicode (lex, term_context, unicode_escape_p)
if (c != '\\') if (c != '\\')
{ {
lex->bs_count = 0; lex->bs_count = 0;
return (term_context ? c : (java_lineterminator (c) return c;
? '\n'
: (unicode_t) c));
} }
++lex->bs_count; ++lex->bs_count;
...@@ -532,13 +532,17 @@ java_read_unicode (lex, term_context, unicode_escape_p) ...@@ -532,13 +532,17 @@ java_read_unicode (lex, term_context, unicode_escape_p)
unicode |= (unicode_t)((c-'0') << shift); unicode |= (unicode_t)((c-'0') << shift);
else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift); unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
else if (c == 'u')
{
/* Recognize any number of u in \u. */
shift += 4;
}
else else
java_lex_error ("Non hex digit in Unicode escape sequence", 0); java_lex_error ("Non hex digit in Unicode escape sequence", 0);
} }
lex->bs_count = 0; lex->bs_count = 0;
*unicode_escape_p = 1; *unicode_escape_p = 1;
return (term_context return unicode;
? unicode : (java_lineterminator (c) ? '\n' : unicode));
} }
lex->unget_value = c; lex->unget_value = c;
} }
...@@ -546,6 +550,28 @@ java_read_unicode (lex, term_context, unicode_escape_p) ...@@ -546,6 +550,28 @@ java_read_unicode (lex, term_context, unicode_escape_p)
} }
static unicode_t static unicode_t
java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
java_lexer *lex;
int *unicode_escape_p;
{
unicode_t c = java_read_unicode (lex, unicode_escape_p);
if (c == '\r')
{
/* We have to read ahead to see if we got \r\n. In that case we
return a single line terminator. */
int dummy;
c = java_read_unicode (lex, &dummy);
if (c != '\n')
lex->unget_value = c;
/* In either case we must return a newline. */
c = '\n';
}
return c;
}
static unicode_t
java_get_unicode () java_get_unicode ()
{ {
/* It's time to read a line when... */ /* It's time to read a line when... */
...@@ -554,54 +580,28 @@ java_get_unicode () ...@@ -554,54 +580,28 @@ java_get_unicode ()
unicode_t c; unicode_t c;
java_allocate_new_line (); java_allocate_new_line ();
if (ctxp->c_line->line[0] != '\n') if (ctxp->c_line->line[0] != '\n')
for (;;) {
{ for (;;)
int unicode_escape_p; {
c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p); int unicode_escape_p;
java_store_unicode (ctxp->c_line, c, unicode_escape_p); c = java_read_unicode_collapsing_terminators (ctxp->lexer,
if (ctxp->c_line->white_space_only &unicode_escape_p);
&& !JAVA_WHITE_SPACE_P (c) && c!='\n') java_store_unicode (ctxp->c_line, c, unicode_escape_p);
ctxp->c_line->white_space_only = 0; if (ctxp->c_line->white_space_only
if ((c == '\n') || (c == UEOF)) && !JAVA_WHITE_SPACE_P (c)
break; && c != '\n'
} && c != UEOF)
ctxp->c_line->white_space_only = 0;
if ((c == '\n') || (c == UEOF))
break;
}
}
} }
ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0); ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]); JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
return ctxp->c_line->line [ctxp->c_line->current++]; return ctxp->c_line->line [ctxp->c_line->current++];
} }
static int
java_lineterminator (c)
unicode_t c;
{
if (c == '\n') /* LF */
return 1;
else if (c == '\r') /* CR */
{
int unicode_escape_p;
c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
if (c == '\r')
{
/* In this case we will have another terminator. For some
reason the lexer has several different unget methods. We
can't use the `ahead' method because then the \r will end
up in the actual text of the line, causing an error. So
instead we choose a very low-level method. FIXME: this
is incredibly ugly. */
ctxp->lexer->unget_value = c;
}
else if (c != '\n')
{
ctxp->c_line->ahead [0] = c;
ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
}
return 1;
}
else
return 0;
}
/* Parse the end of a C style comment. /* Parse the end of a C style comment.
* C is the first character following the '/' and '*'. */ * C is the first character following the '/' and '*'. */
static void static void
...@@ -615,11 +615,13 @@ java_parse_end_comment (c) ...@@ -615,11 +615,13 @@ java_parse_end_comment (c)
{ {
case UEOF: case UEOF:
java_lex_error ("Comment not terminated at end of input", 0); java_lex_error ("Comment not terminated at end of input", 0);
return;
case '*': case '*':
switch (c = java_get_unicode ()) switch (c = java_get_unicode ())
{ {
case UEOF: case UEOF:
java_lex_error ("Comment not terminated at end of input", 0); java_lex_error ("Comment not terminated at end of input", 0);
return;
case '/': case '/':
return; return;
case '*': /* reparse only '*' */ case '*': /* reparse only '*' */
...@@ -692,6 +694,14 @@ java_letter_or_digit_p (c) ...@@ -692,6 +694,14 @@ java_letter_or_digit_p (c)
return _JAVA_LETTER_OR_DIGIT_P (c); return _JAVA_LETTER_OR_DIGIT_P (c);
} }
/* This function to be used only by JAVA_ID_CHAR_P (). */
static int
java_ignorable_control_p (c)
unicode_t c;
{
return _JAVA_IDENTIFIER_IGNORABLE (c);
}
static unicode_t static unicode_t
java_parse_escape_sequence () java_parse_escape_sequence ()
{ {
...@@ -747,7 +757,7 @@ java_parse_escape_sequence () ...@@ -747,7 +757,7 @@ java_parse_escape_sequence ()
case '\n': case '\n':
return '\n'; /* ULT, caught latter as a specific error */ return '\n'; /* ULT, caught latter as a specific error */
default: default:
java_lex_error ("Illegal character in escape sequence", 0); java_lex_error ("Invalid character in escape sequence", 0);
return JAVA_CHAR_ERROR; return JAVA_CHAR_ERROR;
} }
} }
...@@ -839,7 +849,14 @@ java_lex (java_lval) ...@@ -839,7 +849,14 @@ java_lex (java_lval)
{ {
c = java_get_unicode (); c = java_get_unicode ();
if (c == UEOF) if (c == UEOF)
java_lex_error ("Comment not terminated at end of input", 0); {
/* It is ok to end a `//' comment with EOF, unless
we're being pedantic. */
if (pedantic)
java_lex_error ("Comment not terminated at end of input",
0);
return 0;
}
if (c == '\n') /* ULT */ if (c == '\n') /* ULT */
goto step1; goto step1;
} }
...@@ -1134,6 +1151,7 @@ java_lex (java_lval) ...@@ -1134,6 +1151,7 @@ java_lex (java_lval)
} }
ctxp->minus_seen = 0; ctxp->minus_seen = 0;
/* Character literals */ /* Character literals */
if (c == '\'') if (c == '\'')
{ {
...@@ -1141,10 +1159,14 @@ java_lex (java_lval) ...@@ -1141,10 +1159,14 @@ java_lex (java_lval)
if ((c = java_get_unicode ()) == '\\') if ((c = java_get_unicode ()) == '\\')
char_lit = java_parse_escape_sequence (); char_lit = java_parse_escape_sequence ();
else else
char_lit = c; {
if (c == '\n' || c == '\'')
java_lex_error ("Invalid character literal", 0);
char_lit = c;
}
c = java_get_unicode (); c = java_get_unicode ();
if ((c == '\n') || (c == UEOF)) if ((c == '\n') || (c == UEOF))
java_lex_error ("Character literal not terminated at end of line", 0); java_lex_error ("Character literal not terminated at end of line", 0);
if (c != '\'') if (c != '\'')
...@@ -1509,7 +1531,7 @@ java_lex (java_lval) ...@@ -1509,7 +1531,7 @@ java_lex (java_lval)
/* Everything else is an invalid character in the input */ /* Everything else is an invalid character in the input */
{ {
char lex_error_buffer [128]; char lex_error_buffer [128];
sprintf (lex_error_buffer, "Invalid character '%s' in input", sprintf (lex_error_buffer, "Invalid character `%s' in input",
java_sprint_unicode (ctxp->c_line, ctxp->c_line->current)); java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
java_lex_error (lex_error_buffer, 1); java_lex_error (lex_error_buffer, 1);
} }
......
...@@ -256,6 +256,7 @@ extern void set_float_handler PARAMS ((jmp_buf)); ...@@ -256,6 +256,7 @@ extern void set_float_handler PARAMS ((jmp_buf));
RANGE (c, '0', '9') || \ RANGE (c, '0', '9') || \
c == '_' || \ c == '_' || \
c == '$')) || \ c == '$')) || \
java_ignorable_control_p (c) || \
(c > 127 && java_letter_or_digit_p (c))) (c > 127 && java_letter_or_digit_p (c)))
#define JAVA_ASCII_DIGIT(c) RANGE(c,'0', '9') #define JAVA_ASCII_DIGIT(c) RANGE(c,'0', '9')
#define JAVA_ASCII_OCTDIGIT(c) RANGE(c,'0', '7') #define JAVA_ASCII_OCTDIGIT(c) RANGE(c,'0', '7')
...@@ -552,6 +553,20 @@ extern void set_float_handler PARAMS ((jmp_buf)); ...@@ -552,6 +553,20 @@ extern void set_float_handler PARAMS ((jmp_buf));
RANGE (c, 0xFFD2, 0xFFD7) || \ RANGE (c, 0xFFD2, 0xFFD7) || \
RANGE (c, 0xFFDA, 0xFFDC)) RANGE (c, 0xFFDA, 0xFFDC))
/* Identifier-ignorable characters. This should not be used
standalone. Note that the JCL says 200a->200e. That is a typo.
The correct values are 202a->202e. Note also that we test against
0x0000 separately to avoid a warning. */
#define _JAVA_IDENTIFIER_IGNORABLE(c) \
(c == 0x0000 \
|| RANGE (c, 0x0001, 0x0008) \
|| RANGE (c, 0x000e, 0x001b) \
|| RANGE (c, 0x007f, 0x009f) \
|| RANGE (c, 0x200c, 0x200f) \
|| RANGE (c, 0x202a, 0x202e) \
|| RANGE (c, 0x206a, 0x206f) \
|| c == 0xfeff)
/* Constants */ /* Constants */
#define JAVA_CHAR_ERROR 0xFFC1 /* This is an illegal unicode!?! FIXME */ #define JAVA_CHAR_ERROR 0xFFC1 /* This is an illegal unicode!?! FIXME */
#define JAVA_READ_BUFFER 256 #define JAVA_READ_BUFFER 256
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment