Commit 3f27e3f8 by Tom Tromey Committed by Tom Tromey

Make-lang.in (JAVA_LEX_C): Added chartables.h.

	* Make-lang.in (JAVA_LEX_C): Added chartables.h.
	* lex.c (java_ignorable_control_p): Removed.
	(java_letter_or_digit_p): Removed.
	(java_start_char_p): New function.
	(java_read_char): Return `int', not `unicode_t'.  Changed
	callers.
	(java_read_unicode): Likewise.
	(java_read_unicode_collapsing_terminators): Likewise.
	(java_get_unicode): Likewise.
	(java_new_lexer): Initialize hit_eof.
	(java_parse_end_comment): Take `int' argument.
	(java_parse_doc_section): Likewise.
	(java_parse_escape_sequence): Don't allow backlash-newline.
	Return `int'.
	* lex.h (JAVA_DIGIT_P): Removed.
	(_JAVA_LETTER_OR_DIGIT_P): Removed.
	(_JAVA_IDENTIFIER_IGNORABLE): Removed.
	(JAVA_START_CHAR_P): Renamed from JAVA_ID_CHAR_P.
	(JAVA_PART_CHAR_P): New macro.
	(UEOF): Now -1.
	(JAVA_CHAR_ERROR): Now -2.
	(java_lexer): New field `hit_eof'.
	* chartables.h: New file.
	* gen-table.pl: new file.

From-SVN: r38237
parent 568aac9c
2000-11-07 Tom Tromey <tromey@cygnus.com>
* Make-lang.in (JAVA_LEX_C): Added chartables.h.
* lex.c (java_ignorable_control_p): Removed.
(java_letter_or_digit_p): Removed.
(java_start_char_p): New function.
(java_read_char): Return `int', not `unicode_t'. Changed
callers.
(java_read_unicode): Likewise.
(java_read_unicode_collapsing_terminators): Likewise.
(java_get_unicode): Likewise.
(java_new_lexer): Initialize hit_eof.
(java_parse_end_comment): Take `int' argument.
(java_parse_doc_section): Likewise.
(java_parse_escape_sequence): Don't allow backlash-newline.
Return `int'.
* lex.h (JAVA_DIGIT_P): Removed.
(_JAVA_LETTER_OR_DIGIT_P): Removed.
(_JAVA_IDENTIFIER_IGNORABLE): Removed.
(JAVA_START_CHAR_P): Renamed from JAVA_ID_CHAR_P.
(JAVA_PART_CHAR_P): New macro.
(UEOF): Now -1.
(JAVA_CHAR_ERROR): Now -2.
(java_lexer): New field `hit_eof'.
* chartables.h: New file.
* gen-table.pl: new file.
2000-11-20 Tom Tromey <tromey@cygnus.com>
Alexandre Petit-Bianco <apbianco@cygnus.com>
......
......@@ -214,7 +214,7 @@ java.stage4: stage4-start
#
# .o:.h dependencies.
JAVA_TREE_H = $(TREE_H) java/java-tree.h java/java-tree.def
JAVA_LEX_C = java/lex.c java/keyword.h
JAVA_LEX_C = java/lex.c java/keyword.h java/chartables.h
java/parse.o: java/parse.c java/jcf-reader.c $(CONFIG_H) system.h \
function.h $(JAVA_TREE_H) $(JAVA_LEX_C) java/parse.h java/lex.h $(GGC_H)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
#! /usr/bin/perl
# Copyright (C) 2000 Free Software Foundation
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# gen-table.pl - Generate tables for gcj from Unicode data.
# Usage: perl gen-table.pl DATA-FILE
# Names of fields in Unicode data table.
$CODE = 0;
$NAME = 1;
$CATEGORY = 2;
$COMBINING_CLASSES = 3;
$BIDI_CATEGORY = 4;
$DECOMPOSITION = 5;
$DECIMAL_VALUE = 6;
$DIGIT_VALUE = 7;
$NUMERIC_VALUE = 8;
$MIRRORED = 9;
$OLD_NAME = 10;
$COMMENT = 11;
$UPPER = 12;
$LOWER = 13;
$TITLE = 14;
# Start of special-cased gaps in Unicode data table.
%gaps = (
0x4e00 => "CJK",
0xac00 => "Hangul",
0xd800 => "Unassigned High Surrogate",
0xdb80 => "Private Use High Surrogate",
0xdc00 => "Low Surrogate",
0xe000 => "Private Use"
);
# This lists control characters which are also considered whitespace.
# This is a somewhat odd list, taken from the JCL definition of
# Character.isIdentifierIgnorable.
%whitespace_controls =
(
0x0009 => 1,
0x000a => 1,
0x000b => 1,
0x000c => 1,
0x000d => 1,
0x001c => 1,
0x001d => 1,
0x001e => 1,
0x001f => 1
);
open (INPUT, "< $ARGV[0]") || exit 1;
$last_code = -1;
while (<INPUT>)
{
chop;
@fields = split (';', $_, 30);
if ($#fields != 14)
{
print STDERR "Entry for $fields[$CODE] has wrong number of fields\n";
}
$code = hex ($fields[$CODE]);
if ($code > $last_code + 1)
{
# Found a gap.
if (defined $gaps{$code})
{
# Fill the gap with the last character read.
@gfields = @fields;
}
else
{
# The gap represents undefined characters. Only the type
# matters.
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
'', '', '', '');
}
for (++$last_code; $last_code < $code; ++$last_code)
{
$gfields{$CODE} = sprintf ("%04x", $last_code);
&process_one ($last_code, @gfields);
}
}
&process_one ($code, @fields);
$last_code = $code;
}
close (INPUT);
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
'', '', '', '');
for (++$last_code; $last_code < 0x10000; ++$last_code)
{
$gfields{$CODE} = sprintf ("%04x", $last_code);
&process_one ($last_code, @gfields);
}
--$last_code; # Want last to be 0xFFFF.
&print_tables ($last_code);
exit 0;
# Process a single character.
sub process_one
{
my ($code, @fields) = @_;
my $value = '';
my $type = $fields[$CATEGORY];
# See if the character is a valid identifier start.
if ($type =~ /L./ # Letter
|| $type eq 'Pc' # Connecting punctuation
|| $type eq 'Sc') # Currency symbol
{
$value = 'LETTER_START';
}
# See if the character is a valid identifier member.
if ($type =~ /L./ # Letter
|| $type eq 'Pc' # Connecting punctuation
|| $type eq 'Sc' # Currency symbol
|| $type =~ /N[dl]/ # Number: decimal or letter
|| $type =~ /M[nc]/ # Mark: non-spacing or combining
|| ($type eq 'Cc' # Certain controls
&& ! defined $whitespace_controls{$code})
|| ($code >= 0x200c # Join controls
&& $code <= 0x200f)
|| ($code >= 0x202a # Bidi controls -- note that there
# is a typo in the JCL where these are
# concerned.
&& $code <= 0x202e)
|| ($code >= 0x206a # Format controls
&& $code <= 0x206f)
|| $code == 0xfeff) # ZWNBSP
{
if ($value eq '')
{
$value = 'LETTER_PART';
}
else
{
$value = 'LETTER_PART | ' . $value;
}
}
if ($value eq '')
{
$value = '0';
}
else
{
$value = '(' . $value . ')';
}
$map[$code] = $value;
}
sub print_tables
{
my ($last) = @_;
local ($bytes_out) = 0;
open (OUT, "> chartables.h");
print OUT "/* This file is automatically generated. DO NOT EDIT!\n";
print OUT " Instead, edit gen-table.pl and re-run. */\n\n";
print OUT "#ifndef CHARTABLES_H\n";
print OUT "#define CHARTABLES_H\n\n";
print OUT "#define LETTER_START 1\n";
print OUT "#define LETTER_PART 2\n\n";
for ($count = 0; $count <= $last; $count += 256)
{
$row[$count / 256] = &print_row ($count, '(char *) ', 'char', 1,
'page');
}
print OUT "static char *type_table[256] = {\n";
for ($count = 0; $count <= $last; $count += 256)
{
print OUT ",\n" if $count > 0;
print OUT " ", $row[$count / 256];
$bytes_out += 4;
}
print OUT "\n};\n\n";
print OUT "#endif /* CHARTABLES_H */\n";
close (OUT);
printf "Generated %d bytes\n", $bytes_out;
}
# Print a single "row" of a two-level table.
sub print_row
{
my ($start, $def_pfx, $typname, $typsize, $name) = @_;
my ($i);
my (@values);
my ($flag) = 1;
my ($off);
for ($off = 0; $off < 256; ++$off)
{
$values[$off] = $map[$off + $start];
if ($values[$off] ne $values[0])
{
$flag = 0;
}
}
if ($flag)
{
return $def_pfx . $values[0];
}
printf OUT "static %s %s%d[256] = {\n ", $typname, $name, $start / 256;
my ($column) = 2;
for ($i = $start; $i < $start + 256; ++$i)
{
print OUT ", "
if $i > $start;
my ($text) = $values[$i - $start];
if (length ($text) + $column + 2 > 78)
{
print OUT "\n ";
$column = 2;
}
print OUT $text;
$column += length ($text) + 2;
}
print OUT "\n};\n\n";
$bytes_out += 256 * $typsize;
return sprintf "%s%d", $name, $start / 256;
}
......@@ -36,6 +36,7 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */
#include "keyword.h"
#include "flags.h"
#include "chartables.h"
/* Function declaration */
static char *java_sprint_unicode PARAMS ((struct java_line *, int));
......@@ -46,17 +47,17 @@ static int java_is_eol PARAMS ((FILE *, int));
static tree build_wfl_node PARAMS ((tree));
#endif
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
static unicode_t java_parse_escape_sequence PARAMS ((void));
static int java_letter_or_digit_p PARAMS ((unicode_t));
static int java_ignorable_control_p PARAMS ((unicode_t));
static int java_parse_doc_section PARAMS ((unicode_t));
static void java_parse_end_comment PARAMS ((unicode_t));
static unicode_t java_get_unicode PARAMS ((void));
static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
static unicode_t java_read_unicode_collapsing_terminators
PARAMS ((java_lexer *, int *));
static int java_parse_escape_sequence PARAMS ((void));
static int java_start_char_p PARAMS ((unicode_t));
static int java_part_char_p PARAMS ((unicode_t));
static int java_parse_doc_section PARAMS ((int));
static void java_parse_end_comment PARAMS ((int));
static int java_get_unicode PARAMS ((void));
static int java_read_unicode PARAMS ((java_lexer *, int *));
static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
int *));
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
static unicode_t java_read_char PARAMS ((java_lexer *));
static int java_read_char PARAMS ((java_lexer *));
static void java_allocate_new_line PARAMS ((void));
static void java_unget_unicode PARAMS ((void));
static unicode_t java_sneak_unicode PARAMS ((void));
......@@ -217,6 +218,7 @@ java_new_lexer (finput, encoding)
lex->finput = finput;
lex->bs_count = 0;
lex->unget_value = 0;
lex->hit_eof = 0;
#ifdef HAVE_ICONV
lex->handle = iconv_open ("UCS-2", encoding);
......@@ -298,7 +300,7 @@ java_destroy_lexer (lex)
free (lex);
}
static unicode_t
static int
java_read_char (lex)
java_lexer *lex;
{
......@@ -496,12 +498,12 @@ java_store_unicode (l, c, unicode_escape_p)
l->unicode_escape_p [l->size++] = unicode_escape_p;
}
static unicode_t
static int
java_read_unicode (lex, unicode_escape_p)
java_lexer *lex;
int *unicode_escape_p;
{
unicode_t c;
int c;
c = java_read_char (lex);
*unicode_escape_p = 0;
......@@ -549,12 +551,12 @@ java_read_unicode (lex, unicode_escape_p)
return (unicode_t) '\\';
}
static unicode_t
static int
java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
java_lexer *lex;
int *unicode_escape_p;
{
unicode_t c = java_read_unicode (lex, unicode_escape_p);
int c = java_read_unicode (lex, unicode_escape_p);
if (c == '\r')
{
......@@ -571,13 +573,18 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
return c;
}
static unicode_t
static int
java_get_unicode ()
{
/* It's time to read a line when... */
if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
{
unicode_t c;
int c;
int found_chars = 0;
if (ctxp->lexer->hit_eof)
return UEOF;
java_allocate_new_line ();
if (ctxp->c_line->line[0] != '\n')
{
......@@ -586,15 +593,24 @@ java_get_unicode ()
int unicode_escape_p;
c = java_read_unicode_collapsing_terminators (ctxp->lexer,
&unicode_escape_p);
java_store_unicode (ctxp->c_line, c, unicode_escape_p);
if (ctxp->c_line->white_space_only
&& !JAVA_WHITE_SPACE_P (c)
&& c != '\n'
&& c != UEOF)
ctxp->c_line->white_space_only = 0;
if (c != UEOF)
{
found_chars = 1;
java_store_unicode (ctxp->c_line, c, unicode_escape_p);
if (ctxp->c_line->white_space_only
&& !JAVA_WHITE_SPACE_P (c)
&& c != '\n')
ctxp->c_line->white_space_only = 0;
}
if ((c == '\n') || (c == UEOF))
break;
}
if (c == UEOF && ! found_chars)
{
ctxp->lexer->hit_eof = 1;
return UEOF;
}
}
}
ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
......@@ -606,9 +622,8 @@ java_get_unicode ()
* C is the first character following the '/' and '*'. */
static void
java_parse_end_comment (c)
unicode_t c;
int c;
{
for ( ;; c = java_get_unicode ())
{
switch (c)
......@@ -637,7 +652,7 @@ java_parse_end_comment (c)
static int
java_parse_doc_section (c)
unicode_t c;
int c;
{
int valid_tag = 0, seen_star = 0;
......@@ -655,10 +670,10 @@ java_parse_doc_section (c)
}
c = java_get_unicode();
}
if (c == UEOF)
java_lex_error ("Comment not terminated at end of input", 0);
if (seen_star && (c == '/'))
return 1; /* Goto step1 in caller */
......@@ -673,7 +688,7 @@ java_parse_doc_section (c)
c = java_get_unicode ();
tag [tag_index++] = c;
}
if (c == UEOF)
java_lex_error ("Comment not terminated at end of input", 0);
tag [tag_index] = '\0';
......@@ -685,28 +700,51 @@ java_parse_doc_section (c)
return 0;
}
/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
will return a wrong result. */
/* Return true if C is a valid start character for a Java identifier.
This is only called if C >= 128 -- smaller values are handled
inline. However, this function handles all values anyway. */
static int
java_letter_or_digit_p (c)
java_start_char_p (c)
unicode_t c;
{
return _JAVA_LETTER_OR_DIGIT_P (c);
unsigned int hi = c / 256;
char *page = type_table[hi];
unsigned long val = (unsigned long) page;
int flags;
if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
flags = page[c & 255];
else
flags = val;
return flags & LETTER_START;
}
/* This function to be used only by JAVA_ID_CHAR_P (). */
/* Return true if C is a valid part character for a Java identifier.
This is only called if C >= 128 -- smaller values are handled
inline. However, this function handles all values anyway. */
static int
java_ignorable_control_p (c)
java_part_char_p (c)
unicode_t c;
{
return _JAVA_IDENTIFIER_IGNORABLE (c);
unsigned int hi = c / 256;
char *page = type_table[hi];
unsigned long val = (unsigned long) page;
int flags;
if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
flags = page[c & 255];
else
flags = val;
return flags & LETTER_PART;
}
static unicode_t
static int
java_parse_escape_sequence ()
{
unicode_t char_lit;
unicode_t c;
int c;
switch (c = java_get_unicode ())
{
......@@ -754,8 +792,6 @@ java_parse_escape_sequence ()
return char_lit;
}
case '\n':
return '\n'; /* ULT, caught latter as a specific error */
default:
java_lex_error ("Invalid character in escape sequence", 0);
return JAVA_CHAR_ERROR;
......@@ -840,7 +876,8 @@ java_lex (java_lval)
#endif
YYSTYPE *java_lval;
{
unicode_t c, first_unicode;
int c;
unicode_t first_unicode;
int ascii_index, all_ascii;
char *string;
......@@ -863,7 +900,7 @@ java_lex (java_lval)
if ((c = java_get_unicode ()) == UEOF)
return 0; /* Ok here */
else
java_unget_unicode (); /* Caught latter at the end the function */
java_unget_unicode (); /* Caught later, at the end of the function */
}
/* Handle EOF here */
if (c == UEOF) /* Should probably do something here... */
......@@ -1189,7 +1226,7 @@ java_lex (java_lval)
/* Character literals */
if (c == '\'')
{
unicode_t char_lit;
int char_lit;
if ((c = java_get_unicode ()) == '\\')
char_lit = java_parse_escape_sequence ();
else
......@@ -1206,7 +1243,7 @@ java_lex (java_lval)
if (c != '\'')
java_lex_error ("Syntax error in character literal", 0);
if (c == JAVA_CHAR_ERROR)
if (char_lit == JAVA_CHAR_ERROR)
char_lit = 0; /* We silently convert it to zero */
JAVA_LEX_CHAR_LIT (char_lit);
......@@ -1225,7 +1262,11 @@ java_lex (java_lval)
{
if (c == '\\')
c = java_parse_escape_sequence ();
no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
if (c == JAVA_CHAR_ERROR)
{
no_error = 0;
c = 0; /* We silently convert it to zero. */
}
java_unicode_2_utf8 (c);
}
if (c == '\n' || c == UEOF) /* ULT */
......@@ -1469,7 +1510,7 @@ java_lex (java_lval)
/* Keyword, boolean literal or null literal */
for (first_unicode = c, all_ascii = 1, ascii_index = 0;
JAVA_ID_CHAR_P (c); c = java_get_unicode ())
JAVA_PART_CHAR_P (c); c = java_get_unicode ())
{
java_unicode_2_utf8 (c);
if (all_ascii && c >= 128)
......@@ -1554,8 +1595,8 @@ java_lex (java_lval)
}
}
/* We may have and ID here */
if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
/* We may have an ID here */
if (JAVA_START_CHAR_P (first_unicode))
{
JAVA_LEX_ID (string);
java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment