Commit 1613e52b by Neil Booth Committed by Neil Booth

Makefile.in (LIBCPP_OBJS): Add cppcharset.o.

	* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
	(cppcharset.o): New target.
	* c-lex.c (is_extended_char): Move to cppcharset.c.
	(utf8_extend_token): Delete.
	* cppcharset.c: New file.
	* cpphash.h (_cpp_valid_ucn): New.
	* cpplex.c (lex_identifier): Update prototype.
	(continues_identifier_p): Rename forms_identifier_p.  Handle UCN
	escapes.
	(maybe_read_ucs): Rename maybe_read_ucn.  Update to use code
	in cppcharset.c.
	(lex_number, lex_identifier, cpp_parse_escape): Update.
	(_cpp_lex_direct): Update to handle UCNs.
	(cpp_avoid_paste): Don't paste to form a UCN.
testsuite:
	* ucs.c: Update diagnostic messages.

From-SVN: r65845
parent 0a45ec5c
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target.
* c-lex.c (is_extended_char): Move to cppcharset.c.
(utf8_extend_token): Delete.
* cppcharset.c: New file.
* cpphash.h (_cpp_valid_ucn): New.
* cpplex.c (lex_identifier): Update prototype.
(continues_identifier_p): Rename forms_identifier_p. Handle UCN
escapes.
(maybe_read_ucs): Rename maybe_read_ucn. Update to use code
in cppcharset.c.
(lex_number, lex_identifier, cpp_parse_escape): Update.
(_cpp_lex_direct): Update to handle UCNs.
(cpp_avoid_paste): Don't paste to form a UCN.
2003-04-19 Roger Sayle <roger@eyesopen.com> 2003-04-19 Roger Sayle <roger@eyesopen.com>
* builtins.c (expand_builtin): Don't expand a pure or const * builtins.c (expand_builtin): Don't expand a pure or const
......
...@@ -2320,7 +2320,7 @@ PREPROCESSOR_DEFINES = \ ...@@ -2320,7 +2320,7 @@ PREPROCESSOR_DEFINES = \
@TARGET_SYSTEM_ROOT_DEFINE@ @TARGET_SYSTEM_ROOT_DEFINE@
LIBCPP_OBJS = cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpptrad.o \ LIBCPP_OBJS = cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpptrad.o \
cpphash.o cpperror.o cppinit.o \ cpphash.o cpperror.o cppinit.o cppcharset.o \
hashtable.o line-map.o mkdeps.o mbchar.o cpppch.o hashtable.o line-map.o mkdeps.o mbchar.o cpppch.o
LIBCPP_DEPS = $(CPPLIB_H) cpphash.h line-map.h hashtable.h intl.h \ LIBCPP_DEPS = $(CPPLIB_H) cpphash.h line-map.h hashtable.h intl.h \
...@@ -2333,6 +2333,7 @@ libcpp.a: $(LIBCPP_OBJS) ...@@ -2333,6 +2333,7 @@ libcpp.a: $(LIBCPP_OBJS)
$(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS) $(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS)
-$(RANLIB) libcpp.a -$(RANLIB) libcpp.a
cppcharset.o: cppcharset.c $(LIBCPP_DEPS)
cpperror.o: cpperror.c $(LIBCPP_DEPS) cpperror.o: cpperror.c $(LIBCPP_DEPS)
cppexp.o: cppexp.c $(LIBCPP_DEPS) cppexp.o: cppexp.c $(LIBCPP_DEPS)
cpplex.o: cpplex.c $(LIBCPP_DEPS) mbchar.h cpplex.o: cpplex.c $(LIBCPP_DEPS) mbchar.h
......
...@@ -322,315 +322,6 @@ cb_undef (pfile, line, node) ...@@ -322,315 +322,6 @@ cb_undef (pfile, line, node)
(*debug_hooks->undef) (SOURCE_LINE (map, line), (*debug_hooks->undef) (SOURCE_LINE (map, line),
(const char *) NODE_NAME (node)); (const char *) NODE_NAME (node));
} }
#if 0 /* not yet */
/* Returns nonzero if C is a universal-character-name. Give an error if it
is not one which may appear in an identifier, as per [extendid].
Note that extended character support in identifiers has not yet been
implemented. It is my personal opinion that this is not a desirable
feature. Portable code cannot count on support for more than the basic
identifier character set. */
static inline int
is_extended_char (c)
int c;
{
#ifdef TARGET_EBCDIC
return 0;
#else
/* ASCII. */
if (c < 0x7f)
return 0;
/* None of the valid chars are outside the Basic Multilingual Plane (the
low 16 bits). */
if (c > 0xffff)
{
error ("universal-character-name '\\U%08x' not valid in identifier", c);
return 1;
}
/* Latin */
if ((c >= 0x00c0 && c <= 0x00d6)
|| (c >= 0x00d8 && c <= 0x00f6)
|| (c >= 0x00f8 && c <= 0x01f5)
|| (c >= 0x01fa && c <= 0x0217)
|| (c >= 0x0250 && c <= 0x02a8)
|| (c >= 0x1e00 && c <= 0x1e9a)
|| (c >= 0x1ea0 && c <= 0x1ef9))
return 1;
/* Greek */
if ((c == 0x0384)
|| (c >= 0x0388 && c <= 0x038a)
|| (c == 0x038c)
|| (c >= 0x038e && c <= 0x03a1)
|| (c >= 0x03a3 && c <= 0x03ce)
|| (c >= 0x03d0 && c <= 0x03d6)
|| (c == 0x03da)
|| (c == 0x03dc)
|| (c == 0x03de)
|| (c == 0x03e0)
|| (c >= 0x03e2 && c <= 0x03f3)
|| (c >= 0x1f00 && c <= 0x1f15)
|| (c >= 0x1f18 && c <= 0x1f1d)
|| (c >= 0x1f20 && c <= 0x1f45)
|| (c >= 0x1f48 && c <= 0x1f4d)
|| (c >= 0x1f50 && c <= 0x1f57)
|| (c == 0x1f59)
|| (c == 0x1f5b)
|| (c == 0x1f5d)
|| (c >= 0x1f5f && c <= 0x1f7d)
|| (c >= 0x1f80 && c <= 0x1fb4)
|| (c >= 0x1fb6 && c <= 0x1fbc)
|| (c >= 0x1fc2 && c <= 0x1fc4)
|| (c >= 0x1fc6 && c <= 0x1fcc)
|| (c >= 0x1fd0 && c <= 0x1fd3)
|| (c >= 0x1fd6 && c <= 0x1fdb)
|| (c >= 0x1fe0 && c <= 0x1fec)
|| (c >= 0x1ff2 && c <= 0x1ff4)
|| (c >= 0x1ff6 && c <= 0x1ffc))
return 1;
/* Cyrillic */
if ((c >= 0x0401 && c <= 0x040d)
|| (c >= 0x040f && c <= 0x044f)
|| (c >= 0x0451 && c <= 0x045c)
|| (c >= 0x045e && c <= 0x0481)
|| (c >= 0x0490 && c <= 0x04c4)
|| (c >= 0x04c7 && c <= 0x04c8)
|| (c >= 0x04cb && c <= 0x04cc)
|| (c >= 0x04d0 && c <= 0x04eb)
|| (c >= 0x04ee && c <= 0x04f5)
|| (c >= 0x04f8 && c <= 0x04f9))
return 1;
/* Armenian */
if ((c >= 0x0531 && c <= 0x0556)
|| (c >= 0x0561 && c <= 0x0587))
return 1;
/* Hebrew */
if ((c >= 0x05d0 && c <= 0x05ea)
|| (c >= 0x05f0 && c <= 0x05f4))
return 1;
/* Arabic */
if ((c >= 0x0621 && c <= 0x063a)
|| (c >= 0x0640 && c <= 0x0652)
|| (c >= 0x0670 && c <= 0x06b7)
|| (c >= 0x06ba && c <= 0x06be)
|| (c >= 0x06c0 && c <= 0x06ce)
|| (c >= 0x06e5 && c <= 0x06e7))
return 1;
/* Devanagari */
if ((c >= 0x0905 && c <= 0x0939)
|| (c >= 0x0958 && c <= 0x0962))
return 1;
/* Bengali */
if ((c >= 0x0985 && c <= 0x098c)
|| (c >= 0x098f && c <= 0x0990)
|| (c >= 0x0993 && c <= 0x09a8)
|| (c >= 0x09aa && c <= 0x09b0)
|| (c == 0x09b2)
|| (c >= 0x09b6 && c <= 0x09b9)
|| (c >= 0x09dc && c <= 0x09dd)
|| (c >= 0x09df && c <= 0x09e1)
|| (c >= 0x09f0 && c <= 0x09f1))
return 1;
/* Gurmukhi */
if ((c >= 0x0a05 && c <= 0x0a0a)
|| (c >= 0x0a0f && c <= 0x0a10)
|| (c >= 0x0a13 && c <= 0x0a28)
|| (c >= 0x0a2a && c <= 0x0a30)
|| (c >= 0x0a32 && c <= 0x0a33)
|| (c >= 0x0a35 && c <= 0x0a36)
|| (c >= 0x0a38 && c <= 0x0a39)
|| (c >= 0x0a59 && c <= 0x0a5c)
|| (c == 0x0a5e))
return 1;
/* Gujarati */
if ((c >= 0x0a85 && c <= 0x0a8b)
|| (c == 0x0a8d)
|| (c >= 0x0a8f && c <= 0x0a91)
|| (c >= 0x0a93 && c <= 0x0aa8)
|| (c >= 0x0aaa && c <= 0x0ab0)
|| (c >= 0x0ab2 && c <= 0x0ab3)
|| (c >= 0x0ab5 && c <= 0x0ab9)
|| (c == 0x0ae0))
return 1;
/* Oriya */
if ((c >= 0x0b05 && c <= 0x0b0c)
|| (c >= 0x0b0f && c <= 0x0b10)
|| (c >= 0x0b13 && c <= 0x0b28)
|| (c >= 0x0b2a && c <= 0x0b30)
|| (c >= 0x0b32 && c <= 0x0b33)
|| (c >= 0x0b36 && c <= 0x0b39)
|| (c >= 0x0b5c && c <= 0x0b5d)
|| (c >= 0x0b5f && c <= 0x0b61))
return 1;
/* Tamil */
if ((c >= 0x0b85 && c <= 0x0b8a)
|| (c >= 0x0b8e && c <= 0x0b90)
|| (c >= 0x0b92 && c <= 0x0b95)
|| (c >= 0x0b99 && c <= 0x0b9a)
|| (c == 0x0b9c)
|| (c >= 0x0b9e && c <= 0x0b9f)
|| (c >= 0x0ba3 && c <= 0x0ba4)
|| (c >= 0x0ba8 && c <= 0x0baa)
|| (c >= 0x0bae && c <= 0x0bb5)
|| (c >= 0x0bb7 && c <= 0x0bb9))
return 1;
/* Telugu */
if ((c >= 0x0c05 && c <= 0x0c0c)
|| (c >= 0x0c0e && c <= 0x0c10)
|| (c >= 0x0c12 && c <= 0x0c28)
|| (c >= 0x0c2a && c <= 0x0c33)
|| (c >= 0x0c35 && c <= 0x0c39)
|| (c >= 0x0c60 && c <= 0x0c61))
return 1;
/* Kannada */
if ((c >= 0x0c85 && c <= 0x0c8c)
|| (c >= 0x0c8e && c <= 0x0c90)
|| (c >= 0x0c92 && c <= 0x0ca8)
|| (c >= 0x0caa && c <= 0x0cb3)
|| (c >= 0x0cb5 && c <= 0x0cb9)
|| (c >= 0x0ce0 && c <= 0x0ce1))
return 1;
/* Malayalam */
if ((c >= 0x0d05 && c <= 0x0d0c)
|| (c >= 0x0d0e && c <= 0x0d10)
|| (c >= 0x0d12 && c <= 0x0d28)
|| (c >= 0x0d2a && c <= 0x0d39)
|| (c >= 0x0d60 && c <= 0x0d61))
return 1;
/* Thai */
if ((c >= 0x0e01 && c <= 0x0e30)
|| (c >= 0x0e32 && c <= 0x0e33)
|| (c >= 0x0e40 && c <= 0x0e46)
|| (c >= 0x0e4f && c <= 0x0e5b))
return 1;
/* Lao */
if ((c >= 0x0e81 && c <= 0x0e82)
|| (c == 0x0e84)
|| (c == 0x0e87)
|| (c == 0x0e88)
|| (c == 0x0e8a)
|| (c == 0x0e0d)
|| (c >= 0x0e94 && c <= 0x0e97)
|| (c >= 0x0e99 && c <= 0x0e9f)
|| (c >= 0x0ea1 && c <= 0x0ea3)
|| (c == 0x0ea5)
|| (c == 0x0ea7)
|| (c == 0x0eaa)
|| (c == 0x0eab)
|| (c >= 0x0ead && c <= 0x0eb0)
|| (c == 0x0eb2)
|| (c == 0x0eb3)
|| (c == 0x0ebd)
|| (c >= 0x0ec0 && c <= 0x0ec4)
|| (c == 0x0ec6))
return 1;
/* Georgian */
if ((c >= 0x10a0 && c <= 0x10c5)
|| (c >= 0x10d0 && c <= 0x10f6))
return 1;
/* Hiragana */
if ((c >= 0x3041 && c <= 0x3094)
|| (c >= 0x309b && c <= 0x309e))
return 1;
/* Katakana */
if ((c >= 0x30a1 && c <= 0x30fe))
return 1;
/* Bopmofo */
if ((c >= 0x3105 && c <= 0x312c))
return 1;
/* Hangul */
if ((c >= 0x1100 && c <= 0x1159)
|| (c >= 0x1161 && c <= 0x11a2)
|| (c >= 0x11a8 && c <= 0x11f9))
return 1;
/* CJK Unified Ideographs */
if ((c >= 0xf900 && c <= 0xfa2d)
|| (c >= 0xfb1f && c <= 0xfb36)
|| (c >= 0xfb38 && c <= 0xfb3c)
|| (c == 0xfb3e)
|| (c >= 0xfb40 && c <= 0xfb41)
|| (c >= 0xfb42 && c <= 0xfb44)
|| (c >= 0xfb46 && c <= 0xfbb1)
|| (c >= 0xfbd3 && c <= 0xfd3f)
|| (c >= 0xfd50 && c <= 0xfd8f)
|| (c >= 0xfd92 && c <= 0xfdc7)
|| (c >= 0xfdf0 && c <= 0xfdfb)
|| (c >= 0xfe70 && c <= 0xfe72)
|| (c == 0xfe74)
|| (c >= 0xfe76 && c <= 0xfefc)
|| (c >= 0xff21 && c <= 0xff3a)
|| (c >= 0xff41 && c <= 0xff5a)
|| (c >= 0xff66 && c <= 0xffbe)
|| (c >= 0xffc2 && c <= 0xffc7)
|| (c >= 0xffca && c <= 0xffcf)
|| (c >= 0xffd2 && c <= 0xffd7)
|| (c >= 0xffda && c <= 0xffdc)
|| (c >= 0x4e00 && c <= 0x9fa5))
return 1;
error ("universal-character-name '\\u%04x' not valid in identifier", c);
return 1;
#endif
}
/* Add the UTF-8 representation of C to the token_buffer. */
static void
utf8_extend_token (c)
int c;
{
int shift, mask;
if (c <= 0x0000007f)
{
extend_token (c);
return;
}
else if (c <= 0x000007ff)
shift = 6, mask = 0xc0;
else if (c <= 0x0000ffff)
shift = 12, mask = 0xe0;
else if (c <= 0x001fffff)
shift = 18, mask = 0xf0;
else if (c <= 0x03ffffff)
shift = 24, mask = 0xf8;
else
shift = 30, mask = 0xfc;
extend_token (mask | (c >> shift));
do
{
shift -= 6;
extend_token ((unsigned char) (0x80 | (c >> shift)));
}
while (shift);
}
#endif
int int
c_lex (value) c_lex (value)
......
...@@ -555,6 +555,10 @@ extern bool _cpp_expansions_different_trad PARAMS ((const cpp_macro *, ...@@ -555,6 +555,10 @@ extern bool _cpp_expansions_different_trad PARAMS ((const cpp_macro *,
extern uchar *_cpp_copy_replacement_text PARAMS ((const cpp_macro *, uchar *)); extern uchar *_cpp_copy_replacement_text PARAMS ((const cpp_macro *, uchar *));
extern size_t _cpp_replacement_text_len PARAMS ((const cpp_macro *)); extern size_t _cpp_replacement_text_len PARAMS ((const cpp_macro *));
/* In cppcharset.c. */
cppchar_t _cpp_valid_ucn PARAMS ((cpp_reader *, const uchar **,
int identifer_p));
/* Utility routines and macros. */ /* Utility routines and macros. */
#define DSC(str) (const uchar *)str, sizeof str - 1 #define DSC(str) (const uchar *)str, sizeof str - 1
#define xnew(T) (T *) xmalloc (sizeof(T)) #define xnew(T) (T *) xmalloc (sizeof(T))
......
...@@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; ...@@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int)); static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
static int skip_line_comment PARAMS ((cpp_reader *)); static int skip_line_comment PARAMS ((cpp_reader *));
static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *)); static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
static void lex_number PARAMS ((cpp_reader *, cpp_string *)); static void lex_number PARAMS ((cpp_reader *, cpp_string *));
static bool continues_identifier_p PARAMS ((cpp_reader *)); static bool forms_identifier_p PARAMS ((cpp_reader *, int));
static void lex_string PARAMS ((cpp_reader *, cpp_token *)); static void lex_string PARAMS ((cpp_reader *, cpp_token *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
cppchar_t)); cppchar_t));
static int name_p PARAMS ((cpp_reader *, const cpp_string *)); static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
const unsigned char *, cppchar_t *));
static tokenrun *next_tokenrun PARAMS ((tokenrun *)); static tokenrun *next_tokenrun PARAMS ((tokenrun *));
static unsigned int hex_digit_value PARAMS ((unsigned int)); static unsigned int hex_digit_value PARAMS ((unsigned int));
...@@ -361,33 +360,53 @@ name_p (pfile, string) ...@@ -361,33 +360,53 @@ name_p (pfile, string)
} }
/* Returns TRUE if the sequence starting at buffer->cur is invalid in /* Returns TRUE if the sequence starting at buffer->cur is invalid in
an identifier. */ an identifier. FIRST is TRUE if this starts an identifier. */
static bool static bool
continues_identifier_p (pfile) forms_identifier_p (pfile, first)
cpp_reader *pfile; cpp_reader *pfile;
int first;
{ {
if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident)) cpp_buffer *buffer = pfile->buffer;
return false;
if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar) if (*buffer->cur == '$')
{ {
pfile->warned_dollar = true; if (!CPP_OPTION (pfile, dollars_in_ident))
cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number"); return false;
buffer->cur++;
if (CPP_PEDANTIC (pfile)
&& !pfile->state.skipping
&& !pfile->warned_dollar)
{
pfile->warned_dollar = true;
cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
}
return true;
} }
pfile->buffer->cur++;
return true; /* Is this a syntactically valid UCN? */
if (0 && *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
return true;
buffer->cur -= 2;
}
return false;
} }
/* Lex an identifier starting at BUFFER->CUR - 1. */ /* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode * static cpp_hashnode *
lex_identifier (pfile) lex_identifier (pfile, base)
cpp_reader *pfile; cpp_reader *pfile;
const uchar *base;
{ {
cpp_hashnode *result; cpp_hashnode *result;
const uchar *cur, *base; const uchar *cur;
base = pfile->buffer->cur - 1;
do do
{ {
cur = pfile->buffer->cur; cur = pfile->buffer->cur;
...@@ -398,7 +417,7 @@ lex_identifier (pfile) ...@@ -398,7 +417,7 @@ lex_identifier (pfile)
pfile->buffer->cur = cur; pfile->buffer->cur = cur;
} }
while (continues_identifier_p (pfile)); while (forms_identifier_p (pfile, false));
result = (cpp_hashnode *) result = (cpp_hashnode *)
ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
...@@ -444,7 +463,7 @@ lex_number (pfile, number) ...@@ -444,7 +463,7 @@ lex_number (pfile, number)
pfile->buffer->cur = cur; pfile->buffer->cur = cur;
} }
while (continues_identifier_p (pfile)); while (forms_identifier_p (pfile, false));
number->len = cur - base; number->len = cur - base;
dest = _cpp_unaligned_alloc (pfile, number->len + 1); dest = _cpp_unaligned_alloc (pfile, number->len + 1);
...@@ -803,7 +822,6 @@ _cpp_lex_direct (pfile) ...@@ -803,7 +822,6 @@ _cpp_lex_direct (pfile)
} }
/* Fall through. */ /* Fall through. */
start_ident:
case '_': case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
...@@ -816,7 +834,7 @@ _cpp_lex_direct (pfile) ...@@ -816,7 +834,7 @@ _cpp_lex_direct (pfile)
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z': case 'Y': case 'Z':
result->type = CPP_NAME; result->type = CPP_NAME;
result->val.node = lex_identifier (pfile); result->val.node = lex_identifier (pfile, buffer->cur - 1);
/* Convert named operators to their proper types. */ /* Convert named operators to their proper types. */
if (result->val.node->flags & NODE_OPERATOR) if (result->val.node->flags & NODE_OPERATOR)
...@@ -1044,14 +1062,23 @@ _cpp_lex_direct (pfile) ...@@ -1044,14 +1062,23 @@ _cpp_lex_direct (pfile)
case '@': result->type = CPP_ATSIGN; break; case '@': result->type = CPP_ATSIGN; break;
case '$': case '$':
if (CPP_OPTION (pfile, dollars_in_ident)) case '\\':
goto start_ident; {
/* Fall through... */ const uchar *base = --buffer->cur;
default: if (forms_identifier_p (pfile, true))
result->type = CPP_OTHER; {
result->val.c = c; result->type = CPP_NAME;
break; result->val.node = lex_identifier (pfile, base);
break;
}
buffer->cur++;
default:
result->type = CPP_OTHER;
result->val.c = c;
break;
}
} }
return result; return result;
...@@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2) ...@@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2)
|| b == CPP_CHAR || b == CPP_STRING); /* L */ || b == CPP_CHAR || b == CPP_STRING); /* L */
case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
|| c == '.' || c == '+' || c == '-'); || c == '.' || c == '+' || c == '-');
case CPP_OTHER: return (CPP_OPTION (pfile, objc) /* UCNs */
&& token1->val.c == '@' case CPP_OTHER: return ((token1->val.c == '\\' && b == CPP_NAME)
&& (b == CPP_NAME || b == CPP_STRING)); || (CPP_OPTION (pfile, objc)
&& token1->val.c == '@'
&& (b == CPP_NAME || b == CPP_STRING)));
default: break; default: break;
} }
...@@ -1363,93 +1392,31 @@ hex_digit_value (c) ...@@ -1363,93 +1392,31 @@ hex_digit_value (c)
abort (); abort ();
} }
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate /* Read a possible universal character name starting at *PSTR. */
failure if cpplib is not parsing C++ or C99. Such failure is static cppchar_t
silent, and no variables are updated. Otherwise returns 0, and maybe_read_ucn (pfile, pstr)
warns if -Wtraditional.
[lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
ISO/IEC 10646 is NNNNNNNN; the character designated by the
universal character name \uNNNN is that character whose character
short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
for a universal character name is less than 0x20 or in the range
0x7F-0x9F (inclusive), or if the universal character name
designates a character in the basic source character set, then the
program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
mapping. Is this ever wrong?
PC points to the 'u' or 'U', PSTR is points to the byte after PC,
LIMIT is the end of the string or charconst. PSTR is updated to
point after the UCS on return, and the UCS is written into PC. */
static int
maybe_read_ucs (pfile, pstr, limit, pc)
cpp_reader *pfile; cpp_reader *pfile;
const unsigned char **pstr; const uchar **pstr;
const unsigned char *limit;
cppchar_t *pc;
{ {
const unsigned char *p = *pstr; cppchar_t result, c = (*pstr)[-1];
unsigned int code = 0;
unsigned int c = *pc, length;
/* Only attempt to interpret a UCS for C++ and C99. */
if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
return 1;
if (CPP_WTRADITIONAL (pfile)) result = _cpp_valid_ucn (pfile, pstr, false);
cpp_error (pfile, DL_WARNING, if (result)
"the meaning of '\\%c' is different in traditional C", c);
length = (c == 'u' ? 4: 8);
if ((size_t) (limit - p) < length)
{
cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
/* Skip to the end to avoid more diagnostics. */
p = limit;
}
else
{ {
for (; length; length--, p++) if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, DL_WARNING,
"the meaning of '\\%c' is different in traditional C",
(int) c);
if (CPP_OPTION (pfile, EBCDIC))
{ {
c = *p; cpp_error (pfile, DL_ERROR,
if (ISXDIGIT (c)) "universal character with an EBCDIC target");
code = (code << 4) + hex_digit_value (c); result = 0x3f; /* EBCDIC invalid character */
else
{
cpp_error (pfile, DL_ERROR,
"non-hex digit '%c' in universal-character-name", c);
/* We shouldn't skip in case there are multibyte chars. */
break;
}
} }
} }
if (CPP_OPTION (pfile, EBCDIC)) return result;
{
cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
}
/* True extended characters are OK. */
else if (code >= 0xa0
&& !(code & 0x80000000)
&& !(code >= 0xD800 && code <= 0xDFFF))
;
/* The standard permits $, @ and ` to be specified as UCNs. We use
hex escapes so that this also works with EBCDIC hosts. */
else if (code == 0x24 || code == 0x40 || code == 0x60)
;
/* Don't give another error if one occurred above. */
else if (length == 0)
cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
*pstr = p;
*pc = code;
return 0;
} }
/* Returns the value of an escape sequence, truncated to the correct /* Returns the value of an escape sequence, truncated to the correct
...@@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide) ...@@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide)
int unknown = 0; int unknown = 0;
const unsigned char *str = *pstr, *charconsts; const unsigned char *str = *pstr, *charconsts;
cppchar_t c, mask; cppchar_t c, ucn, mask;
unsigned int width; unsigned int width;
if (CPP_OPTION (pfile, EBCDIC)) if (CPP_OPTION (pfile, EBCDIC))
...@@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide) ...@@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide)
break; break;
case 'u': case 'U': case 'u': case 'U':
unknown = maybe_read_ucs (pfile, &str, limit, &c); ucn = maybe_read_ucn (pfile, &str);
if (ucn)
c = ucn;
else
unknown = true;
break; break;
case 'x': case 'x':
......
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* ucs.c: Update diagnostic messages.
2003-04-19 Neil Booth <neil@daikokuya.co.uk> 2003-04-19 Neil Booth <neil@daikokuya.co.uk>
* gcc.dg/cpp/truefalse.cpp: New test. * gcc.dg/cpp/truefalse.cpp: New test.
......
...@@ -51,7 +51,7 @@ void foo () ...@@ -51,7 +51,7 @@ void foo ()
c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */ c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */ c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */ c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */
/* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character /* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character
constant warning even for wide characters. */ constant warning even for wide characters. */
/* { dg-warning "too long|multi-character" "" { target *-*-* } 54 } */ /* { dg-warning "too long|multi-character" "" { target *-*-* } 54 } */
...@@ -61,7 +61,7 @@ void foo () ...@@ -61,7 +61,7 @@ void foo ()
c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */ c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */ c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */ c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */ c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */ c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment