Commit 1613e52b by Neil Booth Committed by Neil Booth

Makefile.in (LIBCPP_OBJS): Add cppcharset.o.

	* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
	(cppcharset.o): New target.
	* c-lex.c (is_extended_char): Move to cppcharset.c.
	(utf8_extend_token): Delete.
	* cppcharset.c: New file.
	* cpphash.h (_cpp_valid_ucn): New.
	* cpplex.c (lex_identifier): Update prototype.
	(continues_identifier_p): Rename forms_identifier_p.  Handle UCN
	escapes.
	(maybe_read_ucs): Rename maybe_read_ucn.  Update to use code
	in cppcharset.c.
	(lex_number, lex_identifier, cpp_parse_escape): Update.
	(_cpp_lex_direct): Update to handle UCNs.
	(cpp_avoid_paste): Don't paste to form a UCN.
testsuite:
	* ucs.c: Update diagnostic messages.

From-SVN: r65845
parent 0a45ec5c
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target.
* c-lex.c (is_extended_char): Move to cppcharset.c.
(utf8_extend_token): Delete.
* cppcharset.c: New file.
* cpphash.h (_cpp_valid_ucn): New.
* cpplex.c (lex_identifier): Update prototype.
(continues_identifier_p): Rename forms_identifier_p. Handle UCN
escapes.
(maybe_read_ucs): Rename maybe_read_ucn. Update to use code
in cppcharset.c.
(lex_number, lex_identifier, cpp_parse_escape): Update.
(_cpp_lex_direct): Update to handle UCNs.
(cpp_avoid_paste): Don't paste to form a UCN.
2003-04-19 Roger Sayle <roger@eyesopen.com>
* builtins.c (expand_builtin): Don't expand a pure or const
......
......@@ -2320,7 +2320,7 @@ PREPROCESSOR_DEFINES = \
@TARGET_SYSTEM_ROOT_DEFINE@
LIBCPP_OBJS = cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpptrad.o \
cpphash.o cpperror.o cppinit.o \
cpphash.o cpperror.o cppinit.o cppcharset.o \
hashtable.o line-map.o mkdeps.o mbchar.o cpppch.o
LIBCPP_DEPS = $(CPPLIB_H) cpphash.h line-map.h hashtable.h intl.h \
......@@ -2333,6 +2333,7 @@ libcpp.a: $(LIBCPP_OBJS)
$(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS)
-$(RANLIB) libcpp.a
cppcharset.o: cppcharset.c $(LIBCPP_DEPS)
cpperror.o: cpperror.c $(LIBCPP_DEPS)
cppexp.o: cppexp.c $(LIBCPP_DEPS)
cpplex.o: cpplex.c $(LIBCPP_DEPS) mbchar.h
......
......@@ -323,315 +323,6 @@ cb_undef (pfile, line, node)
(const char *) NODE_NAME (node));
}
#if 0 /* not yet */
/* Returns nonzero if C is a universal-character-name. Give an error if it
is not one which may appear in an identifier, as per [extendid].
Note that extended character support in identifiers has not yet been
implemented. It is my personal opinion that this is not a desirable
feature. Portable code cannot count on support for more than the basic
identifier character set. */
static inline int
is_extended_char (c)
int c;
{
#ifdef TARGET_EBCDIC
return 0;
#else
/* ASCII. */
if (c < 0x7f)
return 0;
/* None of the valid chars are outside the Basic Multilingual Plane (the
low 16 bits). */
if (c > 0xffff)
{
error ("universal-character-name '\\U%08x' not valid in identifier", c);
return 1;
}
/* Latin */
if ((c >= 0x00c0 && c <= 0x00d6)
|| (c >= 0x00d8 && c <= 0x00f6)
|| (c >= 0x00f8 && c <= 0x01f5)
|| (c >= 0x01fa && c <= 0x0217)
|| (c >= 0x0250 && c <= 0x02a8)
|| (c >= 0x1e00 && c <= 0x1e9a)
|| (c >= 0x1ea0 && c <= 0x1ef9))
return 1;
/* Greek */
if ((c == 0x0384)
|| (c >= 0x0388 && c <= 0x038a)
|| (c == 0x038c)
|| (c >= 0x038e && c <= 0x03a1)
|| (c >= 0x03a3 && c <= 0x03ce)
|| (c >= 0x03d0 && c <= 0x03d6)
|| (c == 0x03da)
|| (c == 0x03dc)
|| (c == 0x03de)
|| (c == 0x03e0)
|| (c >= 0x03e2 && c <= 0x03f3)
|| (c >= 0x1f00 && c <= 0x1f15)
|| (c >= 0x1f18 && c <= 0x1f1d)
|| (c >= 0x1f20 && c <= 0x1f45)
|| (c >= 0x1f48 && c <= 0x1f4d)
|| (c >= 0x1f50 && c <= 0x1f57)
|| (c == 0x1f59)
|| (c == 0x1f5b)
|| (c == 0x1f5d)
|| (c >= 0x1f5f && c <= 0x1f7d)
|| (c >= 0x1f80 && c <= 0x1fb4)
|| (c >= 0x1fb6 && c <= 0x1fbc)
|| (c >= 0x1fc2 && c <= 0x1fc4)
|| (c >= 0x1fc6 && c <= 0x1fcc)
|| (c >= 0x1fd0 && c <= 0x1fd3)
|| (c >= 0x1fd6 && c <= 0x1fdb)
|| (c >= 0x1fe0 && c <= 0x1fec)
|| (c >= 0x1ff2 && c <= 0x1ff4)
|| (c >= 0x1ff6 && c <= 0x1ffc))
return 1;
/* Cyrillic */
if ((c >= 0x0401 && c <= 0x040d)
|| (c >= 0x040f && c <= 0x044f)
|| (c >= 0x0451 && c <= 0x045c)
|| (c >= 0x045e && c <= 0x0481)
|| (c >= 0x0490 && c <= 0x04c4)
|| (c >= 0x04c7 && c <= 0x04c8)
|| (c >= 0x04cb && c <= 0x04cc)
|| (c >= 0x04d0 && c <= 0x04eb)
|| (c >= 0x04ee && c <= 0x04f5)
|| (c >= 0x04f8 && c <= 0x04f9))
return 1;
/* Armenian */
if ((c >= 0x0531 && c <= 0x0556)
|| (c >= 0x0561 && c <= 0x0587))
return 1;
/* Hebrew */
if ((c >= 0x05d0 && c <= 0x05ea)
|| (c >= 0x05f0 && c <= 0x05f4))
return 1;
/* Arabic */
if ((c >= 0x0621 && c <= 0x063a)
|| (c >= 0x0640 && c <= 0x0652)
|| (c >= 0x0670 && c <= 0x06b7)
|| (c >= 0x06ba && c <= 0x06be)
|| (c >= 0x06c0 && c <= 0x06ce)
|| (c >= 0x06e5 && c <= 0x06e7))
return 1;
/* Devanagari */
if ((c >= 0x0905 && c <= 0x0939)
|| (c >= 0x0958 && c <= 0x0962))
return 1;
/* Bengali */
if ((c >= 0x0985 && c <= 0x098c)
|| (c >= 0x098f && c <= 0x0990)
|| (c >= 0x0993 && c <= 0x09a8)
|| (c >= 0x09aa && c <= 0x09b0)
|| (c == 0x09b2)
|| (c >= 0x09b6 && c <= 0x09b9)
|| (c >= 0x09dc && c <= 0x09dd)
|| (c >= 0x09df && c <= 0x09e1)
|| (c >= 0x09f0 && c <= 0x09f1))
return 1;
/* Gurmukhi */
if ((c >= 0x0a05 && c <= 0x0a0a)
|| (c >= 0x0a0f && c <= 0x0a10)
|| (c >= 0x0a13 && c <= 0x0a28)
|| (c >= 0x0a2a && c <= 0x0a30)
|| (c >= 0x0a32 && c <= 0x0a33)
|| (c >= 0x0a35 && c <= 0x0a36)
|| (c >= 0x0a38 && c <= 0x0a39)
|| (c >= 0x0a59 && c <= 0x0a5c)
|| (c == 0x0a5e))
return 1;
/* Gujarati */
if ((c >= 0x0a85 && c <= 0x0a8b)
|| (c == 0x0a8d)
|| (c >= 0x0a8f && c <= 0x0a91)
|| (c >= 0x0a93 && c <= 0x0aa8)
|| (c >= 0x0aaa && c <= 0x0ab0)
|| (c >= 0x0ab2 && c <= 0x0ab3)
|| (c >= 0x0ab5 && c <= 0x0ab9)
|| (c == 0x0ae0))
return 1;
/* Oriya */
if ((c >= 0x0b05 && c <= 0x0b0c)
|| (c >= 0x0b0f && c <= 0x0b10)
|| (c >= 0x0b13 && c <= 0x0b28)
|| (c >= 0x0b2a && c <= 0x0b30)
|| (c >= 0x0b32 && c <= 0x0b33)
|| (c >= 0x0b36 && c <= 0x0b39)
|| (c >= 0x0b5c && c <= 0x0b5d)
|| (c >= 0x0b5f && c <= 0x0b61))
return 1;
/* Tamil */
if ((c >= 0x0b85 && c <= 0x0b8a)
|| (c >= 0x0b8e && c <= 0x0b90)
|| (c >= 0x0b92 && c <= 0x0b95)
|| (c >= 0x0b99 && c <= 0x0b9a)
|| (c == 0x0b9c)
|| (c >= 0x0b9e && c <= 0x0b9f)
|| (c >= 0x0ba3 && c <= 0x0ba4)
|| (c >= 0x0ba8 && c <= 0x0baa)
|| (c >= 0x0bae && c <= 0x0bb5)
|| (c >= 0x0bb7 && c <= 0x0bb9))
return 1;
/* Telugu */
if ((c >= 0x0c05 && c <= 0x0c0c)
|| (c >= 0x0c0e && c <= 0x0c10)
|| (c >= 0x0c12 && c <= 0x0c28)
|| (c >= 0x0c2a && c <= 0x0c33)
|| (c >= 0x0c35 && c <= 0x0c39)
|| (c >= 0x0c60 && c <= 0x0c61))
return 1;
/* Kannada */
if ((c >= 0x0c85 && c <= 0x0c8c)
|| (c >= 0x0c8e && c <= 0x0c90)
|| (c >= 0x0c92 && c <= 0x0ca8)
|| (c >= 0x0caa && c <= 0x0cb3)
|| (c >= 0x0cb5 && c <= 0x0cb9)
|| (c >= 0x0ce0 && c <= 0x0ce1))
return 1;
/* Malayalam */
if ((c >= 0x0d05 && c <= 0x0d0c)
|| (c >= 0x0d0e && c <= 0x0d10)
|| (c >= 0x0d12 && c <= 0x0d28)
|| (c >= 0x0d2a && c <= 0x0d39)
|| (c >= 0x0d60 && c <= 0x0d61))
return 1;
/* Thai */
if ((c >= 0x0e01 && c <= 0x0e30)
|| (c >= 0x0e32 && c <= 0x0e33)
|| (c >= 0x0e40 && c <= 0x0e46)
|| (c >= 0x0e4f && c <= 0x0e5b))
return 1;
/* Lao */
if ((c >= 0x0e81 && c <= 0x0e82)
|| (c == 0x0e84)
|| (c == 0x0e87)
|| (c == 0x0e88)
|| (c == 0x0e8a)
|| (c == 0x0e0d)
|| (c >= 0x0e94 && c <= 0x0e97)
|| (c >= 0x0e99 && c <= 0x0e9f)
|| (c >= 0x0ea1 && c <= 0x0ea3)
|| (c == 0x0ea5)
|| (c == 0x0ea7)
|| (c == 0x0eaa)
|| (c == 0x0eab)
|| (c >= 0x0ead && c <= 0x0eb0)
|| (c == 0x0eb2)
|| (c == 0x0eb3)
|| (c == 0x0ebd)
|| (c >= 0x0ec0 && c <= 0x0ec4)
|| (c == 0x0ec6))
return 1;
/* Georgian */
if ((c >= 0x10a0 && c <= 0x10c5)
|| (c >= 0x10d0 && c <= 0x10f6))
return 1;
/* Hiragana */
if ((c >= 0x3041 && c <= 0x3094)
|| (c >= 0x309b && c <= 0x309e))
return 1;
/* Katakana */
if ((c >= 0x30a1 && c <= 0x30fe))
return 1;
/* Bopmofo */
if ((c >= 0x3105 && c <= 0x312c))
return 1;
/* Hangul */
if ((c >= 0x1100 && c <= 0x1159)
|| (c >= 0x1161 && c <= 0x11a2)
|| (c >= 0x11a8 && c <= 0x11f9))
return 1;
/* CJK Unified Ideographs */
if ((c >= 0xf900 && c <= 0xfa2d)
|| (c >= 0xfb1f && c <= 0xfb36)
|| (c >= 0xfb38 && c <= 0xfb3c)
|| (c == 0xfb3e)
|| (c >= 0xfb40 && c <= 0xfb41)
|| (c >= 0xfb42 && c <= 0xfb44)
|| (c >= 0xfb46 && c <= 0xfbb1)
|| (c >= 0xfbd3 && c <= 0xfd3f)
|| (c >= 0xfd50 && c <= 0xfd8f)
|| (c >= 0xfd92 && c <= 0xfdc7)
|| (c >= 0xfdf0 && c <= 0xfdfb)
|| (c >= 0xfe70 && c <= 0xfe72)
|| (c == 0xfe74)
|| (c >= 0xfe76 && c <= 0xfefc)
|| (c >= 0xff21 && c <= 0xff3a)
|| (c >= 0xff41 && c <= 0xff5a)
|| (c >= 0xff66 && c <= 0xffbe)
|| (c >= 0xffc2 && c <= 0xffc7)
|| (c >= 0xffca && c <= 0xffcf)
|| (c >= 0xffd2 && c <= 0xffd7)
|| (c >= 0xffda && c <= 0xffdc)
|| (c >= 0x4e00 && c <= 0x9fa5))
return 1;
error ("universal-character-name '\\u%04x' not valid in identifier", c);
return 1;
#endif
}
/* Add the UTF-8 representation of C to the token_buffer. */
static void
utf8_extend_token (c)
int c;
{
int shift, mask;
if (c <= 0x0000007f)
{
extend_token (c);
return;
}
else if (c <= 0x000007ff)
shift = 6, mask = 0xc0;
else if (c <= 0x0000ffff)
shift = 12, mask = 0xe0;
else if (c <= 0x001fffff)
shift = 18, mask = 0xf0;
else if (c <= 0x03ffffff)
shift = 24, mask = 0xf8;
else
shift = 30, mask = 0xfc;
extend_token (mask | (c >> shift));
do
{
shift -= 6;
extend_token ((unsigned char) (0x80 | (c >> shift)));
}
while (shift);
}
#endif
int
c_lex (value)
tree *value;
......
......@@ -555,6 +555,10 @@ extern bool _cpp_expansions_different_trad PARAMS ((const cpp_macro *,
extern uchar *_cpp_copy_replacement_text PARAMS ((const cpp_macro *, uchar *));
extern size_t _cpp_replacement_text_len PARAMS ((const cpp_macro *));
/* In cppcharset.c. */
cppchar_t _cpp_valid_ucn PARAMS ((cpp_reader *, const uchar **,
int identifer_p));
/* Utility routines and macros. */
#define DSC(str) (const uchar *)str, sizeof str - 1
#define xnew(T) (T *) xmalloc (sizeof(T))
......
......@@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
static int skip_line_comment PARAMS ((cpp_reader *));
static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
static void lex_number PARAMS ((cpp_reader *, cpp_string *));
static bool continues_identifier_p PARAMS ((cpp_reader *));
static bool forms_identifier_p PARAMS ((cpp_reader *, int));
static void lex_string PARAMS ((cpp_reader *, cpp_token *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
cppchar_t));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, cppchar_t *));
static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
static tokenrun *next_tokenrun PARAMS ((tokenrun *));
static unsigned int hex_digit_value PARAMS ((unsigned int));
......@@ -361,33 +360,53 @@ name_p (pfile, string)
}
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
an identifier. */
an identifier. FIRST is TRUE if this starts an identifier. */
static bool
continues_identifier_p (pfile)
forms_identifier_p (pfile, first)
cpp_reader *pfile;
int first;
{
if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident))
cpp_buffer *buffer = pfile->buffer;
if (*buffer->cur == '$')
{
if (!CPP_OPTION (pfile, dollars_in_ident))
return false;
if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar)
buffer->cur++;
if (CPP_PEDANTIC (pfile)
&& !pfile->state.skipping
&& !pfile->warned_dollar)
{
pfile->warned_dollar = true;
cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
}
pfile->buffer->cur++;
return true;
}
/* Is this a syntactically valid UCN? */
if (0 && *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
return true;
buffer->cur -= 2;
}
return false;
}
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (pfile)
lex_identifier (pfile, base)
cpp_reader *pfile;
const uchar *base;
{
cpp_hashnode *result;
const uchar *cur, *base;
const uchar *cur;
base = pfile->buffer->cur - 1;
do
{
cur = pfile->buffer->cur;
......@@ -398,7 +417,7 @@ lex_identifier (pfile)
pfile->buffer->cur = cur;
}
while (continues_identifier_p (pfile));
while (forms_identifier_p (pfile, false));
result = (cpp_hashnode *)
ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
......@@ -444,7 +463,7 @@ lex_number (pfile, number)
pfile->buffer->cur = cur;
}
while (continues_identifier_p (pfile));
while (forms_identifier_p (pfile, false));
number->len = cur - base;
dest = _cpp_unaligned_alloc (pfile, number->len + 1);
......@@ -803,7 +822,6 @@ _cpp_lex_direct (pfile)
}
/* Fall through. */
start_ident:
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
......@@ -816,7 +834,7 @@ _cpp_lex_direct (pfile)
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
result->val.node = lex_identifier (pfile);
result->val.node = lex_identifier (pfile, buffer->cur - 1);
/* Convert named operators to their proper types. */
if (result->val.node->flags & NODE_OPERATOR)
......@@ -1044,15 +1062,24 @@ _cpp_lex_direct (pfile)
case '@': result->type = CPP_ATSIGN; break;
case '$':
if (CPP_OPTION (pfile, dollars_in_ident))
goto start_ident;
/* Fall through... */
case '\\':
{
const uchar *base = --buffer->cur;
if (forms_identifier_p (pfile, true))
{
result->type = CPP_NAME;
result->val.node = lex_identifier (pfile, base);
break;
}
buffer->cur++;
default:
result->type = CPP_OTHER;
result->val.c = c;
break;
}
}
return result;
}
......@@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2)
|| b == CPP_CHAR || b == CPP_STRING); /* L */
case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
|| c == '.' || c == '+' || c == '-');
case CPP_OTHER: return (CPP_OPTION (pfile, objc)
/* UCNs */
case CPP_OTHER: return ((token1->val.c == '\\' && b == CPP_NAME)
|| (CPP_OPTION (pfile, objc)
&& token1->val.c == '@'
&& (b == CPP_NAME || b == CPP_STRING));
&& (b == CPP_NAME || b == CPP_STRING)));
default: break;
}
......@@ -1363,93 +1392,31 @@ hex_digit_value (c)
abort ();
}
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
failure if cpplib is not parsing C++ or C99. Such failure is
silent, and no variables are updated. Otherwise returns 0, and
warns if -Wtraditional.
[lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
ISO/IEC 10646 is NNNNNNNN; the character designated by the
universal character name \uNNNN is that character whose character
short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
for a universal character name is less than 0x20 or in the range
0x7F-0x9F (inclusive), or if the universal character name
designates a character in the basic source character set, then the
program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
mapping. Is this ever wrong?
PC points to the 'u' or 'U', PSTR is points to the byte after PC,
LIMIT is the end of the string or charconst. PSTR is updated to
point after the UCS on return, and the UCS is written into PC. */
static int
maybe_read_ucs (pfile, pstr, limit, pc)
/* Read a possible universal character name starting at *PSTR. */
static cppchar_t
maybe_read_ucn (pfile, pstr)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
cppchar_t *pc;
const uchar **pstr;
{
const unsigned char *p = *pstr;
unsigned int code = 0;
unsigned int c = *pc, length;
/* Only attempt to interpret a UCS for C++ and C99. */
if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
return 1;
cppchar_t result, c = (*pstr)[-1];
result = _cpp_valid_ucn (pfile, pstr, false);
if (result)
{
if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, DL_WARNING,
"the meaning of '\\%c' is different in traditional C", c);
"the meaning of '\\%c' is different in traditional C",
(int) c);
length = (c == 'u' ? 4: 8);
if ((size_t) (limit - p) < length)
{
cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
/* Skip to the end to avoid more diagnostics. */
p = limit;
}
else
{
for (; length; length--, p++)
{
c = *p;
if (ISXDIGIT (c))
code = (code << 4) + hex_digit_value (c);
else
if (CPP_OPTION (pfile, EBCDIC))
{
cpp_error (pfile, DL_ERROR,
"non-hex digit '%c' in universal-character-name", c);
/* We shouldn't skip in case there are multibyte chars. */
break;
}
"universal character with an EBCDIC target");
result = 0x3f; /* EBCDIC invalid character */
}
}
if (CPP_OPTION (pfile, EBCDIC))
{
cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
}
/* True extended characters are OK. */
else if (code >= 0xa0
&& !(code & 0x80000000)
&& !(code >= 0xD800 && code <= 0xDFFF))
;
/* The standard permits $, @ and ` to be specified as UCNs. We use
hex escapes so that this also works with EBCDIC hosts. */
else if (code == 0x24 || code == 0x40 || code == 0x60)
;
/* Don't give another error if one occurred above. */
else if (length == 0)
cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
*pstr = p;
*pc = code;
return 0;
return result;
}
/* Returns the value of an escape sequence, truncated to the correct
......@@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide)
int unknown = 0;
const unsigned char *str = *pstr, *charconsts;
cppchar_t c, mask;
cppchar_t c, ucn, mask;
unsigned int width;
if (CPP_OPTION (pfile, EBCDIC))
......@@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide)
break;
case 'u': case 'U':
unknown = maybe_read_ucs (pfile, &str, limit, &c);
ucn = maybe_read_ucn (pfile, &str);
if (ucn)
c = ucn;
else
unknown = true;
break;
case 'x':
......
2003-04-20 Neil Booth <neil@daikokuya.co.uk>
* ucs.c: Update diagnostic messages.
2003-04-19 Neil Booth <neil@daikokuya.co.uk>
* gcc.dg/cpp/truefalse.cpp: New test.
......
......@@ -51,7 +51,7 @@ void foo ()
c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */
c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */
/* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character
constant warning even for wide characters. */
/* { dg-warning "too long|multi-character" "" { target *-*-* } 54 } */
......@@ -61,7 +61,7 @@ void foo ()
c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */
c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment