Commit fbb22910 by Paolo Carlini

re PR preprocessor/53690 ([C++11] \u0000 and \U00000000 are wrongly encoded as U+0001.)

/libcpp
2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>

	PR c++/53690
	* charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
	return type to bool.  Fix encoding of \u0000 and \U00000000 in C++.
	(convert_ucn): Adjust call.
	* lex.c (forms_identifier_p): Likewise.
	* internal.h (_cpp_valid_ucn): Adjust declaration.

/gcc/testsuite
2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>

	PR c++/53690
	* g++.dg/cpp/pr53690.C: New.

From-SVN: r225353
parent a05d02b2
2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/53690
* g++.dg/cpp/pr53690.C: New.
2015-07-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2015-07-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-cmp.c: New test. * gcc.target/powerpc/vec-cmp.c: New test.
......
// PR c++/53690
// { dg-do compile { target c++11 } }
int array1[U'\U00000000' == 0 ? 1 : -1];
int array2[U'\u0000' == 0 ? 1 : -1];
int array3[u'\U00000000' == 0 ? 1 : -1];
int array4[u'\u0000' == 0 ? 1 : -1];
2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/53690
* charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
return type to bool. Fix encoding of \u0000 and \U00000000 in C++.
(convert_ucn): Adjust call.
* lex.c (forms_identifier_p): Likewise.
* internal.h (_cpp_valid_ucn): Adjust declaration.
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals Implement N4197 - Adding u8 character literals
......
...@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, ...@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
or 0060 (`), nor one in the range D800 through DFFF inclusive. or 0060 (`), nor one in the range D800 through DFFF inclusive.
*PSTR must be preceded by "\u" or "\U"; it is assumed that the *PSTR must be preceded by "\u" or "\U"; it is assumed that the
buffer end is delimited by a non-hex digit. Returns zero if the buffer end is delimited by a non-hex digit. Returns false if the
UCN has not been consumed. UCN has not been consumed, true otherwise.
Otherwise the nonzero value of the UCN, whether valid or invalid, The value of the UCN, whether valid or invalid, is returned in *CP.
is returned. Diagnostics are emitted for invalid values. PSTR Diagnostics are emitted for invalid values. PSTR is updated to point
is updated to point one beyond the UCN, or to the syntactically one beyond the UCN, or to the syntactically invalid character.
invalid character.
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
an identifier, or 2 otherwise. */ an identifier, or 2 otherwise. */
cppchar_t bool
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
const uchar *limit, int identifier_pos, const uchar *limit, int identifier_pos,
struct normalize_state *nst) struct normalize_state *nst, cppchar_t *cp)
{ {
cppchar_t result, c; cppchar_t result, c;
unsigned int length; unsigned int length;
...@@ -1030,7 +1029,10 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, ...@@ -1030,7 +1029,10 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
multiple tokens in identifiers, so we can't give a helpful multiple tokens in identifiers, so we can't give a helpful
error message in that case. */ error message in that case. */
if (length && identifier_pos) if (length && identifier_pos)
return 0; {
*cp = 0;
return false;
}
*pstr = str; *pstr = str;
if (length) if (length)
...@@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, ...@@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
(int) (str - base), base); (int) (str - base), base);
} }
if (result == 0) *cp = result;
result = 1; return true;
return result;
} }
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
...@@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, ...@@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct normalize_state nst = INITIAL_NORMALIZE_STATE; struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++; /* Skip u/U. */ from++; /* Skip u/U. */
ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst); _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft); rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
if (rval) if (rval)
......
...@@ -744,9 +744,10 @@ struct normalize_state ...@@ -744,9 +744,10 @@ struct normalize_state
#define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \ #define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \
((st)->previous = (c), (st)->prev_class = 0) ((st)->previous = (c), (st)->prev_class = 0)
extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **, extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
const unsigned char *, int, const unsigned char *, int,
struct normalize_state *state); struct normalize_state *state,
cppchar_t *);
extern void _cpp_destroy_iconv (cpp_reader *); extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t, unsigned char *, size_t, size_t,
......
...@@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first, ...@@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first,
&& *buffer->cur == '\\' && *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{ {
cppchar_t s;
buffer->cur += 2; buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state)) state, &s))
return true; return true;
buffer->cur -= 2; buffer->cur -= 2;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment