Commit 0900e29c by Eric Botcazou Committed by Eric Botcazou

charset.c (UCS_LIMIT): New macro.

	* charset.c (UCS_LIMIT): New macro.
	(ucn_valid_in_identifier): Use it instead of a hardcoded constant.
	(_cpp_valid_ucn): Issue a pedantic warning for UCNs larger than
	UCS_LIMIT outside of identifiers in C and in C++2a or later.

From-SVN: r276167
parent d7326aaf
2019-09-26 Eric Botcazou <ebotcazou@adacore.com>
* gcc.dg/cpp/ucs.c: Add test for new warning and adjust.
* gcc.dg/cpp/utf8-5byte-1.c: Add -w to the options.
* gcc.dg/attr-alias-5.c: Likewise.
* g++.dg/cpp/ucn-1.C: Add test for new warning.
* g++.dg/cpp2a/ucn1.C: New test.
2019-09-26 Max Filippov <jcmvbkbc@gmail.com> 2019-09-26 Max Filippov <jcmvbkbc@gmail.com>
* gcc.target/xtensa/pr91880.c: New test case. * gcc.target/xtensa/pr91880.c: New test case.
......
...@@ -12,4 +12,6 @@ int main() ...@@ -12,4 +12,6 @@ int main()
int c\u0024c; // { dg-error "not valid in an identifier" "" { target { powerpc-ibm-aix* } } } int c\u0024c; // { dg-error "not valid in an identifier" "" { target { powerpc-ibm-aix* } } }
U"\uD800"; // { dg-error "not a valid universal character" } U"\uD800"; // { dg-error "not a valid universal character" }
U'\U00110000'; // { dg-warning "outside" "110000 outside UCS" { target c++2a } }
} }
// { dg-do compile }
// { dg-options "-std=c++2a" }
int main()
{
U'\U00110000'; // { dg-warning "outside" "110000 outside UCS" }
}
/* Verify diagnostics for aliases to strings containing extended /* Verify diagnostics for aliases to strings containing extended
identifiers or bad characters. */ identifiers or bad characters. */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-std=gnu99" } */ /* { dg-options "-std=gnu99 -w" } */
/* { dg-require-alias "" } */ /* { dg-require-alias "" } */
/* { dg-require-ascii-locale "" } */ /* { dg-require-ascii-locale "" } */
/* { dg-skip-if "" { powerpc*-*-aix* } } */ /* { dg-skip-if "" { powerpc*-*-aix* } } */
......
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
#endif #endif
#if WCHAR_MAX >= 0x7ffffff #if WCHAR_MAX >= 0x7ffffff
# if L'\U1234abcd' != 0x1234abcd # if L'\U1234abcd' != 0x1234abcd /* { dg-warning "outside" "" } */
# error bad long ucs /* { dg-bogus "bad" "bad U1234abcd evaluation" } */ # error bad long ucs /* { dg-bogus "bad" "bad U1234abcd evaluation" } */
# endif # endif
#endif #endif
...@@ -49,7 +49,7 @@ void foo () ...@@ -49,7 +49,7 @@ void foo ()
int c; int c;
c = L'\ubad'; /* { dg-error "incomplete" "incomplete UCN 1" } */ c = L'\ubad'; /* { dg-error "incomplete" "incomplete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */ c = L"\U1234"[0]; /* { dg-error "incomplete" "incomplete UCN 2" } */
c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */ c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */
/* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character /* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character
...@@ -64,4 +64,6 @@ void foo () ...@@ -64,4 +64,6 @@ void foo ()
c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */ c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */ c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */ c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */
c = L'\U00110000'; /* { dg-warning "outside" "110000 outside UCS" } */
} }
/* Test for bug in conversions from 5-byte UTF-8 sequences in /* Test for bug in conversions from 5-byte UTF-8 sequences in
cpplib. */ cpplib. */
/* { dg-do run { target { 4byte_wchar_t } } } */ /* { dg-do run { target { 4byte_wchar_t } } } */
/* { dg-options "-std=gnu99" } */ /* { dg-options "-std=gnu99 -w" } */
extern void abort (void); extern void abort (void);
extern void exit (int); extern void exit (int);
......
2019-09-26 Eric Botcazou <ebotcazou@adacore.com>
* charset.c (UCS_LIMIT): New macro.
(ucn_valid_in_identifier): Use it instead of a hardcoded constant.
(_cpp_valid_ucn): Issue a pedantic warning for UCNs larger than
UCS_LIMIT outside of identifiers in C and in C++2a or later.
2019-09-19 Lewis Hyatt <lhyatt@gmail.com> 2019-09-19 Lewis Hyatt <lhyatt@gmail.com>
PR c/67224 PR c/67224
......
...@@ -901,6 +901,9 @@ struct ucnrange { ...@@ -901,6 +901,9 @@ struct ucnrange {
}; };
#include "ucnid.h" #include "ucnid.h"
/* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. */
#define UCS_LIMIT 0x10FFFF
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at /* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an the start of an identifier, and 0 if C is not valid in an
identifier. We assume C has already gone through the checks of identifier. We assume C has already gone through the checks of
...@@ -915,7 +918,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, ...@@ -915,7 +918,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
int mn, mx, md; int mn, mx, md;
unsigned short valid_flags, invalid_start_flags; unsigned short valid_flags, invalid_start_flags;
if (c > 0x10FFFF) if (c > UCS_LIMIT)
return 0; return 0;
mn = 0; mn = 0;
...@@ -1016,6 +1019,10 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, ...@@ -1016,6 +1019,10 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
whose short identifier is less than 00A0 other than 0024 ($), 0040 (@), whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
or 0060 (`), nor one in the range D800 through DFFF inclusive. or 0060 (`), nor one in the range D800 through DFFF inclusive.
If the hexadecimal value is larger than the upper bound of the UCS
codespace specified in ISO/IEC 10646, a pedantic warning is issued
in all versions of C and in the C++2a or later versions of C++.
*PSTR must be preceded by "\u" or "\U"; it is assumed that the *PSTR must be preceded by "\u" or "\U"; it is assumed that the
buffer end is delimited by a non-hex digit. Returns false if the buffer end is delimited by a non-hex digit. Returns false if the
UCN has not been consumed, true otherwise. UCN has not been consumed, true otherwise.
...@@ -1135,6 +1142,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, ...@@ -1135,6 +1142,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
"universal character %.*s is not valid at the start of an identifier", "universal character %.*s is not valid at the start of an identifier",
(int) (str - base), base); (int) (str - base), base);
} }
else if (result > UCS_LIMIT
&& (!CPP_OPTION (pfile, cplusplus)
|| CPP_OPTION (pfile, lang) > CLK_CXX17))
cpp_error (pfile, CPP_DL_PEDWARN,
"%.*s is outside the UCS codespace",
(int) (str - base), base);
*cp = result; *cp = result;
return true; return true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment