Commit 62729350 by Neil Booth Committed by Neil Booth

c-lex.c (read_escape, read_ucs): Delete.

	* c-lex.c (read_escape, read_ucs): Delete.
	(lex_string): Use cpp_parse_escape.
	* cpplex.c (read_ucs): Rename maybe_read_ucs.  Do nothing
	if not C++ or not C99.  Warn if traditional.
	(parse_escape): Rename cpp_parse_escape.  Make the bitmask
	unsigned.  Warn for out-of-range escape sequences here.
	Update to use maybe_read_ucs.
	(cpp_interpret_charconst):  Don't warn here.
	* cpplib.h (cpp_parse_escape): New exported function.

From-SVN: r42509
parent ff867905
2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
* c-lex.c (read_escape, read_ucs): Delete.
(lex_string): Use cpp_parse_escape.
* cpplex.c (read_ucs): Rename maybe_read_ucs. Do nothing
if not C++ or not C99. Warn if traditional.
(parse_escape): Rename cpp_parse_escape. Make the bitmask
unsigned. Warn for out-of-range escape sequences here.
Update to use maybe_read_ucs.
(cpp_interpret_charconst): Don't warn here.
* cpplib.h (cpp_parse_escape): New exported function.
2001-05-23 Joseph S. Myers <jsm28@cam.ac.uk>
* extend.texi (Attribute Syntax): Clarify application of
......
......@@ -79,10 +79,6 @@ int c_header_level; /* depth in C headers - C++ only */
/* Nonzero tells yylex to ignore \ in string constants. */
static int ignore_escape_flag;
static const char *readescape PARAMS ((const char *, const char *,
unsigned int *));
static const char *read_ucs PARAMS ((const char *, const char *,
unsigned int *, int));
static void parse_float PARAMS ((PTR));
static tree lex_number PARAMS ((const char *, unsigned int));
static tree lex_string PARAMS ((const char *, unsigned int, int));
......@@ -350,230 +346,6 @@ cb_undef (pfile, node)
debug_undef (lineno, (const char *) NODE_NAME (node));
}
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
[lex.charset]: The character designated by the universal-character-name
\UNNNNNNNN is that character whose character short name in ISO/IEC 10646
is NNNNNNNN; the character designated by the universal-character-name
\uNNNN is that character whose character short name in ISO/IEC 10646 is
0000NNNN. If the hexadecimal value for a universal character name is
less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
universal character name designates a character in the basic source
character set, then the program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
mapping. Is this ever wrong? */
static const char *
read_ucs (p, limit, cptr, length)
const char *p;
const char *limit;
unsigned int *cptr;
int length;
{
unsigned int code = 0;
int c;
for (; length; --length)
{
if (p >= limit)
{
error ("incomplete universal-character-name");
break;
}
c = *p++;
if (! ISXDIGIT (c))
{
error ("non hex digit '%c' in universal-character-name", c);
p--;
break;
}
code <<= 4;
if (c >= 'a' && c <= 'f')
code += c - 'a' + 10;
if (c >= 'A' && c <= 'F')
code += c - 'A' + 10;
if (c >= '0' && c <= '9')
code += c - '0';
}
#ifdef TARGET_EBCDIC
sorry ("universal-character-name on EBCDIC target");
*cptr = 0x3f; /* EBCDIC invalid character */
return p;
#endif
if (code > 0x9f && !(code & 0x80000000))
/* True extended character, OK. */;
else if (code >= 0x20 && code < 0x7f)
{
/* ASCII printable character. The C character set consists of all of
these except $, @ and `. We use hex escapes so that this also
works with EBCDIC hosts. */
if (code != 0x24 && code != 0x40 && code != 0x60)
error ("universal-character-name used for '%c'", code);
}
else
error ("invalid universal-character-name");
*cptr = code;
return p;
}
/* Read an escape sequence and write its character equivalent into *CPTR.
P is the input pointer, which is just after the backslash. LIMIT
is how much text we have.
Returns the updated input pointer. */
static const char *
readescape (p, limit, cptr)
const char *p;
const char *limit;
unsigned int *cptr;
{
unsigned int c, code, count;
unsigned firstdig = 0;
int nonnull;
if (p == limit)
{
/* cpp has already issued an error for this. */
*cptr = 0;
return p;
}
c = *p++;
switch (c)
{
case 'x':
if (warn_traditional && !in_system_header)
warning ("the meaning of `\\x' varies with -traditional");
if (flag_traditional)
{
*cptr = 'x';
return p;
}
code = 0;
count = 0;
nonnull = 0;
while (p < limit)
{
c = *p++;
if (! ISXDIGIT (c))
{
p--;
break;
}
code *= 16;
if (c >= 'a' && c <= 'f')
code += c - 'a' + 10;
if (c >= 'A' && c <= 'F')
code += c - 'A' + 10;
if (c >= '0' && c <= '9')
code += c - '0';
if (code != 0 || count != 0)
{
if (count == 0)
firstdig = code;
count++;
}
nonnull = 1;
}
if (! nonnull)
{
warning ("\\x used with no following hex digits");
*cptr = 'x';
return p;
}
else if (count == 0)
/* Digits are all 0's. Ok. */
;
else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
|| (count > 1
&& (((unsigned)1
<< (TYPE_PRECISION (integer_type_node)
- (count - 1) * 4))
<= firstdig)))
pedwarn ("hex escape out of range");
*cptr = code;
return p;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
code = 0;
for (count = 0; count < 3; count++)
{
if (c < '0' || c > '7')
{
p--;
break;
}
code = (code * 8) + (c - '0');
if (p == limit)
break;
c = *p++;
}
if (count == 3)
p--;
*cptr = code;
return p;
case '\\': case '\'': case '"': case '?':
*cptr = c;
return p;
case 'n': *cptr = TARGET_NEWLINE; return p;
case 't': *cptr = TARGET_TAB; return p;
case 'r': *cptr = TARGET_CR; return p;
case 'f': *cptr = TARGET_FF; return p;
case 'b': *cptr = TARGET_BS; return p;
case 'v': *cptr = TARGET_VT; return p;
case 'a':
if (warn_traditional && !in_system_header)
warning ("the meaning of '\\a' varies with -traditional");
*cptr = flag_traditional ? c : TARGET_BELL;
return p;
/* Warnings and support checks handled by read_ucs(). */
case 'u': case 'U':
if (c_language != clk_cplusplus && !flag_isoc99)
break;
if (warn_traditional && !in_system_header)
warning ("the meaning of '\\%c' varies with -traditional", c);
return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
case 'e': case 'E':
if (pedantic)
pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
*cptr = TARGET_ESC; return p;
/* '\(', etc, are used at beginning of line to avoid confusing Emacs.
'\%' is used to prevent SCCS from getting confused. */
case '(': case '{': case '[': case '%':
if (pedantic)
pedwarn ("unknown escape sequence '\\%c'", c);
*cptr = c;
return p;
}
if (ISGRAPH (c))
pedwarn ("unknown escape sequence '\\%c'", c);
else
pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c);
*cptr = c;
return p;
}
#if 0 /* not yet */
/* Returns nonzero if C is a universal-character-name. Give an error if it
is not one which may appear in an identifier, as per [extendid].
......@@ -1551,10 +1323,15 @@ lex_string (str, len, wide)
if (c == '\\' && !ignore_escape_flag)
{
p = readescape (p, limit, &c);
if (width < HOST_BITS_PER_INT
&& (unsigned) c >= ((unsigned)1 << width))
pedwarn ("escape sequence out of range for character");
unsigned int mask;
if (width < HOST_BITS_PER_INT)
mask = ((unsigned int) 1 << width) - 1;
else
mask = ~0;
c = cpp_parse_escape (parse_in, (const unsigned char **) &p,
(const unsigned char *) limit,
mask, flag_traditional);
}
/* Add this single character into the buffer either as a wchar_t
......
......@@ -98,11 +98,8 @@ static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, HOST_WIDE_INT,
int));
static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, unsigned int));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, unsigned int *));
static cpp_chunk *new_chunk PARAMS ((unsigned int));
static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
......@@ -1670,7 +1667,10 @@ hex_digit_value (c)
abort ();
}
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
failure if cpplib is not parsing C++ or C99. Such failure is
silent, and no variables are updated. Otherwise returns 0, and
warns if -Wtraditional.
[lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
......@@ -1683,19 +1683,31 @@ hex_digit_value (c)
program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
mapping. Is this ever wrong? */
mapping. Is this ever wrong?
static unsigned int
read_ucs (pfile, pstr, limit, length)
PC points to the 'u' or 'U', PSTR is points to the byte after PC,
LIMIT is the end of the string or charconst. PSTR is updated to
point after the UCS on return, and the UCS is written into PC. */
static int
maybe_read_ucs (pfile, pstr, limit, pc)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
unsigned int length;
unsigned int *pc;
{
const unsigned char *p = *pstr;
unsigned int c, code = 0;
unsigned int code = 0;
unsigned int c = *pc, length;
/* Only attempt to interpret a UCS for C++ and C99. */
if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
return 1;
for (; length; --length)
if (CPP_WTRADITIONAL (pfile))
cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
for (length = (c == 'u' ? 4: 8); length; --length)
{
if (p >= limit)
{
......@@ -1737,21 +1749,24 @@ read_ucs (pfile, pstr, limit, length)
#endif
*pstr = p;
return code;
*pc = code;
return 0;
}
/* Interpret an escape sequence, and return its value. PSTR points to
the input pointer, which is just after the backslash. LIMIT is how
much text we have. MASK is the precision for the target type (char
or wchar_t). TRADITIONAL, if true, does not interpret escapes that
did not exist in traditional C. */
much text we have. MASK is a bitmask for the precision for the
destination type (char or wchar_t). TRADITIONAL, if true, does not
interpret escapes that did not exist in traditional C.
static unsigned int
parse_escape (pfile, pstr, limit, mask, traditional)
Handles all relevant diagnostics. */
unsigned int
cpp_parse_escape (pfile, pstr, limit, mask, traditional)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
HOST_WIDE_INT mask;
unsigned HOST_WIDE_INT mask;
int traditional;
{
int unknown = 0;
......@@ -1787,17 +1802,8 @@ parse_escape (pfile, pstr, limit, mask, traditional)
c = TARGET_ESC;
break;
/* Warnings and support checks handled by read_ucs(). */
case 'u': case 'U':
if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
{
if (CPP_WTRADITIONAL (pfile))
cpp_warning (pfile,
"the meaning of '\\%c' varies with -traditional", c);
c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
}
else
unknown = 1;
unknown = maybe_read_ucs (pfile, &str, limit, &c);
break;
case 'x':
......@@ -1869,6 +1875,9 @@ parse_escape (pfile, pstr, limit, mask, traditional)
cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
}
if (c > mask)
cpp_pedwarn (pfile, "escape sequence out of range for character");
*pstr = str;
return c;
}
......@@ -1939,11 +1948,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
#endif
if (c == '\\')
{
c = parse_escape (pfile, &str, limit, mask, traditional);
if (width < HOST_BITS_PER_WIDE_INT && c > mask)
cpp_pedwarn (pfile, "escape sequence out of range for character");
}
c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
......
......@@ -601,6 +601,10 @@ extern int cpp_ideq PARAMS ((const cpp_token *,
extern void cpp_output_line PARAMS ((cpp_reader *, FILE *));
extern void cpp_output_token PARAMS ((const cpp_token *, FILE *));
extern const char *cpp_type2name PARAMS ((enum cpp_ttype));
extern unsigned int cpp_parse_escape PARAMS ((cpp_reader *,
const unsigned char **,
const unsigned char *,
unsigned HOST_WIDE_INT, int));
/* In cpphash.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment