Commit 4268e8bb by Neil Booth Committed by Neil Booth

c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension.

	* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
	and sign-extension.
	(lex_charconst): Update for change in prototype of
	cpp_interpret_charconst.  Extend from cppchar_t to HOST_WIDE_INT
	appropriately.
	* cpphash.h (BITS_PER_CPPCHAR_T): New.
	* cppinit.c (cpp_create_reader): Initialize them for no
	change in semantics.
	(cpp_post_options): Add sanity checks.
	* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
	and truncation issues.  Calculate in type cppchar_t.
	(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
	(cpp_interpret_charconst): Calculate in type cppchar_t.  Handle
	run-time dependent precision correctly.  Return whether the
	result is signed or not.
	* cpplib.c (dequote_string): Use cppchar_t; update.
	* cpplib.h (cppchar_signed_t): New.
	struct cpp_options): New precision members.
	(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.

From-SVN: r53152
parent ac5ec768
2002-05-04 Neil Booth <neil@daikokuya.demon.co.uk>
* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
and sign-extension.
(lex_charconst): Update for change in prototype of
cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT
appropriately.
* cpphash.h (BITS_PER_CPPCHAR_T): New.
* cppinit.c (cpp_create_reader): Initialize them for no
change in semantics.
(cpp_post_options): Add sanity checks.
* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
and truncation issues. Calculate in type cppchar_t.
(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
(cpp_interpret_charconst): Calculate in type cppchar_t. Handle
run-time dependent precision correctly. Return whether the
result is signed or not.
* cpplib.c (dequote_string): Use cppchar_t; update.
* cpplib.h (cppchar_signed_t): New.
struct cpp_options): New precision members.
(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.
2002-05-03 David S. Miller <davem@redhat.com> 2002-05-03 David S. Miller <davem@redhat.com>
* config/sparc/sparc-protos.h (sparc_rtx_costs): New. * config/sparc/sparc-protos.h (sparc_rtx_costs): New.
......
...@@ -1238,9 +1238,7 @@ lex_string (str, len, wide) ...@@ -1238,9 +1238,7 @@ lex_string (str, len, wide)
char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1)); char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
char *q = buf; char *q = buf;
const unsigned char *p = str, *limit = str + len; const unsigned char *p = str, *limit = str + len;
unsigned int c; cppchar_t c;
unsigned width = wide ? WCHAR_TYPE_SIZE
: TYPE_PRECISION (char_type_node);
#ifdef MULTIBYTE_CHARS #ifdef MULTIBYTE_CHARS
/* Reset multibyte conversion state. */ /* Reset multibyte conversion state. */
...@@ -1270,15 +1268,7 @@ lex_string (str, len, wide) ...@@ -1270,15 +1268,7 @@ lex_string (str, len, wide)
#endif #endif
if (c == '\\' && !ignore_escape_flag) if (c == '\\' && !ignore_escape_flag)
{ c = cpp_parse_escape (parse_in, &p, limit, wide);
unsigned int mask;
if (width < HOST_BITS_PER_INT)
mask = ((unsigned int) 1 << width) - 1;
else
mask = ~0;
c = cpp_parse_escape (parse_in, &p, limit, mask);
}
/* Add this single character into the buffer either as a wchar_t, /* Add this single character into the buffer either as a wchar_t,
a multibyte sequence, or as a single byte. */ a multibyte sequence, or as a single byte. */
...@@ -1345,45 +1335,31 @@ static tree ...@@ -1345,45 +1335,31 @@ static tree
lex_charconst (token) lex_charconst (token)
const cpp_token *token; const cpp_token *token;
{ {
HOST_WIDE_INT result; cppchar_t result;
tree type, value; tree type, value;
unsigned int chars_seen; unsigned int chars_seen;
int unsignedp;
result = cpp_interpret_charconst (parse_in, token, warn_multichar, result = cpp_interpret_charconst (parse_in, token, warn_multichar,
&chars_seen); &chars_seen, &unsignedp);
if (token->type == CPP_WCHAR)
{
value = build_int_2 (result, 0);
type = wchar_type_node;
}
else
{
if (result < 0)
value = build_int_2 (result, -1);
else
value = build_int_2 (result, 0);
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
if (c_language == clk_cplusplus && chars_seen <= 1)
type = char_type_node;
else
type = integer_type_node;
}
/* cpp_interpret_charconst issues a warning if the constant /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
overflows, but if the number fits in HOST_WIDE_INT anyway, it before possibly widening to HOST_WIDE_INT for build_int_2. */
will return it un-truncated, which may cause problems down the if (unsignedp || (cppchar_signed_t) result >= 0)
line. So set the type to widest_integer_literal_type, call value = build_int_2 (result, 0);
convert to truncate it to the proper type, then clear else
TREE_OVERFLOW so we don't get a second warning. value = build_int_2 ((cppchar_signed_t) result, -1);
FIXME: cpplib's assessment of overflow may not be accurate on a
platform where the final type can change at (compiler's) runtime. */
TREE_TYPE (value) = widest_integer_literal_type_node; if (token->type == CPP_WCHAR)
value = convert (type, value); type = wchar_type_node;
TREE_OVERFLOW (value) = 0; /* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
else if ((c_language == clk_c || c_language == clk_objective_c)
|| chars_seen > 1)
type = integer_type_node;
else
type = char_type_node;
TREE_TYPE (value) = type;
return value; return value;
} }
...@@ -283,10 +283,10 @@ eval_token (pfile, token) ...@@ -283,10 +283,10 @@ eval_token (pfile, token)
const cpp_token *token; const cpp_token *token;
{ {
unsigned int temp; unsigned int temp;
int unsignedp = 0;
struct op op; struct op op;
op.op = CPP_NUMBER; op.op = CPP_NUMBER;
op.unsignedp = 0;
switch (token->type) switch (token->type)
{ {
...@@ -294,9 +294,8 @@ eval_token (pfile, token) ...@@ -294,9 +294,8 @@ eval_token (pfile, token)
return parse_number (pfile, token); return parse_number (pfile, token);
case CPP_WCHAR: case CPP_WCHAR:
op.unsignedp = WCHAR_UNSIGNED; case CPP_CHAR:
case CPP_CHAR: /* Always unsigned. */ op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp);
op.value = cpp_interpret_charconst (pfile, token, 1, &temp);
break; break;
case CPP_NAME: case CPP_NAME:
...@@ -331,6 +330,7 @@ eval_token (pfile, token) ...@@ -331,6 +330,7 @@ eval_token (pfile, token)
op.value = temp; op.value = temp;
} }
op.unsignedp = unsignedp;
return op; return op;
} }
......
...@@ -29,6 +29,8 @@ struct directive; /* Deliberately incomplete. */ ...@@ -29,6 +29,8 @@ struct directive; /* Deliberately incomplete. */
struct pending_option; struct pending_option;
struct op; struct op;
#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
/* Test if a sign is valid within a preprocessing number. */ /* Test if a sign is valid within a preprocessing number. */
#define VALID_SIGN(c, prevc) \ #define VALID_SIGN(c, prevc) \
(((c) == '+' || (c) == '-') && \ (((c) == '+' || (c) == '-') && \
......
...@@ -502,6 +502,18 @@ cpp_create_reader (lang) ...@@ -502,6 +502,18 @@ cpp_create_reader (lang)
CPP_OPTION (pfile, pending) = CPP_OPTION (pfile, pending) =
(struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending)); (struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
/* CPP arithmetic done to existing rules for now. */
#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
#ifndef MAX_CHAR_TYPE_SIZE
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
#endif
CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
#ifndef MAX_WCHAR_TYPE_SIZE
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
#endif
CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
/* It's simplest to just create this struct whether or not it will /* It's simplest to just create this struct whether or not it will
be needed. */ be needed. */
pfile->deps = deps_init (); pfile->deps = deps_init ();
...@@ -1796,6 +1808,27 @@ cpp_post_options (pfile) ...@@ -1796,6 +1808,27 @@ cpp_post_options (pfile)
fputc ('\n', stderr); fputc ('\n', stderr);
} }
#if ENABLE_CHECKING
/* Sanity checks for CPP arithmetic. */
if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
cpp_error (pfile, DL_FATAL,
"preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T
|| CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
cpp_error (pfile, DL_FATAL,
"CPP cannot handle (wide) character constants over %u bits",
BITS_PER_CPPCHAR_T);
{
cppchar_t test = 0;
test--;
if (test < 1)
cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
}
#endif
/* Canonicalize in_fname and out_fname. We guarantee they are not /* Canonicalize in_fname and out_fname. We guarantee they are not
NULL, and that the empty string represents stdin / stdout. */ NULL, and that the empty string represents stdin / stdout. */
if (CPP_OPTION (pfile, in_fname) == NULL if (CPP_OPTION (pfile, in_fname) == NULL
......
...@@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc) ...@@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc)
return 0; return 0;
} }
/* Interpret an escape sequence, and return its value. PSTR points to /* Returns the value of an escape sequence, truncated to the correct
the input pointer, which is just after the backslash. LIMIT is how target precision. PSTR points to the input pointer, which is just
much text we have. MASK is a bitmask for the precision for the after the backslash. LIMIT is how much text we have. WIDE is true
destination type (char or wchar_t). if the escape sequence is part of a wide character constant or
string literal. Handles all relevant diagnostics. */
Handles all relevant diagnostics. */ cppchar_t
unsigned int cpp_parse_escape (pfile, pstr, limit, wide)
cpp_parse_escape (pfile, pstr, limit, mask)
cpp_reader *pfile; cpp_reader *pfile;
const unsigned char **pstr; const unsigned char **pstr;
const unsigned char *limit; const unsigned char *limit;
unsigned HOST_WIDE_INT mask; int wide;
{ {
int unknown = 0; int unknown = 0;
const unsigned char *str = *pstr; const unsigned char *str = *pstr;
unsigned int c = *str++; cppchar_t c, mask;
unsigned int width;
if (wide)
width = CPP_OPTION (pfile, wchar_precision);
else
width = CPP_OPTION (pfile, char_precision);
if (width < BITS_PER_CPPCHAR_T)
mask = ((cppchar_t) 1 << width) - 1;
else
mask = ~0;
c = *str++;
switch (c) switch (c)
{ {
case '\\': case '\'': case '"': case '?': break; case '\\': case '\'': case '"': case '?': break;
...@@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask) ...@@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask)
"the meaning of '\\x' is different in traditional C"); "the meaning of '\\x' is different in traditional C");
{ {
unsigned int i = 0, overflow = 0; cppchar_t i = 0, overflow = 0;
int digits_found = 0; int digits_found = 0;
while (str < limit) while (str < limit)
...@@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask) ...@@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask)
case '0': case '1': case '2': case '3': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '4': case '5': case '6': case '7':
{ {
unsigned int i = c - '0'; size_t count = 0;
int count = 0; cppchar_t i = c - '0';
while (str < limit && ++count < 3) while (str < limit && ++count < 3)
{ {
...@@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask) ...@@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask)
} }
if (c > mask) if (c > mask)
cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type"); {
cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
c &= mask;
}
*pstr = str; *pstr = str;
return c; return c;
} }
#ifndef MAX_CHAR_TYPE_SIZE
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
#endif
#ifndef MAX_WCHAR_TYPE_SIZE
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
#endif
/* Interpret a (possibly wide) character constant in TOKEN. /* Interpret a (possibly wide) character constant in TOKEN.
WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
to a variable that is filled in with the number of characters seen. */ points to a variable that is filled in with the number of
HOST_WIDE_INT characters seen, and UNSIGNEDP to a variable that indicates whether
cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) the result has signed type. */
cppchar_t
cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
cpp_reader *pfile; cpp_reader *pfile;
const cpp_token *token; const cpp_token *token;
int warn_multi; int warn_multi;
unsigned int *pchars_seen; unsigned int *pchars_seen;
int *unsignedp;
{ {
const unsigned char *str = token->val.str.text; const unsigned char *str = token->val.str.text;
const unsigned char *limit = str + token->val.str.len; const unsigned char *limit = str + token->val.str.len;
unsigned int chars_seen = 0; unsigned int chars_seen = 0;
unsigned int width, max_chars, c; unsigned int width, max_chars;
unsigned HOST_WIDE_INT mask; cppchar_t c, mask, result = 0;
HOST_WIDE_INT result = 0;
bool unsigned_p; bool unsigned_p;
#ifdef MULTIBYTE_CHARS #ifdef MULTIBYTE_CHARS
...@@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) ...@@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Width in bits. */ /* Width in bits. */
if (token->type == CPP_CHAR) if (token->type == CPP_CHAR)
{ {
width = MAX_CHAR_TYPE_SIZE; width = CPP_OPTION (pfile, char_precision);
unsigned_p = CPP_OPTION (pfile, signed_char) == 0; unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
} }
else else
{ {
width = MAX_WCHAR_TYPE_SIZE; width = CPP_OPTION (pfile, wchar_precision);
unsigned_p = WCHAR_UNSIGNED; unsigned_p = WCHAR_UNSIGNED;
} }
if (width < HOST_BITS_PER_WIDE_INT) if (width < BITS_PER_CPPCHAR_T)
mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1; mask = ((cppchar_t) 1 << width) - 1;
else else
mask = ~0; mask = ~0;
max_chars = HOST_BITS_PER_WIDE_INT / width; max_chars = BITS_PER_CPPCHAR_T / width;
while (str < limit) while (str < limit)
{ {
...@@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) ...@@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
#endif #endif
if (c == '\\') if (c == '\\')
c = cpp_parse_escape (pfile, &str, limit, mask); c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
#ifdef MAP_CHARACTER #ifdef MAP_CHARACTER
if (ISPRINT (c)) if (ISPRINT (c))
...@@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) ...@@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Merge character into result; ignore excess chars. */ /* Merge character into result; ignore excess chars. */
if (++chars_seen <= max_chars) if (++chars_seen <= max_chars)
{ {
if (width < HOST_BITS_PER_WIDE_INT) if (width < BITS_PER_CPPCHAR_T)
result = (result << width) | (c & mask); result = (result << width) | (c & mask);
else else
result = c; result = c;
...@@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) ...@@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
{ {
unsigned int nbits = chars_seen * width; unsigned int nbits = chars_seen * width;
mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits); mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits);
if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0) if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
result &= mask; result &= mask;
else else
...@@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) ...@@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
} }
*pchars_seen = chars_seen; *pchars_seen = chars_seen;
*unsignedp = unsigned_p;
return result; return result;
} }
......
...@@ -726,23 +726,15 @@ dequote_string (pfile, str, len) ...@@ -726,23 +726,15 @@ dequote_string (pfile, str, len)
uchar *result = _cpp_unaligned_alloc (pfile, len + 1); uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
uchar *dst = result; uchar *dst = result;
const uchar *limit = str + len; const uchar *limit = str + len;
unsigned int c; cppchar_t c;
unsigned HOST_WIDE_INT mask;
/* We need the mask to match the host's 'unsigned char', not the
target's. */
if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
else
mask = ~(unsigned HOST_WIDE_INT)0;
while (str < limit) while (str < limit)
{ {
c = *str++; c = *str++;
if (c != '\\') if (c != '\\')
*dst++ = c; *dst++ = c;
else else
*dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask); *dst++ = cpp_parse_escape (pfile, &str, limit, 0);
} }
*dst++ = '\0'; *dst++ = '\0';
return result; return result;
......
...@@ -190,9 +190,12 @@ struct cpp_token ...@@ -190,9 +190,12 @@ struct cpp_token
} val; } val;
}; };
/* A standalone character. It is unsigned for the same reason we use /* A type wide enough to hold any multibyte source character.
unsigned char - to avoid signedness issues. */ cpplib's character constant interpreter uses shifts, and so
requires an unsigned type. */
typedef unsigned int cppchar_t; typedef unsigned int cppchar_t;
/* Its signed equivalent. */
typedef int cppchar_signed_t;
/* Values for opts.dump_macros. /* Values for opts.dump_macros.
dump_only means inhibit output of the preprocessed text dump_only means inhibit output of the preprocessed text
...@@ -237,6 +240,10 @@ struct cpp_options ...@@ -237,6 +240,10 @@ struct cpp_options
/* -fleading_underscore sets this to "_". */ /* -fleading_underscore sets this to "_". */
const char *user_label_prefix; const char *user_label_prefix;
/* Precision for target CPP arithmetic, target characters and target
wide characters, respectively. */
size_t precision, char_precision, wchar_precision;
/* The language we're preprocessing. */ /* The language we're preprocessing. */
enum c_lang lang; enum c_lang lang;
...@@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *, ...@@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int)); extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int));
/* Evaluate a CPP_CHAR or CPP_WCHAR token. */ /* Evaluate a CPP_CHAR or CPP_WCHAR token. */
extern HOST_WIDE_INT extern cppchar_t
cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *, cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
int, unsigned int *)); int, unsigned int *, int *));
extern void cpp_define PARAMS ((cpp_reader *, const char *)); extern void cpp_define PARAMS ((cpp_reader *, const char *));
extern void cpp_assert PARAMS ((cpp_reader *, const char *)); extern void cpp_assert PARAMS ((cpp_reader *, const char *));
...@@ -600,10 +607,15 @@ extern int cpp_ideq PARAMS ((const cpp_token *, ...@@ -600,10 +607,15 @@ extern int cpp_ideq PARAMS ((const cpp_token *,
extern void cpp_output_line PARAMS ((cpp_reader *, FILE *)); extern void cpp_output_line PARAMS ((cpp_reader *, FILE *));
extern void cpp_output_token PARAMS ((const cpp_token *, FILE *)); extern void cpp_output_token PARAMS ((const cpp_token *, FILE *));
extern const char *cpp_type2name PARAMS ((enum cpp_ttype)); extern const char *cpp_type2name PARAMS ((enum cpp_ttype));
extern unsigned int cpp_parse_escape PARAMS ((cpp_reader *, /* Returns the value of an escape sequence, truncated to the correct
const unsigned char **, target precision. PSTR points to the input pointer, which is just
const unsigned char *, after the backslash. LIMIT is how much text we have. WIDE is true
unsigned HOST_WIDE_INT)); if the escape sequence is part of a wide character constant or
string literal. Handles all relevant diagnostics. */
extern cppchar_t cpp_parse_escape PARAMS ((cpp_reader *,
const unsigned char ** pstr,
const unsigned char *limit,
int wide));
/* In cpphash.c */ /* In cpphash.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment