c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension.

* c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension. (lex_charconst): Update for change in prototype of cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT appropriately. * cpphash.h (BITS_PER_CPPCHAR_T): New. * cppinit.c (cpp_create_reader): Initialize them for no change in semantics. (cpp_post_options): Add sanity checks. * cpplex.c (cpp_parse_escape): Handle precision, sign-extension and truncation issues. Calculate in type cppchar_t. (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove. (cpp_interpret_charconst): Calculate in type cppchar_t. Handle run-time dependent precision correctly. Return whether the result is signed or not. * cpplib.c (dequote_string): Use cppchar_t; update. * cpplib.h (cppchar_signed_t): New. struct cpp_options): New precision members. (cpp_interpret_charconst, cpp_parse_escape): Update prototypes. From-SVN: r53152

c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension.
* c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension. (lex_charconst): Update for change in prototype of cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT appropriately. * cpphash.h (BITS_PER_CPPCHAR_T): New. * cppinit.c (cpp_create_reader): Initialize them for no change in semantics. (cpp_post_options): Add sanity checks. * cpplex.c (cpp_parse_escape): Handle precision, sign-extension and truncation issues. Calculate in type cppchar_t. (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove. (cpp_interpret_charconst): Calculate in type cppchar_t. Handle run-time dependent precision correctly. Return whether the result is signed or not. * cpplib.c (dequote_string): Use cppchar_t; update. * cpplib.h (cppchar_signed_t): New. struct cpp_options): New precision members. (cpp_interpret_charconst, cpp_parse_escape): Update prototypes. From-SVN: r53152
4268e8bb · Neil Booth · Neil Booth · ac5ec768 · 4268e8bb · 4268e8bb
Commit 4268e8bb authored May 04, 2002 by Neil Booth Committed by Neil Booth May 04, 2002
Hide whitespace changes
Inline Side-by-side

Showing with 149 additions and 104 deletions

gcc/ChangeLog
+22 -0

gcc/c-lex.c
+21 -45

gcc/cppexp.c
+4 -4

gcc/cpphash.h
+2 -0

gcc/cppinit.c
+33 -0

gcc/cpplex.c
+45 -37

gcc/cpplib.c
+2 -10

gcc/cpplib.h
+20 -8

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2002-05-04  Neil Booth  <neil@daikokuya.demon.co.uk>
+	* c-lex.c (lex_string): Let cpp_parse_escape handles truncation
+	and sign-extension.
+	(lex_charconst): Update for change in prototype of
+	cpp_interpret_charconst.  Extend from cppchar_t to HOST_WIDE_INT
+	appropriately.
+	* cpphash.h (BITS_PER_CPPCHAR_T): New.
+	* cppinit.c (cpp_create_reader): Initialize them for no
+	change in semantics.
+	(cpp_post_options): Add sanity checks.
+	* cpplex.c (cpp_parse_escape): Handle precision, sign-extension
+	and truncation issues.  Calculate in type cppchar_t.
+	(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
+	(cpp_interpret_charconst): Calculate in type cppchar_t.  Handle
+	run-time dependent precision correctly.  Return whether the
+	result is signed or not.
+	* cpplib.c (dequote_string): Use cppchar_t; update.
+	* cpplib.h (cppchar_signed_t): New.
+	struct cpp_options): New precision members.
+	(cpp_interpret_charconst, cpp_parse_escape): Update prototypes.
 2002-05-03  David S. Miller  <davem@redhat.com>
 	* config/sparc/sparc-protos.h (sparc_rtx_costs): New.

--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -1238,9 +1238,7 @@ lex_string (str, len, wide)
  char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
  char *q = buf;
  const unsigned char *p = str, *limit = str + len;
-  unsigned int c;
+  cppchar_t c;
-  unsigned width = wide ? WCHAR_TYPE_SIZE
-			: TYPE_PRECISION (char_type_node);
 #ifdef MULTIBYTE_CHARS
  /* Reset multibyte conversion state.  */
@@ -1270,15 +1268,7 @@ lex_string (str, len, wide)
 #endif
      if (c == '\\' && !ignore_escape_flag)
-	{
+	c = cpp_parse_escape (parse_in, &p, limit, wide);
-	  unsigned int mask;
-	  if (width < HOST_BITS_PER_INT)
-	    mask = ((unsigned int) 1 << width) - 1;
-	  else
-	    mask = ~0;
-	  c = cpp_parse_escape (parse_in, &p, limit, mask);
-	}
      /* Add this single character into the buffer either as a wchar_t,
 	 a multibyte sequence, or as a single byte.  */
@@ -1345,45 +1335,31 @@ static tree
 lex_charconst (token)
     const cpp_token *token;
 {
-  HOST_WIDE_INT result;
+  cppchar_t result;
  tree type, value;
  unsigned int chars_seen;
+  int unsignedp;
  result = cpp_interpret_charconst (parse_in, token, warn_multichar,
- 				    &chars_seen);
+ 				    &chars_seen, &unsignedp);
-  if (token->type == CPP_WCHAR)
-    {
-      value = build_int_2 (result, 0);
-      type = wchar_type_node;
-    }
-  else
-    {
-      if (result < 0)
- 	value = build_int_2 (result, -1);
-      else
- 	value = build_int_2 (result, 0);
-      /* In C, a character constant has type 'int'.
- 	 In C++ 'char', but multi-char charconsts have type 'int'.  */
-      if (c_language == clk_cplusplus && chars_seen <= 1)
-	type = char_type_node;
-      else
-	type = integer_type_node;
-    }
-  /* cpp_interpret_charconst issues a warning if the constant
+  /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
-     overflows, but if the number fits in HOST_WIDE_INT anyway, it
+     before possibly widening to HOST_WIDE_INT for build_int_2.  */
-     will return it un-truncated, which may cause problems down the
+  if (unsignedp || (cppchar_signed_t) result >= 0)
-     line.  So set the type to widest_integer_literal_type, call
+    value = build_int_2 (result, 0);
-     convert to truncate it to the proper type, then clear
+  else
-     TREE_OVERFLOW so we don't get a second warning.
+    value = build_int_2 ((cppchar_signed_t) result, -1);
-     FIXME: cpplib's assessment of overflow may not be accurate on a
-     platform where the final type can change at (compiler's) runtime.  */
-  TREE_TYPE (value) = widest_integer_literal_type_node;
+  if (token->type == CPP_WCHAR)
-  value = convert (type, value);
+    type = wchar_type_node;
-  TREE_OVERFLOW (value) = 0;
+  /* In C, a character constant has type 'int'.
+     In C++ 'char', but multi-char charconsts have type 'int'.  */
+  else if ((c_language == clk_c || c_language == clk_objective_c)
+	   || chars_seen > 1)
+    type = integer_type_node;
+  else
+    type = char_type_node;
+  TREE_TYPE (value) = type;
  return value;
 }
--- a/gcc/cppexp.c
+++ b/gcc/cppexp.c
@@ -283,10 +283,10 @@ eval_token (pfile, token)
     const cpp_token *token;
 {
  unsigned int temp;
+  int unsignedp = 0;
  struct op op;
  op.op = CPP_NUMBER;
-  op.unsignedp = 0;
  switch (token->type)
    {
@@ -294,9 +294,8 @@ eval_token (pfile, token)
      return parse_number (pfile, token);
    case CPP_WCHAR:
-      op.unsignedp = WCHAR_UNSIGNED;
+    case CPP_CHAR:
-    case CPP_CHAR:		/* Always unsigned.  */
+      op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp);
-      op.value = cpp_interpret_charconst (pfile, token, 1, &temp);
      break;
    case CPP_NAME:
@@ -331,6 +330,7 @@ eval_token (pfile, token)
      op.value = temp;
    }
+  op.unsignedp = unsignedp;
  return op;
 }

--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -29,6 +29,8 @@ struct directive;		/* Deliberately incomplete.  */
 struct pending_option;
 struct op;
+#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
 /* Test if a sign is valid within a preprocessing number.  */
 #define VALID_SIGN(c, prevc) \
  (((c) == '+' || (c) == '-') && \

--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -502,6 +502,18 @@ cpp_create_reader (lang)
  CPP_OPTION (pfile, pending) =
    (struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
+  /* CPP arithmetic done to existing rules for now.  */
+#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
+  CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
+#ifndef MAX_CHAR_TYPE_SIZE
+#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
+#endif
+  CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
+#ifndef MAX_WCHAR_TYPE_SIZE
+#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
+#endif
+  CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
  /* It's simplest to just create this struct whether or not it will
     be needed.  */
  pfile->deps = deps_init ();
@@ -1796,6 +1808,27 @@ cpp_post_options (pfile)
      fputc ('\n', stderr);
    }
+#if ENABLE_CHECKING
+  /* Sanity checks for CPP arithmetic.  */
+  if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
+    cpp_error (pfile, DL_FATAL,
+	       "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
+	       BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
+  if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T
+      || CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
+    cpp_error (pfile, DL_FATAL,
+	       "CPP cannot handle (wide) character constants over %u bits",
+	       BITS_PER_CPPCHAR_T);
+  {
+    cppchar_t test = 0;
+    test--;
+    if (test < 1)
+      cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
+  }
+#endif
  /* Canonicalize in_fname and out_fname.  We guarantee they are not
     NULL, and that the empty string represents stdin / stdout.  */
  if (CPP_OPTION (pfile, in_fname) == NULL

--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc)
  return 0;
 }
-/* Interpret an escape sequence, and return its value.  PSTR points to
+/* Returns the value of an escape sequence, truncated to the correct
-   the input pointer, which is just after the backslash.  LIMIT is how
+   target precision.  PSTR points to the input pointer, which is just
-   much text we have.  MASK is a bitmask for the precision for the
+   after the backslash.  LIMIT is how much text we have.  WIDE is true
-   destination type (char or wchar_t).
+   if the escape sequence is part of a wide character constant or
+   string literal.  Handles all relevant diagnostics.  */
-   Handles all relevant diagnostics.  */
+cppchar_t
-unsigned int
+cpp_parse_escape (pfile, pstr, limit, wide)
-cpp_parse_escape (pfile, pstr, limit, mask)
     cpp_reader *pfile;
     const unsigned char **pstr;
     const unsigned char *limit;
-     unsigned HOST_WIDE_INT mask;
+     int wide;
 {
  int unknown = 0;
  const unsigned char *str = *pstr;
-  unsigned int c = *str++;
+  cppchar_t c, mask;
+  unsigned int width;
+  if (wide)
+    width = CPP_OPTION (pfile, wchar_precision);
+  else
+    width = CPP_OPTION (pfile, char_precision);
+  if (width < BITS_PER_CPPCHAR_T)
+    mask = ((cppchar_t) 1 << width) - 1;
+  else
+    mask = ~0;
+  c = *str++;
  switch (c)
    {
    case '\\': case '\'': case '"': case '?': break;
@@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask)
 		   "the meaning of '\\x' is different in traditional C");
 	{
-	  unsigned int i = 0, overflow = 0;
+	  cppchar_t i = 0, overflow = 0;
 	  int digits_found = 0;
 	  while (str < limit)
@@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask)
    case '0':  case '1':  case '2':  case '3':
    case '4':  case '5':  case '6':  case '7':
      {
-	unsigned int i = c - '0';
+	size_t count = 0;
-	int count = 0;
+	cppchar_t i = c - '0';
 	while (str < limit && ++count < 3)
 	  {
@@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask)
    }
  if (c > mask)
-    cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+    {
+      cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+      c &= mask;
+    }
  *pstr = str;
  return c;
 }
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
 /* Interpret a (possibly wide) character constant in TOKEN.
-   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN points
+   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
-   to a variable that is filled in with the number of characters seen.  */
+   points to a variable that is filled in with the number of
-HOST_WIDE_INT
+   characters seen, and UNSIGNEDP to a variable that indicates whether
-cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
+   the result has signed type.  */
+cppchar_t
+cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
     cpp_reader *pfile;
     const cpp_token *token;
     int warn_multi;
     unsigned int *pchars_seen;
+     int *unsignedp;
 {
  const unsigned char *str = token->val.str.text;
  const unsigned char *limit = str + token->val.str.len;
  unsigned int chars_seen = 0;
-  unsigned int width, max_chars, c;
+  unsigned int width, max_chars;
-  unsigned HOST_WIDE_INT mask;
+  cppchar_t c, mask, result = 0;
-  HOST_WIDE_INT result = 0;
  bool unsigned_p;
 #ifdef MULTIBYTE_CHARS
@@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
  /* Width in bits.  */
  if (token->type == CPP_CHAR)
    {
-      width = MAX_CHAR_TYPE_SIZE;
+      width = CPP_OPTION (pfile, char_precision);
      unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
    }
  else
    {
-      width = MAX_WCHAR_TYPE_SIZE;
+      width = CPP_OPTION (pfile, wchar_precision);
      unsigned_p = WCHAR_UNSIGNED;
    }
-  if (width < HOST_BITS_PER_WIDE_INT)
+  if (width < BITS_PER_CPPCHAR_T)
-    mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
+    mask = ((cppchar_t) 1 << width) - 1;
  else
    mask = ~0;
-  max_chars = HOST_BITS_PER_WIDE_INT / width;
+  max_chars = BITS_PER_CPPCHAR_T / width;
  while (str < limit)
    {
@@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
 #endif
      if (c == '\\')
-	c = cpp_parse_escape (pfile, &str, limit, mask);
+	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
 #ifdef MAP_CHARACTER
      if (ISPRINT (c))
@@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
      /* Merge character into result; ignore excess chars.  */
      if (++chars_seen <= max_chars)
 	{
-	  if (width < HOST_BITS_PER_WIDE_INT)
+	  if (width < BITS_PER_CPPCHAR_T)
 	    result = (result << width) | (c & mask);
 	  else
 	    result = c;
@@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
    {
      unsigned int nbits = chars_seen * width;
-      mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
+      mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits);
      if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
 	result &= mask;
      else
@@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
    }
  *pchars_seen = chars_seen;
+  *unsignedp = unsigned_p;
  return result;
 }

--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -726,23 +726,15 @@ dequote_string (pfile, str, len)
  uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
  uchar *dst = result;
  const uchar *limit = str + len;
-  unsigned int c;
+  cppchar_t c;
-  unsigned HOST_WIDE_INT mask;
-  /* We need the mask to match the host's 'unsigned char', not the
-     target's.  */
-  if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
-    mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
-  else
-    mask = ~(unsigned HOST_WIDE_INT)0;
  while (str < limit)
    {
      c = *str++;
      if (c != '\\')
 	*dst++ = c;
      else
-	*dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask);
+	*dst++ = cpp_parse_escape (pfile, &str, limit, 0);
    }
  *dst++ = '\0';
  return result;

--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -190,9 +190,12 @@ struct cpp_token
  } val;
 };
-/* A standalone character.  It is unsigned for the same reason we use
+/* A type wide enough to hold any multibyte source character.
-   unsigned char - to avoid signedness issues.  */
+   cpplib's character constant interpreter uses shifts, and so
+   requires an unsigned type.  */
 typedef unsigned int cppchar_t;
+/* Its signed equivalent.  */
+typedef int cppchar_signed_t;
 /* Values for opts.dump_macros.
  dump_only means inhibit output of the preprocessed text
@@ -237,6 +240,10 @@ struct cpp_options
  /* -fleading_underscore sets this to "_".  */
  const char *user_label_prefix;
+  /* Precision for target CPP arithmetic, target characters and target
+     wide characters, respectively.  */
+  size_t precision, char_precision, wchar_precision;
  /* The language we're preprocessing.  */
  enum c_lang lang;
@@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
 extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int));
 /* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
-extern HOST_WIDE_INT
+extern cppchar_t
 cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
-				 int, unsigned int *));
+				 int, unsigned int *, int *));
 extern void cpp_define PARAMS ((cpp_reader *, const char *));
 extern void cpp_assert PARAMS ((cpp_reader *, const char *));
@@ -600,10 +607,15 @@ extern int cpp_ideq			PARAMS ((const cpp_token *,
 extern void cpp_output_line		PARAMS ((cpp_reader *, FILE *));
 extern void cpp_output_token		PARAMS ((const cpp_token *, FILE *));
 extern const char *cpp_type2name	PARAMS ((enum cpp_ttype));
-extern unsigned int cpp_parse_escape	PARAMS ((cpp_reader *,
+/* Returns the value of an escape sequence, truncated to the correct
-						 const unsigned char **,
+   target precision.  PSTR points to the input pointer, which is just
-						 const unsigned char *,
+   after the backslash.  LIMIT is how much text we have.  WIDE is true
-						 unsigned HOST_WIDE_INT));
+   if the escape sequence is part of a wide character constant or
+   string literal.  Handles all relevant diagnostics.  */
+extern cppchar_t cpp_parse_escape	PARAMS ((cpp_reader *,
+						 const unsigned char ** pstr,
+						 const unsigned char *limit,
+						 int wide));
 /* In cpphash.c */