Commit f8710242 by Neil Booth Committed by Neil Booth

cpp.texi: Update for handling of charconsts.

	* cpp.texi: Update for handling of charconsts.
	* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF.  Update
	diagnostics.  Skip to the end if the UCS is too short.
	(cpp_interpret_charconst): Long charconsts issue a warning
	not an error.

	* gcc.dg/cpp/charconst.c: New tests.
	* gcc.dg/cpp/escape.c: New tests.
	* gcc.dg/cpp/escape-1.c: New tests.
	* gcc.dg/cpp/escape-2.c: New tests.
	* gcc.dg/cpp/ucs.c: New tests.

From-SVN: r42514
parent 75f3e3c8
2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
* cpp.texi: Update for handling of charconsts.
* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update
diagnostics. Skip to the end if the UCS is too short.
(cpp_interpret_charconst): Long charconsts issue a warning
not an error.
2001-05-23 Richard Henderson <rth@redhat.com> 2001-05-23 Richard Henderson <rth@redhat.com>
* doc/install.texi (alpha-linux): Require binutils 2.11. * doc/install.texi (alpha-linux): Require binutils 2.11.
......
...@@ -2889,22 +2889,17 @@ same column as it did in the original source file. ...@@ -2889,22 +2889,17 @@ same column as it did in the original source file.
@item The numeric value of character constants in preprocessor expressions. @item The numeric value of character constants in preprocessor expressions.
The preprocessor interprets character constants in preprocessing The preprocessor and compiler interpret character constants in the same
directives on the host machine. Expressions outside preprocessing way; escape sequences such as @code{\a} are given the values they would
directives are compiled to be interpreted on the target machine. In the have on the target machine.
normal case of a native compiler, these two environments are the same
and so character constants will be evaluated identically in both cases.
However, in the case of a cross compiler, the values may be different.
Multi-character character constants are interpreted a character at a Multi-character character constants are interpreted a character at a
time, shifting the previous result left by the number of bits per time, shifting the previous result left by the number of bits per
character on the host, and adding the new character. For example, 'ab' character on the host, and adding the new character. For example, 'ab'
on an 8-bit host would be interpreted as 'a' * 256 + 'b'. If there are on an 8-bit host would be interpreted as 'a' * 256 + 'b'. If there are
more characters in the constant than can fit in the widest native more characters in the constant than can fit in the widest native
integer type on the host, usually a @samp{long}, the behavior is integer type on the host, usually a @samp{long}, the excess characters
undefined. are ignored and a diagnostic is given.
Evaluation of wide character constants is not properly implemented yet.
@item Source file inclusion. @item Source file inclusion.
......
...@@ -1706,46 +1706,48 @@ maybe_read_ucs (pfile, pstr, limit, pc) ...@@ -1706,46 +1706,48 @@ maybe_read_ucs (pfile, pstr, limit, pc)
if (CPP_WTRADITIONAL (pfile)) if (CPP_WTRADITIONAL (pfile))
cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c); cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
for (length = (c == 'u' ? 4: 8); length; --length)
{
if (p >= limit)
{
cpp_error (pfile, "incomplete universal-character-name");
break;
}
c = *p; length = (c == 'u' ? 4: 8);
if (ISXDIGIT (c))
{ if ((size_t) (limit - p) < length)
code = (code << 4) + hex_digit_value (c); {
p++; cpp_error (pfile, "incomplete universal-character-name");
} /* Skip to the end to avoid more diagnostics. */
else p = limit;
}
else
{
for (; length; length--, p++)
{ {
cpp_error (pfile, c = *p;
"non-hex digit '%c' in universal-character-name", c); if (ISXDIGIT (c))
break; code = (code << 4) + hex_digit_value (c);
else
{
cpp_error (pfile,
"non-hex digit '%c' in universal-character-name", c);
/* We shouldn't skip in case there are multibyte chars. */
break;
}
} }
} }
#ifdef TARGET_EBCDIC #ifdef TARGET_EBCDIC
cpp_error (pfile, "universal-character-name on EBCDIC target"); cpp_error (pfile, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */ code = 0x3f; /* EBCDIC invalid character */
#else #else
if (code > 0x9f && !(code & 0x80000000)) /* True extended characters are OK. */
; /* True extended character, OK. */ if (code >= 0xa0
else if (code >= 0x20 && code < 0x7f) && !(code & 0x80000000)
{ && !(code >= 0xD800 && code <= 0xDFFF))
/* ASCII printable character. The C character set consists of all of ;
these except $, @ and `. We use hex escapes so that this also /* The standard permits $, @ and ` to be specified as UCNs. We use
works with EBCDIC hosts. */ hex escapes so that this also works with EBCDIC hosts. */
if (code != 0x24 && code != 0x40 && code != 0x60) else if (code == 0x24 || code == 0x40 || code == 0x60)
cpp_error (pfile, "universal-character-name used for '%c'", code); ;
} /* Don't give another error if one occurred above. */
else else if (length == 0)
cpp_error (pfile, "invalid universal-character-name"); cpp_error (pfile, "universal-character-name out of range");
#endif #endif
*pstr = p; *pstr = p;
...@@ -1970,7 +1972,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen) ...@@ -1970,7 +1972,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
else if (chars_seen > max_chars) else if (chars_seen > max_chars)
{ {
chars_seen = max_chars; chars_seen = max_chars;
cpp_error (pfile, "character constant too long"); cpp_warning (pfile, "character constant too long");
} }
else if (chars_seen > 1 && !traditional && warn_multi) else if (chars_seen > 1 && !traditional && warn_multi)
cpp_warning (pfile, "multi-character character constant"); cpp_warning (pfile, "multi-character character constant");
......
2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
* gcc.dg/cpp/charconst.c: New tests.
* gcc.dg/cpp/escape.c: New tests.
* gcc.dg/cpp/escape-1.c: New tests.
* gcc.dg/cpp/escape-2.c: New tests.
* gcc.dg/cpp/ucs.c: New tests.
2001-05-23 David.Billinghurst <David.Billinghurst@riotinto.com> 2001-05-23 David.Billinghurst <David.Billinghurst@riotinto.com>
* gcc.misc-tests/linkage.exp: Pass appropriate flags to * gcc.misc-tests/linkage.exp: Pass appropriate flags to
......
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* This tests various diagnostics about character constants, for both
the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '' /* { dg-warning "empty" "empty charconst" } */
#endif
#if L'' /* { dg-warning "empty" "empty wide charconst" } */
#endif
#if 'very long' /* { dg-warning "too long" "long charconst" } */
#endif
#if L'very long' /* { dg-warning "too long" "long wide charconst" } */
#endif
/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
#if 'ab' /* { dg-warning "multi-char" "multi-character" } */
#endif
void foo ()
{
int c = ''; /* { dg-warning "empty" "empty charconst" } */
c = L''; /* { dg-warning "empty" "empty wide charconst" } */
c = 'very long'; /* { dg-warning "too long" "long charconst" } */
c = L'very long'; /* { dg-warning "too long" "long wide charconst" } */
/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
c = 'ab'; /* { dg-warning "multi-char" "multi-character" } */
}
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* This tests various diagnostics about escape sequences, for both
the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\x' /* { dg-error "no following" "\x with no digits" } */
#endif
#if '\x400' /* { dg-warning "out of range" "\x out of range" } */
#endif
#if '\x0ff' /* { dg-bogus "out of range" "\x out of range" } */
#endif
#if '\400' /* { dg-warning "out of range" "\x out of range" } */
#endif
#if '\377' /* { dg-bogus "out of range" "bogus \x out of range" } */
#endif
#if '\177' != 0x7f /* { dg-bogus "out of range" "bogus \x out of range" } */
#error bad octal /* { dg-bogus "bad" "bad octal evaluation" } */
#endif
#if '\0377' /* { dg-warning "multi" "too long octal" } */
#endif
#if '\p' /* { dg-error "unknown escape" "unknown escape seq" } */
#endif
void foo ()
{
int c;
c = '\x'; /* { dg-error "no following" "\x with no digits" } */
c = '\x100'; /* { dg-warning "out of range" "\x out of range" } */
c = '\x0ff'; /* { dg-bogus "out of range" "\x out of range" } */
c = '\400'; /* { dg-warning "out of range" "\x out of range" } */
c = '\377'; /* { dg-bogus "out of range" "bogus \x out of range" } */
c = '\0377'; /* { dg-warning "multi" "too long octal" } */
c = '\p'; /* { dg-error "unknown escape" "unknown escape seq" } */
}
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-pedantic -std=c99 -fno-show-column" } */
/* This tests various diagnostics with -pedantic about escape
sequences, for both the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\e' /* { dg-warning "non-ISO" "non-ISO \\e" } */
#endif
#if '\u00a0' /* { dg-bogus "unknown" "\\u is known in C99" } */
#endif
void foo ()
{
int c = '\E'; /* { dg-warning "non-ISO" "non-ISO \\E" } */
c = '\u00a0'; /* { dg-bogus "unknown" "\\u is known in C99" } */
}
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-Wtraditional -std=c89 -fno-show-column" } */
/* This tests various diagnostics with -Wtraditioanl about escape
sequences, for both the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\a' /* { dg-warning "traditional" "traditional bell" } */
#endif
#if '\x1a' != 26 /* { dg-warning "traditional" "traditional hex" } */
#error bad hex /* { dg-bogus "bad" "bad hexadecimal evaluation" } */
#endif
#if '\u' /* { dg-warning "unknown" "\u is unknown in C89" } */
#endif
void foo ()
{
int c = '\a'; /* { dg-warning "traditional" "traditional bell" } */
c = '\xa1'; /* { dg-warning "traditional" "traditional hex" } */
c = '\u'; /* { dg-warning "unknown" "\u is unknown in C89" } */
}
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-std=c99" } */
/* This tests universal character sequences.
Neil Booth, 22 May 2001. */
#if L'\u1234' != 0x1234
#error bad short ucs /* { dg-bogus "bad" "bad \u1234 evaluation" } */
#endif
#if L'\U1234abcd' != 0x1234abcd
#error bad long ucs /* { dg-bogus "bad" "bad \U1234abcd evaluation" } */
#endif
void foo ()
{
int c;
c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */
/* { dg-warning "too long" "" { target *-*-* } 24 } */
c = '\u0024'; /* { dg-bogus "invalid" "0024 is a valid UCN" } */
c = "\u0040"[0]; /* { dg-bogus "invalid" "0040 is a valid UCN" } */
c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment