Commit 00a81b8b by Jason Merrill Committed by Jason Merrill

More N3077 raw string changes

	More N3077 raw string changes
	* charset.c (cpp_interpret_string): Don't transform UCNs in raw
	strings.
	* lex.c (bufring_append): Split out from...
	(lex_raw_string): ...here.  Undo trigraph and line splicing
	transformations.  Do process line notes in multi-line literals.
	(_cpp_process_line_notes): Ignore notes that were already handled.

From-SVN: r157804
parent 0591d33e
2010-03-29 Jason Merrill <jason@redhat.com> 2010-03-29 Jason Merrill <jason@redhat.com>
N3077
* c-c++-common/raw-string-1.c: Update handling of trigraphs, line
splicing and UCNs.
* c-c++-common/raw-string-2.c: Add trigraph test.
* c-c++-common/raw-string-8.c: New.
* c-c++-common/raw-string-9.c: New.
* c-c++-common/raw-string-10.c: New.
* c-c++-common/raw-string-1.c: Combine C and C++ raw string tests. * c-c++-common/raw-string-1.c: Combine C and C++ raw string tests.
* c-c++-common/raw-string-2.c: Combine C and C++ raw string tests. * c-c++-common/raw-string-2.c: Combine C and C++ raw string tests.
* c-c++-common/raw-string-3.c: Combine C and C++ raw string tests. * c-c++-common/raw-string-3.c: Combine C and C++ raw string tests.
......
// { dg-do run } // { dg-do run }
// { dg-require-effective-target wchar } // { dg-require-effective-target wchar }
// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } // { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
// { dg-options "-std=c++0x" { target c++ } } // { dg-options "-std=c++0x" { target c++ } }
#ifndef __cplusplus #ifndef __cplusplus
...@@ -13,57 +13,78 @@ typedef __CHAR32_TYPE__ char32_t; ...@@ -13,57 +13,78 @@ typedef __CHAR32_TYPE__ char32_t;
const char s0[] = R"(a\ const char s0[] = R"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)"; c)";
const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc"; const char s1[] = "a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char s2[] = R"*|*(a\ const char s2[] = R"*|*(a\
b b
c)" c)"
c)*|" c)*|"
c)*|*"; c)*|*";
const char s3[] = "ab\nc)\"\nc)*|\"\nc"; const char s3[] = "a\\\nb\nc)\"\nc)*|\"\nc";
// The ) in ??) below is part of the raw string suffix )".
const char s4[] = R"(??/
??/
??(??<??=??'??!??-??>??)";
const char s5[] = "?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char t0[] = u8R"(a\ const char t0[] = u8R"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)"; c)";
const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc"; const char t1[] = u8"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char t2[] = u8R"*|*(a\ const char t2[] = u8R"*|*(a\
b b
c)" c)"
c)*|" c)*|"
c)*|*"; c)*|*";
const char t3[] = u8"ab\nc)\"\nc)*|\"\nc"; const char t3[] = u8"a\\\nb\nc)\"\nc)*|\"\nc";
const char t4[] = u8R"(??/
??/
??(??<??=??'??!??-??>??)";
const char t5[] = u8"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char16_t u0[] = uR"(a\ const char16_t u0[] = uR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)"; c)";
const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc"; const char16_t u1[] = u"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char16_t u2[] = uR"*|*(a\ const char16_t u2[] = uR"*|*(a\
b b
c)" c)"
c)*|" c)*|"
c)*|*"; c)*|*";
const char16_t u3[] = u"ab\nc)\"\nc)*|\"\nc"; const char16_t u3[] = u"a\\\nb\nc)\"\nc)*|\"\nc";
const char16_t u4[] = uR"(??/
??/
??(??<??=??'??!??-??>??)";
const char16_t u5[] = u"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char32_t U0[] = UR"(a\ const char32_t U0[] = UR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)"; c)";
const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc"; const char32_t U1[] = U"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char32_t U2[] = UR"*|*(a\ const char32_t U2[] = UR"*|*(a\
b b
c)" c)"
c)*|" c)*|"
c)*|*"; c)*|*";
const char32_t U3[] = U"ab\nc)\"\nc)*|\"\nc"; const char32_t U3[] = U"a\\\nb\nc)\"\nc)*|\"\nc";
const char32_t U4[] = UR"(??/
??/
??(??<??=??'??!??-??>??)";
const char32_t U5[] = U"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const wchar_t L0[] = LR"(a\ const wchar_t L0[] = LR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)"; c)";
const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc"; const wchar_t L1[] = L"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const wchar_t L2[] = LR"*|*(a\ const wchar_t L2[] = LR"*|*(a\
b b
c)" c)"
c)*|" c)*|"
c)*|*"; c)*|*";
const wchar_t L3[] = L"ab\nc)\"\nc)*|\"\nc"; const wchar_t L3[] = L"a\\\nb\nc)\"\nc)*|\"\nc";
const wchar_t L4[] = LR"(??/
??/
??(??<??=??'??!??-??>??)";
const wchar_t L5[] = L"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
int int
main (void) main (void)
...@@ -74,30 +95,45 @@ main (void) ...@@ -74,30 +95,45 @@ main (void)
if (sizeof (s2) != sizeof (s3) if (sizeof (s2) != sizeof (s3)
|| __builtin_memcmp (s2, s3, sizeof (s2)) != 0) || __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (s4) != sizeof (s5)
|| __builtin_memcmp (s4, s5, sizeof (s4)) != 0)
__builtin_abort ();
if (sizeof (t0) != sizeof (t1) if (sizeof (t0) != sizeof (t1)
|| __builtin_memcmp (t0, t1, sizeof (t0)) != 0) || __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (t2) != sizeof (t3) if (sizeof (t2) != sizeof (t3)
|| __builtin_memcmp (t2, t3, sizeof (t2)) != 0) || __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (t4) != sizeof (t5)
|| __builtin_memcmp (t4, t5, sizeof (t4)) != 0)
__builtin_abort ();
if (sizeof (u0) != sizeof (u1) if (sizeof (u0) != sizeof (u1)
|| __builtin_memcmp (u0, u1, sizeof (u0)) != 0) || __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (u2) != sizeof (u3) if (sizeof (u2) != sizeof (u3)
|| __builtin_memcmp (u2, u3, sizeof (u2)) != 0) || __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (u4) != sizeof (u5)
|| __builtin_memcmp (u4, u5, sizeof (u4)) != 0)
__builtin_abort ();
if (sizeof (U0) != sizeof (U1) if (sizeof (U0) != sizeof (U1)
|| __builtin_memcmp (U0, U1, sizeof (U0)) != 0) || __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (U2) != sizeof (U3) if (sizeof (U2) != sizeof (U3)
|| __builtin_memcmp (U2, U3, sizeof (U2)) != 0) || __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (U4) != sizeof (U5)
|| __builtin_memcmp (U4, U5, sizeof (U4)) != 0)
__builtin_abort ();
if (sizeof (L0) != sizeof (L1) if (sizeof (L0) != sizeof (L1)
|| __builtin_memcmp (L0, L1, sizeof (L0)) != 0) || __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (L2) != sizeof (L3) if (sizeof (L2) != sizeof (L3)
|| __builtin_memcmp (L2, L3, sizeof (L2)) != 0) || __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
__builtin_abort (); __builtin_abort ();
if (sizeof (L4) != sizeof (L5)
|| __builtin_memcmp (L4, L5, sizeof (L4)) != 0)
__builtin_abort ();
if (sizeof (R"*()*") != 1 if (sizeof (R"*()*") != 1
|| __builtin_memcmp (R"*()*", "", 1) != 0) || __builtin_memcmp (R"*()*", "", 1) != 0)
__builtin_abort (); __builtin_abort ();
......
// Test that we don't revert trigraphs and line splicing when a raw string
// literal is formed by token pasting.
// { dg-options "-std=gnu99 -trigraphs" { target c } }
// { dg-options "-std=c++0x" { target c++ } }
// { dg-do run }
#define PASTE(X,Y) X##Y
const char a[] = PASTE(R,"(??>\
)");
#define TEST(str, val) \
if (sizeof (str) != sizeof (val) \
|| __builtin_memcmp (str, val, sizeof (str)) != 0) \
__builtin_abort ()
int main()
{
TEST (a, "}");
}
// { dg-do run } // { dg-do run }
// { dg-require-effective-target wchar } // { dg-require-effective-target wchar }
// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } // { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
// { dg-options "-std=c++0x" { target c++ } } // { dg-options "-std=c++0x" { target c++ } }
#ifndef __cplusplus #ifndef __cplusplus
...@@ -32,6 +32,8 @@ const char s08[] = u8R"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-"; ...@@ -32,6 +32,8 @@ const char s08[] = u8R"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)"; const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)";
const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef"; const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef";
const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w"; const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w";
const char s12[] = R"??=??(??<??>??)??'??!??-\
(a)#[{}]^|~";
const char16_t u03[] = R"-(a)-" u"(b)"; const char16_t u03[] = R"-(a)-" u"(b)";
const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ"; const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
...@@ -42,6 +44,8 @@ const char16_t u08[] = uR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-"; ...@@ -42,6 +44,8 @@ const char16_t u08[] = uR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)"; const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)";
const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef"; const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef";
const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w"; const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w";
const char16_t u12[] = uR"??=??(??<??>??)??'??!??-\
(a)#[{}]^|~";
const char32_t U03[] = R"-(a)-" U"(b)"; const char32_t U03[] = R"-(a)-" U"(b)";
const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ"; const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
...@@ -52,6 +56,8 @@ const char32_t U08[] = UR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-"; ...@@ -52,6 +56,8 @@ const char32_t U08[] = UR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)"; const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)";
const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef"; const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef";
const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w"; const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w";
const char32_t U12[] = UR"??=??(??<??>??)??'??!??-\
(a)#[{}]^|~";
const wchar_t L03[] = R"-(a)-" L"(b)"; const wchar_t L03[] = R"-(a)-" L"(b)";
const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ"; const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
...@@ -62,6 +68,8 @@ const wchar_t L08[] = LR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-"; ...@@ -62,6 +68,8 @@ const wchar_t L08[] = LR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)"; const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)";
const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef"; const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef";
const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w"; const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w";
const wchar_t L12[] = LR"??=??(??<??>??)??'??!??-\
(a)#[{}]^|~";
int int
main (void) main (void)
...@@ -82,6 +90,7 @@ main (void) ...@@ -82,6 +90,7 @@ main (void)
TEST (s09, "a(b)"); TEST (s09, "a(b)");
TEST (s10, "(a)b"); TEST (s10, "(a)b");
TEST (s11, "ab"); TEST (s11, "ab");
TEST (s12, "a");
TEST (u03, u"a(b)"); TEST (u03, u"a(b)");
TEST (u04, u"(a)b"); TEST (u04, u"(a)b");
TEST (u05, u"ab"); TEST (u05, u"ab");
...@@ -91,6 +100,7 @@ main (void) ...@@ -91,6 +100,7 @@ main (void)
TEST (u09, u"a(b)"); TEST (u09, u"a(b)");
TEST (u10, u"(a)b"); TEST (u10, u"(a)b");
TEST (u11, u"ab"); TEST (u11, u"ab");
TEST (u12, u"a");
TEST (U03, U"a(b)"); TEST (U03, U"a(b)");
TEST (U04, U"(a)b"); TEST (U04, U"(a)b");
TEST (U05, U"ab"); TEST (U05, U"ab");
...@@ -100,6 +110,7 @@ main (void) ...@@ -100,6 +110,7 @@ main (void)
TEST (U09, U"a(b)"); TEST (U09, U"a(b)");
TEST (U10, U"(a)b"); TEST (U10, U"(a)b");
TEST (U11, U"ab"); TEST (U11, U"ab");
TEST (U12, U"a");
TEST (L03, L"a(b)"); TEST (L03, L"a(b)");
TEST (L04, L"(a)b"); TEST (L04, L"(a)b");
TEST (L05, L"ab"); TEST (L05, L"ab");
...@@ -109,5 +120,6 @@ main (void) ...@@ -109,5 +120,6 @@ main (void)
TEST (L09, L"a(b)"); TEST (L09, L"a(b)");
TEST (L10, L"(a)b"); TEST (L10, L"(a)b");
TEST (L11, L"ab"); TEST (L11, L"ab");
TEST (L12, L"a");
return 0; return 0;
} }
// Test that we track line numbers properly across newlines
// both escaped and not in raw strings.
// { dg-options "-std=gnu99" { target c } }
// { dg-options "-std=c++0x" { target c++ } }
const char a[] = R"(\
)";
T t; // { dg-error "" }
// Make sure that we properly handle trigraphs in raw strings when
// trigraphs are disabled, too.
// { dg-options "-std=gnu99" { target c } }
// { dg-options "-std=gnu++0x" { target c++ } }
// { dg-do run }
const char b[] = "??>"; // { dg-message "-trigraphs" }
const char a[] = R"(??>??)??/
??)";
#define TEST(str, val) \
if (sizeof (str) != sizeof (val) \
|| __builtin_memcmp (str, val, sizeof (str)) != 0) \
__builtin_abort ()
int main()
{
TEST (a, "?\?>?\?)?\?/\n?\?");
}
2010-03-29 Jason Merrill <jason@redhat.com> 2010-03-29 Jason Merrill <jason@redhat.com>
More N3077 raw string changes
* charset.c (cpp_interpret_string): Don't transform UCNs in raw
strings.
* lex.c (bufring_append): Split out from...
(lex_raw_string): ...here. Undo trigraph and line splicing
transformations. Do process line notes in multi-line literals.
(_cpp_process_line_notes): Ignore notes that were already handled.
Some raw string changes from N3077 Some raw string changes from N3077
* charset.c (cpp_interpret_string): Change inner delimiters to (). * charset.c (cpp_interpret_string): Change inner delimiters to ().
* lex.c (lex_raw_string): Likewise. Also disallow '\' in delimiter. * lex.c (lex_raw_string): Likewise. Also disallow '\' in delimiter.
......
...@@ -1403,23 +1403,10 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, ...@@ -1403,23 +1403,10 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
if (limit >= p + (p - prefix) + 1) if (limit >= p + (p - prefix) + 1)
limit -= (p - prefix) + 1; limit -= (p - prefix) + 1;
for (;;) /* Raw strings are all normal characters; these can be fed
{
base = p;
while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
p++;
if (p > base)
{
/* We have a run of normal characters; these can be fed
directly to convert_cset. */ directly to convert_cset. */
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf)) if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
goto fail; goto fail;
}
if (p == limit)
break;
p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
}
continue; continue;
} }
......
...@@ -240,7 +240,8 @@ struct _cpp_line_note ...@@ -240,7 +240,8 @@ struct _cpp_line_note
/* Type of note. The 9 'from' trigraph characters represent those /* Type of note. The 9 'from' trigraph characters represent those
trigraphs, '\\' an escaped newline, ' ' an escaped newline with trigraphs, '\\' an escaped newline, ' ' an escaped newline with
intervening space, and anything else is invalid. */ intervening space, 0 represents a note that has already been handled,
and anything else is invalid. */
unsigned int type; unsigned int type;
}; };
......
...@@ -314,6 +314,8 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment) ...@@ -314,6 +314,8 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
} }
} }
} }
else if (note->type == 0)
/* Already processed in lex_raw_string. */;
else else
abort (); abort ();
} }
...@@ -674,8 +676,37 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, ...@@ -674,8 +676,37 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
token->val.str.text = dest; token->val.str.text = dest;
} }
/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
static void
bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
{
_cpp_buff *first_buff = *first_buff_p;
_cpp_buff *last_buff = *last_buff_p;
if (first_buff == NULL)
first_buff = last_buff = _cpp_get_buff (pfile, len);
else if (len > BUFF_ROOM (last_buff))
{
size_t room = BUFF_ROOM (last_buff);
memcpy (BUFF_FRONT (last_buff), base, room);
BUFF_FRONT (last_buff) += room;
base += room;
len -= room;
last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
}
memcpy (BUFF_FRONT (last_buff), base, len);
BUFF_FRONT (last_buff) += len;
*first_buff_p = first_buff;
*last_buff_p = last_buff;
}
/* Lexes a raw string. The stored string contains the spelling, including /* Lexes a raw string. The stored string contains the spelling, including
double quotes, delimiter string, '[' and ']', any leading double quotes, delimiter string, '(' and ')', any leading
'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
literal, or CPP_OTHER if it was not properly terminated. literal, or CPP_OTHER if it was not properly terminated.
...@@ -692,6 +723,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, ...@@ -692,6 +723,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
enum cpp_ttype type; enum cpp_ttype type;
size_t total_len = 0; size_t total_len = 0;
_cpp_buff *first_buff = NULL, *last_buff = NULL; _cpp_buff *first_buff = NULL, *last_buff = NULL;
_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
type = (*base == 'L' ? CPP_WSTRING : type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 : *base == 'U' ? CPP_STRING32 :
...@@ -749,7 +781,99 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, ...@@ -749,7 +781,99 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
cur = raw_prefix + raw_prefix_len + 1; cur = raw_prefix + raw_prefix_len + 1;
for (;;) for (;;)
{ {
cppchar_t c = *cur++; #define BUF_APPEND(STR,LEN) \
do { \
bufring_append (pfile, (const uchar *)(STR), (LEN), \
&first_buff, &last_buff); \
total_len += (LEN); \
} while (0);
cppchar_t c;
/* If we previously performed any trigraph or line splicing
transformations, undo them within the body of the raw string. */
while (note->pos < cur)
++note;
for (; note->pos == cur; ++note)
{
switch (note->type)
{
case '\\':
case ' ':
/* Restore backslash followed by newline. */
BUF_APPEND (base, cur - base);
base = cur;
BUF_APPEND ("\\", 1);
after_backslash:
if (note->type == ' ')
{
/* GNU backslash whitespace newline extension. FIXME
could be any sequence of non-vertical space. When we
can properly restore any such sequence, we should mark
this note as handled so _cpp_process_line_notes
doesn't warn. */
BUF_APPEND (" ", 1);
}
BUF_APPEND ("\n", 1);
break;
case 0:
/* Already handled. */
break;
default:
if (_cpp_trigraph_map[note->type])
{
/* Don't warn about this trigraph in
_cpp_process_line_notes, since trigraphs show up as
trigraphs in raw strings. */
unsigned type = note->type;
note->type = 0;
if (!CPP_OPTION (pfile, trigraphs))
/* If we didn't convert the trigraph in the first
place, don't do anything now either. */
break;
BUF_APPEND (base, cur - base);
base = cur;
BUF_APPEND ("??", 2);
/* ??/ followed by newline gets two line notes, one for
the trigraph and one for the backslash/newline. */
if (type == '/' && note[1].pos == cur)
{
if (note[1].type != '\\'
&& note[1].type != ' ')
abort ();
BUF_APPEND ("/", 1);
++note;
goto after_backslash;
}
/* The ) from ??) could be part of the suffix. */
else if (type == ')'
&& strncmp ((const char *) cur+1,
(const char *) raw_prefix,
raw_prefix_len) == 0
&& cur[raw_prefix_len+1] == '"')
{
cur += raw_prefix_len+2;
goto break_outer_loop;
}
else
{
/* Skip the replacement character. */
base = ++cur;
BUF_APPEND (&type, 1);
}
}
else
abort ();
break;
}
}
c = *cur++;
if (c == ')' if (c == ')'
&& strncmp ((const char *) cur, (const char *) raw_prefix, && strncmp ((const char *) cur, (const char *) raw_prefix,
...@@ -772,39 +896,14 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, ...@@ -772,39 +896,14 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
break; break;
} }
/* raw strings allow embedded non-escaped newlines, which BUF_APPEND (base, cur - base);
complicates this routine a lot. */
if (first_buff == NULL)
{
total_len = cur - base;
first_buff = last_buff = _cpp_get_buff (pfile, total_len);
memcpy (BUFF_FRONT (last_buff), base, total_len);
raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
BUFF_FRONT (last_buff) += total_len;
}
else
{
size_t len = cur - base;
size_t cur_len = len > BUFF_ROOM (last_buff)
? BUFF_ROOM (last_buff) : len;
total_len += len;
memcpy (BUFF_FRONT (last_buff), base, cur_len);
BUFF_FRONT (last_buff) += cur_len;
if (len > cur_len)
{
last_buff = _cpp_append_extend_buff (pfile, last_buff,
len - cur_len);
memcpy (BUFF_FRONT (last_buff), base + cur_len,
len - cur_len);
BUFF_FRONT (last_buff) += len - cur_len;
}
}
if (pfile->buffer->cur < pfile->buffer->rlimit) if (pfile->buffer->cur < pfile->buffer->rlimit)
CPP_INCREMENT_LINE (pfile, 0); CPP_INCREMENT_LINE (pfile, 0);
pfile->buffer->need_line = true; pfile->buffer->need_line = true;
pfile->buffer->cur = cur-1;
_cpp_process_line_notes (pfile, false);
if (!_cpp_get_fresh_line (pfile)) if (!_cpp_get_fresh_line (pfile))
{ {
source_location src_loc = token->src_loc; source_location src_loc = token->src_loc;
...@@ -820,11 +919,13 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, ...@@ -820,11 +919,13 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
} }
cur = base = pfile->buffer->cur; cur = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
} }
else if (c == '\0' && !saw_NUL) else if (c == '\0' && !saw_NUL)
LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table, LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, cur)); CPP_BUF_COLUMN (pfile->buffer, cur));
} }
break_outer_loop:
if (saw_NUL && !pfile->state.skipping) if (saw_NUL && !pfile->state.skipping)
cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0, cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment