Commit a3998c2f by David Malcolm Committed by David Malcolm

Fix use-after-free lexing unterminated raw strings (PR preprocessor/78811)

gcc/ChangeLog:
	PR preprocessor/78680
	PR preprocessor/78811
	* input.c (struct selftest::lexer_test): Add field
	m_implicitly_expect_EOF.
	(selftest::lexer_error_sink): New class.
	(selftest::lexer_error_sink::s_singleton): New global.
	(selftest::lexer_test::lexer_test): Initialize new field
	"m_implicitly_expect_EOF".
	(selftest::lexer_test::~lexer_test): Conditionalize the
	check for the EOF token on the new field.
	(selftest::test_lexer_string_locations_raw_string_unterminated):
	New function.
	(selftest::input_c_tests): Call the new test.

libcpp/ChangeLog:
	PR preprocessor/78680
	PR preprocessor/78811
	* lex.c (_cpp_lex_direct): Only determine the end-location of
	the token and build a range for non-reserved start locations.
	Do not do it for EOF tokens.

From-SVN: r243721
parent a3038e19
2016-12-15 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680
PR preprocessor/78811
* input.c (struct selftest::lexer_test): Add field
m_implicitly_expect_EOF.
(selftest::lexer_error_sink): New class.
(selftest::lexer_error_sink::s_singleton): New global.
(selftest::lexer_test::lexer_test): Initialize new field
"m_implicitly_expect_EOF".
(selftest::lexer_test::~lexer_test): Conditionalize the
check for the EOF token on the new field.
(selftest::test_lexer_string_locations_raw_string_unterminated):
New function.
(selftest::input_c_tests): Call the new test.
2016-12-15 Wilco Dijkstra <wdijkstr@arm.com> 2016-12-15 Wilco Dijkstra <wdijkstr@arm.com>
* config/arm/arm.h (TARGET_BACKTRACE): Use crtl->is_leaf. * config/arm/arm.h (TARGET_BACKTRACE): Use crtl->is_leaf.
...@@ -1985,6 +1985,7 @@ struct lexer_test ...@@ -1985,6 +1985,7 @@ struct lexer_test
cpp_reader_ptr m_parser; cpp_reader_ptr m_parser;
temp_source_file m_tempfile; temp_source_file m_tempfile;
string_concat_db m_concats; string_concat_db m_concats;
bool m_implicitly_expect_EOF;
}; };
/* Use an EBCDIC encoding for the execution charset, specifically /* Use an EBCDIC encoding for the execution charset, specifically
...@@ -2046,6 +2047,54 @@ class ebcdic_execution_charset : public lexer_test_options ...@@ -2046,6 +2047,54 @@ class ebcdic_execution_charset : public lexer_test_options
ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
/* A lexer_test_options subclass that records a list of error
messages emitted by the lexer. */
class lexer_error_sink : public lexer_test_options
{
public:
lexer_error_sink ()
{
gcc_assert (s_singleton == NULL);
s_singleton = this;
}
~lexer_error_sink ()
{
gcc_assert (s_singleton == this);
s_singleton = NULL;
int i;
char *str;
FOR_EACH_VEC_ELT (m_errors, i, str)
free (str);
}
void apply (lexer_test &test) FINAL OVERRIDE
{
cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
callbacks->error = on_error;
}
static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
int level ATTRIBUTE_UNUSED,
int reason ATTRIBUTE_UNUSED,
rich_location *richloc ATTRIBUTE_UNUSED,
const char *msgid, va_list *ap)
ATTRIBUTE_FPTR_PRINTF(5,0)
{
char *msg = xvasprintf (msgid, *ap);
s_singleton->m_errors.safe_push (msg);
return true;
}
auto_vec<char *> m_errors;
private:
static lexer_error_sink *s_singleton;
};
lexer_error_sink *lexer_error_sink::s_singleton;
/* Constructor. Override line_table with a new instance based on CASE_, /* Constructor. Override line_table with a new instance based on CASE_,
and write CONTENT to a tempfile. Create a cpp_reader, and use it to and write CONTENT to a tempfile. Create a cpp_reader, and use it to
start parsing the tempfile. */ start parsing the tempfile. */
...@@ -2056,7 +2105,8 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, ...@@ -2056,7 +2105,8 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content,
m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
/* Create a tempfile and write the text to it. */ /* Create a tempfile and write the text to it. */
m_tempfile (SELFTEST_LOCATION, ".c", content), m_tempfile (SELFTEST_LOCATION, ".c", content),
m_concats () m_concats (),
m_implicitly_expect_EOF (true)
{ {
if (options) if (options)
options->apply (*this); options->apply (*this);
...@@ -2069,16 +2119,19 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, ...@@ -2069,16 +2119,19 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content,
ASSERT_NE (fname, NULL); ASSERT_NE (fname, NULL);
} }
/* Destructor. Verify that the next token in m_parser is EOF. */ /* Destructor. By default, verify that the next token in m_parser is EOF. */
lexer_test::~lexer_test () lexer_test::~lexer_test ()
{ {
location_t loc; location_t loc;
const cpp_token *tok; const cpp_token *tok;
tok = cpp_get_token_with_location (m_parser, &loc); if (m_implicitly_expect_EOF)
ASSERT_NE (tok, NULL); {
ASSERT_EQ (tok->type, CPP_EOF); tok = cpp_get_token_with_location (m_parser, &loc);
ASSERT_NE (tok, NULL);
ASSERT_EQ (tok->type, CPP_EOF);
}
} }
/* Get the next token from m_parser. */ /* Get the next token from m_parser. */
...@@ -3247,6 +3300,31 @@ test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) ...@@ -3247,6 +3300,31 @@ test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
"range endpoints are on different lines"); "range endpoints are on different lines");
} }
/* Test of parsing an unterminated raw string. */
static void
test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
{
const char *content = "R\"ouch()ouCh\" /* etc */";
lexer_error_sink errors;
lexer_test test (case_, content, &errors);
test.m_implicitly_expect_EOF = false;
/* Attempt to parse the raw string. */
const cpp_token *tok = test.get_token ();
ASSERT_EQ (tok->type, CPP_EOF);
ASSERT_EQ (1, errors.m_errors.length ());
/* We expect the message "unterminated raw string"
in the "cpplib" translation domain.
It's not clear that dgettext is available on all supported hosts,
so this assertion is commented-out for now.
ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
errors.m_errors[0]);
*/
}
/* Test of lexing char constants. */ /* Test of lexing char constants. */
static void static void
...@@ -3390,6 +3468,7 @@ input_c_tests () ...@@ -3390,6 +3468,7 @@ input_c_tests ()
for_each_line_table_case (test_lexer_string_locations_long_line); for_each_line_table_case (test_lexer_string_locations_long_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
for_each_line_table_case (test_lexer_char_constants); for_each_line_table_case (test_lexer_char_constants);
test_reading_source_line (); test_reading_source_line ();
......
2016-12-15 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680
PR preprocessor/78811
* lex.c (_cpp_lex_direct): Only determine the end-location of
the token and build a range for non-reserved start locations.
Do not do it for EOF tokens.
2016-12-12 David Malcolm <dmalcolm@redhat.com> 2016-12-12 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680 PR preprocessor/78680
......
...@@ -3089,25 +3089,27 @@ _cpp_lex_direct (cpp_reader *pfile) ...@@ -3089,25 +3089,27 @@ _cpp_lex_direct (cpp_reader *pfile)
break; break;
} }
/* Ensure that any line notes are processed, so that we have the /* Potentially convert the location of the token to a range. */
correct physical line/column for the end-point of the token even if (result->src_loc >= RESERVED_LOCATION_COUNT
when a logical line is split via one or more backslashes. */ && result->type != CPP_EOF)
if (buffer->cur >= buffer->notes[buffer->cur_note].pos {
&& !pfile->overlaid_buffer) /* Ensure that any line notes are processed, so that we have the
_cpp_process_line_notes (pfile, false); correct physical line/column for the end-point of the token even
when a logical line is split via one or more backslashes. */
source_range tok_range; if (buffer->cur >= buffer->notes[buffer->cur_note].pos
tok_range.m_start = result->src_loc; && !pfile->overlaid_buffer)
if (result->src_loc >= RESERVED_LOCATION_COUNT) _cpp_process_line_notes (pfile, false);
tok_range.m_finish
= linemap_position_for_column (pfile->line_table, source_range tok_range;
CPP_BUF_COLUMN (buffer, buffer->cur)); tok_range.m_start = result->src_loc;
else tok_range.m_finish
tok_range.m_finish = tok_range.m_start; = linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (buffer, buffer->cur));
result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
result->src_loc, result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
tok_range, NULL); result->src_loc,
tok_range, NULL);
}
return result; return result;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment