Commit 688e7a53 by Tom Tromey Committed by Tom Tromey

re PR preprocessor/33415 (Can't compile .cpp file with UTF-8 BOM.)

libcpp
	PR libcpp/33415:
	* charset.c (_cpp_convert_input): Add buffer_start argument.
	Ignore UTF-8 BOM if seen.
	* internal.h (_cpp_convert_input): Add argument.
	* files.c (struct _cpp_file) <buffer_start>: New field.
	(destroy_cpp_file): Free buffer_start, not buffer.
	(_cpp_pop_file_buffer): Likewise.
	(read_file_guts): Update.
gcc/testsuite
	PR libcpp/33415:
	* gcc.dg/cpp/pr33415.c: New file.

From-SVN: r134507
parent 009890be
2008-04-21 Tom Tromey <tromey@redhat.com>
PR libcpp/33415:
* gcc.dg/cpp/pr33415.c: New file.
2008-04-21 Olivier Hainque <hainque@adacore.com> 2008-04-21 Olivier Hainque <hainque@adacore.com>
* gnat.dg/bltins.adb: New testcase. * gnat.dg/bltins.adb: New testcase.
 /* Test case for PR 33415. Note that the first bytes of this file
are a UTF-8 BOM. */
/* { dg-do compile } */
int f(void) { return 5; }
2008-04-21 Tom Tromey <tromey@redhat.com>
PR libcpp/33415:
* charset.c (_cpp_convert_input): Add buffer_start argument.
Ignore UTF-8 BOM if seen.
* internal.h (_cpp_convert_input): Add argument.
* files.c (struct _cpp_file) <buffer_start>: New field.
(destroy_cpp_file): Free buffer_start, not buffer.
(_cpp_pop_file_buffer): Likewise.
(read_file_guts): Update.
2008-04-18 Kris Van Hees <kris.van.hees@oracle.com> 2008-04-18 Kris Van Hees <kris.van.hees@oracle.com>
* include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal. * include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal.
......
/* CPP Library - charsets /* CPP Library - charsets
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008
Free Software Foundation, Inc. Free Software Foundation, Inc.
Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges. Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
...@@ -1637,18 +1637,24 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len) ...@@ -1637,18 +1637,24 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
source file) from INPUT_CHARSET to the source character set. INPUT source file) from INPUT_CHARSET to the source character set. INPUT
points to the input buffer, SIZE is its allocated size, and LEN is points to the input buffer, SIZE is its allocated size, and LEN is
the length of the meaningful data within the buffer. The the length of the meaningful data within the buffer. The
translated buffer is returned, and *ST_SIZE is set to the length of translated buffer is returned, *ST_SIZE is set to the length of
the meaningful data within the translated buffer. the meaningful data within the translated buffer, and *BUFFER_START
is set to the start of the returned buffer. *BUFFER_START may
INPUT is expected to have been allocated with xmalloc. This function differ from the return value in the case of a BOM or other ignored
will either return INPUT, or free it and return a pointer to another marker information.
xmalloc-allocated block of memory. */
INPUT is expected to have been allocated with xmalloc. This
function will either set *BUFFER_START to INPUT, or free it and set
*BUFFER_START to a pointer to another xmalloc-allocated block of
memory. */
uchar * uchar *
_cpp_convert_input (cpp_reader *pfile, const char *input_charset, _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
uchar *input, size_t size, size_t len, off_t *st_size) uchar *input, size_t size, size_t len,
const unsigned char **buffer_start, off_t *st_size)
{ {
struct cset_converter input_cset; struct cset_converter input_cset;
struct _cpp_strbuf to; struct _cpp_strbuf to;
unsigned char *buffer;
input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset); input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
if (input_cset.func == convert_no_conversion) if (input_cset.func == convert_no_conversion)
...@@ -1689,8 +1695,24 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset, ...@@ -1689,8 +1695,24 @@ _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
else else
to.text[to.len] = '\n'; to.text[to.len] = '\n';
buffer = to.text;
*st_size = to.len; *st_size = to.len;
return to.text; #if HOST_CHARSET == HOST_CHARSET_ASCII
/* The HOST_CHARSET test just above ensures that the source charset
is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
BOM -- however, even if it did, we would still need this code due
to the 'convert_no_conversion' case. */
if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
&& to.text[2] == 0xbf)
{
*st_size -= 3;
buffer += 3;
}
#endif
*buffer_start = to.text;
return buffer;
} }
/* Decide on the default encoding to assume for input files. */ /* Decide on the default encoding to assume for input files. */
......
...@@ -74,6 +74,10 @@ struct _cpp_file ...@@ -74,6 +74,10 @@ struct _cpp_file
/* The contents of NAME after calling read_file(). */ /* The contents of NAME after calling read_file(). */
const uchar *buffer; const uchar *buffer;
/* Pointer to the real start of BUFFER. read_file() might increment
BUFFER; when freeing, this this pointer must be used instead. */
const uchar *buffer_start;
/* The macro, if any, preventing re-inclusion. */ /* The macro, if any, preventing re-inclusion. */
const cpp_hashnode *cmacro; const cpp_hashnode *cmacro;
...@@ -635,8 +639,11 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file) ...@@ -635,8 +639,11 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
cpp_error (pfile, CPP_DL_WARNING, cpp_error (pfile, CPP_DL_WARNING,
"%s is shorter than expected", file->path); "%s is shorter than expected", file->path);
file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset), file->buffer = _cpp_convert_input (pfile,
buf, size, total, &file->st.st_size); CPP_OPTION (pfile, input_charset),
buf, size, total,
&file->buffer_start,
&file->st.st_size);
file->buffer_valid = true; file->buffer_valid = true;
return true; return true;
...@@ -969,8 +976,8 @@ make_cpp_file (cpp_reader *pfile, cpp_dir *dir, const char *fname) ...@@ -969,8 +976,8 @@ make_cpp_file (cpp_reader *pfile, cpp_dir *dir, const char *fname)
static void static void
destroy_cpp_file (_cpp_file *file) destroy_cpp_file (_cpp_file *file)
{ {
if (file->buffer) if (file->buffer_start)
free ((void *) file->buffer); free ((void *) file->buffer_start);
free ((void *) file->name); free ((void *) file->name);
free (file); free (file);
} }
...@@ -1302,9 +1309,10 @@ _cpp_pop_file_buffer (cpp_reader *pfile, _cpp_file *file) ...@@ -1302,9 +1309,10 @@ _cpp_pop_file_buffer (cpp_reader *pfile, _cpp_file *file)
/* Invalidate control macros in the #including file. */ /* Invalidate control macros in the #including file. */
pfile->mi_valid = false; pfile->mi_valid = false;
if (file->buffer) if (file->buffer_start)
{ {
free ((void *) file->buffer); free ((void *) file->buffer_start);
file->buffer_start = NULL;
file->buffer = NULL; file->buffer = NULL;
file->buffer_valid = false; file->buffer_valid = false;
} }
......
/* Part of CPP library. /* Part of CPP library.
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
Free Software Foundation, Inc. Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it This program is free software; you can redistribute it and/or modify it
...@@ -644,7 +644,7 @@ extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **, ...@@ -644,7 +644,7 @@ extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
extern void _cpp_destroy_iconv (cpp_reader *); extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t, unsigned char *, size_t, size_t,
off_t *); const unsigned char **, off_t *);
extern const char *_cpp_default_encoding (void); extern const char *_cpp_default_encoding (void);
extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile, extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile,
const unsigned char *id, const unsigned char *id,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment