Commit cf551fba by Eric Christopher

cppcharset.c (one_iso88591_to_utf8): New function.

2004-01-16  Eric Christopher  <echristo@redhat.com>
	    Chandrakala Chavva <cchavva@redhat.com>

	* cppcharset.c (one_iso88591_to_utf8): New function.
	(convert_iso88591_utf8): Ditto. Use.
	(conversion_tab): Use.
	(_cpp_input_to_utf8): New function.
	(_cpp_init_iconv_buffer): Ditto.
	(_cpp_close_iconv_buffer): Ditto.
	* cpphash.h: Prototype new functions.
	(cpp_buffer): Add input_cset_desc.
	* cppinit.c: Add input_charset default.
	* cpplib.c (cpp_push_buffer): Support init and
	close of iconv.
	* cpplib.h (cpp_options): Add input_charset.

From-SVN: r76000
parent 2f9c39f8
2004-01-16 Eric Christopher <echristo@redhat.com>
Chandrakala Chavva <cchavva@redhat.com>
* cppcharset.c (one_iso88591_to_utf8): New function.
(convert_iso88591_utf8): Ditto. Use.
(conversion_tab): Use.
(_cpp_input_to_utf8): New function.
(_cpp_init_iconv_buffer): Ditto.
(_cpp_close_iconv_buffer): Ditto.
* cpphash.h: Prototype new functions.
(cpp_buffer): Add input_cset_desc.
* cppinit.c: Add input_charset default.
* cpplib.c (cpp_push_buffer): Support init and
close of iconv.
* cpplib.h (cpp_options): Add input_charset.
2004-01-16 Kazu Hirata <kazu@cs.umass.edu> 2004-01-16 Kazu Hirata <kazu@cs.umass.edu>
* system.h (ASM_OUTPUT_SECTION_NAME): Poison. * system.h (ASM_OUTPUT_SECTION_NAME): Poison.
......
...@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp, ...@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0; return 0;
} }
/* The first 256 code points of ISO 8859.1 have the same numeric
values as the first 256 code points of Unicode, therefore the
incoming ISO 8859.1 character can be passed directly to
one_cppchar_to_utf8 (which expects a Unicode value). */
static int
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
{
const uchar *inbuf = *inbufp;
int rval;
if (*inbytesleftp > 1)
return EINVAL;
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
if (rval)
return rval;
*inbufp += 1;
*inbytesleftp -= 1;
return 0;
}
/* Helper routine for the next few functions. The 'const' on /* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */ pointed to, which lets the inliner see through it. */
...@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen, ...@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to); return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
} }
static bool
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
}
/* Identity conversion, used when we have no alternative. */ /* Identity conversion, used when we have no alternative. */
static bool static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED, convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
...@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = { ...@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 }, { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 }, { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 }, { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
}; };
/* Subroutine of cpp_init_iconv: initialize and return a /* Subroutine of cpp_init_iconv: initialize and return a
...@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) ...@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
if (ret.cd == (iconv_t) -1) if (ret.cd == (iconv_t) -1)
{ {
if (errno == EINVAL) if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */ cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
"conversion from %s to %s not supported by iconv", "conversion from %s to %s not supported by iconv",
from, to); from, to);
else else
...@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) ...@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
} }
else else
{ {
cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */ cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
"no iconv implementation, cannot convert from %s to %s", "no iconv implementation, cannot convert from %s to %s",
from, to); from, to);
ret.func = convert_no_conversion; ret.func = convert_no_conversion;
...@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, ...@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
return result; return result;
} }
uchar *
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
{
struct _cpp_strbuf tbuf;
struct cset_converter cvt = pfile->buffer->input_cset_desc;
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
{
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
return NULL;
}
if (length)
tbuf.text[tbuf.len] = '\n';
else
tbuf.text[0] = '\n';
return tbuf.text;
}
/* Check the input file format. At present assuming the input file
is in iso-8859-1 format. Convert this input character set to
source character set format (UTF-8). */
void
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
{
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
from);
}
void
_cpp_close_iconv_buffer (cpp_reader *pfile)
{
if (HAVE_ICONV
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
iconv_close (pfile->buffer->input_cset_desc.cd);
}
...@@ -313,6 +313,10 @@ struct cpp_buffer ...@@ -313,6 +313,10 @@ struct cpp_buffer
/* Used for buffer overlays by cpptrad.c. */ /* Used for buffer overlays by cpptrad.c. */
const uchar *saved_cur, *saved_rlimit; const uchar *saved_cur, *saved_rlimit;
/* Descriptor for converting from the input character set to the
source character set. */
struct cset_converter input_cset_desc;
}; };
/* A cpp_reader encapsulates the "state" of a pre-processor run. /* A cpp_reader encapsulates the "state" of a pre-processor run.
...@@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *); ...@@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *, extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int); unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *); extern void _cpp_pop_buffer (cpp_reader *);
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */ /* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *); extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
......
...@@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table) ...@@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
CPP_OPTION (pfile, narrow_charset) = 0; CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, wide_charset) = 0; CPP_OPTION (pfile, wide_charset) = 0;
/* Default the input character set to iso-8859-1 for now. */
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
/* A fake empty "directory" used as the starting point for files /* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we looked up without a search path. Name cannot be '/' because we
don't want to prepend anything at all to filenames using it. All don't want to prepend anything at all to filenames using it. All
......
...@@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len, ...@@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3) int from_stage3)
{ {
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer); cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */ /* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer)); memset (new, 0, sizeof (cpp_buffer));
...@@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len, ...@@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true; new->need_line = true;
pfile->buffer = new; pfile->buffer = new;
_cpp_init_iconv_buffer (pfile, input);
return new; return new;
} }
...@@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile) ...@@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */ /* In case of a missing #endif. */
pfile->state.skipping = 0; pfile->state.skipping = 0;
_cpp_close_iconv_buffer (pfile);
/* _cpp_do_file_change expects pfile->buffer to be the new one. */ /* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev; pfile->buffer = buffer->prev;
......
...@@ -332,6 +332,9 @@ struct cpp_options ...@@ -332,6 +332,9 @@ struct cpp_options
/* Holds the name of the target wide character set. */ /* Holds the name of the target wide character set. */
const char *wide_charset; const char *wide_charset;
/* Holds the name of the input character set. */
const char *input_charset;
/* True to warn about precompiled header files we couldn't use. */ /* True to warn about precompiled header files we couldn't use. */
bool warn_invalid_pch; bool warn_invalid_pch;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment