Commit 16dd5cfe by Eric Christopher

[multiple changes]

2004-02-02  Eric Christopher  <echristo@redhat.com>
            Zack Weinberg  <zack@codesourcery.com>

        * c-opts.c (c_common_handle_option): Add -finput-charset.
        * c.opt: Ditto.
        * cppcharset.c (one_iso88591_to_utf8): Remove.
        (convert_iso88591_utf8): Ditto.
        (conversion_tab): Remove 8859-1 converter.
        (_cpp_input_to_utf8): Remove.
        (_cpp_init_iconv_buffer): Ditto.
        (_cpp_close_iconv_buffer): Ditto.
        (_cpp_convert_input): New function.
        (_cpp_default_encoding): Ditto.
        * cpphash.h: Add/remove prototypes for above.
        * cppfiles.c (read_file_guts): Use _cpp_convert_input.
        * cppinit.c (cpp_create_reader): Use _cpp_default_encoding
        for narrow execution and input character sets.
        * cpplib.c (cpp_push_buffer): Delete uses of removed functions.
        * doc/cppopts.texi: Document -finput-charset.

2004-02-02 Eric Christopher  <echristo@redhat.com>
           Zack Weinberg  <zack@codesourcery.com>

        * gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset.

2004-01-29  Eric Christopher  <echristo@redhat.com>
            Zack Weinberg  <zack@codesourcery.com>

        * testsuite/22_locale/collate/compare/wchar_t/2.cc: Remove xfail. Use
        -finput-charset.
        * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc: Ditto.
        * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc: Ditto
        * testsuite/22_locale/collate/hash/wchar_t/2.cc: Ditto.
        * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc: Ditto.
        * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc: Ditto.
        * testsuite/22_locale/collate/transform/wchar_t/2.cc: Ditto.
        * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc: Ditto.
        * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
        Ditto.

From-SVN: r77136
parent 6699d593
2004-02-02 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* c-opts.c (c_common_handle_option): Add -finput-charset.
* c.opt: Ditto.
* cppcharset.c (one_iso88591_to_utf8): Remove.
(convert_iso88591_utf8): Ditto.
(conversion_tab): Remove 8859-1 converter.
(_cpp_input_to_utf8): Remove.
(_cpp_init_iconv_buffer): Ditto.
(_cpp_close_iconv_buffer): Ditto.
(_cpp_convert_input): New function.
(_cpp_default_encoding): Ditto.
* cpphash.h: Add/remove prototypes for above.
* cppfiles.c (read_file_guts): Use _cpp_convert_input.
* cppinit.c (cpp_create_reader): Use _cpp_default_encoding
for narrow execution and input character sets.
* cpplib.c (cpp_push_buffer): Delete uses of removed functions.
* doc/cppopts.texi: Document -finput-charset.
2004-02-02 David Edelsohn <edelsohn@gnu.org>
* rtlanal.c (refers_to_regno_p): Test regno, not inner_regno,
......@@ -3984,4 +4004,3 @@
* invoke.texi (-O1): Document change.
See ChangeLog.10 for earlier changes.
......@@ -904,6 +904,10 @@ c_common_handle_option (size_t scode, const char *arg, int value)
cpp_opts->wide_charset = arg;
break;
case OPT_finput_charset_:
cpp_opts->input_charset = arg;
break;
case OPT_ftemplate_depth_:
max_tinst_depth = value;
break;
......
......@@ -482,6 +482,11 @@ fexec-charset=
C ObjC C++ ObjC++ Joined RejectNegative
-fexec-charset=<cset> Convert all strings and character constants to character set <cset>
finput-charset=
C ObjC C++ ObjC++ Joined RejectNegative
-finput-charset=<cset> Specify the default character set for source files.
fexternal-templates
C++ ObjC++
......
......@@ -446,31 +446,6 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
return 0;
}
/* The first 256 code points of ISO 8859.1 have the same numeric
values as the first 256 code points of Unicode, therefore the
incoming ISO 8859.1 character can be passed directly to
one_cppchar_to_utf8 (which expects a Unicode value). */
static int
one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
{
const uchar *inbuf = *inbufp;
int rval;
if (*inbytesleftp > 1)
return EINVAL;
rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
if (rval)
return rval;
*inbufp += 1;
*inbytesleftp -= 1;
return 0;
}
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
......@@ -554,14 +529,6 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
static bool
convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
}
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
......@@ -639,7 +606,6 @@ static const struct conversion conversion_tab[] = {
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
{ "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
};
/* Subroutine of cpp_init_iconv: initialize and return a
......@@ -1388,44 +1354,58 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
}
uchar *
_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
uchar *input, size_t size, size_t len, off_t *st_size)
{
struct _cpp_strbuf tbuf;
struct cset_converter cvt = pfile->buffer->input_cset_desc;
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
struct cset_converter input_cset;
struct _cpp_strbuf to;
if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
if (input_cset.func == convert_no_conversion)
{
cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
return NULL;
to.text = input;
to.asize = size;
to.len = len;
}
if (length)
tbuf.text[tbuf.len] = '\n';
else
tbuf.text[0] = '\n';
{
to.asize = MAX (65536, len);
to.text = xmalloc (to.asize);
to.len = 0;
return tbuf.text;
}
if (!APPLY_CONVERSION (input_cset, input, len, &to))
cpp_error (pfile, CPP_DL_ERROR,
"failure to convert %s to %s",
CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
/* Check the input file format. At present assuming the input file
is in iso-8859-1 format. Convert this input character set to
source character set format (UTF-8). */
free (input);
}
void
_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
{
pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
from);
/* Clean up the mess. */
if (input_cset.func == convert_using_iconv)
iconv_close (input_cset.cd);
/* Resize buffer if we allocated substantially too much, or if we
haven't enough space for the \n-terminator. */
if (to.len + 4096 < to.asize || to.len >= to.asize)
to.text = xrealloc (to.text, to.len + 1);
to.text[to.len] = '\n';
*st_size = to.len;
return to.text;
}
void
_cpp_close_iconv_buffer (cpp_reader *pfile)
const char *
_cpp_default_encoding (void)
{
if (HAVE_ICONV
&& pfile->buffer->input_cset_desc.func == convert_using_iconv)
iconv_close (pfile->buffer->input_cset_desc.cd);
const char *current_encoding = NULL;
#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET)
setlocale (LC_CTYPE, "");
current_encoding = nl_langinfo (CODESET);
#endif
if (current_encoding == NULL || *current_encoding == '\0')
current_encoding = SOURCE_CHARSET;
return current_encoding;
}
......@@ -514,15 +514,8 @@ read_file_guts (cpp_reader *pfile, _cpp_file *file)
cpp_error (pfile, CPP_DL_WARNING,
"%s is shorter than expected", file->path);
/* Shrink buffer if we allocated substantially too much. */
if (total + 4096 < size)
buf = xrealloc (buf, total + 1);
/* The lexer requires that the buffer be \n-terminated. */
buf[total] = '\n';
file->buffer = buf;
file->st.st_size = total;
file->buffer = _cpp_convert_input (pfile, CPP_OPTION (pfile, input_charset),
buf, size, total, &file->st.st_size);
file->buffer_valid = true;
return true;
......
......@@ -563,9 +563,6 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
unsigned int, unsigned int);
extern void _cpp_pop_buffer (cpp_reader *);
extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
extern void _cpp_close_iconv_buffer (cpp_reader *);
/* In cpptrad.c. */
extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
......@@ -582,8 +579,12 @@ extern size_t _cpp_replacement_text_len (const cpp_macro *);
extern cppchar_t _cpp_valid_ucn (cpp_reader *, const uchar **,
const uchar *, int);
extern void _cpp_destroy_iconv (cpp_reader *);
extern bool _cpp_interpret_string_notranslate (cpp_reader *, const cpp_string *,
extern bool _cpp_interpret_string_notranslate (cpp_reader *,
const cpp_string *,
cpp_string *);
extern uchar *_cpp_convert_input (cpp_reader *, const char *, uchar *,
size_t, size_t, off_t *);
extern const char *_cpp_default_encoding (void);
/* Utility routines and macros. */
#define DSC(str) (const uchar *)str, sizeof str - 1
......
......@@ -159,11 +159,11 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */
/* Default to no charset conversion. */
CPP_OPTION (pfile, narrow_charset) = 0;
CPP_OPTION (pfile, narrow_charset) = _cpp_default_encoding ();
CPP_OPTION (pfile, wide_charset) = 0;
/* Default the input character set to iso-8859-1 for now. */
CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
/* Default the input character set to UTF-8. */
CPP_OPTION (pfile, input_charset) = _cpp_default_encoding ();
/* A fake empty "directory" used as the starting point for files
looked up without a search path. Name cannot be '/' because we
......
......@@ -1925,7 +1925,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
int from_stage3)
{
cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
const char *input = CPP_OPTION (pfile, input_charset);
/* Clears, amongst other things, if_stack and mi_cmacro. */
memset (new, 0, sizeof (cpp_buffer));
......@@ -1937,7 +1936,6 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
new->need_line = true;
pfile->buffer = new;
_cpp_init_iconv_buffer (pfile, input);
return new;
}
......@@ -1960,8 +1958,6 @@ _cpp_pop_buffer (cpp_reader *pfile)
/* In case of a missing #endif. */
pfile->state.skipping = 0;
_cpp_close_iconv_buffer (pfile);
/* _cpp_do_file_change expects pfile->buffer to be the new one. */
pfile->buffer = buffer->prev;
......
......@@ -511,6 +511,16 @@ corresponds to the width of @code{wchar_t}. As with
by the system's @code{iconv} library routine; however, you will have
problems with encodings that do not fit exactly in @code{wchar_t}.
@item -finput-charset=@var{charset}
@opindex finput-charset
Set the input character set, used for translation from the character
set of the input file to the source character set used by GCC. If the
locale does not specify, or GCC cannot get this information from the
locale, the default is UTF-8. This can be overriden by either the locale
or this command line option. Currently the command line option takes
precedence if there's a conflict. @var{charset} can be any encoding
supported by the system's @code{iconv} library routine.
@item -fworking-directory
@opindex fworking-directory
@opindex fno-working-directory
......
2004-02-02 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* gcc.c-torture/execute/wchar_t-1.c: Add -finput-charset.
2004-02-02 Zack Weinberg <zack@codesourcery.com>
* g++.dg/eh/forced1.C, g++.dg/eh/forced2.C, g++.dg/eh/forced3.C
......@@ -20556,5 +20561,3 @@ rlsruhe.de>
correspond to c-torture 1.11.
* New file.
/* { dg-options "-finput-charset=utf-8" } */
typedef __WCHAR_TYPE__ wchar_t;
wchar_t x[] = L"Ä";
wchar_t y = L'Ä';
......
2004-01-29 Eric Christopher <echristo@redhat.com>
Zack Weinberg <zack@codesourcery.com>
* testsuite/22_locale/collate/compare/wchar_t/2.cc: Remove xfail. Use
-finput-charset.
* testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc: Ditto
* testsuite/22_locale/collate/hash/wchar_t/2.cc: Ditto.
* testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/2.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc: Ditto.
* testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
Ditto.
2004-02-02 Paolo Carlini <pcarlini@suse.de>
* include/bits/stl_function.h: Additional minor tweaks.
......
......@@ -18,9 +18,8 @@
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
// 22.2.4.1.1 collate members
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <locale>
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <locale>
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
......@@ -20,9 +20,8 @@
// 22.2.4.1.1 collate members
// Doesn't work due to use of literal ISO8859.1 characters. PR 11439
// { dg-do compile { xfail *-*-* } } should be run
// { dg-excess-errors "" }
// { dg-do run }
// { dg-options "-finput-charset=iso-8859-1" }
#include <testsuite_hooks.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment