Commit ae658d47 by Benjamin Kosnik

locale-inst.cc: Add codecvt<unicode_t...


2000-08-22  Benjamin Kosnik  <bkoz@purist.soma.redhat.com>

	* src/locale-inst.cc: Add codecvt<unicode_t, wchar_t,
	__enc_traits> instantiations for has_facet and use_facet.
	* testsuite/22_locale/codecvt_unicode_wchar_t.cc: New file, for
	testing two-byte unicode encodings converted to four-byte UCS4
	encodings.

	* bits/codecvt.h (codecvt<__enc_traits>): Fix do_unshift.
	* testsuite/22_locale/codecvt_unicode_char.cc (test01): Add
	correct state/encoding information.

	* acinclude.m4 (GLIBCPP_CHECK_WCHAR_T_SUPPORT): Add wcsrtombs,
	mbsrtowcs checks as codecvt<wchar_t, char, mbstate_t> needs them
	now.
	Add checks for langinfo.h, nl_langinfo function call.
	* libio/iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed.

From-SVN: r35897
parent 62432849
2000-08-22 Benjamin Kosnik <bkoz@purist.soma.redhat.com>
* src/locale-inst.cc: Add codecvt<unicode_t, wchar_t,
__enc_traits> instantiations for has_facet and use_facet.
* testsuite/22_locale/codecvt_unicode_wchar_t.cc: New file, for
testing two-byte unicode encodings converted to four-byte UCS4
encodings.
* bits/codecvt.h (codecvt<__enc_traits>): Fix do_unshift.
* testsuite/22_locale/codecvt_unicode_char.cc (test01): Add
correct state/encoding information.
* acinclude.m4 (GLIBCPP_CHECK_WCHAR_T_SUPPORT): Add wcsrtombs,
mbsrtowcs checks as codecvt<wchar_t, char, mbstate_t> needs them
now.
Add checks for langinfo.h, nl_langinfo function call.
* libio/iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed.
2000-08-22 Richard B. Kreckel <Richard.Kreckel@Uni-Mainz.DE>
* config/cpu/alpha/bits/atomicity.h: Change __attribute__
......
dnl
dnl Initialize configure bits.
dnl
......@@ -936,10 +935,12 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
AC_MSG_RESULT($has_weof)
dnl Tests for wide character functions used in char_traits<wchar_t>.
AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset, ac_wfuncs=yes, ac_wfuncs=no)
AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset \
wcsrtombs mbsrtowcs, ac_wfuncs=yes, ac_wfuncs=no)
AC_MSG_CHECKING([for ISO C9X wchar_t support])
if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes && test x"$ac_wfuncs" = xyes; then
if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes \
&& test x"$ac_wfuncs" = xyes; then
ac_isoC9X_wchar_t=yes
else
ac_isoC9X_wchar_t=no
......@@ -949,10 +950,13 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
dnl Use iconv for wchar_t to char conversions. As such, check for
dnl X/Open Portability Guide, version 2 features (XPG2).
AC_CHECK_HEADER(iconv.h, ac_has_iconv_h=yes, ac_has_iconv_h=no)
AC_CHECK_FUNCS(iconv_open iconv_close iconv, ac_XPG2funcs=yes, ac_XPG2funcs=no)
AC_CHECK_HEADER(langinfo.h, ac_has_langinfo_h=yes, ac_has_langinfo_h=no)
AC_CHECK_FUNCS(iconv_open iconv_close iconv nl_langinfo, \
ac_XPG2funcs=yes, ac_XPG2funcs=no)
AC_MSG_CHECKING([for XPG2 wchar_t support])
if test x"$ac_has_iconv_h" = xyes && test x"$ac_XPG2funcs" = xyes; then
if test x"$ac_has_iconv_h" = xyes && test x"$ac_has_langinfo_h" = xyes \
&& test x"$ac_XPG2funcs" = xyes; then
ac_XPG2_wchar_t=yes
else
ac_XPG2_wchar_t=no
......@@ -961,12 +965,9 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
dnl At the moment, only enable wchar_t specializations if all the
dnl above support is present.
dnl 2000-07-07-bkoz-hack-xxx
# ac_isoC9X_wchar_t=no
dnl 2000-07-07-bkoz-hack-xxx
AC_MSG_CHECKING([for enabled wchar_t specializations])
if test x"$ac_isoC9X_wchar_t" = xyes && test x"$ac_XPG2_wchar_t" = xyes; then
if test x"$ac_isoC9X_wchar_t" = xyes \
&& test x"$ac_XPG2_wchar_t" = xyes; then
libinst_wstring_la="libinst-wstring.la"
AC_DEFINE(_GLIBCPP_USE_WCHAR_T)
AC_MSG_RESULT("yes")
......
......@@ -10,7 +10,6 @@ dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without
dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A
dnl PARTICULAR PURPOSE.
dnl
dnl Initialize configure bits.
dnl
......@@ -948,10 +947,12 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
AC_MSG_RESULT($has_weof)
dnl Tests for wide character functions used in char_traits<wchar_t>.
AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset, ac_wfuncs=yes, ac_wfuncs=no)
AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset \
wcsrtombs mbsrtowcs, ac_wfuncs=yes, ac_wfuncs=no)
AC_MSG_CHECKING([for ISO C9X wchar_t support])
if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes && test x"$ac_wfuncs" = xyes; then
if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes \
&& test x"$ac_wfuncs" = xyes; then
ac_isoC9X_wchar_t=yes
else
ac_isoC9X_wchar_t=no
......@@ -961,10 +962,13 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
dnl Use iconv for wchar_t to char conversions. As such, check for
dnl X/Open Portability Guide, version 2 features (XPG2).
AC_CHECK_HEADER(iconv.h, ac_has_iconv_h=yes, ac_has_iconv_h=no)
AC_CHECK_FUNCS(iconv_open iconv_close iconv, ac_XPG2funcs=yes, ac_XPG2funcs=no)
AC_CHECK_HEADER(langinfo.h, ac_has_langinfo_h=yes, ac_has_langinfo_h=no)
AC_CHECK_FUNCS(iconv_open iconv_close iconv nl_langinfo, \
ac_XPG2funcs=yes, ac_XPG2funcs=no)
AC_MSG_CHECKING([for XPG2 wchar_t support])
if test x"$ac_has_iconv_h" = xyes && test x"$ac_XPG2funcs" = xyes; then
if test x"$ac_has_iconv_h" = xyes && test x"$ac_has_langinfo_h" = xyes \
&& test x"$ac_XPG2funcs" = xyes; then
ac_XPG2_wchar_t=yes
else
ac_XPG2_wchar_t=no
......@@ -973,12 +977,9 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
dnl At the moment, only enable wchar_t specializations if all the
dnl above support is present.
dnl 2000-07-07-bkoz-hack-xxx
# ac_isoC9X_wchar_t=no
dnl 2000-07-07-bkoz-hack-xxx
AC_MSG_CHECKING([for enabled wchar_t specializations])
if test x"$ac_isoC9X_wchar_t" = xyes && test x"$ac_XPG2_wchar_t" = xyes; then
if test x"$ac_isoC9X_wchar_t" = xyes \
&& test x"$ac_XPG2_wchar_t" = xyes; then
libinst_wstring_la="libinst-wstring.la"
AC_DEFINE(_GLIBCPP_USE_WCHAR_T)
AC_MSG_RESULT("yes")
......
......@@ -416,7 +416,12 @@ namespace std
if (__conv != size_t(-1))
{
__to_next = reinterpret_cast<extern_type*>(__cto);
__ret = ok;
if (__tlen == __tmultiple * (__to_end - __to))
__ret = noconv;
else if (__tlen == 0)
__ret = ok;
else
__ret = partial;
}
else
__ret = error;
......
......@@ -510,6 +510,9 @@
/* Define if you have the logl function. */
#undef HAVE_LOGL
/* Define if you have the mbsrtowcs function. */
#undef HAVE_MBSRTOWCS
/* Define if you have the modff function. */
#undef HAVE_MODFF
......@@ -519,6 +522,9 @@
/* Define if you have the nan function. */
#undef HAVE_NAN
/* Define if you have the nl_langinfo function. */
#undef HAVE_NL_LANGINFO
/* Define if you have the powf function. */
#undef HAVE_POWF
......@@ -579,6 +585,9 @@
/* Define if you have the wcslen function. */
#undef HAVE_WCSLEN
/* Define if you have the wcsrtombs function. */
#undef HAVE_WCSRTOMBS
/* Define if you have the wmemchr function. */
#undef HAVE_WMEMCHR
......
2000-08-22 Benjamin Kosnik <bkoz@gnu.org>
* iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed.
2000-08-14 Benjamin Kosnik <bkoz@gnu.org>
* *: Merge with mainline glibc sources.
......
......@@ -27,15 +27,15 @@
#ifdef _LIBC
# include <dlfcn.h>
# include <wchar.h>
# include <locale/localeinfo.h>
# include <wcsmbs/wcsmbsload.h>
# include <iconv/gconv_int.h>
#endif
#include <stdlib.h>
#include <string.h>
#ifdef _LIBC
#if defined(_LIBC) || defined(_GLIBCPP_USE_WCHAR_T)
# include <langinfo.h>
# include <locale/localeinfo.h>
# include <wcsmbs/wcsmbsload.h>
# include <iconv/gconv_int.h>
#endif
......@@ -156,21 +156,14 @@ _IO_fwide (fp, mode)
should come up with a solution for the determination of the
currently used internal character set. */
const char *internal_ccs = _G_INTERNAL_CCS;
const char *external_ccs = NULL;
const char *external_ccs = nl_langinfo(CODESET);
# ifdef HAVE_NL_LANGINFO
external_ccs = nl_langinfo (CODESET);
# endif
if (external_ccs == NULL)
external_ccs = "ISO-8859-1";
cc->__cd_in = iconv_open (internal_ccs, external_ccs);
if (cc->__cd_in != (iconv_t) -1)
cc->__cd_out = iconv_open (external_ccs, internal_ccs);
if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
/* XXX */
abort ();
}
# else
# error "somehow determine this from LC_CTYPE"
......
......@@ -136,13 +136,26 @@ namespace std {
template class __codecvt_abstract_base<char, char, mbstate_t>;
template class __codecvt_abstract_base<wchar_t, char, mbstate_t>;
#ifdef _GLIBCPP_USE_WCHAR_T
// XXX This should not be necessary. Unfortunately, the has_facet
// and use_facet defines are not in the headers, an instead in
// locale_facets.tcc for the time being, as they use std::vector and
// thus compile time double when they are pushed up to the top-level
// includes.
typedef unsigned short unicode_t;
template
const codecvt<unicode_t, char, __enc_traits>&
use_facet<codecvt<unicode_t, char, __enc_traits> >(const locale&);
template
bool
has_facet<codecvt<unicode_t, char, __enc_traits> >(const locale &);
template
const codecvt<unicode_t, wchar_t, __enc_traits>&
use_facet<codecvt<unicode_t, wchar_t, __enc_traits> >(const locale&);
template
bool
has_facet<codecvt<unicode_t, wchar_t, __enc_traits> >(const locale &);
#endif
// collate
......
......@@ -97,8 +97,6 @@ void test01()
bool test = true;
const ext_type* e_lit = "black pearl jasmine tea";
const ext_type* efrom_next;
const int_type* ifrom_next;
int size = strlen(e_lit);
int_type i_lit_base[24] =
......@@ -108,6 +106,8 @@ void test01()
};
const int_type* i_lit = i_lit_base;
const ext_type* efrom_next;
const int_type* ifrom_next;
ext_type* e_arr = new ext_type[size + 1];
ext_type* eto_next;
int_type* i_arr = new int_type[size + 1];
......@@ -130,7 +130,7 @@ void test01()
VERIFY( ito_next == i_arr + size );
// out
unicode_codecvt::state_type state02;
unicode_codecvt::state_type state02("UNICODE", "ISO_8859-1");
initialize_state(state02);
result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
e_arr, e_arr + size, eto_next);
......@@ -141,7 +141,7 @@ void test01()
// unshift
ext_traits::copy(e_arr, e_lit, size);
unicode_codecvt::state_type state03;
unicode_codecvt::state_type state03("UNICODE", "ISO_8859-1");
initialize_state(state03);
result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
VERIFY( r3 == codecvt_base::noconv );
......@@ -153,7 +153,7 @@ void test01()
VERIFY( !cvt.always_noconv() );
unicode_codecvt::state_type state04;
unicode_codecvt::state_type state04("UNICODE", "ISO_8859-1");
initialize_state(state04);
int j = cvt.length(state03, e_lit, e_lit + size, 5);
VERIFY( j == 5 );
......
// 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
// Copyright (C) 2000 Free Software Foundation
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
// USA.
// 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
#include <locale>
#include <debug_assert.h>
using namespace std;
void
initialize_state(__enc_traits& state)
{ state._M_init(); }
// Partial specialization using __enc_traits.
// codecvt<unicode_t, wchar_t, __enc_traits>
void test01()
{
typedef codecvt_base::result result;
typedef unsigned short unicode_t;
typedef unicode_t int_type;
typedef wchar_t ext_type;
typedef __enc_traits enc_type;
typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
typedef char_traits<int_type> int_traits;
typedef char_traits<ext_type> ext_traits;
bool test = true;
const ext_type* e_lit = L"black pearl jasmine tea";
int size = ext_traits::length(e_lit);
int_type i_lit_base[24] =
{ 25088, 27648, 24832, 25344, 27392, 8192, 28672, 25856, 24832, 29184,
27648, 8192, 27136, 24832, 29440, 27904, 26880, 28160, 25856, 8192, 29696,
25856, 24832, 2560
};
const int_type* i_lit = i_lit_base;
const ext_type* efrom_next;
const int_type* ifrom_next;
ext_type* e_arr = new ext_type[size + 1];
ext_type* eto_next;
int_type* i_arr = new int_type[size + 1];
int_type* ito_next;
// construct a locale object with the specialized facet.
locale loc(locale::classic(), new unicode_codecvt);
// sanity check the constructed locale has the specialized facet.
VERIFY( has_facet<unicode_codecvt>(loc) );
const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
// in
unicode_codecvt::state_type state01("UNICODE", "UCS4");
initialize_state(state01);
result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
i_arr, i_arr + size, ito_next);
VERIFY( r1 == codecvt_base::ok );
VERIFY( !int_traits::compare(i_arr, i_lit, size) );
VERIFY( efrom_next == e_lit + size );
VERIFY( ito_next == i_arr + size );
// out
unicode_codecvt::state_type state02("UNICODE", "UCS4");
initialize_state(state02);
result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
e_arr, e_arr + size, eto_next);
VERIFY( r2 == codecvt_base::ok );
VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
VERIFY( ifrom_next == i_lit + size );
VERIFY( eto_next == e_arr + size );
// unshift
ext_traits::copy(e_arr, e_lit, size);
unicode_codecvt::state_type state03("UNICODE", "UCS4");
initialize_state(state03);
result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
VERIFY( r3 == codecvt_base::noconv );
VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
VERIFY( eto_next == e_arr );
int i = cvt.encoding();
VERIFY( i == 0 );
VERIFY( !cvt.always_noconv() );
unicode_codecvt::state_type state04("UNICODE", "UCS4");
initialize_state(state04);
int j = cvt.length(state03, e_lit, e_lit + size, 5);
VERIFY( j == 5 );
int k = cvt.max_length();
VERIFY( k == 1 );
delete [] e_arr;
delete [] i_arr;
}
int main ()
{
test01();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment