locale-inst.cc: Add codecvt<unicode_t...

2000-08-22 Benjamin Kosnik <bkoz@purist.soma.redhat.com> * src/locale-inst.cc: Add codecvt<unicode_t, wchar_t, __enc_traits> instantiations for has_facet and use_facet. * testsuite/22_locale/codecvt_unicode_wchar_t.cc: New file, for testing two-byte unicode encodings converted to four-byte UCS4 encodings. * bits/codecvt.h (codecvt<__enc_traits>): Fix do_unshift. * testsuite/22_locale/codecvt_unicode_char.cc (test01): Add correct state/encoding information. * acinclude.m4 (GLIBCPP_CHECK_WCHAR_T_SUPPORT): Add wcsrtombs, mbsrtowcs checks as codecvt<wchar_t, char, mbstate_t> needs them now. Add checks for langinfo.h, nl_langinfo function call. * libio/iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed. From-SVN: r35897

locale-inst.cc: Add codecvt<unicode_t...
2000-08-22 Benjamin Kosnik <bkoz@purist.soma.redhat.com> * src/locale-inst.cc: Add codecvt<unicode_t, wchar_t, __enc_traits> instantiations for has_facet and use_facet. * testsuite/22_locale/codecvt_unicode_wchar_t.cc: New file, for testing two-byte unicode encodings converted to four-byte UCS4 encodings. * bits/codecvt.h (codecvt<__enc_traits>): Fix do_unshift. * testsuite/22_locale/codecvt_unicode_char.cc (test01): Add correct state/encoding information. * acinclude.m4 (GLIBCPP_CHECK_WCHAR_T_SUPPORT): Add wcsrtombs, mbsrtowcs checks as codecvt<wchar_t, char, mbstate_t> needs them now. Add checks for langinfo.h, nl_langinfo function call. * libio/iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed. From-SVN: r35897
ae658d47 · Benjamin Kosnik · 62432849 · ae658d47 · ae658d47 · ae658d47
Commit ae658d47 authored Aug 22, 2000 by Benjamin Kosnik
11 changed files
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
+2000-08-22  Benjamin Kosnik  <bkoz@purist.soma.redhat.com>
+
+	* src/locale-inst.cc: Add codecvt<unicode_t, wchar_t,
+	__enc_traits> instantiations for has_facet and use_facet.
+	* testsuite/22_locale/codecvt_unicode_wchar_t.cc: New file, for
+	testing two-byte unicode encodings converted to four-byte UCS4
+	encodings.
+
+	* bits/codecvt.h (codecvt<__enc_traits>): Fix do_unshift.
+	* testsuite/22_locale/codecvt_unicode_char.cc (test01): Add
+	correct state/encoding information.
+
+	* acinclude.m4 (GLIBCPP_CHECK_WCHAR_T_SUPPORT): Add wcsrtombs,
+	mbsrtowcs checks as codecvt<wchar_t, char, mbstate_t> needs them
+	now.
+	Add checks for langinfo.h, nl_langinfo function call.
+	* libio/iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed.
+
 2000-08-22  Richard B. Kreckel  <Richard.Kreckel@Uni-Mainz.DE>

 	* config/cpu/alpha/bits/atomicity.h: Change __attribute__

--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
-
 dnl
 dnl Initialize configure bits.
 dnl
@@ -936,10 +935,12 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
    AC_MSG_RESULT($has_weof)

    dnl Tests for wide character functions used in char_traits<wchar_t>.
-    AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset, ac_wfuncs=yes, ac_wfuncs=no)
+    AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset \
+    wcsrtombs mbsrtowcs, ac_wfuncs=yes, ac_wfuncs=no)

    AC_MSG_CHECKING([for ISO C9X wchar_t support])
-    if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes && test x"$ac_wfuncs" = xyes; then
+    if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes \
+       && test x"$ac_wfuncs" = xyes; then
      ac_isoC9X_wchar_t=yes
    else
      ac_isoC9X_wchar_t=no
@@ -949,10 +950,13 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
    dnl Use iconv for wchar_t to char conversions. As such, check for 
    dnl X/Open Portability Guide, version 2 features (XPG2).
    AC_CHECK_HEADER(iconv.h, ac_has_iconv_h=yes, ac_has_iconv_h=no)
-    AC_CHECK_FUNCS(iconv_open iconv_close iconv, ac_XPG2funcs=yes, ac_XPG2funcs=no)
+    AC_CHECK_HEADER(langinfo.h, ac_has_langinfo_h=yes, ac_has_langinfo_h=no)
+    AC_CHECK_FUNCS(iconv_open iconv_close iconv nl_langinfo, \
+    ac_XPG2funcs=yes, ac_XPG2funcs=no)

    AC_MSG_CHECKING([for XPG2 wchar_t support])
-    if test x"$ac_has_iconv_h" = xyes && test x"$ac_XPG2funcs" = xyes; then
+    if test x"$ac_has_iconv_h" = xyes && test x"$ac_has_langinfo_h" = xyes \
+       && test x"$ac_XPG2funcs" = xyes; then
      ac_XPG2_wchar_t=yes
    else
      ac_XPG2_wchar_t=no
@@ -961,12 +965,9 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [

    dnl At the moment, only enable wchar_t specializations if all the
    dnl above support is present.
-    dnl 2000-07-07-bkoz-hack-xxx
-#    ac_isoC9X_wchar_t=no
-    dnl 2000-07-07-bkoz-hack-xxx
-
    AC_MSG_CHECKING([for enabled wchar_t specializations])
-    if test x"$ac_isoC9X_wchar_t" = xyes && test x"$ac_XPG2_wchar_t" = xyes; then
+    if test x"$ac_isoC9X_wchar_t" = xyes \
+       && test x"$ac_XPG2_wchar_t" = xyes; then
      libinst_wstring_la="libinst-wstring.la"
      AC_DEFINE(_GLIBCPP_USE_WCHAR_T)
      AC_MSG_RESULT("yes")

--- a/libstdc++-v3/aclocal.m4
+++ b/libstdc++-v3/aclocal.m4
@@ -10,7 +10,6 @@ dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without
 dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 dnl PARTICULAR PURPOSE.

-
 dnl
 dnl Initialize configure bits.
 dnl
@@ -948,10 +947,12 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
    AC_MSG_RESULT($has_weof)

    dnl Tests for wide character functions used in char_traits<wchar_t>.
-    AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset, ac_wfuncs=yes, ac_wfuncs=no)
+    AC_CHECK_FUNCS(wcslen wmemchr wmemcmp wmemcpy wmemmove wmemset \
+    wcsrtombs mbsrtowcs, ac_wfuncs=yes, ac_wfuncs=no)

    AC_MSG_CHECKING([for ISO C9X wchar_t support])
-    if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes && test x"$ac_wfuncs" = xyes; then
+    if test x"$has_weof" = xyes && test x"$has_wchar_minmax" = xyes \
+       && test x"$ac_wfuncs" = xyes; then
      ac_isoC9X_wchar_t=yes
    else
      ac_isoC9X_wchar_t=no
@@ -961,10 +962,13 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [
    dnl Use iconv for wchar_t to char conversions. As such, check for 
    dnl X/Open Portability Guide, version 2 features (XPG2).
    AC_CHECK_HEADER(iconv.h, ac_has_iconv_h=yes, ac_has_iconv_h=no)
-    AC_CHECK_FUNCS(iconv_open iconv_close iconv, ac_XPG2funcs=yes, ac_XPG2funcs=no)
+    AC_CHECK_HEADER(langinfo.h, ac_has_langinfo_h=yes, ac_has_langinfo_h=no)
+    AC_CHECK_FUNCS(iconv_open iconv_close iconv nl_langinfo, \
+    ac_XPG2funcs=yes, ac_XPG2funcs=no)

    AC_MSG_CHECKING([for XPG2 wchar_t support])
-    if test x"$ac_has_iconv_h" = xyes && test x"$ac_XPG2funcs" = xyes; then
+    if test x"$ac_has_iconv_h" = xyes && test x"$ac_has_langinfo_h" = xyes \
+       && test x"$ac_XPG2funcs" = xyes; then
      ac_XPG2_wchar_t=yes
    else
      ac_XPG2_wchar_t=no
@@ -973,12 +977,9 @@ AC_DEFUN(GLIBCPP_CHECK_WCHAR_T_SUPPORT, [

    dnl At the moment, only enable wchar_t specializations if all the
    dnl above support is present.
-    dnl 2000-07-07-bkoz-hack-xxx
-#    ac_isoC9X_wchar_t=no
-    dnl 2000-07-07-bkoz-hack-xxx
-
    AC_MSG_CHECKING([for enabled wchar_t specializations])
-    if test x"$ac_isoC9X_wchar_t" = xyes && test x"$ac_XPG2_wchar_t" = xyes; then
+    if test x"$ac_isoC9X_wchar_t" = xyes \
+       && test x"$ac_XPG2_wchar_t" = xyes; then
      libinst_wstring_la="libinst-wstring.la"
      AC_DEFINE(_GLIBCPP_USE_WCHAR_T)
      AC_MSG_RESULT("yes")

--- a/libstdc++-v3/bits/codecvt.h
+++ b/libstdc++-v3/bits/codecvt.h
@@ -416,7 +416,12 @@ namespace std
 	  if (__conv != size_t(-1))
 	    {
 	      __to_next = reinterpret_cast<extern_type*>(__cto);
-	      __ret = ok;
+	      if (__tlen == __tmultiple * (__to_end - __to))
+		__ret = noconv;
+	      else if (__tlen == 0)
+		__ret = ok;
+	      else
+		__ret = partial;
 	    }
 	  else 
 	    __ret = error;

--- a/libstdc++-v3/config.h.in
+++ b/libstdc++-v3/config.h.in
@@ -510,6 +510,9 @@
 /* Define if you have the logl function.  */
 #undef HAVE_LOGL

+/* Define if you have the mbsrtowcs function.  */
+#undef HAVE_MBSRTOWCS
+
 /* Define if you have the modff function.  */
 #undef HAVE_MODFF

@@ -519,6 +522,9 @@
 /* Define if you have the nan function.  */
 #undef HAVE_NAN

+/* Define if you have the nl_langinfo function.  */
+#undef HAVE_NL_LANGINFO
+
 /* Define if you have the powf function.  */
 #undef HAVE_POWF

@@ -579,6 +585,9 @@
 /* Define if you have the wcslen function.  */
 #undef HAVE_WCSLEN

+/* Define if you have the wcsrtombs function.  */
+#undef HAVE_WCSRTOMBS
+
 /* Define if you have the wmemchr function.  */
 #undef HAVE_WMEMCHR


--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
--- a/libstdc++-v3/libio/ChangeLog
+++ b/libstdc++-v3/libio/ChangeLog
+2000-08-22  Benjamin Kosnik  <bkoz@gnu.org>
+
+	* iofwide.c (_IO_fwide): Simplify, as nl_langinfo is assumed.
+
 2000-08-14  Benjamin Kosnik  <bkoz@gnu.org>

 	* *: Merge with mainline glibc sources.

--- a/libstdc++-v3/libio/iofwide.c
+++ b/libstdc++-v3/libio/iofwide.c
@@ -27,15 +27,15 @@
 #ifdef _LIBC
 # include <dlfcn.h>
 # include <wchar.h>
+# include <locale/localeinfo.h>
+# include <wcsmbs/wcsmbsload.h>
+# include <iconv/gconv_int.h>
 #endif
 #include <stdlib.h>
 #include <string.h>

-#ifdef _LIBC
+#if defined(_LIBC) || defined(_GLIBCPP_USE_WCHAR_T)
 # include <langinfo.h>
-# include <locale/localeinfo.h>
-# include <wcsmbs/wcsmbsload.h>
-# include <iconv/gconv_int.h>
 #endif


@@ -156,21 +156,14 @@ _IO_fwide (fp, mode)
 	   should come up with a solution for the determination of the
 	   currently used internal character set.  */
 	const char *internal_ccs = _G_INTERNAL_CCS;
-	const char *external_ccs = NULL;
+	const char *external_ccs = nl_langinfo(CODESET);

-#  ifdef HAVE_NL_LANGINFO
-	external_ccs = nl_langinfo (CODESET);
-#  endif
 	if (external_ccs == NULL)
 	  external_ccs = "ISO-8859-1";

 	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
 	if (cc->__cd_in != (iconv_t) -1)
 	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
-
-	if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
-	  /* XXX */
-	  abort ();
      }
 # else
 #  error "somehow determine this from LC_CTYPE"

--- a/libstdc++-v3/src/locale-inst.cc
+++ b/libstdc++-v3/src/locale-inst.cc
@@ -136,13 +136,26 @@ namespace std {
  template class __codecvt_abstract_base<char, char, mbstate_t>;
  template class __codecvt_abstract_base<wchar_t, char, mbstate_t>;
 #ifdef _GLIBCPP_USE_WCHAR_T
+  // XXX This should not be necessary. Unfortunately, the has_facet
+  // and use_facet defines are not in the headers, an instead in
+  // locale_facets.tcc for the time being, as they use std::vector and
+  // thus compile time double when they are pushed up to the top-level
+  // includes.
  typedef unsigned short			unicode_t;
+
  template
    const codecvt<unicode_t, char, __enc_traits>& 
    use_facet<codecvt<unicode_t, char, __enc_traits> >(const locale&);
  template 
    bool
    has_facet<codecvt<unicode_t, char, __enc_traits> >(const locale &);
+
+  template
+    const codecvt<unicode_t, wchar_t, __enc_traits>& 
+    use_facet<codecvt<unicode_t, wchar_t, __enc_traits> >(const locale&);
+  template 
+    bool
+    has_facet<codecvt<unicode_t, wchar_t, __enc_traits> >(const locale &);
 #endif

  // collate

--- a/libstdc++-v3/testsuite/22_locale/codecvt_unicode_char.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt_unicode_char.cc
@@ -97,8 +97,6 @@ void test01()

  bool 			test = true;
  const ext_type* 	e_lit = "black pearl jasmine tea";
-  const ext_type*       efrom_next;
-  const int_type*       ifrom_next;
  int 			size = strlen(e_lit);

  int_type 		i_lit_base[24] = 
@@ -108,6 +106,8 @@ void test01()
  };
  const int_type* 	i_lit = i_lit_base;

+  const ext_type*       efrom_next;
+  const int_type*       ifrom_next;
  ext_type* 		e_arr = new ext_type[size + 1];
  ext_type*		eto_next;
  int_type* 		i_arr = new int_type[size + 1];
@@ -130,7 +130,7 @@ void test01()
  VERIFY( ito_next == i_arr + size );

  // out
-  unicode_codecvt::state_type state02;
+  unicode_codecvt::state_type state02("UNICODE", "ISO_8859-1");
  initialize_state(state02);  
  result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next, 
 		       e_arr, e_arr + size, eto_next);
@@ -141,7 +141,7 @@ void test01()

  // unshift
  ext_traits::copy(e_arr, e_lit, size);
-  unicode_codecvt::state_type state03;
+  unicode_codecvt::state_type state03("UNICODE", "ISO_8859-1");
  initialize_state(state03);
  result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
  VERIFY( r3 == codecvt_base::noconv );
@@ -153,7 +153,7 @@ void test01()

  VERIFY( !cvt.always_noconv() );

-  unicode_codecvt::state_type state04;
+  unicode_codecvt::state_type state04("UNICODE", "ISO_8859-1");
  initialize_state(state04);
  int j = cvt.length(state03, e_lit, e_lit + size, 5);
  VERIFY( j == 5 );

--- a/libstdc++-v3/testsuite/22_locale/codecvt_unicode_wchar_t.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt_unicode_wchar_t.cc
+// 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
+
+// Copyright (C) 2000 Free Software Foundation
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+// USA.
+
+// 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
+
+#include <locale>
+#include <debug_assert.h>
+
+using namespace std;
+
+void
+initialize_state(__enc_traits& state)
+{ state._M_init(); }
+
+// Partial specialization using __enc_traits.
+// codecvt<unicode_t, wchar_t, __enc_traits>
+void test01()
+{
+  typedef codecvt_base::result			result;
+  typedef unsigned short			unicode_t;
+  typedef unicode_t				int_type;
+  typedef wchar_t				ext_type;
+  typedef __enc_traits				enc_type;
+  typedef codecvt<int_type, ext_type, enc_type>	unicode_codecvt;
+  typedef char_traits<int_type>			int_traits;
+  typedef char_traits<ext_type>			ext_traits;
+
+  bool 			test = true;
+  const ext_type* 	e_lit = L"black pearl jasmine tea";
+  int 			size = ext_traits::length(e_lit);
+
+  int_type 		i_lit_base[24] = 
+  { 25088, 27648, 24832, 25344, 27392, 8192, 28672, 25856, 24832, 29184, 
+    27648, 8192, 27136, 24832, 29440, 27904, 26880, 28160, 25856, 8192, 29696,
+    25856, 24832, 2560
+  };
+  const int_type* 	i_lit = i_lit_base;
+
+  const ext_type*       efrom_next;
+  const int_type*       ifrom_next;
+  ext_type* 		e_arr = new ext_type[size + 1];
+  ext_type*		eto_next;
+  int_type* 		i_arr = new int_type[size + 1];
+  int_type*		ito_next;
+
+  // construct a locale object with the specialized facet.
+  locale 		loc(locale::classic(), new unicode_codecvt);
+  // sanity check the constructed locale has the specialized facet.
+  VERIFY( has_facet<unicode_codecvt>(loc) );
+  const unicode_codecvt&	cvt = use_facet<unicode_codecvt>(loc); 
+
+  // in
+  unicode_codecvt::state_type state01("UNICODE", "UCS4");
+  initialize_state(state01);
+  result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next, 
+		     i_arr, i_arr + size, ito_next);
+  VERIFY( r1 == codecvt_base::ok );
+  VERIFY( !int_traits::compare(i_arr, i_lit, size) ); 
+  VERIFY( efrom_next == e_lit + size );
+  VERIFY( ito_next == i_arr + size );
+
+  // out
+  unicode_codecvt::state_type state02("UNICODE", "UCS4");
+  initialize_state(state02);  
+  result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next, 
+		       e_arr, e_arr + size, eto_next);
+  VERIFY( r2 == codecvt_base::ok );
+  VERIFY( !ext_traits::compare(e_arr, e_lit, size) ); 
+  VERIFY( ifrom_next == i_lit + size );
+  VERIFY( eto_next == e_arr + size );
+
+  // unshift
+  ext_traits::copy(e_arr, e_lit, size);
+  unicode_codecvt::state_type state03("UNICODE", "UCS4");
+  initialize_state(state03);
+  result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
+  VERIFY( r3 == codecvt_base::noconv );
+  VERIFY( !ext_traits::compare(e_arr, e_lit, size) ); 
+  VERIFY( eto_next == e_arr );
+
+  int i = cvt.encoding();
+  VERIFY( i == 0 );
+
+  VERIFY( !cvt.always_noconv() );
+
+  unicode_codecvt::state_type state04("UNICODE", "UCS4");
+  initialize_state(state04);
+  int j = cvt.length(state03, e_lit, e_lit + size, 5);
+  VERIFY( j == 5 );
+
+  int k = cvt.max_length();
+  VERIFY( k == 1 );
+
+  delete [] e_arr;
+  delete [] i_arr;
+}
+
+int main ()
+{
+  test01();
+
+  return 0;
+}
+
+
+