* libc/ctype/iswalpha.c: Handle all wchar_t as unicode on

_MB_CAPABLE systems. * libc/ctype/iswblank.c: Ditto. * libc/ctype/iswcntrl.c: Ditto. * libc/ctype/iswprint.c: Ditto. * libc/ctype/iswpunct.c: Ditto. * libc/ctype/iswspace.c: Ditto. * libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c. Explain why. * libc/ctype/towlower.c: Ditto. * libc/ctype/towupper.c: Ditto. * libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now. * libc/include/sys/reent.h (struct _reent): Mark _current_category and _current_locale as unused. * libc/locale/locale.c: Add new charset support to documentation. Include ../stdio/local.h from here. (lc_ctype_charset): Set to "ASCII" by default. (lc_message_charset): Ditto. (_setlocale_r): Don't set _current_category and _current_locale. (loadlocale): Add Cygwin codepage support. On _MB_CAPABLE systems, set __mbtowc and __wctomb function pointers to function corresponding with current charset. Don't allow non-existant ISO-8859-12 charset. Add support for Windows singlebyte codepages. On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin, call __set_ctype() in case the catorgy is LC_CTYPE. Don't set _current_category and _current_locale. * libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c. * libc/stdlib/Makefile.in: Regenerate. * libc/stdlib/local.h: Add prototype for __locale_charset. Add prototypes for __mbtowc and __wctomb pointers. Add prototypes for charset-specific _wctomb_r and _mbtowc_r functions. Declare tables and functions from sb_charsets.c. * libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc by default. (_mbtowc_r): Just call __mbtowc from here. (__ascii_mbtowc): New function. (__iso_mbtowc): New function. (__cp_mbtowc): New function. (__utf8_mbtowc): New function. (__sjis_mbtowc): New function. Disable on Cygwin. (__eucjp_mbtowc): New function. Disable on Cygwin. (__jis_mbtowc): New function. Disable on Cygwin. * libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF conversion tables for all ISO and CP charsets. (__iso_8859_index): New function. (__cp_index): New function. * libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb by default. (_wctomb_r): Just call __wctomb from here. (__ascii_wctomb): New function. (__utf8_wctomb): New function. (__sjis_wctomb): New function. Disable on Cygwin. (__eucjp_wctomb): New function. Disable on Cygwin. (__jis_wctomb): New function. Disable on Cygwin. (__iso_wctomb): New function. (__cp_wctomb): New function.
author: Corinna Vinschen <corinna@vinschen.de> 2009-03-24 10:13:27 +0000
committer: Corinna Vinschen <corinna@vinschen.de> 2009-03-24 10:13:27 +0000
commit: 28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch)
tree: 8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 /newlib/libc/stdlib/wctomb_r.c
parent: 0258b687228f0d9d5191615ba0a13f7496f09d3b (diff)
download: cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.gz
cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.bz2
cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.zip
1 files changed, 301 insertions, 163 deletions
diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c
index 8d6d3fc92..64210f232 100644
--- a/newlib/libc/stdlib/wctomb_r.c
+++ b/newlib/libc/stdlib/wctomb_r.c
@@ -4,11 +4,11 @@
 #include <wchar.h>
 #include <locale.h>
 #include "mbctype.h"
+#include "local.h"
 
-extern char *__locale_charset ();
-
-/* for some conversions, we use the __count field as a place to store a state value */
-#define __state __count
+int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset,
+		 mbstate_t *)
+    = __ascii_wctomb;
 
 int
 _DEFUN (_wctomb_r, (r, s, wchar, state),
@@ -17,196 +17,287 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
         wchar_t        _wchar _AND
         mbstate_t     *state)
 {
+  return __wctomb (r, s, _wchar, __locale_charset (), state);
+}
+
+int
+_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
   /* Avoids compiler warnings about comparisons that are always false
      due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t)
      is 4, as is the case on cygwin.  */
   wint_t wchar = _wchar;
 
-  if (strlen (__locale_charset ()) <= 1)
-    { /* fall-through */ }
-  else if (!strcmp (__locale_charset (), "UTF-8"))
+  if (s == NULL)
+    return 0;
+ 
+  if ((size_t)wchar >= 0x100)
     {
-      if (s == NULL)
-        return 0; /* UTF-8 encoding is not state-dependent */
+      r->_errno = EILSEQ;
+      return -1;
+    }
+
+  *s = (char) wchar;
+  return 1;
+}
 
-      if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
+#ifdef _MB_CAPABLE
+/* for some conversions, we use the __count field as a place to store a state value */
+#define __state __count
+
+int
+_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
+
+  if (s == NULL)
+    return 0; /* UTF-8 encoding is not state-dependent */
+
+  if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
+    {
+      /* At this point only the second half of a surrogate pair is valid. */
+      r->_errno = EILSEQ;
+      return -1;
+    }
+  if (wchar <= 0x7f)
+    {
+      *s = wchar;
+      return 1;
+    }
+  if (wchar >= 0x80 && wchar <= 0x7ff)
+    {
+      *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
+      *s   = 0x80 |  (wchar &  0x3f);
+      return 2;
+    }
+  if (wchar >= 0x800 && wchar <= 0xffff)
+    {
+      if (wchar >= 0xd800 && wchar <= 0xdfff)
 	{
-	  /* At this point only the second half of a surrogate pair is valid. */
-	  r->_errno = EILSEQ;
-	  return -1;
-	}
-      if (wchar <= 0x7f)
-        {
-          *s = wchar;
-          return 1;
-        }
-      else if (wchar >= 0x80 && wchar <= 0x7ff)
-        {
-          *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
-          *s   = 0x80 |  (wchar &  0x3f);
-          return 2;
-        }
-      else if (wchar >= 0x800 && wchar <= 0xffff)
-        {
-          if (wchar >= 0xd800 && wchar <= 0xdfff)
+	  wint_t tmp;
+	  /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
+	  if (sizeof (wchar_t) != 2)
+	    {
+	      r->_errno = EILSEQ;
+	      return -1;
+	    }
+	  if (wchar >= 0xdc00)
 	    {
-	      wint_t tmp;
-	      /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
-	      if (sizeof (wchar_t) != 2)
+	      /* Second half of a surrogate pair. It's not valid if
+		 we don't have already read a first half of a surrogate
+		 before. */
+	      if (state->__count != -4)
 		{
 		  r->_errno = EILSEQ;
 		  return -1;
 		}
-	      if (wchar >= 0xdc00)
-		{
-		  /* Second half of a surrogate pair. It's not valid if
-		     we don't have already read a first half of a surrogate
-		     before. */
-		  if (state->__count != -4)
-		    {
-		      r->_errno = EILSEQ;
-		      return -1;
-		    }
-		  /* If it's valid, reconstruct the full Unicode value and
-		     return the trailing three bytes of the UTF-8 char. */
-		  tmp = (state->__value.__wchb[0] << 16)
-			| (state->__value.__wchb[1] << 8)
-			| (wchar & 0x3ff);
-		  state->__count = 0;
-		  *s++ = 0x80 | ((tmp &  0x3f000) >> 12);
-		  *s++ = 0x80 | ((tmp &    0xfc0) >> 6);
-		  *s   = 0x80 |  (tmp &     0x3f);
-		  return 3;
-	      	}
-	      /* First half of a surrogate pair.  Store the state and return
-	         the first byte of the UTF-8 char. */
-	      tmp = ((wchar & 0x3ff) << 10) + 0x10000;
-	      state->__value.__wchb[0] = (tmp >> 16) & 0xff;
-	      state->__value.__wchb[1] = (tmp >> 8) & 0xff;
-	      state->__count = -4;
-	      *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
-	      return 1;
+	      /* If it's valid, reconstruct the full Unicode value and
+		 return the trailing three bytes of the UTF-8 char. */
+	      tmp = (state->__value.__wchb[0] << 16)
+		    | (state->__value.__wchb[1] << 8)
+		    | (wchar & 0x3ff);
+	      state->__count = 0;
+	      *s++ = 0x80 | ((tmp &  0x3f000) >> 12);
+	      *s++ = 0x80 | ((tmp &    0xfc0) >> 6);
+	      *s   = 0x80 |  (tmp &     0x3f);
+	      return 3;
 	    }
-          *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
-          *s++ = 0x80 | ((wchar &  0xfc0) >> 6);
-          *s   = 0x80 |  (wchar &   0x3f);
-          return 3;
-        }
-      else if (wchar >= 0x10000 && wchar <= 0x10ffff)
-        {
-          *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
-          *s++ = 0x80 | ((wchar &  0x3f000) >> 12);
-          *s++ = 0x80 | ((wchar &    0xfc0) >> 6);
-          *s   = 0x80 |  (wchar &     0x3f);
-          return 4;
-        }
+	  /* First half of a surrogate pair.  Store the state and return
+	     the first byte of the UTF-8 char. */
+	  tmp = ((wchar & 0x3ff) << 10) + 0x10000;
+	  state->__value.__wchb[0] = (tmp >> 16) & 0xff;
+	  state->__value.__wchb[1] = (tmp >> 8) & 0xff;
+	  state->__count = -4;
+	  *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
+	  return 1;
+	}
+      *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
+      *s++ = 0x80 | ((wchar &  0xfc0) >> 6);
+      *s   = 0x80 |  (wchar &   0x3f);
+      return 3;
+    }
+  if (wchar >= 0x10000 && wchar <= 0x10ffff)
+    {
+      *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
+      *s++ = 0x80 | ((wchar &  0x3f000) >> 12);
+      *s++ = 0x80 | ((wchar &    0xfc0) >> 6);
+      *s   = 0x80 |  (wchar &     0x3f);
+      return 4;
+    }
+
+  r->_errno = EILSEQ;
+  return -1;
+}
+
+/* Cygwin defines its own doublebyte charset conversion functions 
+   because the underlying OS requires wchar_t == UTF-16. */
+#ifndef __CYGWIN__
+int
+_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
+
+  unsigned char char2 = (unsigned char)wchar;
+  unsigned char char1 = (unsigned char)(wchar >> 8);
+
+  if (s == NULL)
+    return 0;  /* not state-dependent */
+
+  if (char1 != 0x00)
+    {
+    /* first byte is non-zero..validate multi-byte char */
+      if (_issjis1(char1) && _issjis2(char2)) 
+	{
+	  *s++ = (char)char1;
+	  *s = (char)char2;
+	  return 2;
+	}
       else
 	{
 	  r->_errno = EILSEQ;
 	  return -1;
 	}
     }
-  else if (!strcmp (__locale_charset (), "SJIS"))
+  *s = (char) wchar;
+  return 1;
+}
+
+int
+_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
+  unsigned char char2 = (unsigned char)wchar;
+  unsigned char char1 = (unsigned char)(wchar >> 8);
+
+  if (s == NULL)
+    return 0;  /* not state-dependent */
+
+  if (char1 != 0x00)
     {
-      unsigned char char2 = (unsigned char)wchar;
-      unsigned char char1 = (unsigned char)(wchar >> 8);
-
-      if (s == NULL)
-        return 0;  /* not state-dependent */
-
-      if (char1 != 0x00)
-        {
-        /* first byte is non-zero..validate multi-byte char */
-          if (_issjis1(char1) && _issjis2(char2)) 
-            {
-              *s++ = (char)char1;
-              *s = (char)char2;
-              return 2;
-            }
-          else
-	    {
-	      r->_errno = EILSEQ;
-	      return -1;
-	    }
-        }
+    /* first byte is non-zero..validate multi-byte char */
+      if (_iseucjp (char1) && _iseucjp (char2)) 
+	{
+	  *s++ = (char)char1;
+	  *s = (char)char2;
+	  return 2;
+	}
+      else
+	{
+	  r->_errno = EILSEQ;
+	  return -1;
+	}
     }
-  else if (!strcmp (__locale_charset (), "EUCJP"))
+  *s = (char) wchar;
+  return 1;
+}
+
+int
+_DEFUN (__jis_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
+  int cnt = 0; 
+  unsigned char char2 = (unsigned char)wchar;
+  unsigned char char1 = (unsigned char)(wchar >> 8);
+
+  if (s == NULL)
+    return 1;  /* state-dependent */
+
+  if (char1 != 0x00)
     {
-      unsigned char char2 = (unsigned char)wchar;
-      unsigned char char1 = (unsigned char)(wchar >> 8);
-
-      if (s == NULL)
-        return 0;  /* not state-dependent */
-
-      if (char1 != 0x00)
-        {
-        /* first byte is non-zero..validate multi-byte char */
-          if (_iseucjp (char1) && _iseucjp (char2)) 
-            {
-              *s++ = (char)char1;
-              *s = (char)char2;
-              return 2;
-            }
-          else
+    /* first byte is non-zero..validate multi-byte char */
+      if (_isjis (char1) && _isjis (char2)) 
+	{
+	  if (state->__state == 0)
 	    {
-	      r->_errno = EILSEQ;
-	      return -1;
+	      /* must switch from ASCII to JIS state */
+	      state->__state = 1;
+	      *s++ = ESC_CHAR;
+	      *s++ = '$';
+	      *s++ = 'B';
+	      cnt = 3;
 	    }
-        }
+	  *s++ = (char)char1;
+	  *s = (char)char2;
+	  return cnt + 2;
+	}
+      r->_errno = EILSEQ;
+      return -1;
     }
-  else if (!strcmp (__locale_charset (), "JIS"))
+  if (state->__state != 0)
     {
-      int cnt = 0; 
-      unsigned char char2 = (unsigned char)wchar;
-      unsigned char char1 = (unsigned char)(wchar >> 8);
-
-      if (s == NULL)
-        return 1;  /* state-dependent */
-
-      if (char1 != 0x00)
-        {
-        /* first byte is non-zero..validate multi-byte char */
-          if (_isjis (char1) && _isjis (char2)) 
-            {
-              if (state->__state == 0)
-                {
-                  /* must switch from ASCII to JIS state */
-                  state->__state = 1;
-                  *s++ = ESC_CHAR;
-                  *s++ = '$';
-                  *s++ = 'B';
-                  cnt = 3;
-                }
-              *s++ = (char)char1;
-              *s = (char)char2;
-              return cnt + 2;
-            }
-          else
-	    {
-	      r->_errno = EILSEQ;
-	      return -1;
-	    }
-        }
-      else
-        {
-          if (state->__state != 0)
-            {
-              /* must switch from JIS to ASCII state */
-              state->__state = 0;
-              *s++ = ESC_CHAR;
-              *s++ = '(';
-              *s++ = 'B';
-              cnt = 3;
-            }
-          *s = (char)char2;
-          return cnt + 1;
-        }
+      /* must switch from JIS to ASCII state */
+      state->__state = 0;
+      *s++ = ESC_CHAR;
+      *s++ = '(';
+      *s++ = 'B';
+      cnt = 3;
     }
+  *s = (char)char2;
+  return cnt + 1;
+}
+#endif /* !__CYGWIN__ */
+
+#ifdef _MB_EXTENDED_CHARSETS_ISO
+int
+_DEFUN (__iso_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
 
   if (s == NULL)
     return 0;
+
+  /* wchars <= 0x9f translate to all ISO charsets directly. */
+  if (wchar >= 0xa0)
+    {
+      int iso_idx = __iso_8859_index (charset + 9);
+      if (iso_idx >= 0)
+	{
+	  unsigned char mb;
+
+	  if (s == NULL)
+	    return 0;
+
+	  for (mb = 0; mb < 0x60; ++mb)
+	    if (__iso_8859_conv[iso_idx][mb] == wchar)
+	      {
+		*s = (char) (mb + 0xa0);
+		return 1;
+	      }
+	  r->_errno = EILSEQ;
+	  return -1;
+	}
+    }
  
-  /* otherwise we are dealing with a single byte character */
   if ((size_t)wchar >= 0x100)
     {
       r->_errno = EILSEQ;
@@ -216,4 +307,51 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
   *s = (char) wchar;
   return 1;
 }
-    
+#endif /* _MB_EXTENDED_CHARSETS_ISO */
+
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+int
+_DEFUN (__cp_wctomb, (r, s, wchar, charset, state),
+        struct _reent *r       _AND 
+        char          *s       _AND
+        wchar_t        _wchar  _AND
+	const char    *charset _AND
+        mbstate_t     *state)
+{
+  wint_t wchar = _wchar;
+
+  if (s == NULL)
+    return 0;
+
+  if (wchar >= 0x80)
+    {
+      int cp_idx = __cp_index (charset + 2);
+      if (cp_idx >= 0)
+	{
+	  unsigned char mb;
+
+	  if (s == NULL)
+	    return 0;
+
+	  for (mb = 0; mb < 0x80; ++mb)
+	    if (__cp_conv[cp_idx][mb] == wchar)
+	      {
+		*s = (char) (mb + 0x80);
+		return 1;
+	      }
+	  r->_errno = EILSEQ;
+	  return -1;
+	}
+    }
+
+  if ((size_t)wchar >= 0x100)
+    {
+      r->_errno = EILSEQ;
+      return -1;
+    }
+
+  *s = (char) wchar;
+  return 1;
+}
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif /* _MB_CAPABLE */
author	Corinna Vinschen <corinna@vinschen.de>	2009-03-24 10:13:27 +0000
committer	Corinna Vinschen <corinna@vinschen.de>	2009-03-24 10:13:27 +0000
commit	28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch)
tree	8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 /newlib/libc/stdlib/wctomb_r.c
parent	0258b687228f0d9d5191615ba0a13f7496f09d3b (diff)
download	cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.gz cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.bz2 cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.zip