summaryrefslogtreecommitdiffstats
path: root/newlib/libc/stdlib/wctomb_r.c
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2009-03-24 10:13:27 +0000
committerCorinna Vinschen <corinna@vinschen.de>2009-03-24 10:13:27 +0000
commit28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch)
tree8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 /newlib/libc/stdlib/wctomb_r.c
parent0258b687228f0d9d5191615ba0a13f7496f09d3b (diff)
downloadcygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.gz
cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.tar.bz2
cygnal-28186e81d947a830d9895cecc2d8e836a3cbccd0.zip
* libc/ctype/iswalpha.c: Handle all wchar_t as unicode on
_MB_CAPABLE systems. * libc/ctype/iswblank.c: Ditto. * libc/ctype/iswcntrl.c: Ditto. * libc/ctype/iswprint.c: Ditto. * libc/ctype/iswpunct.c: Ditto. * libc/ctype/iswspace.c: Ditto. * libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c. Explain why. * libc/ctype/towlower.c: Ditto. * libc/ctype/towupper.c: Ditto. * libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now. * libc/include/sys/reent.h (struct _reent): Mark _current_category and _current_locale as unused. * libc/locale/locale.c: Add new charset support to documentation. Include ../stdio/local.h from here. (lc_ctype_charset): Set to "ASCII" by default. (lc_message_charset): Ditto. (_setlocale_r): Don't set _current_category and _current_locale. (loadlocale): Add Cygwin codepage support. On _MB_CAPABLE systems, set __mbtowc and __wctomb function pointers to function corresponding with current charset. Don't allow non-existant ISO-8859-12 charset. Add support for Windows singlebyte codepages. On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin, call __set_ctype() in case the catorgy is LC_CTYPE. Don't set _current_category and _current_locale. * libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c. * libc/stdlib/Makefile.in: Regenerate. * libc/stdlib/local.h: Add prototype for __locale_charset. Add prototypes for __mbtowc and __wctomb pointers. Add prototypes for charset-specific _wctomb_r and _mbtowc_r functions. Declare tables and functions from sb_charsets.c. * libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc by default. (_mbtowc_r): Just call __mbtowc from here. (__ascii_mbtowc): New function. (__iso_mbtowc): New function. (__cp_mbtowc): New function. (__utf8_mbtowc): New function. (__sjis_mbtowc): New function. Disable on Cygwin. (__eucjp_mbtowc): New function. Disable on Cygwin. (__jis_mbtowc): New function. Disable on Cygwin. * libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF conversion tables for all ISO and CP charsets. (__iso_8859_index): New function. (__cp_index): New function. * libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb by default. (_wctomb_r): Just call __wctomb from here. (__ascii_wctomb): New function. (__utf8_wctomb): New function. (__sjis_wctomb): New function. Disable on Cygwin. (__eucjp_wctomb): New function. Disable on Cygwin. (__jis_wctomb): New function. Disable on Cygwin. (__iso_wctomb): New function. (__cp_wctomb): New function.
Diffstat (limited to 'newlib/libc/stdlib/wctomb_r.c')
-rw-r--r--newlib/libc/stdlib/wctomb_r.c464
1 files changed, 301 insertions, 163 deletions
diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c
index 8d6d3fc92..64210f232 100644
--- a/newlib/libc/stdlib/wctomb_r.c
+++ b/newlib/libc/stdlib/wctomb_r.c
@@ -4,11 +4,11 @@
#include <wchar.h>
#include <locale.h>
#include "mbctype.h"
+#include "local.h"
-extern char *__locale_charset ();
-
-/* for some conversions, we use the __count field as a place to store a state value */
-#define __state __count
+int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset,
+ mbstate_t *)
+ = __ascii_wctomb;
int
_DEFUN (_wctomb_r, (r, s, wchar, state),
@@ -17,196 +17,287 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
wchar_t _wchar _AND
mbstate_t *state)
{
+ return __wctomb (r, s, _wchar, __locale_charset (), state);
+}
+
+int
+_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
/* Avoids compiler warnings about comparisons that are always false
due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t)
is 4, as is the case on cygwin. */
wint_t wchar = _wchar;
- if (strlen (__locale_charset ()) <= 1)
- { /* fall-through */ }
- else if (!strcmp (__locale_charset (), "UTF-8"))
+ if (s == NULL)
+ return 0;
+
+ if ((size_t)wchar >= 0x100)
{
- if (s == NULL)
- return 0; /* UTF-8 encoding is not state-dependent */
+ r->_errno = EILSEQ;
+ return -1;
+ }
+
+ *s = (char) wchar;
+ return 1;
+}
- if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
+#ifdef _MB_CAPABLE
+/* for some conversions, we use the __count field as a place to store a state value */
+#define __state __count
+
+int
+_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
+
+ if (s == NULL)
+ return 0; /* UTF-8 encoding is not state-dependent */
+
+ if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
+ {
+ /* At this point only the second half of a surrogate pair is valid. */
+ r->_errno = EILSEQ;
+ return -1;
+ }
+ if (wchar <= 0x7f)
+ {
+ *s = wchar;
+ return 1;
+ }
+ if (wchar >= 0x80 && wchar <= 0x7ff)
+ {
+ *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
+ *s = 0x80 | (wchar & 0x3f);
+ return 2;
+ }
+ if (wchar >= 0x800 && wchar <= 0xffff)
+ {
+ if (wchar >= 0xd800 && wchar <= 0xdfff)
{
- /* At this point only the second half of a surrogate pair is valid. */
- r->_errno = EILSEQ;
- return -1;
- }
- if (wchar <= 0x7f)
- {
- *s = wchar;
- return 1;
- }
- else if (wchar >= 0x80 && wchar <= 0x7ff)
- {
- *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
- *s = 0x80 | (wchar & 0x3f);
- return 2;
- }
- else if (wchar >= 0x800 && wchar <= 0xffff)
- {
- if (wchar >= 0xd800 && wchar <= 0xdfff)
+ wint_t tmp;
+ /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
+ if (sizeof (wchar_t) != 2)
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
+ if (wchar >= 0xdc00)
{
- wint_t tmp;
- /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
- if (sizeof (wchar_t) != 2)
+ /* Second half of a surrogate pair. It's not valid if
+ we don't have already read a first half of a surrogate
+ before. */
+ if (state->__count != -4)
{
r->_errno = EILSEQ;
return -1;
}
- if (wchar >= 0xdc00)
- {
- /* Second half of a surrogate pair. It's not valid if
- we don't have already read a first half of a surrogate
- before. */
- if (state->__count != -4)
- {
- r->_errno = EILSEQ;
- return -1;
- }
- /* If it's valid, reconstruct the full Unicode value and
- return the trailing three bytes of the UTF-8 char. */
- tmp = (state->__value.__wchb[0] << 16)
- | (state->__value.__wchb[1] << 8)
- | (wchar & 0x3ff);
- state->__count = 0;
- *s++ = 0x80 | ((tmp & 0x3f000) >> 12);
- *s++ = 0x80 | ((tmp & 0xfc0) >> 6);
- *s = 0x80 | (tmp & 0x3f);
- return 3;
- }
- /* First half of a surrogate pair. Store the state and return
- the first byte of the UTF-8 char. */
- tmp = ((wchar & 0x3ff) << 10) + 0x10000;
- state->__value.__wchb[0] = (tmp >> 16) & 0xff;
- state->__value.__wchb[1] = (tmp >> 8) & 0xff;
- state->__count = -4;
- *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
- return 1;
+ /* If it's valid, reconstruct the full Unicode value and
+ return the trailing three bytes of the UTF-8 char. */
+ tmp = (state->__value.__wchb[0] << 16)
+ | (state->__value.__wchb[1] << 8)
+ | (wchar & 0x3ff);
+ state->__count = 0;
+ *s++ = 0x80 | ((tmp & 0x3f000) >> 12);
+ *s++ = 0x80 | ((tmp & 0xfc0) >> 6);
+ *s = 0x80 | (tmp & 0x3f);
+ return 3;
}
- *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
- *s++ = 0x80 | ((wchar & 0xfc0) >> 6);
- *s = 0x80 | (wchar & 0x3f);
- return 3;
- }
- else if (wchar >= 0x10000 && wchar <= 0x10ffff)
- {
- *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
- *s++ = 0x80 | ((wchar & 0x3f000) >> 12);
- *s++ = 0x80 | ((wchar & 0xfc0) >> 6);
- *s = 0x80 | (wchar & 0x3f);
- return 4;
- }
+ /* First half of a surrogate pair. Store the state and return
+ the first byte of the UTF-8 char. */
+ tmp = ((wchar & 0x3ff) << 10) + 0x10000;
+ state->__value.__wchb[0] = (tmp >> 16) & 0xff;
+ state->__value.__wchb[1] = (tmp >> 8) & 0xff;
+ state->__count = -4;
+ *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
+ return 1;
+ }
+ *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
+ *s++ = 0x80 | ((wchar & 0xfc0) >> 6);
+ *s = 0x80 | (wchar & 0x3f);
+ return 3;
+ }
+ if (wchar >= 0x10000 && wchar <= 0x10ffff)
+ {
+ *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
+ *s++ = 0x80 | ((wchar & 0x3f000) >> 12);
+ *s++ = 0x80 | ((wchar & 0xfc0) >> 6);
+ *s = 0x80 | (wchar & 0x3f);
+ return 4;
+ }
+
+ r->_errno = EILSEQ;
+ return -1;
+}
+
+/* Cygwin defines its own doublebyte charset conversion functions
+ because the underlying OS requires wchar_t == UTF-16. */
+#ifndef __CYGWIN__
+int
+_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
+
+ unsigned char char2 = (unsigned char)wchar;
+ unsigned char char1 = (unsigned char)(wchar >> 8);
+
+ if (s == NULL)
+ return 0; /* not state-dependent */
+
+ if (char1 != 0x00)
+ {
+ /* first byte is non-zero..validate multi-byte char */
+ if (_issjis1(char1) && _issjis2(char2))
+ {
+ *s++ = (char)char1;
+ *s = (char)char2;
+ return 2;
+ }
else
{
r->_errno = EILSEQ;
return -1;
}
}
- else if (!strcmp (__locale_charset (), "SJIS"))
+ *s = (char) wchar;
+ return 1;
+}
+
+int
+_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
+ unsigned char char2 = (unsigned char)wchar;
+ unsigned char char1 = (unsigned char)(wchar >> 8);
+
+ if (s == NULL)
+ return 0; /* not state-dependent */
+
+ if (char1 != 0x00)
{
- unsigned char char2 = (unsigned char)wchar;
- unsigned char char1 = (unsigned char)(wchar >> 8);
-
- if (s == NULL)
- return 0; /* not state-dependent */
-
- if (char1 != 0x00)
- {
- /* first byte is non-zero..validate multi-byte char */
- if (_issjis1(char1) && _issjis2(char2))
- {
- *s++ = (char)char1;
- *s = (char)char2;
- return 2;
- }
- else
- {
- r->_errno = EILSEQ;
- return -1;
- }
- }
+ /* first byte is non-zero..validate multi-byte char */
+ if (_iseucjp (char1) && _iseucjp (char2))
+ {
+ *s++ = (char)char1;
+ *s = (char)char2;
+ return 2;
+ }
+ else
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
}
- else if (!strcmp (__locale_charset (), "EUCJP"))
+ *s = (char) wchar;
+ return 1;
+}
+
+int
+_DEFUN (__jis_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
+ int cnt = 0;
+ unsigned char char2 = (unsigned char)wchar;
+ unsigned char char1 = (unsigned char)(wchar >> 8);
+
+ if (s == NULL)
+ return 1; /* state-dependent */
+
+ if (char1 != 0x00)
{
- unsigned char char2 = (unsigned char)wchar;
- unsigned char char1 = (unsigned char)(wchar >> 8);
-
- if (s == NULL)
- return 0; /* not state-dependent */
-
- if (char1 != 0x00)
- {
- /* first byte is non-zero..validate multi-byte char */
- if (_iseucjp (char1) && _iseucjp (char2))
- {
- *s++ = (char)char1;
- *s = (char)char2;
- return 2;
- }
- else
+ /* first byte is non-zero..validate multi-byte char */
+ if (_isjis (char1) && _isjis (char2))
+ {
+ if (state->__state == 0)
{
- r->_errno = EILSEQ;
- return -1;
+ /* must switch from ASCII to JIS state */
+ state->__state = 1;
+ *s++ = ESC_CHAR;
+ *s++ = '$';
+ *s++ = 'B';
+ cnt = 3;
}
- }
+ *s++ = (char)char1;
+ *s = (char)char2;
+ return cnt + 2;
+ }
+ r->_errno = EILSEQ;
+ return -1;
}
- else if (!strcmp (__locale_charset (), "JIS"))
+ if (state->__state != 0)
{
- int cnt = 0;
- unsigned char char2 = (unsigned char)wchar;
- unsigned char char1 = (unsigned char)(wchar >> 8);
-
- if (s == NULL)
- return 1; /* state-dependent */
-
- if (char1 != 0x00)
- {
- /* first byte is non-zero..validate multi-byte char */
- if (_isjis (char1) && _isjis (char2))
- {
- if (state->__state == 0)
- {
- /* must switch from ASCII to JIS state */
- state->__state = 1;
- *s++ = ESC_CHAR;
- *s++ = '$';
- *s++ = 'B';
- cnt = 3;
- }
- *s++ = (char)char1;
- *s = (char)char2;
- return cnt + 2;
- }
- else
- {
- r->_errno = EILSEQ;
- return -1;
- }
- }
- else
- {
- if (state->__state != 0)
- {
- /* must switch from JIS to ASCII state */
- state->__state = 0;
- *s++ = ESC_CHAR;
- *s++ = '(';
- *s++ = 'B';
- cnt = 3;
- }
- *s = (char)char2;
- return cnt + 1;
- }
+ /* must switch from JIS to ASCII state */
+ state->__state = 0;
+ *s++ = ESC_CHAR;
+ *s++ = '(';
+ *s++ = 'B';
+ cnt = 3;
}
+ *s = (char)char2;
+ return cnt + 1;
+}
+#endif /* !__CYGWIN__ */
+
+#ifdef _MB_EXTENDED_CHARSETS_ISO
+int
+_DEFUN (__iso_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
if (s == NULL)
return 0;
+
+ /* wchars <= 0x9f translate to all ISO charsets directly. */
+ if (wchar >= 0xa0)
+ {
+ int iso_idx = __iso_8859_index (charset + 9);
+ if (iso_idx >= 0)
+ {
+ unsigned char mb;
+
+ if (s == NULL)
+ return 0;
+
+ for (mb = 0; mb < 0x60; ++mb)
+ if (__iso_8859_conv[iso_idx][mb] == wchar)
+ {
+ *s = (char) (mb + 0xa0);
+ return 1;
+ }
+ r->_errno = EILSEQ;
+ return -1;
+ }
+ }
- /* otherwise we are dealing with a single byte character */
if ((size_t)wchar >= 0x100)
{
r->_errno = EILSEQ;
@@ -216,4 +307,51 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
*s = (char) wchar;
return 1;
}
-
+#endif /* _MB_EXTENDED_CHARSETS_ISO */
+
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+int
+_DEFUN (__cp_wctomb, (r, s, wchar, charset, state),
+ struct _reent *r _AND
+ char *s _AND
+ wchar_t _wchar _AND
+ const char *charset _AND
+ mbstate_t *state)
+{
+ wint_t wchar = _wchar;
+
+ if (s == NULL)
+ return 0;
+
+ if (wchar >= 0x80)
+ {
+ int cp_idx = __cp_index (charset + 2);
+ if (cp_idx >= 0)
+ {
+ unsigned char mb;
+
+ if (s == NULL)
+ return 0;
+
+ for (mb = 0; mb < 0x80; ++mb)
+ if (__cp_conv[cp_idx][mb] == wchar)
+ {
+ *s = (char) (mb + 0x80);
+ return 1;
+ }
+ r->_errno = EILSEQ;
+ return -1;
+ }
+ }
+
+ if ((size_t)wchar >= 0x100)
+ {
+ r->_errno = EILSEQ;
+ return -1;
+ }
+
+ *s = (char) wchar;
+ return 1;
+}
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif /* _MB_CAPABLE */