diff options
author | Thomas Fitzsimmons <fitzsim@redhat.com> | 2002-04-24 20:53:30 +0000 |
---|---|---|
committer | Thomas Fitzsimmons <fitzsim@redhat.com> | 2002-04-24 20:53:30 +0000 |
commit | eaa75b70e31aed8ba2823c7b37ff92230dd8c451 (patch) | |
tree | 255a744fbdffc3197c3f971ed1edae29c71a6134 /newlib/libc/stdlib/mbtowc_r.c | |
parent | b9f9f699372fae9f3b7926fa165d07fe68a949fb (diff) | |
download | cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.tar.gz cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.tar.bz2 cygnal-eaa75b70e31aed8ba2823c7b37ff92230dd8c451.zip |
* Makefile.am (check-DEJAGNU): New target.
(site.exp): Likewise.
* acinclude.m4 (NEWLIB_CONFIGURE): Replace AC_CANONICAL_HOST
with AC_CANONICAL_SYSTEM. Remove AC_CANONICAL_BUILD.
* libc/locale/locale.c (_setlocale_r): Add UTF-8 support.
* libc/stdlib/mbtowc_r.c (_mbtowc_r): Likewise.
* libc/stdlib/wctomb_r.c (_wctomb_r): Likewise.
* testsuite: New directory.
* testsuite/config: Likewise.
* testsuite/lib: Likewise.
* testsuite/newlib.locale: Likewise.
* testsuite/newlib.string: Likewise.
* testsuite/config/default.exp: New file.
* testsuite/lib/checkoutput.exp: New file.
* testsuite/lib/newlib.exp: New file.
* testsuite/lib/passfail.exp: New file.
* testsuite/newlib.locale/UTF-8.c: New file.
* testsuite/newlib.locale/UTF-8.exp: New file.
* testsuite/newlib.locale/locale.exp: New file.
* testsuite/newlib.string/string.exp: New file.
* testsuite/newlib.string/tstring.c: New file.
Diffstat (limited to 'newlib/libc/stdlib/mbtowc_r.c')
-rw-r--r-- | newlib/libc/stdlib/mbtowc_r.c | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index 4bf302359..866789f16 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -66,6 +66,193 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), if (r->_current_locale == NULL || (strlen (r->_current_locale) <= 1)) { /* fall-through */ } + else if (!strcmp (r->_current_locale, "UTF-8")) + { + wchar_t char1 = 0; + + if (s == NULL) + return 0; /* UTF-8 character encodings are not state-dependent */ + + /* we know n >= 1 if we get here */ + *pwc = 0; + char1 = (wchar_t)*t; + + if (char1 == '\0') + return 0; /* s points to the null character */ + + if (char1 >= 0x0 && char1 <= 0x7f) + { + /* single-byte sequence */ + *pwc = char1; + return 1; + } + else if (char1 >= 0xc0 && char1 <= 0xdf) + { + /* two-byte sequence */ + if (n >= 2) + { + wchar_t char2 = (wchar_t)*(t+1); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + + if (char1 < 0xc2) + /* overlong UTF-8 sequence */ + return -1; + + *pwc = ((char1 & 0x1f) << 6) + | (char2 & 0x3f); + return 2; + } + else + return -1; + } + else if (char1 >= 0xe0 && char1 <= 0xef) + { + /* three-byte sequence */ + if (n >= 3) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + + if (char1 == 0xe0) + { + if (char2 < 0xa0) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x0f) << 12) + | ((char2 & 0x3f) << 6) + | (char3 & 0x3f); + + if (*pwc >= 0xd800 && *pwc <= 0xdfff) + { + return -1; + } + else + return 3; + } + else + return -1; + } + else if (char1 >= 0xf0 && char1 <= 0xf7) + { + /* four-byte sequence */ + if (n >= 4) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + + if (char1 == 0xf0) + { + if (char2 < 0x90) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x07) << 18) + | ((char2 & 0x3f) << 12) + | ((char3 & 0x3f) << 6) + | (char4 & 0x3f); + + return 4; + } + else + return -1; + } + else if (char1 >= 0xf8 && char1 <= 0xfb) + { + /* five-byte sequence */ + if (n >= 5) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + wchar_t char5 = (wchar_t)*(t+4); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + if (char5 < 0x80 || char5 > 0xbf) + return -1; + + if (char1 == 0xf8) + { + if (char2 < 0x88) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x03) << 24) + | ((char2 & 0x3f) << 18) + | ((char3 & 0x3f) << 12) + | ((char4 & 0x3f) << 6) + | (char5 & 0x3f); + return 5; + } + else + return -1; + } + else if (char1 >= 0xfc && char1 <= 0xfd) + { + /* six-byte sequence */ + if (n >= 6) + { + wchar_t char2 = (wchar_t)*(t+1); + wchar_t char3 = (wchar_t)*(t+2); + wchar_t char4 = (wchar_t)*(t+3); + wchar_t char5 = (wchar_t)*(t+4); + wchar_t char6 = (wchar_t)*(t+5); + + if (char2 < 0x80 || char2 > 0xbf) + return -1; + if (char3 < 0x80 || char3 > 0xbf) + return -1; + if (char4 < 0x80 || char4 > 0xbf) + return -1; + if (char5 < 0x80 || char5 > 0xbf) + return -1; + if (char6 < 0x80 || char6 > 0xbf) + return -1; + + if (char1 == 0xfc) + { + if (char2 < 0x84) + /* overlong UTF-8 sequence */ + return -1; + } + + *pwc = ((char1 & 0x01) << 30) + | ((char2 & 0x3f) << 24) + | ((char3 & 0x3f) << 18) + | ((char4 & 0x3f) << 12) + | ((char5 & 0x3f) << 6) + | (char6 & 0x3f); + return 6; + } + else + return -1; + } + else + return -1; + } else if (!strcmp (r->_current_locale, "C-SJIS")) { int char1; |