diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2010-02-13 12:26:06 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2010-02-13 12:26:06 +0000 |
commit | 15a9e176569e91b319b6af59d8c874bb539c32a6 (patch) | |
tree | dad28077d60eab31c0912ccb2a546d03011371f3 /winsup/cygwin/regex/regcomp.c | |
parent | 03ac74c1686c75614511450541296c74150cb066 (diff) | |
download | cygnal-15a9e176569e91b319b6af59d8c874bb539c32a6.tar.gz cygnal-15a9e176569e91b319b6af59d8c874bb539c32a6.tar.bz2 cygnal-15a9e176569e91b319b6af59d8c874bb539c32a6.zip |
* regex/regcomp.c (xwcrtomb): Don't convert Unicode chars outside the
base plane always to UTF-8. Call wcsnrtombs instead to allow arbitrary
multibyte charsets.
Diffstat (limited to 'winsup/cygwin/regex/regcomp.c')
-rw-r--r-- | winsup/cygwin/regex/regcomp.c | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c index 3556dddd8..98459e0e3 100644 --- a/winsup/cygwin/regex/regcomp.c +++ b/winsup/cygwin/regex/regcomp.c @@ -1171,14 +1171,17 @@ xwcrtomb (char *s, wint_t wc, mbstate_t *ps) { if (sizeof (wchar_t) == 2 && wc >= 0x10000) { - /* UTF-16 systems can't handle these values directly. Since the - rest of the code isn't surrogate pair aware, we handle this here, - invisible for the rest of the code. */ - *s++ = 0xf0 | ((wc & 0x1c0000) >> 18); - *s++ = 0x80 | ((wc & 0x3f000) >> 12); - *s++ = 0x80 | ((wc & 0xfc0) >> 6); - *s = 0x80 | (wc & 0x3f); - return 4; + /* UTF-16 wcrtomb can't handle these values directly. The rest of the + code isn't surrogate pair aware, so we handle this here. Convert + value to UTF-16 surrogate and call wcsrtombs to convert the "string" + to the correct multibyte representation, if any. */ + wchar_t ws[2], *wsp = ws; + size_t n; + + wc -= 0x10000; + ws[0] = 0xd800 | (wc >> 10); + ws[1] = 0xdc00 | (wc & 0x3ff); + return wcsnrtombs (s, &wsp, 2, MB_CUR_MAX, ps); } return wcrtomb (s, wc, ps); } |