summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-06 18:28:33 +0000
committerCorinna Vinschen <corinna@vinschen.de>2010-02-06 18:28:33 +0000
commit5eb556c8497ddd680c28e53e04c4badfe612004a (patch)
treea9d05f6982f2d8cf3b670e1e0d00655d8eaeea99
parent38d9821daf5c631d2c8f12bc772961973edaba0c (diff)
downloadcygnal-5eb556c8497ddd680c28e53e04c4badfe612004a.tar.gz
cygnal-5eb556c8497ddd680c28e53e04c4badfe612004a.tar.bz2
cygnal-5eb556c8497ddd680c28e53e04c4badfe612004a.zip
* libc/ctype/ctype_cp.h (_CTYPE_GEORGIAN_PS_128_254): Define.
(_CTYPE_GEORGIAN_PS_255): Define. (_CTYPE_PT154_128_254): Define. (_CTYPE_PT154_255): Define. (__ctype_cp): Add array members for above ctype definitions. * libc/locale/locale.c (loadlocale): Make TIS-620 charset name available for all targets. Add guards for setting the conversion function pointers. Add support for GEORGIAN-PS and PT154 charsets. Change documentation to reflect current behaviour more closely. * libc/locale/nl_langinfo.c (nl_langinfo): On Cygwin, translate "CP101" to "GEORGIAN-PS" and "CP102" to "PT154". * libc/stdlib/sb_charsets.c (__cp_conv): Add conversion arrays for GEORGIAN-PS and PT154. (__cp_index): Map invalid Windows codepage number 101 to GEORGIAN-PS conversion array, 102 to PT154 conversion array.
-rw-r--r--newlib/ChangeLog18
-rw-r--r--newlib/libc/ctype/ctype_cp.h62
-rw-r--r--newlib/libc/locale/locale.c117
-rw-r--r--newlib/libc/locale/nl_langinfo.c4
-rw-r--r--newlib/libc/stdlib/sb_charsets.c50
5 files changed, 216 insertions, 35 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index ee2c6a2be..ee5f3c09c 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,3 +1,21 @@
+2010-02-06 Corinna Vinschen <corinna@vinschen.de>
+
+ * libc/ctype/ctype_cp.h (_CTYPE_GEORGIAN_PS_128_254): Define.
+ (_CTYPE_GEORGIAN_PS_255): Define.
+ (_CTYPE_PT154_128_254): Define.
+ (_CTYPE_PT154_255): Define.
+ (__ctype_cp): Add array members for above ctype definitions.
+ * libc/locale/locale.c (loadlocale): Make TIS-620 charset name
+ available for all targets. Add guards for setting the conversion
+ function pointers. Add support for GEORGIAN-PS and PT154 charsets.
+ Change documentation to reflect current behaviour more closely.
+ * libc/locale/nl_langinfo.c (nl_langinfo): On Cygwin, translate
+ "CP101" to "GEORGIAN-PS" and "CP102" to "PT154".
+ * libc/stdlib/sb_charsets.c (__cp_conv): Add conversion arrays
+ for GEORGIAN-PS and PT154.
+ (__cp_index): Map invalid Windows codepage number 101 to
+ GEORGIAN-PS conversion array, 102 to PT154 conversion array.
+
2010-02-06 Ralf Corsepius <ralf.corsepius@rtems.org>
* libc/posix/telldir.c: Remove bogus nested prototype of lseek().
diff --git a/newlib/libc/ctype/ctype_cp.h b/newlib/libc/ctype/ctype_cp.h
index 40ecd206b..7ce0ab33d 100644
--- a/newlib/libc/ctype/ctype_cp.h
+++ b/newlib/libc/ctype/ctype_cp.h
@@ -433,6 +433,42 @@
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _U, _U, _U, _U
#define _CTYPE_CP21866_255 _U
+#define _CTYPE_GEORGIAN_PS_128_254 \
+ _P, 0, _P, _L, _P, _P, _P, _P, \
+ _P, _P, _U, _P, _U, _U, 0, 0, \
+ 0, _P, _P, _P, _P, _P, _P, _P, \
+ _P, _P, _L, _P, _L, 0, _L, _U, \
+ _S|_B, _P, _P, _P, _P, _P, _P, _P, \
+ _P, _P, _P, _P, _P, _P, _P, _P, \
+ _P, _P, _P, _P, _P, _P, _P, _P, \
+ _P, _P, _P, _P, _P, _P, _P, _P, \
+ _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
+ _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
+ _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
+ _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
+ _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _L, _L, \
+ _L, _L, _L, _L, _L, _L, _L, _L, \
+ _L, _L, _L, _L, _L, _L, _L, _P, \
+ _L, _L, _L, _L, _L, _L, _L
+#define _CTYPE_GEORGIAN_PS_255 _L
+#define _CTYPE_PT154_128_254 \
+ _U, _U, _U, _L, _P, _P, _U, _U, \
+ _U, _L, _U, _U, _U, _U, _U, _U, \
+ _L, _P, _P, _P, _P, _P, _P, _P, \
+ _L, _L, _L, _L, _L, _L, _L, _L, \
+ _S|_B, _U, _L, _U, _U, _U, _U, _P, \
+ _U, _P, _U, _P, _P, _L, _P, _U, \
+ _P, _L, _U, _L, _L, _L, _P, _P, \
+ _L, _P, _L, _P, _L, _U, _L, _L, \
+ _U, _U, _U, _U, _U, _U, _U, _U, \
+ _U, _U, _U, _U, _U, _U, _U, _U, \
+ _U, _U, _U, _U, _U, _U, _U, _U, \
+ _U, _U, _U, _U, _U, _U, _U, _U, \
+ _L, _L, _L, _L, _L, _L, _L, _L, \
+ _L, _L, _L, _L, _L, _L, _L, _L, \
+ _L, _L, _L, _L, _L, _L, _L, _L, \
+ _L, _L, _L, _L, _L, _L, _L
+#define _CTYPE_PT154_255 _L
extern int __cp_index (const char *charset_ext);
@@ -442,7 +478,7 @@ extern int __cp_index (const char *charset_ext);
#ifndef __CYGWIN__
static _CONST
#endif
-char __ctype_cp[24][128 + 256] = {
+char __ctype_cp[26][128 + 256] = {
{ _CTYPE_CP437_128_254,
0,
_CTYPE_DATA_0_127,
@@ -587,11 +623,23 @@ char __ctype_cp[24][128 + 256] = {
_CTYPE_CP21866_128_254,
_CTYPE_CP21866_255
},
+ { _CTYPE_GEORGIAN_PS_128_254,
+ 0,
+ _CTYPE_DATA_0_127,
+ _CTYPE_GEORGIAN_PS_128_254,
+ _CTYPE_GEORGIAN_PS_255
+ },
+ { _CTYPE_PT154_128_254,
+ 0,
+ _CTYPE_DATA_0_127,
+ _CTYPE_PT154_128_254,
+ _CTYPE_PT154_255
+ },
};
#else /* !defined(ALLOW_NEGATIVE_CTYPE_INDEX) */
-static _CONST char __ctype_cp[22][1 + 256] = {
+static _CONST char __ctype_cp[26][1 + 256] = {
{ 0,
_CTYPE_DATA_0_127,
_CTYPE_CP437_128_254,
@@ -712,6 +760,16 @@ static _CONST char __ctype_cp[22][1 + 256] = {
_CTYPE_CP21866_128_254,
_CTYPE_CP21866_255
},
+ { 0,
+ _CTYPE_DATA_0_127,
+ _CTYPE_GEORGIAN_PS_128_254,
+ _CTYPE_GEORGIAN_PS_255
+ },
+ { 0,
+ _CTYPE_DATA_0_127,
+ _CTYPE_PT154_128_254,
+ _CTYPE_PT154_255
+ },
};
#endif /* ALLOW_NEGATIVE_CTYPE_INDEX */
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index 85069aefa..26283c5f4 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -56,34 +56,36 @@ for a given language, a three character string per ISO 639-3.
<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
<<"modifier">> see below.
-Additionally to the POSIX specifier, seven extensions are supported for
-backward compatibility with older implementations using newlib:
-<<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>,
-<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
-xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932,
-1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
-
-Instead of <<"C-">>, you can specify also <<"C.">>. Both variations allow
+Additionally to the POSIX specifier, the following extension is supported
+for backward compatibility with older implementations using newlib:
+<<"C-charset">>.
+Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
to specify language neutral locales while using other charsets than ASCII,
for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
but uses the UTF-8 charset.
-Even when using POSIX locale strings, the only charsets allowed are
+The following charsets are recogized:
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
-<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
-[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932, 1125, 1250,
-1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
+1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
+857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
+1257, 1258].
+
Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
are equivalent. Charset names with dashes can also be written without
dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
<<"EUCKR"> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
+Full support for all of the above charsets requires that newlib has been
+build with multibyte support and support for all ISO and Windows Codepage.
+Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
+only newlib for Cygwin is built with full charset support by default.
+Under Cygwin, this implementation additionally supports the charsets
+<<"GBK">>, <<"eucKR">>, and <<"Big5">>. Cygwin does not support <<"JIS">>.
+
(<<"">> is also accepted; if given, the settings are read from the
corresponding LC_* environment variables and $LANG according to POSIX rules.
-Under Cygwin, this implementation additionally supports the charsets
-<<"GBK">>, <<"eucKR">>, <<"Big5">>, and <<"TIS-620">>.
-
This implementation also supports a single modifier, <<"cjknarrow">>.
Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
@@ -720,38 +722,91 @@ loadlocale(struct _reent *p, int category)
l_mbtowc = __ascii_mbtowc;
#endif
break;
-#ifdef __CYGWIN__
case 'G':
case 'g':
- if (strcasecmp (charset, "GBK"))
- return NULL;
- strcpy (charset, "GBK");
- mbc_max = 2;
+#ifdef __CYGWIN__
+ if (!strcasecmp (charset, "GBK"))
+ {
+ strcpy (charset, "GBK");
+ mbc_max = 2;
#ifdef _MB_CAPABLE
- l_wctomb = __gbk_wctomb;
- l_mbtowc = __gbk_mbtowc;
+ l_wctomb = __gbk_wctomb;
+ l_mbtowc = __gbk_mbtowc;
#endif
+ }
+ else
+#endif /* __CYGWIN__ */
+ /* GEORGIAN-PS and the alias without dash */
+ if (!strncasecmp (charset, "GEORGIAN", 8))
+ {
+ c = charset + 8;
+ if (*c == '-')
+ ++c;
+ if (strcasecmp (c, "PS"))
+ return NULL;
+ strcpy (charset, "CP101");
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ l_wctomb = __cp_wctomb;
+ l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ }
+ else
+ return NULL;
break;
- case 'B':
- case 'b':
- if (strcasecmp (charset, "BIG5"))
- return NULL;
- strcpy (charset, "BIG5");
- mbc_max = 2;
+ case 'P':
+ case 'p':
+ /* PT154 */
+ if (strcasecmp (charset, "PT154"))
+ return NULL;
+ strcpy (charset, "CP102");
+ mbc_max = 1;
#ifdef _MB_CAPABLE
- l_wctomb = __big5_wctomb;
- l_mbtowc = __big5_mbtowc;
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ l_wctomb = __cp_wctomb;
+ l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
#endif
break;
case 'T':
case 't':
- if (strcasecmp (charset, "TIS620") && strcasecmp (charset, "TIS-620"))
+ if (strncasecmp (charset, "TIS", 3))
+ return NULL;
+ c = charset + 3;
+ if (*c == '-')
+ ++c;
+ if (strcasecmp (c, "620"))
return NULL;
strcpy (charset, "CP874");
mbc_max = 1;
#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
l_wctomb = __cp_wctomb;
l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ break;
+#ifdef __CYGWIN__
+ case 'B':
+ case 'b':
+ if (strcasecmp (charset, "BIG5"))
+ return NULL;
+ strcpy (charset, "BIG5");
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ l_wctomb = __big5_wctomb;
+ l_mbtowc = __big5_mbtowc;
#endif
break;
#endif /* __CYGWIN__ */
diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langinfo.c
index cd86c158d..8e8856de7 100644
--- a/newlib/libc/locale/nl_langinfo.c
+++ b/newlib/libc/locale/nl_langinfo.c
@@ -78,6 +78,10 @@ _DEFUN(nl_langinfo, (item),
ret = "KOI8-R";
else if (strcmp (ret + 2, "21866") == 0)
ret = "KOI8-U";
+ else if (strcmp (ret + 2, "101") == 0)
+ ret = "GEORGIAN-PS";
+ else if (strcmp (ret + 2, "102") == 0)
+ ret = "PT154";
}
else if (ret[0] == 'S'/*JIS*/)
{
diff --git a/newlib/libc/stdlib/sb_charsets.c b/newlib/libc/stdlib/sb_charsets.c
index 4ab1de69a..e668c4b83 100644
--- a/newlib/libc/stdlib/sb_charsets.c
+++ b/newlib/libc/stdlib/sb_charsets.c
@@ -203,7 +203,7 @@ wchar_t __iso_8859_conv[14][0x60] = {
value (function __cp_index), the second index is the value of the
incoming character - 0x80.
Values < 0x80 don't have to be converted anyway. */
-wchar_t __cp_conv[24][0x80] = {
+wchar_t __cp_conv[26][0x80] = {
/* CP437 */
{ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
@@ -611,7 +611,47 @@ wchar_t __cp_conv[24][0x80] = {
0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413,
0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e,
0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412,
- 0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a }
+ 0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a },
+ /* The following are not valid Windows codepages, but they fit nicely here.
+ The CP numbers are only used internally and are guranteed not to clash
+ with valid Windows codepage identifier. */
+ /* CP101 (GEORGIAN-PS) Georgian charset, used as the default charset in
+ the ka_GE locale (Georgian, Georgia). Apparently derived from Windows
+ CP1252. */
+ { 0x80, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10f1,
+ 0x10d7, 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc, 0x10f2, 0x10dd,
+ 0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10f3, 0x10e3, 0x10e4,
+ 0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec,
+ 0x10ed, 0x10ee, 0x10f4, 0x10ef, 0x10f0, 0x10f5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff },
+ /* CP102 (PT154) Cyrillic-Asian charset, used as the default charset in
+ the kk_KZ locale (Kazakh, Kazakhstan). */
+ { 0x496, 0x492, 0x4ee, 0x493, 0x201e, 0x2026, 0x4b6, 0x4ae,
+ 0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8,
+ 0x497, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9,
+ 0xa0, 0x40e, 0x45e, 0x408, 0x4e8, 0x498, 0x4b0, 0xa7,
+ 0x401, 0xa9, 0x4d8, 0xab, 0xac, 0x4ef, 0xae, 0x49c,
+ 0xb0, 0x4b1, 0x406, 0x456, 0x499, 0x4e9, 0xb6, 0xb7,
+ 0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d,
+ 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417,
+ 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f,
+ 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427,
+ 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f,
+ 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,
+ 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,
+ 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
+ 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }
};
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
@@ -727,6 +767,12 @@ __cp_index (const char *charset_ext)
case 21866:
cp_idx = 23;
break;
+ case 101:
+ cp_idx = 24;
+ break;
+ case 102:
+ cp_idx = 25;
+ break;
default:
cp_idx = -1;
break;