summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2009-06-18 09:13:39 +0000
committerCorinna Vinschen <corinna@vinschen.de>2009-06-18 09:13:39 +0000
commite53c92a80e1d6acefdcbcf5e16f7f646971b2bef (patch)
treed28581bff46ad43624b21857cb77e437f6c82ea8
parent339dde8fe5edf6e712ae25b54d57fea1ab6dffc8 (diff)
downloadcygnal-e53c92a80e1d6acefdcbcf5e16f7f646971b2bef.tar.gz
cygnal-e53c92a80e1d6acefdcbcf5e16f7f646971b2bef.tar.bz2
cygnal-e53c92a80e1d6acefdcbcf5e16f7f646971b2bef.zip
* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
modifier on _MB_CAPABLE targets. Add comment to explain. Improve documentation.
-rw-r--r--newlib/ChangeLog6
-rw-r--r--newlib/libc/locale/locale.c94
2 files changed, 73 insertions, 27 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index 34aa64995..154e0597b 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,3 +1,9 @@
+2009-06-18 Corinna Vinschen <corinna@vinschen.de>
+
+ * libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
+ modifier on _MB_CAPABLE targets. Add comment to explain. Improve
+ documentation.
+
2009-06-17 Michael Eager <eager@eagercon.com>
* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index 3b2ffb9d3..60536146f 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -44,29 +44,49 @@ locale.
This is a minimal implementation, supporting only the required <<"POSIX">>
and <<"C">> values for <[locale]>; strings representing other locales are not
-honored unless _MB_CAPABLE is defined in which case POSIX locale strings
-are allowed, plus five extensions supported for backward compatibility with
-older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>,
-<<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, <<"C-ISO-8859-x">> with
-1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
-855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
-1257, 1258]. Even when using POSIX locale strings, the only charsets allowed
-are <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>,
-<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720,
-737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252,
-1253, 1254, 1255, 1256, 1257, 1258].
+honored unless _MB_CAPABLE is defined.
+
+If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
+the form
+
+ language[_TERRITORY][.charset][@@modifier]
+
+<<"language">> is a two character string per ISO 639. <<"TERRITORY">> is a
+country code per ISO 3166. For <<"charset">> and <<"modifier">> see below.
+
+Additionally to the POSIX specifier, five extensions are supported for
+backward compatibility with older implementations using newlib:
+<<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>,
+<<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437,
+720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251,
+1252, 1253, 1254, 1255, 1256, 1257, 1258].
+
+Even when using POSIX locale strings, the only charsets allowed are
+<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
+with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
+852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
+1255, 1256, 1257, 1258].
(<<"">> is also accepted; if given, the settings are read from the
corresponding LC_* environment variables and $LANG according to POSIX rules.
Under Cygwin, this implementation additionally supports the charsets
<<"GBK">>, <<"eucKR">>, and <<"Big5">>.
-If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
-a pointer to the string representing the current locale (always
-<<"C">> in this implementation). The acceptable values for
-<[category]> are defined in `<<locale.h>>' as macros beginning with
-<<"LC_">>, but this implementation does not check the values you pass
-in the <[category]> argument.
+This implementation also supports a single modifier, <<"cjknarrow">>.
+Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
+of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
+how the functions <<wcwidth>> and <<wcswidth>> handle characters from
+the "CJK Ambiguous Width" character class described in
+http://www.unicode.org/unicode/reports/tr11/. Usually these characters
+have a width of 1, unless you specify one of the aforementioned
+languages, in which case these characters have a width of 2. By
+specifying the <<"cjknarrow">> modifier, these characters will have a
+width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well.
+
+If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
+pointer to the string representing the current locale. The acceptable
+values for <[category]> are defined in `<<locale.h>>' as macros
+beginning with <<"LC_">>.
<<localeconv>> returns a pointer to a structure (also defined in
`<<locale.h>>') describing the locale-specific conventions currently
@@ -399,6 +419,9 @@ loadlocale(struct _reent *p, int category)
int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *);
int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
const char *, mbstate_t *);
+#ifdef _MB_CAPABLE
+ int cjknarrow = 0;
+#endif
/* "POSIX" is translated to "C", as on Linux. */
if (!strcmp (locale, "POSIX"))
@@ -429,10 +452,14 @@ loadlocale(struct _reent *p, int category)
if (c[0] == '.')
{
/* Charset */
- strcpy (charset, c + 1);
- if ((c = strchr (charset, '@')))
+ char *chp;
+
+ ++c;
+ strcpy (charset, c);
+ if ((chp = strchr (charset, '@')))
/* Strip off modifier */
- *c = '\0';
+ *chp = '\0';
+ c += strlen (charset);
}
else if (c[0] == '\0' || c[0] == '@')
/* End of string or just a modifier */
@@ -444,6 +471,17 @@ loadlocale(struct _reent *p, int category)
else
/* Invalid string */
return NULL;
+#ifdef _MB_CAPABLE
+ if (c[0] == '@')
+ {
+ /* Modifier */
+ /* Only one modifier is recognized right now. "cjknarrow" is used
+ to modify the behaviour of wcwidth() for East Asian languages.
+ For details see the comment at the end of this function. */
+ if (!strcmp (c + 1, "cjknarrow"))
+ cjknarrow = 1;
+ }
+#endif
}
/* We only support this subset of charsets. */
switch (charset[0])
@@ -606,13 +644,15 @@ loadlocale(struct _reent *p, int category)
__mbtowc = l_mbtowc;
__set_ctype (charset);
/* Check for the language part of the locale specifier. In case
- of "ja", "ko", or "zh", assume the use of CJK fonts. This is
- stored in lc_ctype_cjk_lang and tested in wcwidth() to figure
- out the width to return (1 or 2) for the "CJK Ambiguous Width"
- category of characters. */
- lc_ctype_cjk_lang = (strncmp (locale, "ja", 2) == 0
- || strncmp (locale, "ko", 2) == 0
- || strncmp (locale, "zh", 2) == 0);
+ of "ja", "ko", or "zh", assume the use of CJK fonts, unless the
+ "@cjknarrow" modifier has been specifed.
+ The result is stored in lc_ctype_cjk_lang and tested in wcwidth()
+ to figure out the width to return (1 or 2) for the "CJK Ambiguous
+ Width" category of characters. */
+ lc_ctype_cjk_lang = !cjknarrow
+ && ((strncmp (locale, "ja", 2) == 0
+ || strncmp (locale, "ko", 2) == 0
+ || strncmp (locale, "zh", 2) == 0));
#endif
}
else if (category == LC_MESSAGES)