aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-01-19 20:22:04 +0200
committerArnold D. Robbins <arnold@skeeve.com>2011-01-19 20:22:04 +0200
commitb4a1aa90519d34c87b3a6699b77a24f39b1b22c1 (patch)
tree572747fc4d6f256b7383be2c26cfb57dea15c010
parent32b060d8f0069ad0083ad19d1d095d8ea69f0f45 (diff)
downloadegawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.tar.gz
egawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.tar.bz2
egawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.zip
Simplify code for do_tolower, do_toupper.
-rw-r--r--ChangeLog10
-rw-r--r--awk.h1
-rw-r--r--builtin.c188
-rw-r--r--node.c41
4 files changed, 155 insertions, 85 deletions
diff --git a/ChangeLog b/ChangeLog
index bc3aca64..18067fae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Wed Jan 19 20:19:29 2011 Arnold D. Robbins <arnold@skeeve.com>
+
+ * node.c (wstr2str): New function.
+ * awk.h: Declare it.
+ * builtin.c (is_wupper, is_wlower, to_wupper, to_wlower,
+ wide_change_case, wide_tolower, wide_toupper): New functions to
+ simplify wide character case conversions.
+ (do_tolower, do_toupper): Use wide_tolower, wide_toupper in multibyte
+ case.
+
Mon Jan 17 22:48:48 2011 Arnold D. Robbins <arnold@skeeve.com>
* builtin.c (do_bindtextdomain): Change type of `the_result'
diff --git a/awk.h b/awk.h
index 84a7d18e..664b4f7c 100644
--- a/awk.h
+++ b/awk.h
@@ -1321,6 +1321,7 @@ extern void unref(NODE *tmp);
extern int parse_escape(const char **string_ptr);
#ifdef MBS_SUPPORT
extern NODE *str2wstr(NODE *n, size_t **ptr);
+extern NODE *wstr2str(NODE *n);
#define force_wstring(n) str2wstr(n, NULL)
extern const wchar_t *wstrstr(const wchar_t *haystack, size_t hs_len,
const wchar_t *needle, size_t needle_len);
diff --git a/builtin.c b/builtin.c
index 32d97a51..a9ece11a 100644
--- a/builtin.c
+++ b/builtin.c
@@ -740,18 +740,17 @@ do_substr(int nargs)
}
#ifdef MBS_SUPPORT
- if (gawk_mb_cur_max > 1) {
+ /* force_wstring() already called */
+ if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
+ /* single byte case */
+ r = make_string(t1->stptr + indx, length);
+ else {
/* multibyte case, more work */
size_t result;
wchar_t *wp;
mbstate_t mbs;
char *substr, *cp;
- /* force_wstring() already called */
-
- if (t1->stlen == t1->wstlen)
- goto single_byte_case;
-
/*
* Convert the wide chars in t1->wstptr back into m.b. chars.
* This is pretty grotty, but it's the most straightforward
@@ -769,10 +768,6 @@ do_substr(int nargs)
}
*cp = '\0';
r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
- } else {
- /* single byte case, easy */
-single_byte_case:
- r = make_string(t1->stptr + indx, length);
}
#else
r = make_string(t1->stptr + indx, length);
@@ -1095,11 +1090,72 @@ do_print_rec(int nargs, int redirtype)
fflush(rp->fp);
}
-/*
- * 11/2010: FIXME: Consider converting the whole string to wide
- * characters, running through and converting to wide lower case
- * and then coverting back. Might be more straightforward code.
- */
+#ifdef MBS_SUPPORT
+
+/* is_wupper --- function version of iswupper for passing function pointers */
+
+static int
+is_wupper(wchar_t c)
+{
+ return iswupper(c);
+}
+
+/* is_wlower --- function version of iswlower for passing function pointers */
+
+static int
+is_wlower(wchar_t c)
+{
+ return iswlower(c);
+}
+
+/* to_wupper --- function version of towupper for passing function pointers */
+
+static int
+to_wlower(wchar_t c)
+{
+ return towlower(c);
+}
+
+/* to_wlower --- function version of towlower for passing function pointers */
+
+static int
+to_wupper(wchar_t c)
+{
+ return towupper(c);
+}
+
+/* wide_change_case --- generic case converter for wide characters */
+
+static void
+wide_change_case(wchar_t *wstr,
+ size_t wlen,
+ int (*is_x)(wchar_t c),
+ int (*to_y)(wchar_t c))
+{
+ size_t i;
+ wchar_t *wcp;
+
+ for (i = 0, wcp = wstr; i < wlen; i++, wcp++)
+ if (is_x(*wcp))
+ *wcp = to_y(*wcp);
+}
+
+/* wide_toupper --- map a wide string to upper case */
+
+static void
+wide_toupper(wchar_t *wstr, size_t wlen)
+{
+ wide_change_case(wstr, wlen, is_wlower, to_wupper);
+}
+
+/* wide_tolower --- map a wide string to lower case */
+
+static void
+wide_tolower(wchar_t *wstr, size_t wlen)
+{
+ wide_change_case(wstr, wlen, is_wupper, to_wlower);
+}
+#endif
/* do_tolower --- lower case a string */
@@ -1107,49 +1163,30 @@ NODE *
do_tolower(int nargs)
{
NODE *t1, *t2;
- unsigned char *cp, *cp2;
-#ifdef MBS_SUPPORT
- size_t mbclen = 0;
- mbstate_t mbs, prev_mbs;
-
- if (gawk_mb_cur_max > 1)
- memset(& mbs, 0, sizeof(mbstate_t));
-#endif
t1 = POP_SCALAR();
if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
lintwarn(_("tolower: received non-string argument"));
t1 = force_string(t1);
t2 = make_string(t1->stptr, t1->stlen);
- for (cp = (unsigned char *)t2->stptr,
- cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+
+ if (gawk_mb_cur_max == 1) {
+ unsigned char *cp, *cp2;
+
+ for (cp = (unsigned char *)t2->stptr,
+ cp2 = (unsigned char *)(t2->stptr + t2->stlen);
+ cp < cp2; cp++)
+ if (isupper(*cp))
+ *cp = tolower(*cp);
+ }
#ifdef MBS_SUPPORT
- if (gawk_mb_cur_max > 1) {
- wchar_t wc;
-
- prev_mbs = mbs;
- mbclen = (size_t) mbrtowc(& wc, (char *) cp, cp2 - cp,
- & mbs);
- if ((mbclen != 1) && (mbclen != (size_t) -1) &&
- (mbclen != (size_t) -2) && (mbclen != 0)) {
- /* a multibyte character. */
- if (iswupper(wc)) {
- wint_t junk;
-
- wc = towlower(wc);
- junk = wcrtomb((char *) cp, wc, & prev_mbs);
- }
- /* Adjust the pointer. */
- cp += mbclen - 1;
- } else {
- /* Otherwise we treat it as a singlebyte character. */
- if (isupper(*cp))
- *cp = tolower(*cp);
- }
- } else
+ else {
+ force_wstring(t2);
+ wide_tolower(t2->wstptr, t2->wstlen);
+ wstr2str(t2);
+ }
#endif
- if (isupper(*cp))
- *cp = tolower(*cp);
+
DEREF(t1);
return t2;
}
@@ -1160,49 +1197,30 @@ NODE *
do_toupper(int nargs)
{
NODE *t1, *t2;
- unsigned char *cp, *cp2;
-#ifdef MBS_SUPPORT
- size_t mbclen = 0;
- mbstate_t mbs, prev_mbs;
-
- if (gawk_mb_cur_max > 1)
- memset(& mbs, 0, sizeof(mbstate_t));
-#endif
t1 = POP_SCALAR();
if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
lintwarn(_("toupper: received non-string argument"));
t1 = force_string(t1);
t2 = make_string(t1->stptr, t1->stlen);
- for (cp = (unsigned char *)t2->stptr,
- cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+
+ if (gawk_mb_cur_max == 1) {
+ unsigned char *cp, *cp2;
+
+ for (cp = (unsigned char *)t2->stptr,
+ cp2 = (unsigned char *)(t2->stptr + t2->stlen);
+ cp < cp2; cp++)
+ if (islower(*cp))
+ *cp = toupper(*cp);
+ }
#ifdef MBS_SUPPORT
- if (gawk_mb_cur_max > 1) {
- wchar_t wc;
-
- prev_mbs = mbs;
- mbclen = (size_t) mbrtowc(& wc, (char *) cp, cp2 - cp,
- & mbs);
- if ((mbclen != 1) && (mbclen != (size_t) -1) &&
- (mbclen != (size_t) -2) && (mbclen != 0)) {
- /* a multibyte character. */
- if (iswlower(wc)) {
- wint_t junk;
-
- wc = towupper(wc);
- junk = wcrtomb((char *) cp, wc, & prev_mbs);
- }
- /* Adjust the pointer. */
- cp += mbclen - 1;
- } else {
- /* Otherwise we treat it as a singlebyte character. */
- if (islower(*cp))
- *cp = toupper(*cp);
- }
- } else
+ else {
+ force_wstring(t2);
+ wide_toupper(t2->wstptr, t2->wstlen);
+ wstr2str(t2);
+ }
#endif
- if (islower(*cp))
- *cp = toupper(*cp);
+
DEREF(t1);
return t2;
}
diff --git a/node.c b/node.c
index bbeb629c..cf16f794 100644
--- a/node.c
+++ b/node.c
@@ -761,6 +761,47 @@ str2wstr(NODE *n, size_t **ptr)
return n;
}
+/* wstr2str --- convert a wide string back into multibyte one */
+
+NODE *
+wstr2str(NODE *n)
+{
+ size_t result;
+ size_t length;
+ wchar_t *wp;
+ mbstate_t mbs;
+ char *newval, *cp;
+
+ assert(n->valref == 1);
+ assert((n->flags & WSTRCUR) != 0);
+
+ /*
+ * Convert the wide chars in t1->wstptr back into m.b. chars.
+ * This is pretty grotty, but it's the most straightforward
+ * way to do things.
+ */
+ memset(& mbs, 0, sizeof(mbs));
+
+ length = n->wstlen;
+ emalloc(newval, char *, (length * gawk_mb_cur_max) + 2, "wstr2str");
+
+ wp = n->wstptr;
+ for (cp = newval; length > 0; length--) {
+ result = wcrtomb(cp, *wp, & mbs);
+ if (result == (size_t) -1) /* what to do? break seems best */
+ break;
+ cp += result;
+ wp++;
+ }
+ *cp = '\0';
+
+ efree(n->stptr);
+ n->stptr = newval;
+ n->stlen = cp - newval;
+
+ return n;
+}
+
/* free_wstr --- release the wide string part of a node */
void