diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2011-01-19 20:22:04 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2011-01-19 20:22:04 +0200 |
commit | b4a1aa90519d34c87b3a6699b77a24f39b1b22c1 (patch) | |
tree | 572747fc4d6f256b7383be2c26cfb57dea15c010 | |
parent | 32b060d8f0069ad0083ad19d1d095d8ea69f0f45 (diff) | |
download | egawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.tar.gz egawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.tar.bz2 egawk-b4a1aa90519d34c87b3a6699b77a24f39b1b22c1.zip |
Simplify code for do_tolower, do_toupper.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | awk.h | 1 | ||||
-rw-r--r-- | builtin.c | 188 | ||||
-rw-r--r-- | node.c | 41 |
4 files changed, 155 insertions, 85 deletions
@@ -1,3 +1,13 @@ +Wed Jan 19 20:19:29 2011 Arnold D. Robbins <arnold@skeeve.com> + + * node.c (wstr2str): New function. + * awk.h: Declare it. + * builtin.c (is_wupper, is_wlower, to_wupper, to_wlower, + wide_change_case, wide_tolower, wide_toupper): New functions to + simplify wide character case conversions. + (do_tolower, do_toupper): Use wide_tolower, wide_toupper in multibyte + case. + Mon Jan 17 22:48:48 2011 Arnold D. Robbins <arnold@skeeve.com> * builtin.c (do_bindtextdomain): Change type of `the_result' @@ -1321,6 +1321,7 @@ extern void unref(NODE *tmp); extern int parse_escape(const char **string_ptr); #ifdef MBS_SUPPORT extern NODE *str2wstr(NODE *n, size_t **ptr); +extern NODE *wstr2str(NODE *n); #define force_wstring(n) str2wstr(n, NULL) extern const wchar_t *wstrstr(const wchar_t *haystack, size_t hs_len, const wchar_t *needle, size_t needle_len); @@ -740,18 +740,17 @@ do_substr(int nargs) } #ifdef MBS_SUPPORT - if (gawk_mb_cur_max > 1) { + /* force_wstring() already called */ + if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen) + /* single byte case */ + r = make_string(t1->stptr + indx, length); + else { /* multibyte case, more work */ size_t result; wchar_t *wp; mbstate_t mbs; char *substr, *cp; - /* force_wstring() already called */ - - if (t1->stlen == t1->wstlen) - goto single_byte_case; - /* * Convert the wide chars in t1->wstptr back into m.b. chars. * This is pretty grotty, but it's the most straightforward @@ -769,10 +768,6 @@ do_substr(int nargs) } *cp = '\0'; r = make_str_node(substr, cp - substr, ALREADY_MALLOCED); - } else { - /* single byte case, easy */ -single_byte_case: - r = make_string(t1->stptr + indx, length); } #else r = make_string(t1->stptr + indx, length); @@ -1095,11 +1090,72 @@ do_print_rec(int nargs, int redirtype) fflush(rp->fp); } -/* - * 11/2010: FIXME: Consider converting the whole string to wide - * characters, running through and converting to wide lower case - * and then coverting back. Might be more straightforward code. - */ +#ifdef MBS_SUPPORT + +/* is_wupper --- function version of iswupper for passing function pointers */ + +static int +is_wupper(wchar_t c) +{ + return iswupper(c); +} + +/* is_wlower --- function version of iswlower for passing function pointers */ + +static int +is_wlower(wchar_t c) +{ + return iswlower(c); +} + +/* to_wupper --- function version of towupper for passing function pointers */ + +static int +to_wlower(wchar_t c) +{ + return towlower(c); +} + +/* to_wlower --- function version of towlower for passing function pointers */ + +static int +to_wupper(wchar_t c) +{ + return towupper(c); +} + +/* wide_change_case --- generic case converter for wide characters */ + +static void +wide_change_case(wchar_t *wstr, + size_t wlen, + int (*is_x)(wchar_t c), + int (*to_y)(wchar_t c)) +{ + size_t i; + wchar_t *wcp; + + for (i = 0, wcp = wstr; i < wlen; i++, wcp++) + if (is_x(*wcp)) + *wcp = to_y(*wcp); +} + +/* wide_toupper --- map a wide string to upper case */ + +static void +wide_toupper(wchar_t *wstr, size_t wlen) +{ + wide_change_case(wstr, wlen, is_wlower, to_wupper); +} + +/* wide_tolower --- map a wide string to lower case */ + +static void +wide_tolower(wchar_t *wstr, size_t wlen) +{ + wide_change_case(wstr, wlen, is_wupper, to_wlower); +} +#endif /* do_tolower --- lower case a string */ @@ -1107,49 +1163,30 @@ NODE * do_tolower(int nargs) { NODE *t1, *t2; - unsigned char *cp, *cp2; -#ifdef MBS_SUPPORT - size_t mbclen = 0; - mbstate_t mbs, prev_mbs; - - if (gawk_mb_cur_max > 1) - memset(& mbs, 0, sizeof(mbstate_t)); -#endif t1 = POP_SCALAR(); if (do_lint && (t1->flags & (STRING|STRCUR)) == 0) lintwarn(_("tolower: received non-string argument")); t1 = force_string(t1); t2 = make_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) + + if (gawk_mb_cur_max == 1) { + unsigned char *cp, *cp2; + + for (cp = (unsigned char *)t2->stptr, + cp2 = (unsigned char *)(t2->stptr + t2->stlen); + cp < cp2; cp++) + if (isupper(*cp)) + *cp = tolower(*cp); + } #ifdef MBS_SUPPORT - if (gawk_mb_cur_max > 1) { - wchar_t wc; - - prev_mbs = mbs; - mbclen = (size_t) mbrtowc(& wc, (char *) cp, cp2 - cp, - & mbs); - if ((mbclen != 1) && (mbclen != (size_t) -1) && - (mbclen != (size_t) -2) && (mbclen != 0)) { - /* a multibyte character. */ - if (iswupper(wc)) { - wint_t junk; - - wc = towlower(wc); - junk = wcrtomb((char *) cp, wc, & prev_mbs); - } - /* Adjust the pointer. */ - cp += mbclen - 1; - } else { - /* Otherwise we treat it as a singlebyte character. */ - if (isupper(*cp)) - *cp = tolower(*cp); - } - } else + else { + force_wstring(t2); + wide_tolower(t2->wstptr, t2->wstlen); + wstr2str(t2); + } #endif - if (isupper(*cp)) - *cp = tolower(*cp); + DEREF(t1); return t2; } @@ -1160,49 +1197,30 @@ NODE * do_toupper(int nargs) { NODE *t1, *t2; - unsigned char *cp, *cp2; -#ifdef MBS_SUPPORT - size_t mbclen = 0; - mbstate_t mbs, prev_mbs; - - if (gawk_mb_cur_max > 1) - memset(& mbs, 0, sizeof(mbstate_t)); -#endif t1 = POP_SCALAR(); if (do_lint && (t1->flags & (STRING|STRCUR)) == 0) lintwarn(_("toupper: received non-string argument")); t1 = force_string(t1); t2 = make_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) + + if (gawk_mb_cur_max == 1) { + unsigned char *cp, *cp2; + + for (cp = (unsigned char *)t2->stptr, + cp2 = (unsigned char *)(t2->stptr + t2->stlen); + cp < cp2; cp++) + if (islower(*cp)) + *cp = toupper(*cp); + } #ifdef MBS_SUPPORT - if (gawk_mb_cur_max > 1) { - wchar_t wc; - - prev_mbs = mbs; - mbclen = (size_t) mbrtowc(& wc, (char *) cp, cp2 - cp, - & mbs); - if ((mbclen != 1) && (mbclen != (size_t) -1) && - (mbclen != (size_t) -2) && (mbclen != 0)) { - /* a multibyte character. */ - if (iswlower(wc)) { - wint_t junk; - - wc = towupper(wc); - junk = wcrtomb((char *) cp, wc, & prev_mbs); - } - /* Adjust the pointer. */ - cp += mbclen - 1; - } else { - /* Otherwise we treat it as a singlebyte character. */ - if (islower(*cp)) - *cp = toupper(*cp); - } - } else + else { + force_wstring(t2); + wide_toupper(t2->wstptr, t2->wstlen); + wstr2str(t2); + } #endif - if (islower(*cp)) - *cp = toupper(*cp); + DEREF(t1); return t2; } @@ -761,6 +761,47 @@ str2wstr(NODE *n, size_t **ptr) return n; } +/* wstr2str --- convert a wide string back into multibyte one */ + +NODE * +wstr2str(NODE *n) +{ + size_t result; + size_t length; + wchar_t *wp; + mbstate_t mbs; + char *newval, *cp; + + assert(n->valref == 1); + assert((n->flags & WSTRCUR) != 0); + + /* + * Convert the wide chars in t1->wstptr back into m.b. chars. + * This is pretty grotty, but it's the most straightforward + * way to do things. + */ + memset(& mbs, 0, sizeof(mbs)); + + length = n->wstlen; + emalloc(newval, char *, (length * gawk_mb_cur_max) + 2, "wstr2str"); + + wp = n->wstptr; + for (cp = newval; length > 0; length--) { + result = wcrtomb(cp, *wp, & mbs); + if (result == (size_t) -1) /* what to do? break seems best */ + break; + cp += result; + wp++; + } + *cp = '\0'; + + efree(n->stptr); + n->stptr = newval; + n->stlen = cp - newval; + + return n; +} + /* free_wstr --- release the wide string part of a node */ void |