aboutsummaryrefslogtreecommitdiffstats
path: root/builtin.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin.c')
-rw-r--r--builtin.c379
1 files changed, 332 insertions, 47 deletions
diff --git a/builtin.c b/builtin.c
index 10f8b49f..f9cb44c9 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991-2002 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
@@ -204,6 +204,71 @@ do_fflush(NODE *tree)
return tmp_number((AWKNUM) status);
}
+#ifdef MBS_SUPPORT
+/* strncasecmpmbs --- like strncasecmp(multibyte string version) */
+int
+strncasecmpmbs(const char *s1, mbstate_t mbs1, const char *s2,
+ mbstate_t mbs2, size_t n)
+{
+ int i1, i2, mbclen1, mbclen2, gap;
+ wchar_t wc1, wc2;
+ for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
+ mbclen1 = mbrtowc(&wc1, s1 + i1, n - i1, &mbs1);
+ if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
+ /* We treat it as a singlebyte character. */
+ mbclen1 = 1;
+ wc1 = s1[i1];
+ }
+ mbclen2 = mbrtowc(&wc2, s2 + i2, n - i2, &mbs2);
+ if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
+ /* We treat it as a singlebyte character. */
+ mbclen2 = 1;
+ wc2 = s2[i2];
+ }
+ if ((gap = towlower(wc1) - towlower(wc2)) != 0)
+ /* s1 and s2 are not equivalent. */
+ return gap;
+ }
+ /* s1 and s2 are equivalent. */
+ return 0;
+}
+
+/* Inspect the buffer `src' and write the index of each byte to `dest'.
+ Caller must allocate `dest'.
+ e.g. str = <mb1(1)>, <mb1(2)>, a, b, <mb2(1)>, <mb2(2)>, <mb2(3)>, c
+ where mb(i) means the `i'-th byte of a multibyte character.
+ dest = 1, 2, 1, 1, 1, 2, 3. 1
+*/
+static void
+index_multibyte_buffer(char* src, char* dest, int len)
+{
+ int idx, prev_idx;
+ mbstate_t mbs, prevs;
+ memset(&prevs, 0, sizeof(mbstate_t));
+
+ for (idx = prev_idx = 0 ; idx < len ; idx++) {
+ size_t mbclen;
+ mbs = prevs;
+ mbclen = mbrlen(src + prev_idx, idx - prev_idx + 1, &mbs);
+ if (mbclen == (size_t) -1 || mbclen == 1 || mbclen == 0) {
+ /* singlebyte character. */
+ mbclen = 1;
+ prev_idx = idx + 1;
+ } else if (mbclen == (size_t) -2) {
+ /* a part of a multibyte character. */
+ mbclen = idx - prev_idx + 1;
+ } else if (mbclen > 1) {
+ /* the end of a multibyte character. */
+ prev_idx = idx + 1;
+ prevs = mbs;
+ } else {
+ /* Can't reach. */
+ }
+ dest[idx] = mbclen;
+ }
+}
+#endif
+
/* do_index --- find index of a string */
NODE *
@@ -213,6 +278,14 @@ do_index(NODE *tree)
register char *p1, *p2;
register size_t l1, l2;
long ret;
+#ifdef MBS_SUPPORT
+ size_t mbclen = 0;
+ mbstate_t mbs1, mbs2;
+ if (MB_CUR_MAX > 1) {
+ memset(&mbs1, 0, sizeof(mbstate_t));
+ memset(&mbs2, 0, sizeof(mbstate_t));
+ }
+#endif
s1 = tree_eval(tree->lnode);
@@ -231,11 +304,38 @@ do_index(NODE *tree)
l2 = s2->stlen;
ret = 0;
+ /*
+ * Icky special case, index(foo, "") should return 1,
+ * since both bwk awk and mawk do, and since match("foo", "")
+ * returns 1. This makes index("", "") work, too, fwiw.
+ */
+ if (l2 == 0) {
+ ret = 1;
+ goto out;
+ }
+
/* IGNORECASE will already be false if posix */
if (IGNORECASE) {
while (l1 > 0) {
if (l2 > l1)
break;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1) {
+ if (strncasecmpmbs(p1, mbs1, p2, mbs2, l2) == 0) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ /* Update l1, and p1. */
+ mbclen = mbrlen(p1, l1, &mbs1);
+ if ((mbclen == 1) || (mbclen == (size_t) -1)
+ || (mbclen == (size_t) -2) || (mbclen == 0)) {
+ /* We treat it as a singlebyte character. */
+ mbclen = 1;
+ }
+ l1 -= mbclen;
+ p1 += mbclen;
+ } else {
+#endif
if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
&& (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
ret = 1 + s1->stlen - l1;
@@ -243,6 +343,9 @@ do_index(NODE *tree)
}
l1--;
p1++;
+#ifdef MBS_SUPPORT
+ }
+#endif
}
} else {
while (l1 > 0) {
@@ -253,10 +356,27 @@ do_index(NODE *tree)
ret = 1 + s1->stlen - l1;
break;
}
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1) {
+ mbclen = mbrlen(p1, l1, &mbs1);
+ if ((mbclen == 1) || (mbclen == (size_t) -1) ||
+ (mbclen == (size_t) -2) || (mbclen == 0)) {
+ /* We treat it as a singlebyte character. */
+ mbclen = 1;
+ }
+ l1 -= mbclen;
+ p1 += mbclen;
+ } else {
+ l1--;
+ p1++;
+ }
+#else
l1--;
p1++;
+#endif
}
}
+out:
free_temp(s1);
free_temp(s2);
return tmp_number((AWKNUM) ret);
@@ -360,7 +480,7 @@ format_tree(
/* copy one byte from 's' to 'obufout' checking for space in the process */
#define bchunk_one(s) { \
- if (ofre <= 0) { \
+ if (ofre < 1) { \
long olen = obufout - obuf; \
erealloc(obuf, char *, osiz * 2, "format_tree"); \
ofre += osiz; \
@@ -945,14 +1065,14 @@ check_pos:
if (toofew)
fatal("%s\n\t`%s'\n\t%*s%s",
_("not enough arguments to satisfy format string"),
- fmt_string, s1 - fmt_string - 2, "",
+ fmt_string, s1 - fmt_string - 1, "",
_("^ ran out for this one"));
}
if (do_lint) {
if (need_format)
lintwarn(
_("[s]printf: format specifier does not have control letter"));
- if (carg != NULL)
+ if (cur_arg < num_args)
lintwarn(
_("too many arguments supplied for format string"));
}
@@ -1140,7 +1260,7 @@ do_strftime(NODE *tree)
if (tree->lnode != NULL) {
NODE *tmp = tree_eval(tree->lnode);
if (do_lint && (tmp->flags & (STRING|STR)) == 0)
- lintwarn(_("strftime: recieved non-string first argument"));
+ lintwarn(_("strftime: received non-string first argument"));
t1 = force_string(tmp);
format = t1->stptr;
formatlen = t1->stlen;
@@ -1155,7 +1275,7 @@ do_strftime(NODE *tree)
if (tree->rnode != NULL) {
t2 = tree_eval(tree->rnode->lnode);
if (do_lint && (t2->flags & (NUM|NUMBER)) == 0)
- lintwarn(_("strftime: recieved non-numeric second argument"));
+ lintwarn(_("strftime: received non-numeric second argument"));
fclock = (time_t) force_number(t2);
free_temp(t2);
}
@@ -1263,7 +1383,7 @@ do_system(NODE *tree)
(void) flush_io(); /* so output is synchronous with gawk's */
tmp = tree_eval(tree->lnode);
if (do_lint && (tmp->flags & (STRING|STR)) == 0)
- lintwarn(_("system: recieved non-string argument"));
+ lintwarn(_("system: received non-string argument"));
cmd = force_string(tmp)->stptr;
if (cmd && *cmd) {
@@ -1380,14 +1500,42 @@ do_tolower(NODE *tree)
{
NODE *t1, *t2;
register unsigned char *cp, *cp2;
+#ifdef MBS_SUPPORT
+ size_t mbclen = 0;
+ mbstate_t mbs, prev_mbs;
+ if (MB_CUR_MAX > 1)
+ memset(&mbs, 0, sizeof(mbstate_t));
+#endif
t1 = tree_eval(tree->lnode);
if (do_lint && (t1->flags & (STRING|STR)) == 0)
- lintwarn(_("tolower: recieved non-string argument"));
+ lintwarn(_("tolower: received non-string argument"));
t1 = force_string(t1);
t2 = tmp_string(t1->stptr, t1->stlen);
for (cp = (unsigned char *)t2->stptr,
cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1) {
+ wchar_t wc;
+ prev_mbs = mbs;
+ mbclen = (size_t) mbrtowc(&wc, cp, cp2 - cp, &mbs);
+ if ((mbclen != 1) && (mbclen != (size_t) -1) &&
+ (mbclen != (size_t) -2) && (mbclen != 0)) {
+ /* a multibyte character. */
+ if (iswupper(wc))
+ {
+ wc = towlower(wc);
+ wcrtomb(cp, wc, &prev_mbs);
+ }
+ /* Adjust the pointer. */
+ cp += mbclen - 1;
+ } else {
+ /* Otherwise we treat it as a singlebyte character. */
+ if (ISUPPER(*cp))
+ *cp = tolower(*cp);
+ }
+ } else
+#endif
if (ISUPPER(*cp))
*cp = TOLOWER(*cp);
free_temp(t1);
@@ -1401,14 +1549,42 @@ do_toupper(NODE *tree)
{
NODE *t1, *t2;
register unsigned char *cp, *cp2;
+#ifdef MBS_SUPPORT
+ size_t mbclen = 0;
+ mbstate_t mbs, prev_mbs;
+ if (MB_CUR_MAX > 1)
+ memset(&mbs, 0, sizeof(mbstate_t));
+#endif
t1 = tree_eval(tree->lnode);
if (do_lint && (t1->flags & (STRING|STR)) == 0)
- lintwarn(_("toupper: recieved non-string argument"));
+ lintwarn(_("toupper: received non-string argument"));
t1 = force_string(t1);
t2 = tmp_string(t1->stptr, t1->stlen);
for (cp = (unsigned char *)t2->stptr,
cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX > 1) {
+ wchar_t wc;
+ prev_mbs = mbs;
+ mbclen = (size_t) mbrtowc(&wc, cp, cp2 - cp, &mbs);
+ if ((mbclen != 1) && (mbclen != (size_t) -1) &&
+ (mbclen != (size_t) -2) && (mbclen != 0)) {
+ /* a multibyte character. */
+ if (iswlower(wc))
+ {
+ wc = towupper(wc);
+ wcrtomb(cp, wc, &prev_mbs);
+ }
+ /* Adjust the pointer. */
+ cp += mbclen - 1;
+ } else {
+ /* Otherwise we treat it as a singlebyte character. */
+ if (ISLOWER(*cp))
+ *cp = toupper(*cp);
+ }
+ } else
+#endif
if (ISLOWER(*cp))
*cp = TOUPPER(*cp);
free_temp(t1);
@@ -1669,6 +1845,9 @@ sub_common(NODE *tree, int how_many, int backdigs)
int global = (how_many == -1);
long current;
int lastmatchnonzero;
+#ifdef MBS_SUPPORT
+ char *mb_indices;
+#endif
tmp = tree->lnode;
rp = re_update(tmp);
@@ -1712,8 +1891,28 @@ sub_common(NODE *tree, int how_many, int backdigs)
buf[buflen] = '\0';
buf[buflen + 1] = '\0';
ampersands = 0;
+#ifdef MBS_SUPPORT
+ /*
+ * Some systems' malloc() can't handle being called with an
+ * argument of zero. Thus we have to have some special case
+ * code to check for `repllen == 0'. This can occur for
+ * something like:
+ * sub(/foo/, "", mystring)
+ * for example.
+ */
+ if (MB_CUR_MAX > 1 && repllen > 0) {
+ emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common");
+ index_multibyte_buffer(repl, mb_indices, repllen);
+ } else
+ mb_indices = NULL;
+#endif
for (scan = repl; scan < replend; scan++) {
+#ifdef MBS_SUPPORT
+ if ((MB_CUR_MAX == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
+ && (*scan == '&')) {
+#else
if (*scan == '&') {
+#endif
repllen--;
ampersands++;
} else if (*scan == '\\') {
@@ -1783,10 +1982,22 @@ sub_common(NODE *tree, int how_many, int backdigs)
* making substitutions as we go.
*/
for (scan = repl; scan < replend; scan++)
+#ifdef MBS_SUPPORT
+ if ((MB_CUR_MAX == 1
+ || (repllen > 0 && mb_indices[scan - repl] == 1))
+ && (*scan == '&'))
+#else
if (*scan == '&')
+#endif
for (cp = matchstart; cp < matchend; cp++)
*bp++ = *cp;
+#ifdef MBS_SUPPORT
+ else if ((MB_CUR_MAX == 1
+ || (repllen > 0 && mb_indices[scan - repl] == 1))
+ && (*scan == '\\')) {
+#else
else if (*scan == '\\') {
+#endif
if (backdigs) { /* gensub, behave sanely */
if (ISDIGIT(scan[1])) {
int dig = scan[1] - '0';
@@ -1872,6 +2083,10 @@ sub_common(NODE *tree, int how_many, int backdigs)
(*after_assign)();
t->flags &= ~(NUM|NUMBER);
}
+#ifdef MBS_SUPPORT
+ if (mb_indices != NULL)
+ free(mb_indices);
+#endif
return tmp_number((AWKNUM) matches);
}
@@ -2250,9 +2465,7 @@ do_strtonum(NODE *tree)
tmp = tree_eval(tree->lnode);
- if ((tmp->flags & (NUM|NUMBER)) != 0)
- d = (double) force_number(tmp);
- else if (isnondecimal(tmp->stptr))
+ if (isnondecimal(tmp->stptr))
d = nondec2awknum(tmp->stptr, tmp->stlen);
else
d = (double) force_number(tmp);
@@ -2278,7 +2491,12 @@ nondec2awknum(char *str, size_t len)
char *start = str;
if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
- assert(len > 2);
+ /*
+ * User called strtonum("0x") or some such,
+ * so just quit early.
+ */
+ if (len <= 2)
+ return (AWKNUM) 0.0;
for (str += 2, len -= 2; len > 0; len--, str++) {
switch (*str) {
@@ -2336,25 +2554,13 @@ done:
return retval;
}
-/* do_dcgettext --- handle i18n translations */
+/* do_dcgettext, do_dcngettext --- handle i18n translations */
-/*
- * awk usage is
- *
- * str = dcgettext(string [, domain [, category]])
- *
- * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
- */
+#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
-NODE *
-do_dcgettext(NODE *tree)
+static int
+localecategory_from_argument(NODE *tree)
{
- NODE *tmp, *t1, *t2;
- char *string;
- char *the_result;
-#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
- int lc_cat = -1;
- char *category, *domain;
static struct category_table {
int val;
char *name;
@@ -2384,27 +2590,13 @@ do_dcgettext(NODE *tree)
{ LC_TIME, "LC_TIME" },
#endif /* LC_TIME */
};
-#endif /* ENABLE_NLS */
- tmp = tree->lnode; /* first argument */
- t1 = force_string(tree_eval(tmp));
- string = t1->stptr;
-
- t2 = NULL;
-#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
- tree = tree->rnode; /* second argument */
if (tree != NULL) {
- tmp = tree->lnode;
- t2 = force_string(tree_eval(tmp));
- domain = t2->stptr;
- } else
- domain = TEXTDOMAIN;
-
- if (tree != NULL && tree->rnode != NULL) { /* third argument */
int low, high, i, mid;
- NODE *t;
+ NODE *tmp, *t;
+ char *category;
+ int lc_cat = -1;
- tree = tree->rnode;
tmp = tree->lnode;
t = force_string(tree_eval(tmp));
category = t->stptr;
@@ -2429,6 +2621,49 @@ do_dcgettext(NODE *tree)
fatal(_("dcgettext: `%s' is not a valid locale category"), category);
free_temp(t);
+ return lc_cat;
+ } else
+ return LC_MESSAGES;
+}
+
+#endif
+
+/*
+ * awk usage is
+ *
+ * str = dcgettext(string [, domain [, category]])
+ * str = dcngettext(string1, string2, number [, domain [, category]])
+ *
+ * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
+ */
+
+NODE *
+do_dcgettext(NODE *tree)
+{
+ NODE *tmp, *t1, *t2;
+ char *string;
+ char *the_result;
+#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
+ int lc_cat;
+ char *domain;
+#endif /* ENABLE_NLS */
+
+ tmp = tree->lnode; /* first argument */
+ t1 = force_string(tree_eval(tmp));
+ string = t1->stptr;
+
+ t2 = NULL;
+#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
+ tree = tree->rnode; /* second argument */
+ if (tree != NULL) {
+ tmp = tree->lnode;
+ t2 = force_string(tree_eval(tmp));
+ domain = t2->stptr;
+ } else
+ domain = TEXTDOMAIN;
+
+ if (tree && tree->rnode != NULL) { /* third argument */
+ lc_cat = localecategory_from_argument(tree->rnode);
} else
lc_cat = LC_MESSAGES;
@@ -2443,6 +2678,56 @@ do_dcgettext(NODE *tree)
return tmp_string(the_result, strlen(the_result));
}
+NODE *
+do_dcngettext(NODE *tree)
+{
+ NODE *tmp, *t1, *t2, *t3;
+ char *string1, *string2;
+ long number;
+ char *the_result;
+#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
+ int lc_cat;
+ char *domain;
+#endif /* ENABLE_NLS */
+
+ tmp = tree->lnode; /* first argument */
+ t1 = force_string(tree_eval(tmp));
+ string1 = t1->stptr;
+
+ tmp = tree->rnode->lnode; /* second argument */
+ t2 = force_string(tree_eval(tmp));
+ string2 = t2->stptr;
+
+ tmp = tree->rnode->rnode->lnode; /* third argument */
+ number = (long) double_to_int(force_number(tree_eval(tmp)));
+
+ t3 = NULL;
+#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
+ tree = tree->rnode->rnode->rnode; /* fourth argument */
+ if (tree != NULL) {
+ tmp = tree->lnode;
+ t3 = force_string(tree_eval(tmp));
+ domain = t3->stptr;
+ } else
+ domain = TEXTDOMAIN;
+
+ if (tree && tree->rnode != NULL) { /* fifth argument */
+ lc_cat = localecategory_from_argument(tree->rnode);
+ } else
+ lc_cat = LC_MESSAGES;
+
+ the_result = dcngettext(domain, string1, string2, number, lc_cat);
+#else
+ the_result = (number == 1 ? string1 : string2);
+#endif
+ free_temp(t1);
+ free_temp(t2);
+ if (t3 != NULL)
+ free_temp(t3);
+
+ return tmp_string(the_result, strlen(the_result));
+}
+
/* do_bindtextdomain --- set the directory for a text domain */
/*
@@ -2482,7 +2767,7 @@ do_bindtextdomain(NODE *tree)
free_temp(t1);
if (t2 != NULL)
- free_temp(t1);
+ free_temp(t2);
return tmp_string(the_result, strlen(the_result));
}