diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | array.c | 9 | ||||
-rw-r--r-- | awk.h | 18 | ||||
-rw-r--r-- | awkgram.c | 30 | ||||
-rw-r--r-- | awkgram.y | 30 | ||||
-rw-r--r-- | builtin.c | 84 | ||||
-rw-r--r-- | dfa.c | 71 | ||||
-rw-r--r-- | eval.c | 13 | ||||
-rw-r--r-- | field.c | 57 | ||||
-rw-r--r-- | interpret.h | 2 | ||||
-rw-r--r-- | io.c | 5 | ||||
-rw-r--r-- | main.c | 7 | ||||
-rw-r--r-- | mbsupport.h | 74 | ||||
-rw-r--r-- | mpfr.c | 2 | ||||
-rw-r--r-- | node.c | 18 | ||||
-rw-r--r-- | re.c | 7 | ||||
-rw-r--r-- | regex_internal.h | 8 | ||||
-rw-r--r-- | replace.c | 2 |
18 files changed, 80 insertions, 365 deletions
@@ -1,3 +1,11 @@ +2014-11-15 Arnold D. Robbins <arnold@skeeve.com> + + * array.c, awk.h, awkgram.y, builtin.c, dfa.c, eval.c, field.c, + interpret.h, io.c, main.c, mpfr.c, node.c, re.c, regex_internal.h, + replace.c: Remove all uses of MBS_SUPPORT. + * regex_internal.h: Disable wide characters on DJGPP. + * mbsupport.h: Rework to be needed only for DJGPP. + 2014-11-11 Arnold D. Robbins <arnold@skeeve.com> Don't let memory used increase linearly in the size of @@ -978,14 +978,13 @@ cmp_strings(const NODE *n1, const NODE *n2) const unsigned char *cp1 = (const unsigned char *) s1; const unsigned char *cp2 = (const unsigned char *) s2; -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { ret = strncasecmpmbs((const unsigned char *) cp1, (const unsigned char *) cp2, lmin); - } else -#endif - for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++) - ret = casetable[*cp1] - casetable[*cp2]; + } else { + for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++) + ret = casetable[*cp1] - casetable[*cp2]; + } if (ret != 0) return ret; /* @@ -95,13 +95,11 @@ extern int errno; #include "missing_d/gawkbool.h" #endif -#include "mbsupport.h" /* defines MBS_SUPPORT */ - -#if MBS_SUPPORT /* We can handle multibyte strings. */ #include <wchar.h> #include <wctype.h> -#endif + +#include "mbsupport.h" /* defines stuff for DJGPP to fake MBS */ #ifdef STDC_HEADERS #include <float.h> @@ -395,10 +393,8 @@ typedef struct exp_node { size_t slen; long sref; int idx; -#if MBS_SUPPORT wchar_t *wsp; size_t wslen; -#endif } val; } sub; NODETYPE type; @@ -1104,11 +1100,7 @@ extern int exit_val; #define do_lint (do_flags & (DO_LINT_INVALID|DO_LINT_ALL)) #define do_lint_old (do_flags & DO_LINT_OLD) #endif -#if MBS_SUPPORT extern int gawk_mb_cur_max; -#else -#define gawk_mb_cur_max (1) -#endif #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 extern GETGROUPS_T *groupset; @@ -1416,10 +1408,8 @@ extern AWKNUM nondec2awknum(char *str, size_t len); extern NODE *do_dcgettext(int nargs); extern NODE *do_dcngettext(int nargs); extern NODE *do_bindtextdomain(int nargs); -#if MBS_SUPPORT extern int strncasecmpmbs(const unsigned char *, const unsigned char *, size_t); -#endif /* eval.c */ extern void PUSH_CODE(INSTRUCTION *cp); extern INSTRUCTION *POP_CODE(void); @@ -1602,7 +1592,6 @@ extern NODE *r_dupnode(NODE *n); extern NODE *make_str_node(const char *s, size_t len, int flags); extern void *more_blocks(int id); extern int parse_escape(const char **string_ptr); -#if MBS_SUPPORT extern NODE *str2wstr(NODE *n, size_t **ptr); extern NODE *wstr2str(NODE *n); #define force_wstring(n) str2wstr(n, NULL) @@ -1616,9 +1605,6 @@ extern wint_t btowc_cache[]; #define btowc_cache(x) btowc_cache[(x)&0xFF] extern void init_btowc_cache(); #define is_valid_character(b) (btowc_cache[(b)&0xFF] != WEOF) -#else -#define free_wstr(NODE) /* empty */ -#endif /* re.c */ extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal); extern int research(Regexp *rp, char *str, int start, size_t len, int flags); @@ -4255,7 +4255,6 @@ static const struct token tokentab[] = { {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)}, }; -#if MBS_SUPPORT /* Variable containing the current shift state. */ static mbstate_t cur_mbstate; /* Ring buffer containing current characters. */ @@ -4267,10 +4266,6 @@ static int cur_ring_idx; /* This macro means that last nextc() return a singlebyte character or 1st byte of a multibyte character. */ #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) -#else /* MBS_SUPPORT */ -/* a dummy */ -#define nextc_is_1stbyte 1 -#endif /* MBS_SUPPORT */ /* getfname --- return name of a builtin function (for pretty printing) */ @@ -5159,8 +5154,6 @@ check_bad_char(int c) /* nextc --- get the next input character */ -#if MBS_SUPPORT - static int nextc(bool check_for_bad) { @@ -5231,35 +5224,14 @@ again: } } -#else /* MBS_SUPPORT */ - -int -nextc(bool check_for_bad) -{ - do { - if (lexeof) - return END_FILE; - if (lexptr && lexptr < lexend) { - if (check_for_bad) - check_bad_char(*lexptr); - return ((int) (unsigned char) *lexptr++); - } - } while (get_src_buf()); - return END_SRC; -} - -#endif /* MBS_SUPPORT */ - /* pushback --- push a character back on the input */ static inline void pushback(void) { -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : cur_ring_idx - 1; -#endif (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); } @@ -5468,9 +5440,7 @@ retry: thisline = NULL; tok = tokstart; -#if MBS_SUPPORT if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) -#endif switch (c) { case END_SRC: return 0; @@ -1916,7 +1916,6 @@ static const struct token tokentab[] = { {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)}, }; -#if MBS_SUPPORT /* Variable containing the current shift state. */ static mbstate_t cur_mbstate; /* Ring buffer containing current characters. */ @@ -1928,10 +1927,6 @@ static int cur_ring_idx; /* This macro means that last nextc() return a singlebyte character or 1st byte of a multibyte character. */ #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) -#else /* MBS_SUPPORT */ -/* a dummy */ -#define nextc_is_1stbyte 1 -#endif /* MBS_SUPPORT */ /* getfname --- return name of a builtin function (for pretty printing) */ @@ -2820,8 +2815,6 @@ check_bad_char(int c) /* nextc --- get the next input character */ -#if MBS_SUPPORT - static int nextc(bool check_for_bad) { @@ -2892,35 +2885,14 @@ again: } } -#else /* MBS_SUPPORT */ - -int -nextc(bool check_for_bad) -{ - do { - if (lexeof) - return END_FILE; - if (lexptr && lexptr < lexend) { - if (check_for_bad) - check_bad_char(*lexptr); - return ((int) (unsigned char) *lexptr++); - } - } while (get_src_buf()); - return END_SRC; -} - -#endif /* MBS_SUPPORT */ - /* pushback --- push a character back on the input */ static inline void pushback(void) { -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : cur_ring_idx - 1; -#endif (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); } @@ -3129,9 +3101,7 @@ retry: thisline = NULL; tok = tokstart; -#if MBS_SUPPORT if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) -#endif switch (c) { case END_SRC: return 0; @@ -247,7 +247,6 @@ do_fflush(int nargs) return make_number((AWKNUM) status); } -#if MBS_SUPPORT /* strncasecmpmbs --- like strncasecmp (multibyte string version) */ int @@ -327,14 +326,6 @@ index_multibyte_buffer(char* src, char* dest, int len) dest[idx] = mbclen; } } -#else -/* a dummy function */ -static void -index_multibyte_buffer(char* src ATTRIBUTE_UNUSED, char* dest ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) -{ - cant_happen(); -} -#endif /* do_index --- find index of a string */ @@ -345,7 +336,6 @@ do_index(int nargs) const char *p1, *p2; size_t l1, l2; long ret; -#if MBS_SUPPORT bool do_single_byte = false; mbstate_t mbs1, mbs2; @@ -353,7 +343,6 @@ do_index(int nargs) memset(& mbs1, 0, sizeof(mbstate_t)); memset(& mbs2, 0, sizeof(mbstate_t)); } -#endif POP_TWO_SCALARS(s1, s2); @@ -383,7 +372,6 @@ do_index(int nargs) goto out; } -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { s1 = force_wstring(s1); s2 = force_wstring(s2); @@ -394,14 +382,12 @@ do_index(int nargs) do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0) || (s2->wstlen == 0 && s2->stlen > 0)); } -#endif /* IGNORECASE will already be false if posix */ if (IGNORECASE) { while (l1 > 0) { if (l2 > l1) break; -#if MBS_SUPPORT if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; @@ -412,21 +398,18 @@ do_index(int nargs) ret = pos - s1->wstptr + 1; /* 1-based */ goto out; } else { -#endif - /* - * Could use tolower(*p1) == tolower(*p2) here. - * See discussion in eval.c as to why not. - */ - if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2] - && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { - ret = 1 + s1->stlen - l1; - break; - } - l1--; - p1++; -#if MBS_SUPPORT + /* + * Could use tolower(*p1) == tolower(*p2) here. + * See discussion in eval.c as to why not. + */ + if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2] + && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { + ret = 1 + s1->stlen - l1; + break; + } + l1--; + p1++; } -#endif } } else { while (l1 > 0) { @@ -437,7 +420,6 @@ do_index(int nargs) ret = 1 + s1->stlen - l1; break; } -#if MBS_SUPPORT if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; @@ -451,10 +433,6 @@ do_index(int nargs) l1--; p1++; } -#else - l1--; - p1++; -#endif } } out: @@ -544,7 +522,6 @@ do_length(int nargs) lintwarn(_("length: received non-string argument")); tmp = force_string(tmp); -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { tmp = force_wstring(tmp); len = tmp->wstlen; @@ -555,7 +532,6 @@ do_length(int nargs) if (len == 0 && tmp->stlen > 0) len = tmp->stlen; } else -#endif len = tmp->stlen; DEREF(tmp); @@ -1058,7 +1034,6 @@ check_pos: (void) force_number(arg); if ((arg->flags & NUMBER) != 0) { uval = get_number_uj(arg); -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { char buf[100]; wchar_t wc; @@ -1099,7 +1074,7 @@ out0: ; /* else, fall through */ -#endif + cpbuf[0] = uval; prec = 1; cp = cpbuf; @@ -1113,7 +1088,6 @@ out0: */ cp = arg->stptr; prec = 1; -#if MBS_SUPPORT /* * First character can be multiple bytes if * it's a multibyte character. Grr. @@ -1131,7 +1105,6 @@ out0: fw += count - 1; } } -#endif goto pr_tail; case 's': need_format = false; @@ -1805,13 +1778,11 @@ do_substr(int nargs) if (nargs == 2) { /* third arg. missing */ /* use remainder of string */ length = t1->stlen - indx; /* default to bytes */ -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { t1 = force_wstring(t1); if (t1->wstlen > 0) /* use length of wide char string if we have one */ length = t1->wstlen - indx; } -#endif d_length = length; /* set here in case used in diagnostics, below */ } @@ -1824,12 +1795,10 @@ do_substr(int nargs) } /* get total len of input string, for following checks */ -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { t1 = force_wstring(t1); src_len = t1->wstlen; } else -#endif src_len = t1->stlen; if (indx >= src_len) { @@ -1847,7 +1816,6 @@ do_substr(int nargs) length = src_len - indx; } -#if MBS_SUPPORT /* force_wstring() already called */ if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen) /* single byte case */ @@ -1877,9 +1845,6 @@ do_substr(int nargs) *cp = '\0'; r = make_str_node(substr, cp - substr, ALREADY_MALLOCED); } -#else - r = make_string(t1->stptr + indx, length); -#endif DEREF(t1); return r; @@ -2211,7 +2176,6 @@ do_print_rec(int nargs, int redirtype) rp->output.gawk_fflush(rp->output.fp, rp->output.opaque); } -#if MBS_SUPPORT /* is_wupper --- function version of iswupper for passing function pointers */ @@ -2276,7 +2240,6 @@ wide_tolower(wchar_t *wstr, size_t wlen) { wide_change_case(wstr, wlen, is_wupper, to_wlower); } -#endif /* do_tolower --- lower case a string */ @@ -2299,14 +2262,11 @@ do_tolower(int nargs) cp < cp2; cp++) if (isupper(*cp)) *cp = tolower(*cp); - } -#if MBS_SUPPORT - else { + } else { force_wstring(t2); wide_tolower(t2->wstptr, t2->wstlen); wstr2str(t2); } -#endif DEREF(t1); return t2; @@ -2333,14 +2293,11 @@ do_toupper(int nargs) cp < cp2; cp++) if (islower(*cp)) *cp = toupper(*cp); - } -#if MBS_SUPPORT - else { + } else { force_wstring(t2); wide_toupper(t2->wstptr, t2->wstlen); wstr2str(t2); } -#endif DEREF(t1); return t2; @@ -2490,13 +2447,12 @@ do_match(int nargs) size_t *wc_indices = NULL; rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */ -#if MBS_SUPPORT if (rlength > 0 && gawk_mb_cur_max > 1) { t1 = str2wstr(t1, & wc_indices); rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1; rstart = wc_indices[rstart]; } -#endif + rstart++; /* now it's 1-based indexing */ /* Build the array only if the caller wants the optional subpatterns */ @@ -2518,12 +2474,10 @@ do_match(int nargs) start = t1->stptr + s; subpat_start = s; subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s; -#if MBS_SUPPORT if (len > 0 && gawk_mb_cur_max > 1) { subpat_start = wc_indices[s]; subpat_len = wc_indices[s + len - 1] - subpat_start + 1; } -#endif it = make_string(start, len); it->flags |= MAYBE_NUM; /* user input */ @@ -3578,7 +3532,6 @@ do_bindtextdomain(int nargs) static size_t mbc_byte_count(const char *ptr, size_t numchars) { -#if MBS_SUPPORT mbstate_t cur_state; size_t sum = 0; int mb_len; @@ -3599,9 +3552,6 @@ mbc_byte_count(const char *ptr, size_t numchars) } return sum; -#else - return numchars; -#endif } /* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */ @@ -3609,7 +3559,6 @@ mbc_byte_count(const char *ptr, size_t numchars) static size_t mbc_char_count(const char *ptr, size_t numbytes) { -#if MBS_SUPPORT mbstate_t cur_state; size_t sum = 0; int mb_len; @@ -3632,7 +3581,4 @@ mbc_char_count(const char *ptr, size_t numbytes) } return sum; -#else - return numbytes; -#endif } @@ -58,15 +58,15 @@ #include "gettext.h" #define _(str) gettext (str) -#include "mbsupport.h" /* Define MBS_SUPPORT to 1 or 0, as appropriate. */ -#if MBS_SUPPORT -/* We can handle multibyte strings. */ -# include <wchar.h> -# include <wctype.h> -#endif +#include <wchar.h> +#include <wctype.h> #include "xalloc.h" +#if defined(__DJGPP__) +#include "mbsupport.h" +#endif + #include "dfa.h" #ifdef GAWK @@ -399,12 +399,10 @@ struct dfa */ int *multibyte_prop; -#if MBS_SUPPORT /* A table indexed by byte values that contains the corresponding wide character (if any) for that byte. WEOF means the byte is not a valid single-byte character. */ wint_t mbrtowc_cache[NOTCHAR]; -#endif /* Array of the bracket expression in the DFA. */ struct mb_char_classes *mbcsets; @@ -489,7 +487,6 @@ static void regexp (void); static void dfambcache (struct dfa *d) { -#if MBS_SUPPORT int i; for (i = CHAR_MIN; i <= CHAR_MAX; ++i) { @@ -499,10 +496,8 @@ dfambcache (struct dfa *d) wchar_t wc; d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; } -#endif } -#if MBS_SUPPORT /* Store into *PWC the result of converting the leading bytes of the multibyte buffer S of length N bytes, using the mbrtowc_cache in *D and updating the conversion state in *D. On conversion error, @@ -541,9 +536,6 @@ mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) *pwc = wc; return 1; } -#else -#define mbs_to_wchar(pwc, s, n, d) (WEOF) -#endif #ifdef DEBUG @@ -738,7 +730,7 @@ static charclass newline; #ifdef __GLIBC__ # define is_valid_unibyte_character(c) 1 #else -# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF)) +# define is_valid_unibyte_character(c) (btowc (c) != WEOF) #endif /* C is a "word-constituent" byte. */ @@ -799,17 +791,12 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) static bool setbit_wc (wint_t wc, charclass c) { -#if MBS_SUPPORT int b = wctob (wc); if (b == EOF) return false; setbit (b, c); return true; -#else - abort (); - /*NOTREACHED*/ return false; -#endif } /* Set a bit for B and its case variants in the charclass C. @@ -907,7 +894,6 @@ static wint_t wctok; /* Wide character representation of the current MB_CUR_MAX > 1. */ -#if MBS_SUPPORT /* Fetch the next lexical input character. Set C (of type int) to the next input byte, except set C to EOF if the input is a multibyte character of length greater than 1. Set WC (of type wint_t) to the @@ -936,23 +922,6 @@ static wint_t wctok; /* Wide character representation of the current } \ } while (0) -#else -/* Note that characters become unsigned here. */ -# define FETCH_WC(c, unused, eoferr) \ - do { \ - if (! lexleft) \ - { \ - if ((eoferr) != 0) \ - dfaerror (eoferr); \ - else \ - return lasttok = END; \ - } \ - (c) = to_uchar (*lexptr++); \ - --lexleft; \ - } while (0) - -#endif /* MBS_SUPPORT */ - #ifndef MIN # define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif @@ -1764,7 +1733,6 @@ addtok (token t) } } -#if MBS_SUPPORT /* We treat a multibyte character as a single atom, so that DFA can treat a multibyte character as a single expression. @@ -1796,17 +1764,10 @@ addtok_wc (wint_t wc) addtok (CAT); } } -#else -static void -addtok_wc (wint_t wc) -{ -} -#endif static void add_utf8_anychar (void) { -#if MBS_SUPPORT static const charclass utf8_classes[5] = { /* 80-bf: non-leading bytes. */ {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0}, @@ -1861,7 +1822,6 @@ add_utf8_anychar (void) addtok (CAT); addtok (OR); } -#endif } /* The grammar understood by the parser is as follows. @@ -1902,7 +1862,7 @@ add_utf8_anychar (void) static void atom (void) { - if (MBS_SUPPORT && tok == WCHAR) + if (tok == WCHAR) { if (wctok == WEOF) addtok (BACKREF); @@ -1924,7 +1884,7 @@ atom (void) tok = lex (); } - else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ()) + else if (tok == ANYCHAR && using_utf8 ()) { /* For UTF-8 expand the period to a series of CSETs that define a valid UTF-8 character. This avoids using the slow multibyte path. I'm @@ -1938,9 +1898,7 @@ atom (void) } else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD -#if MBS_SUPPORT || tok == ANYCHAR || tok == MBCSET -#endif /* MBS_SUPPORT */ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) { addtok (tok); @@ -2273,10 +2231,8 @@ epsclosure (position_set *s, struct dfa const *d, char *visited) for (i = 0; i < s->nelem; ++i) if (d->tokens[s->elems[i].index] >= NOTCHAR && d->tokens[s->elems[i].index] != BACKREF -#if MBS_SUPPORT && d->tokens[s->elems[i].index] != ANYCHAR && d->tokens[s->elems[i].index] != MBCSET -#endif && d->tokens[s->elems[i].index] < CSET) { if (!initialized) @@ -2595,9 +2551,7 @@ dfaanalyze (struct dfa *d, int searchflag) it with its epsilon closure. */ for (i = 0; i < d->tindex; ++i) if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF -#if MBS_SUPPORT || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET -#endif || d->tokens[i] >= CSET) { #ifdef DEBUG @@ -2707,9 +2661,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) copyset (d->charclasses[d->tokens[pos.index] - CSET], matches); else { - if (MBS_SUPPORT - && (d->tokens[pos.index] == MBCSET - || d->tokens[pos.index] == ANYCHAR)) + if (d->tokens[pos.index] == MBCSET + || d->tokens[pos.index] == ANYCHAR) { /* MB_CUR_MAX > 1 */ if (d->tokens[pos.index] == MBCSET) @@ -3684,7 +3637,7 @@ dfaoptimize (struct dfa *d) size_t i; bool have_backref = false; - if (!MBS_SUPPORT || !using_utf8 ()) + if (!using_utf8 ()) return; for (i = 0; i < d->tindex; ++i) @@ -530,7 +530,7 @@ posix_compare(NODE *s1, NODE *s2) * In either case, ret will be the right thing to return. */ } -#if MBS_SUPPORT +#if ! defined(__DJGPP__) else { /* Similar logic, using wide characters */ (void) force_wstring(s1); @@ -610,15 +610,14 @@ cmp_nodes(NODE *t1, NODE *t2) const unsigned char *cp1 = (const unsigned char *) t1->stptr; const unsigned char *cp2 = (const unsigned char *) t2->stptr; -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { ret = strncasecmpmbs((const unsigned char *) cp1, (const unsigned char *) cp2, l); - } else -#endif - /* Could use tolower() here; see discussion above. */ - for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++) - ret = casetable[*cp1] - casetable[*cp2]; + } else { + /* Could use tolower() here; see discussion above. */ + for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++) + ret = casetable[*cp1] - casetable[*cp2]; + } } else ret = memcmp(t1->stptr, t2->stptr, l); @@ -392,12 +392,10 @@ re_parse_field(long up_to, /* parse only up to this field number */ char *end = scan + len; int regex_flags = RE_NEED_START; char *sep; -#if MBS_SUPPORT size_t mbclen = 0; mbstate_t mbs; - if (gawk_mb_cur_max > 1) - memset(&mbs, 0, sizeof(mbstate_t)); -#endif + + memset(&mbs, 0, sizeof(mbstate_t)); if (in_middle) regex_flags |= RE_NO_BOL; @@ -424,7 +422,6 @@ re_parse_field(long up_to, /* parse only up to this field number */ && nf < up_to) { regex_flags |= RE_NO_BOL; if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { mbclen = mbrlen(scan, end-scan, &mbs); if ((mbclen == 1) || (mbclen == (size_t) -1) @@ -434,8 +431,7 @@ re_parse_field(long up_to, /* parse only up to this field number */ } scan += mbclen; } else -#endif - scan++; + scan++; if (scan == end) { (*set)(++nf, field, (long)(scan - field), n); up_to = nf; @@ -636,7 +632,6 @@ null_parse_field(long up_to, /* parse only up to this field number */ if (len == 0) return nf; -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { mbstate_t mbs; memset(&mbs, 0, sizeof(mbstate_t)); @@ -652,12 +647,12 @@ null_parse_field(long up_to, /* parse only up to this field number */ (*set)(++nf, scan, mbclen, n); scan += mbclen; } - } else -#endif - for (; nf < up_to && scan < end; scan++) { - if (sep_arr != NULL && nf > 0) - set_element(nf, scan, 0L, sep_arr); - (*set)(++nf, scan, 1L, n); + } else { + for (; nf < up_to && scan < end; scan++) { + if (sep_arr != NULL && nf > 0) + set_element(nf, scan, 0L, sep_arr); + (*set)(++nf, scan, 1L, n); + } } *buf = scan; @@ -688,12 +683,10 @@ sc_parse_field(long up_to, /* parse only up to this field number */ char *field; char *end = scan + len; char sav; -#if MBS_SUPPORT size_t mbclen = 0; mbstate_t mbs; - if (gawk_mb_cur_max > 1) - memset(&mbs, 0, sizeof(mbstate_t)); -#endif + + memset(&mbs, 0, sizeof(mbstate_t)); if (up_to == UNLIMITED) nf = 0; @@ -712,7 +705,6 @@ sc_parse_field(long up_to, /* parse only up to this field number */ for (; nf < up_to;) { field = scan; -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { while (*scan != fschar) { mbclen = mbrlen(scan, end-scan, &mbs); @@ -723,10 +715,10 @@ sc_parse_field(long up_to, /* parse only up to this field number */ } scan += mbclen; } - } else -#endif - while (*scan != fschar) - scan++; + } else { + while (*scan != fschar) + scan++; + } (*set)(++nf, field, (long)(scan - field), n); if (scan == end) break; @@ -766,7 +758,6 @@ fw_parse_field(long up_to, /* parse only up to this field number */ char *scan = *buf; long nf = parse_high_water; char *end = scan + len; -#if MBS_SUPPORT int nmbc; size_t mbclen; size_t mbslen; @@ -775,14 +766,12 @@ fw_parse_field(long up_to, /* parse only up to this field number */ mbstate_t mbs; memset(&mbs, 0, sizeof(mbstate_t)); -#endif if (up_to == UNLIMITED) nf = 0; if (len == 0) return nf; for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { nmbc = 0; mbslen = 0; @@ -805,10 +794,7 @@ fw_parse_field(long up_to, /* parse only up to this field number */ } (*set)(++nf, scan, (long) mbslen, n); scan += mbslen; - } - else -#endif - { + } else { if (len > end - scan) len = end - scan; (*set)(++nf, scan, (long) len, n); @@ -1451,13 +1437,8 @@ set_fpat_function: * Implementation varies if doing MBS or not. */ -#if MBS_SUPPORT #define increment_scan(scanp, len) incr_scan(scanp, len, & mbs) -#else -#define increment_scan(scanp, len) ((*scanp)++) -#endif -#if MBS_SUPPORT /* incr_scan --- MBS version of increment_scan() */ static void @@ -1478,7 +1459,6 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs) } else (*scanp)++; } -#endif /* * fpat_parse_field --- parse fields using a regexp. @@ -1603,12 +1583,9 @@ fpat_parse_field(long up_to, /* parse only up to this field number */ bool need_to_set_sep; bool non_empty; bool eosflag; -#if MBS_SUPPORT mbstate_t mbs; - if (gawk_mb_cur_max > 1) - memset(&mbs, 0, sizeof(mbstate_t)); -#endif + memset(&mbs, 0, sizeof(mbstate_t)); if (up_to == UNLIMITED) nf = 0; diff --git a/interpret.h b/interpret.h index 593f11a6..83ccbfc5 100644 --- a/interpret.h +++ b/interpret.h @@ -711,7 +711,6 @@ mod: t1->stptr[nlen] = '\0'; t1->flags &= ~(NUMCUR|NUMBER|NUMINT); -#if MBS_SUPPORT if ((t1->flags & WSTRCUR) != 0 && (t2->flags & WSTRCUR) != 0) { size_t wlen = t1->wstlen + t2->wstlen; @@ -723,7 +722,6 @@ mod: t1->flags |= WSTRCUR; } else free_wstr(*lhs); -#endif } else { size_t nlen = t1->stlen + t2->stlen; char *p; @@ -3073,10 +3073,8 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) { char *bp; char rs; -#if MBS_SUPPORT size_t mbclen = 0; mbstate_t mbs; -#endif memset(recm, '\0', sizeof(struct recmatch)); rs = RS->stptr[0]; @@ -3087,7 +3085,6 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) if (*state == INDATA) /* skip over data we've already seen */ bp += iop->scanoff; -#if MBS_SUPPORT /* * From: Bruno Haible <bruno@clisp.org> * To: Aharon Robbins <arnold@skeeve.com>, gnits@gnits.org @@ -3184,7 +3181,7 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) return NOTERM; } } -#endif + while (*bp != rs) bp++; @@ -155,9 +155,7 @@ static const char *locale = ""; /* default value to setlocale */ int use_lc_numeric = false; /* obey locale for decimal point */ -#if MBS_SUPPORT int gawk_mb_cur_max; /* MB_CUR_MAX value, see comment in main() */ -#endif FILE *output_fp; /* default gawk output, can be redirected in the debugger */ bool output_is_tty = false; /* control flushing of output */ @@ -290,14 +288,12 @@ main(int argc, char **argv) set_locale_stuff(); -#if MBS_SUPPORT /* * In glibc, MB_CUR_MAX is actually a function. This value is * tested *a lot* in many speed-critical places in gawk. Caching * this value once makes a speed difference. */ gawk_mb_cur_max = MB_CUR_MAX; - /* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */ #ifdef LIBC_IS_BORKED { const char *env_lc; @@ -312,7 +308,6 @@ main(int argc, char **argv) /* init the cache for checking bytes if they're characters */ init_btowc_cache(); -#endif if (do_nostalgia) @@ -346,7 +341,6 @@ main(int argc, char **argv) if (do_lint && os_is_setuid()) warning(_("running %s setuid root may be a security problem"), myname); -#if MBS_SUPPORT if (do_binary) { if (do_posix) warning(_("`--posix' overrides `--characters-as-bytes'")); @@ -356,7 +350,6 @@ main(int argc, char **argv) setlocale(LC_ALL, "C"); #endif } -#endif if (do_debug) /* Need to register the debugger pre-exec hook before any other */ init_debug(); diff --git a/mbsupport.h b/mbsupport.h index 9a62486f..f4e1a821 100644 --- a/mbsupport.h +++ b/mbsupport.h @@ -23,81 +23,25 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -/* - * This file is needed because we test for i18n support in 3 different - * places, and we want a consistent definition in all of them. Following - * the ``Don't Repeat Yourself'' principle from "The Pragmatic Programmer", - * we centralize the tests here. - * - * This test is the union of all the current tests. - */ - -#ifdef HAVE_STDLIB_H -#include <stdlib.h> -#endif - -#ifndef NO_MBSUPPORT - -#if defined(HAVE_ISWCTYPE) \ - && defined(HAVE_LOCALE_H) \ - && (defined(HAVE_BTOWC) || defined(ZOS_USS)) \ - && defined(HAVE_MBRLEN) \ - && defined(HAVE_MBRTOWC) \ - && defined(HAVE_WCHAR_H) \ - && defined(HAVE_WCRTOMB) \ - && defined(HAVE_WCSCOLL) \ - && defined(HAVE_WCTYPE) \ - && defined(HAVE_WCTYPE_H) \ - && defined(HAVE_WCTYPE_T) \ - && defined(HAVE_WINT_T) \ - && defined(HAVE_ISWLOWER) \ - && defined(HAVE_ISWUPPER) \ - && defined(HAVE_TOWLOWER) \ - && defined(HAVE_TOWUPPER) \ - && (defined(HAVE_STDLIB_H) && defined(MB_CUR_MAX)) \ -/* We can handle multibyte strings. */ -# define MBS_SUPPORT 1 -#else -# define MBS_SUPPORT 0 -#endif -#else /* NO_MBSUPPORT is defined */ -# define MBS_SUPPORT 0 -#endif - -#if ! MBS_SUPPORT +#ifdef __DJGPP__ # undef MB_CUR_MAX # define MB_CUR_MAX 1 -/* All this glop is for dfa.c. Bleah. */ - -#ifndef __DJGPP__ -#define wchar_t char -#endif +/* All this glop is for DGJPP */ -#define wctype_t int -#define wint_t int -#define mbstate_t int -#define WEOF EOF #define towupper toupper #define towlower tolower -#ifndef __DJGPP__ -#define btowc(x) ((int)x) -#endif #define iswalnum isalnum #define iswalpha isalpha #define iswupper isupper -#if defined(ZOS_USS) -#undef towupper -#undef towlower -#undef btowc -#undef iswalnum -#undef iswalpha -#undef iswupper -#undef wctype -#undef iswctype -#undef wcscoll -#endif +#define iswlower islower + +#define mbrtowc(wcp, s, e, mbs) (-1) +#define mbrlen(s, e, mbs) strlen(s) +#define wcrtomb(wc, b, mbs) (-1) +#define wcslen strlen +#define wctob(wc) (EOF) extern wctype_t wctype(const char *name); extern int iswctype(wint_t wc, wctype_t desc); @@ -121,10 +121,8 @@ mpg_node(unsigned int tp) r->flags |= MALLOC|NUMBER|NUMCUR; r->stptr = NULL; r->stlen = 0; -#if MBS_SUPPORT r->wstptr = NULL; r->wstlen = 0; -#endif /* defined MBS_SUPPORT */ return r; } @@ -281,7 +281,6 @@ r_dupnode(NODE *n) r->flags &= ~FIELD; r->flags |= MALLOC; r->valref = 1; -#if MBS_SUPPORT /* * DON'T call free_wstr(r) here! * r->wstptr still points at n->wstptr's value, and we @@ -289,13 +288,11 @@ r_dupnode(NODE *n) */ r->wstptr = NULL; r->wstlen = 0; -#endif /* MBS_SUPPORT */ if ((n->flags & STRCUR) != 0) { emalloc(r->stptr, char *, n->stlen + 2, "r_dupnode"); memcpy(r->stptr, n->stptr, n->stlen); r->stptr[n->stlen] = '\0'; -#if MBS_SUPPORT if ((n->flags & WSTRCUR) != 0) { r->wstlen = n->wstlen; emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 2), "r_dupnode"); @@ -303,7 +300,6 @@ r_dupnode(NODE *n) r->wstptr[n->wstlen] = L'\0'; r->flags |= WSTRCUR; } -#endif /* MBS_SUPPORT */ } return r; @@ -322,10 +318,8 @@ r_make_number(double x) r->valref = 1; r->stptr = NULL; r->stlen = 0; -#if MBS_SUPPORT r->wstptr = NULL; r->wstlen = 0; -#endif /* defined MBS_SUPPORT */ return r; } @@ -368,11 +362,8 @@ make_str_node(const char *s, size_t len, int flags) r->flags = (MALLOC|STRING|STRCUR); r->valref = 1; r->stfmt = -1; - -#if MBS_SUPPORT r->wstptr = NULL; r->wstlen = 0; -#endif /* MBS_SUPPORT */ if ((flags & ALREADY_MALLOCED) != 0) r->stptr = (char *) s; @@ -387,15 +378,12 @@ make_str_node(const char *s, size_t len, int flags) char *ptm; int c; const char *end; -#if MBS_SUPPORT mbstate_t cur_state; memset(& cur_state, 0, sizeof(cur_state)); -#endif end = &(r->stptr[len]); for (pf = ptm = r->stptr; pf < end;) { -#if MBS_SUPPORT /* * Keep multibyte characters together. This avoids * problems if a subsequent byte of a multibyte @@ -412,7 +400,7 @@ make_str_node(const char *s, size_t len, int flags) continue; } } -#endif + c = *pf++; if (c == '\\') { c = parse_escape(&pf); @@ -642,7 +630,6 @@ get_numbase(const char *s, bool use_locale) return 8; } -#if MBS_SUPPORT /* str2wstr --- convert a multibyte string to a wide string */ NODE * @@ -891,7 +878,6 @@ out: ; return NULL; } -#endif /* MBS_SUPPORT */ /* is_ieee_magic_val --- return true for +inf, -inf, +nan, -nan */ @@ -938,7 +924,6 @@ get_ieee_magic_val(const char *val) return v; } -#if MBS_SUPPORT wint_t btowc_cache[256]; /* init_btowc_cache --- initialize the cache */ @@ -951,7 +936,6 @@ void init_btowc_cache() btowc_cache[i] = btowc(i); } } -#endif #define BLOCKCHUNK 100 @@ -54,12 +54,9 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) * It is 0, when the current character is a singlebyte character. */ size_t is_multibyte = 0; -#if MBS_SUPPORT mbstate_t mbs; - if (gawk_mb_cur_max > 1) - memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */ -#endif + memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */ if (first) { first = false; @@ -87,7 +84,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) dest = buf; while (src < end) { -#if MBS_SUPPORT if (gawk_mb_cur_max > 1 && ! is_multibyte) { /* The previous byte is a singlebyte character, or last byte of a multibyte character. We check the next character. */ @@ -100,7 +96,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) is_multibyte = 0; } } -#endif /* We skip multibyte character, since it must not be a special character. */ diff --git a/regex_internal.h b/regex_internal.h index c8981a08..3fc2fc58 100644 --- a/regex_internal.h +++ b/regex_internal.h @@ -26,18 +26,16 @@ #include <stdlib.h> #include <string.h> -#include "mbsupport.h" /* gawk */ - #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC # include <langinfo.h> #endif #if defined HAVE_LOCALE_H || defined _LIBC # include <locale.h> #endif -#if MBS_SUPPORT && (defined HAVE_WCHAR_H || defined _LIBC) +#if defined HAVE_WCHAR_H || defined _LIBC # include <wchar.h> #endif /* HAVE_WCHAR_H || _LIBC */ -#if MBS_SUPPORT && (defined HAVE_WCTYPE_H || defined _LIBC) +#if defined HAVE_WCTYPE_H || defined _LIBC # include <wctype.h> #endif /* HAVE_WCTYPE_H || _LIBC */ #if defined HAVE_STDBOOL_H || defined _LIBC @@ -109,7 +107,7 @@ is_blank (int c) # define SIZE_MAX ((size_t) -1) #endif -#if MBS_SUPPORT || _LIBC +#if ! defined(__DJGPP__) && (defined(GAWK) || _LIBC) # define RE_ENABLE_I18N #endif @@ -111,6 +111,6 @@ #include "missing_d/strcoll.c" #endif -#if ! MBS_SUPPORT +#if defined(__DJGPP__) #include "missing_d/wcmisc.c" #endif |