diff options
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 155 |
1 files changed, 152 insertions, 3 deletions
@@ -27,6 +27,7 @@ static reg_syntax_t syn; static void check_bracket_exp(char *s, size_t len); +static char *expand_range(char *s, size_t *len); /* make_regexp --- generate compiled regular expressions */ @@ -44,6 +45,9 @@ make_regexp(const char *s, size_t len, int ignorecase, int dfa, int canfatal) static short first = TRUE; static short no_dfa = FALSE; int has_anchor = FALSE; + int may_have_range = 0; + char *newbuf; + size_t newlen; /* The number of bytes in the current multibyte character. It is 0, when the current character is a singlebyte character. */ @@ -152,13 +156,36 @@ make_regexp(const char *s, size_t len, int ignorecase, int dfa, int canfatal) c = *src; if (c == '^' || c == '$') has_anchor = TRUE; + if (c == '[' || c == '-' || c == ']') + may_have_range++; + *dest++ = *src++; /* not '\\' */ } if (gawk_mb_cur_max > 1 && is_multibyte) is_multibyte--; } /* while */ - *dest = '\0' ; /* Only necessary if we print dest ? */ + *dest = '\0'; + len = dest - buf; + + if ( ! do_posix + && may_have_range >= 3 + && memchr(buf, '-', len) != NULL) { + newlen = len; + newbuf = expand_range(buf, & newlen); + + /* song and dance since buf & buflen are static */ + if (newlen > buflen) { + free(buf); + buf = newbuf; + buflen = newlen; + } else { + memcpy(buf, newbuf, newlen); + free(newbuf); + } + len = newlen; + } + emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); memset((char *) rp, 0, sizeof(*rp)); rp->dfareg = NULL; @@ -196,7 +223,6 @@ make_regexp(const char *s, size_t len, int ignorecase, int dfa, int canfatal) dfasyntax(syn | (ignorecase ? RE_ICASE : 0), ignorecase ? TRUE : FALSE, '\n'); re_set_syntax(syn); - len = dest - buf; if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) { refree(rp); if (! canfatal) { @@ -539,7 +565,7 @@ again: count++; else if (*sp == ']') count--; - if (*sp == '-' && ! range_warned && count == 1 + if (*sp == '-' && do_lint && ! range_warned && count == 1 && sp[-1] != '[' && sp[1] != ']' && ! isdigit(sp[-1]) && ! isdigit(sp[1])) { /* found a range, we think */ @@ -580,3 +606,126 @@ again: done: s[length] = save; } + +/* add_char --- add a character to the buffer, grow it if needed */ + +static void +add_char(char **bufp, size_t *lenp, char ch, char **ptr) +{ + size_t newlen; + size_t offset; + + if (*ptr - *bufp < *lenp) { + **ptr = ch; + (*ptr)++; + return; + } + + /* have to grow the buffer and adjust the pointers */ + offset = (*ptr - *bufp); + newlen = offset * 2; + erealloc(*bufp, char *, newlen + 2, "add_char"); + *ptr = *bufp + offset; + **ptr = ch; + (*ptr)++; +} + +/* expand_range --- turn [b-e] into [bcde] */ + +static char * +expand_range(char *s, size_t *lenp) +{ + int i; + int found = FALSE; + char *sp, *sp2, *newbuf; + size_t len; + int count = 0; + size_t newbuf_len = *lenp * 2; + + emalloc(newbuf, char *, newbuf_len, "expand_range"); + + sp = s; + sp2 = newbuf; + len = *lenp; +#define copy() (add_char(& newbuf, & newbuf_len, *sp++, & sp2), len--) +#define copych(ch) (add_char(& newbuf, & newbuf_len, ch, & sp2)) +again: + while (len > 0) { + if (*sp == '\\') { + copy(); + copy(); + } + else if (*sp == '[') { + count++; + break; + } + else + copy(); + } + if (len == 0) + goto done; + + copy(); /* copy in the [ */ + if (*sp == '-') /* it's literal, just copy it and skip over */ + copy(); + + while (count > 0 && len > 0) { + if (*sp == '\\') { + copy(); + copy(); + continue; + } + if (*sp == '[') { + count++; + copy(); + continue; + } + if (*sp == ']') { + count--; + copy(); + if (count == 0) + goto again; + else + continue; + } + + if (count == 1) { + /* inside [...] but not inside [[:...:]] */ + if (*sp == '-') { + int start, end; + char i; + + if (sp[1] == ']') { /* also literal */ + copy(); + continue; + } + + /* It's a range, expand it. */ + start = sp[-1]; + if (sp[1] == '\\') { + sp++; + len--; + } + end = sp[1]; + for (i = start + 1; i <= end; i++) + copych(i); + sp++; + len--; + continue; + } + else + copy(); + } else { + copy(); + } + } + + if (len > 0) + goto again; + +done: + *lenp = sp2 - newbuf; + return newbuf; +} +#undef copy +#undef copych |