diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | dfa.c | 47 | ||||
-rw-r--r-- | test/ChangeLog | 6 | ||||
-rw-r--r-- | test/Makefile.am | 21 | ||||
-rw-r--r-- | test/Makefile.in | 21 | ||||
-rw-r--r-- | test/backbigs1.awk | 1 | ||||
-rw-r--r-- | test/backbigs1.in | 1 | ||||
-rw-r--r-- | test/backbigs1.ok | 0 | ||||
-rw-r--r-- | test/backsmalls1.awk | 1 | ||||
-rw-r--r-- | test/backsmalls1.in | 36 | ||||
-rw-r--r-- | test/backsmalls1.ok | 14 |
11 files changed, 142 insertions, 10 deletions
@@ -1,3 +1,7 @@ +2013-10-10 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.c (lex): Sync with GNU grep. Handle multibyte \s and \S. + 2013-10-09 Arnold D. Robbins <arnold@skeeve.com> * awkgram.y (mk_assignment): Rework switch to handle Op_assign, @@ -1488,14 +1488,45 @@ lex (void) case 'S': if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) goto normal_char; - zeroset (ccl); - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (isspace (c2)) - setbit (c2, ccl); - if (c == 'S') - notset (ccl); - laststart = 0; - return lasttok = CSET + charclass_index (ccl); + if (MB_CUR_MAX == 1) + { + zeroset (ccl); + for (c2 = 0; c2 < NOTCHAR; ++c2) + if (isspace (c2)) + setbit (c2, ccl); + if (c == 'S') + notset (ccl); + laststart = 0; + return lasttok = CSET + charclass_index (ccl); + } + +#define PUSH_LEX_STATE(s) \ + do \ + { \ + char const *lexptr_saved = lexptr; \ + size_t lexleft_saved = lexleft; \ + lexptr = (s); \ + lexleft = strlen (lexptr) + +#define POP_LEX_STATE() \ + lexptr = lexptr_saved; \ + lexleft = lexleft_saved; \ + } \ + while (0) + + /* FIXME: see if optimizing this, as is done with ANYCHAR and + add_utf8_anychar, makes sense. */ + + /* \s and \S are documented to be equivalent to [[:space:]] and + [^[:space:]] respectively, so tell the lexer to process those + strings, each minus its "already processed" '['. */ + PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]"); + + lasttok = parse_bracket_exp (); + + POP_LEX_STATE (); + + return lasttok; case 'w': case 'W': diff --git a/test/ChangeLog b/test/ChangeLog index c835eb01..4de3d14b 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,9 @@ +2013-10-10 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (backbigs1, backsmalls1): New tests. + * backbigs1.awk, backbigs1.in, backbigs1.ok: New files. + * backsmalls1.awk, backsmalls1.in, backsmalls1.ok: New files. + 2013-10-09 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (badassign1): New test. diff --git a/test/Makefile.am b/test/Makefile.am index b3a9a58f..d0caeed7 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -112,6 +112,12 @@ EXTRA_DIST = \ backw.awk \ backw.in \ backw.ok \ + backbigs1.awk \ + backbigs1.in \ + backbigs1.ok \ + backsmalls1.awk \ + backsmalls1.in \ + backsmalls1.ok \ badargs.ok \ badassign1.awk \ badassign1.ok \ @@ -998,7 +1004,8 @@ MACHINE_TESTS = double1 double2 fmtspcl intformat MPFR_TESTS = mpfrnr mpfrrnd mpfrieee mpfrexprange mpfrsort mpfrbigint LOCALE_CHARSET_TESTS = \ - asort asorti fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \ + asort asorti backbigs1 backsmalls1 \ + fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \ mbprintf1 mbprintf2 mbprintf3 mbprintf4 rebt8b2 rtlenmb sort1 sprintfc SHLIB_TESTS = \ @@ -1861,6 +1868,18 @@ dfamb1: AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +backbigs1: + @echo $@ + @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ + AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +backsmalls1: + @echo $@ + @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ + AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + # Targets generated for other tests: include Maketests diff --git a/test/Makefile.in b/test/Makefile.in index 637bad0d..5ddd9e32 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -330,6 +330,12 @@ EXTRA_DIST = \ backw.awk \ backw.in \ backw.ok \ + backbigs1.awk \ + backbigs1.in \ + backbigs1.ok \ + backsmalls1.awk \ + backsmalls1.in \ + backsmalls1.ok \ badargs.ok \ badassign1.awk \ badassign1.ok \ @@ -1211,7 +1217,8 @@ INET_TESTS = inetdayu inetdayt inetechu inetecht MACHINE_TESTS = double1 double2 fmtspcl intformat MPFR_TESTS = mpfrnr mpfrrnd mpfrieee mpfrexprange mpfrsort mpfrbigint LOCALE_CHARSET_TESTS = \ - asort asorti fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \ + asort asorti backbigs1 backsmalls1 \ + fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \ mbprintf1 mbprintf2 mbprintf3 mbprintf4 rebt8b2 rtlenmb sort1 sprintfc SHLIB_TESTS = \ @@ -2254,6 +2261,18 @@ dfamb1: @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +backbigs1: + @echo $@ + @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ + AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +backsmalls1: + @echo $@ + @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ + AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ Gt-dummy: # file Maketests, generated from Makefile.am by the Gentests program addcomma: diff --git a/test/backbigs1.awk b/test/backbigs1.awk new file mode 100644 index 00000000..fb4d811f --- /dev/null +++ b/test/backbigs1.awk @@ -0,0 +1 @@ +/\S/ diff --git a/test/backbigs1.in b/test/backbigs1.in new file mode 100644 index 00000000..16b415f4 --- /dev/null +++ b/test/backbigs1.in @@ -0,0 +1 @@ +‚ diff --git a/test/backbigs1.ok b/test/backbigs1.ok new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/backbigs1.ok diff --git a/test/backsmalls1.awk b/test/backsmalls1.awk new file mode 100644 index 00000000..f3e0aba4 --- /dev/null +++ b/test/backsmalls1.awk @@ -0,0 +1 @@ +/^\s$/ diff --git a/test/backsmalls1.in b/test/backsmalls1.in new file mode 100644 index 00000000..491807cd --- /dev/null +++ b/test/backsmalls1.in @@ -0,0 +1,36 @@ +# U+00A0 NO-BREAK SPACE: c2 a0 +# +# U+2007 FIGURE SPACE: e2 80 87 +#  +# U+200B ZERO WIDTH SPACE: e2 80 8b +#​ +# U+202F NARROW NO-BREAK SPACE: e2 80 af +#  +U+0020 SPACE: 20 + +U+1680 OGHAM SPACE MARK: e1 9a 80 +  +U+2000 EN QUAD: e2 80 80 +  +U+2001 EM QUAD: e2 80 81 +†+U+2002 EN SPACE: e2 80 82 +  +U+2003 EM SPACE: e2 80 83 +  +U+2004 THREE-PER-EM SPACE: e2 80 84 +  +U+2005 FOUR-PER-EM SPACE: e2 80 85 +  +U+2006 SIX-PER-EM SPACE: e2 80 86 +  +U+2008 PUNCTUATION SPACE: e2 80 88 +  +U+2009 THIN SPACE: e2 80 89 +  +U+200A HAIR SPACE: e2 80 8a +  +U+205F MEDIUM MATHEMATICAL SPACE: e2 81 9f +⟠+U+3000 IDEOGRAPHIC SPACE: e3 80 80 +  diff --git a/test/backsmalls1.ok b/test/backsmalls1.ok new file mode 100644 index 00000000..1f678123 --- /dev/null +++ b/test/backsmalls1.ok @@ -0,0 +1,14 @@ + +  +  +†+  +  +  +  +  +  +  +  +⟠+  |