aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--dfa.c47
-rw-r--r--test/ChangeLog6
-rw-r--r--test/Makefile.am21
-rw-r--r--test/Makefile.in21
-rw-r--r--test/backbigs1.awk1
-rw-r--r--test/backbigs1.in1
-rw-r--r--test/backbigs1.ok0
-rw-r--r--test/backsmalls1.awk1
-rw-r--r--test/backsmalls1.in36
-rw-r--r--test/backsmalls1.ok14
11 files changed, 142 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 2eaa9c70..7aa2e738 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2013-10-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c (lex): Sync with GNU grep. Handle multibyte \s and \S.
+
2013-10-09 Arnold D. Robbins <arnold@skeeve.com>
* awkgram.y (mk_assignment): Rework switch to handle Op_assign,
diff --git a/dfa.c b/dfa.c
index 84ccbc0c..b12d2d8b 100644
--- a/dfa.c
+++ b/dfa.c
@@ -1488,14 +1488,45 @@ lex (void)
case 'S':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- zeroset (ccl);
- for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (isspace (c2))
- setbit (c2, ccl);
- if (c == 'S')
- notset (ccl);
- laststart = 0;
- return lasttok = CSET + charclass_index (ccl);
+ if (MB_CUR_MAX == 1)
+ {
+ zeroset (ccl);
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (isspace (c2))
+ setbit (c2, ccl);
+ if (c == 'S')
+ notset (ccl);
+ laststart = 0;
+ return lasttok = CSET + charclass_index (ccl);
+ }
+
+#define PUSH_LEX_STATE(s) \
+ do \
+ { \
+ char const *lexptr_saved = lexptr; \
+ size_t lexleft_saved = lexleft; \
+ lexptr = (s); \
+ lexleft = strlen (lexptr)
+
+#define POP_LEX_STATE() \
+ lexptr = lexptr_saved; \
+ lexleft = lexleft_saved; \
+ } \
+ while (0)
+
+ /* FIXME: see if optimizing this, as is done with ANYCHAR and
+ add_utf8_anychar, makes sense. */
+
+ /* \s and \S are documented to be equivalent to [[:space:]] and
+ [^[:space:]] respectively, so tell the lexer to process those
+ strings, each minus its "already processed" '['. */
+ PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
+
+ lasttok = parse_bracket_exp ();
+
+ POP_LEX_STATE ();
+
+ return lasttok;
case 'w':
case 'W':
diff --git a/test/ChangeLog b/test/ChangeLog
index c835eb01..4de3d14b 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+2013-10-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (backbigs1, backsmalls1): New tests.
+ * backbigs1.awk, backbigs1.in, backbigs1.ok: New files.
+ * backsmalls1.awk, backsmalls1.in, backsmalls1.ok: New files.
+
2013-10-09 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (badassign1): New test.
diff --git a/test/Makefile.am b/test/Makefile.am
index b3a9a58f..d0caeed7 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -112,6 +112,12 @@ EXTRA_DIST = \
backw.awk \
backw.in \
backw.ok \
+ backbigs1.awk \
+ backbigs1.in \
+ backbigs1.ok \
+ backsmalls1.awk \
+ backsmalls1.in \
+ backsmalls1.ok \
badargs.ok \
badassign1.awk \
badassign1.ok \
@@ -998,7 +1004,8 @@ MACHINE_TESTS = double1 double2 fmtspcl intformat
MPFR_TESTS = mpfrnr mpfrrnd mpfrieee mpfrexprange mpfrsort mpfrbigint
LOCALE_CHARSET_TESTS = \
- asort asorti fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
+ asort asorti backbigs1 backsmalls1 \
+ fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
mbprintf1 mbprintf2 mbprintf3 mbprintf4 rebt8b2 rtlenmb sort1 sprintfc
SHLIB_TESTS = \
@@ -1861,6 +1868,18 @@ dfamb1:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+backbigs1:
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+backsmalls1:
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
# Targets generated for other tests:
include Maketests
diff --git a/test/Makefile.in b/test/Makefile.in
index 637bad0d..5ddd9e32 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -330,6 +330,12 @@ EXTRA_DIST = \
backw.awk \
backw.in \
backw.ok \
+ backbigs1.awk \
+ backbigs1.in \
+ backbigs1.ok \
+ backsmalls1.awk \
+ backsmalls1.in \
+ backsmalls1.ok \
badargs.ok \
badassign1.awk \
badassign1.ok \
@@ -1211,7 +1217,8 @@ INET_TESTS = inetdayu inetdayt inetechu inetecht
MACHINE_TESTS = double1 double2 fmtspcl intformat
MPFR_TESTS = mpfrnr mpfrrnd mpfrieee mpfrexprange mpfrsort mpfrbigint
LOCALE_CHARSET_TESTS = \
- asort asorti fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
+ asort asorti backbigs1 backsmalls1 \
+ fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
mbprintf1 mbprintf2 mbprintf3 mbprintf4 rebt8b2 rtlenmb sort1 sprintfc
SHLIB_TESTS = \
@@ -2254,6 +2261,18 @@ dfamb1:
@[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+backbigs1:
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+backsmalls1:
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
Gt-dummy:
# file Maketests, generated from Makefile.am by the Gentests program
addcomma:
diff --git a/test/backbigs1.awk b/test/backbigs1.awk
new file mode 100644
index 00000000..fb4d811f
--- /dev/null
+++ b/test/backbigs1.awk
@@ -0,0 +1 @@
+/\S/
diff --git a/test/backbigs1.in b/test/backbigs1.in
new file mode 100644
index 00000000..16b415f4
--- /dev/null
+++ b/test/backbigs1.in
@@ -0,0 +1 @@
+‚
diff --git a/test/backbigs1.ok b/test/backbigs1.ok
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/backbigs1.ok
diff --git a/test/backsmalls1.awk b/test/backsmalls1.awk
new file mode 100644
index 00000000..f3e0aba4
--- /dev/null
+++ b/test/backsmalls1.awk
@@ -0,0 +1 @@
+/^\s$/
diff --git a/test/backsmalls1.in b/test/backsmalls1.in
new file mode 100644
index 00000000..491807cd
--- /dev/null
+++ b/test/backsmalls1.in
@@ -0,0 +1,36 @@
+# U+00A0 NO-BREAK SPACE: c2 a0
+# 
+# U+2007 FIGURE SPACE: e2 80 87
+# 
+# U+200B ZERO WIDTH SPACE: e2 80 8b
+#​
+# U+202F NARROW NO-BREAK SPACE: e2 80 af
+# 
+U+0020 SPACE: 20
+
+U+1680 OGHAM SPACE MARK: e1 9a 80
+ 
+U+2000 EN QUAD: e2 80 80
+ 
+U+2001 EM QUAD: e2 80 81
+â€
+U+2002 EN SPACE: e2 80 82
+ 
+U+2003 EM SPACE: e2 80 83
+ 
+U+2004 THREE-PER-EM SPACE: e2 80 84
+ 
+U+2005 FOUR-PER-EM SPACE: e2 80 85
+ 
+U+2006 SIX-PER-EM SPACE: e2 80 86
+ 
+U+2008 PUNCTUATION SPACE: e2 80 88
+ 
+U+2009 THIN SPACE: e2 80 89
+ 
+U+200A HAIR SPACE: e2 80 8a
+ 
+U+205F MEDIUM MATHEMATICAL SPACE: e2 81 9f
+âŸ
+U+3000 IDEOGRAPHIC SPACE: e3 80 80
+ 
diff --git a/test/backsmalls1.ok b/test/backsmalls1.ok
new file mode 100644
index 00000000..1f678123
--- /dev/null
+++ b/test/backsmalls1.ok
@@ -0,0 +1,14 @@
+
+ 
+ 
+â€
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+âŸ
+