diff options
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 6 | ||||
-rw-r--r-- | test/Makefile.in | 16 | ||||
-rw-r--r-- | test/Maketests | 10 | ||||
-rw-r--r-- | test/regnul1.awk | 84 | ||||
-rw-r--r-- | test/regnul1.ok | 8 | ||||
-rw-r--r-- | test/regnul2.awk | 112 | ||||
-rw-r--r-- | test/regnul2.ok | 27 |
8 files changed, 266 insertions, 2 deletions
diff --git a/test/ChangeLog b/test/ChangeLog index 4af051f0..ae144a27 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2014-05-30 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (regnul1, regnul2): New tests. + * regnul1.awk, regnul1.ok, regnul1.awk, regnul2.ok: New files. + 2014-05-22 Andrew J. Schorr <aschorr@telemetry-investments.com> * lintwarn.ok: Updated. diff --git a/test/Makefile.am b/test/Makefile.am index c53e0d01..5675c1d6 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -729,6 +729,10 @@ EXTRA_DIST = \ regexprange.ok \ reginttrad.awk \ reginttrad.ok \ + regnul1.awk \ + regnul1.ok \ + regnul2.awk \ + regnul2.ok \ regrange.awk \ regrange.ok \ regtest.sh \ @@ -1004,7 +1008,7 @@ GAWK_EXT_TESTS = \ nastyparm next nondec nondec2 \ patsplit posix printfbad1 printfbad2 printfbad3 procinfs \ profile1 profile2 profile3 profile4 profile5 pty1 \ - rebuf regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ + rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ rsstart2 rsstart3 rstest6 shadow sortfor sortu split_after_fpat \ splitarg4 strftime \ strtonum switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \ diff --git a/test/Makefile.in b/test/Makefile.in index f00efc63..a7e1f193 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -975,6 +975,10 @@ EXTRA_DIST = \ regexprange.ok \ reginttrad.awk \ reginttrad.ok \ + regnul1.awk \ + regnul1.ok \ + regnul2.awk \ + regnul2.ok \ regrange.awk \ regrange.ok \ regtest.sh \ @@ -1249,7 +1253,7 @@ GAWK_EXT_TESTS = \ nastyparm next nondec nondec2 \ patsplit posix printfbad1 printfbad2 printfbad3 procinfs \ profile1 profile2 profile3 profile4 profile5 pty1 \ - rebuf regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ + rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ rsstart2 rsstart3 rstest6 shadow sortfor sortu split_after_fpat \ splitarg4 strftime \ strtonum switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \ @@ -3512,6 +3516,16 @@ pty1: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regnul1: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +regnul2: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regx8bit: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index e847d765..0841ae77 100644 --- a/test/Maketests +++ b/test/Maketests @@ -1152,6 +1152,16 @@ pty1: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +regnul1: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +regnul2: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + regx8bit: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/regnul1.awk b/test/regnul1.awk new file mode 100644 index 00000000..2a35d176 --- /dev/null +++ b/test/regnul1.awk @@ -0,0 +1,84 @@ +# From denis@gissoft.eu Thu May 29 09:07:56 IDT 2014 +# Article: 8400 of comp.lang.awk +# X-Received: by 10.236.81.99 with SMTP id l63mr3912466yhe.3.1401224812642; +# Tue, 27 May 2014 14:06:52 -0700 (PDT) +# X-Received: by 10.140.37.148 with SMTP id r20mr578874qgr.0.1401224812310; Tue, +# 27 May 2014 14:06:52 -0700 (PDT) +# Path: eternal-september.org!news.eternal-september.org!feeder.eternal-september.org!v102.xanadu-bbs.net!xanadu-bbs.net!news.glorb.com!hl10no6493021igb.0!news-out.google.com!gi6ni15574igc.0!nntp.google.com!hl10no6493018igb.0!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail +# Newsgroups: comp.lang.awk +# Date: Tue, 27 May 2014 14:06:52 -0700 (PDT) +# Complaints-To: groups-abuse@google.com +# Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=85.253.50.165; +# posting-account=zNhVLgoAAACsg-WfVe_or2VV7loUhx8H +# NNTP-Posting-Host: 85.253.50.165 +# User-Agent: G2/1.0 +# MIME-Version: 1.0 +# Message-ID: <3112e356-d2e1-45cd-ba55-2f939ee50105@googlegroups.com> +# Subject: \0 character can't be implement inside regexp in some cases? +# From: denis@gissoft.eu +# Injection-Date: Tue, 27 May 2014 21:06:52 +0000 +# Content-Type: text/plain; charset=ISO-8859-1 +# Xref: news.eternal-september.org comp.lang.awk:8400 +# +# Hello, +# +# while doing some experiments with the gawk(4.1.1) i was found problem in implementing character \x00 inside regexp for two cases: +# +# str~/\0/ +# +# and +# +# switch ( str ) { case /\0/: ... } +# +# the following code try to match given string(=="\x00") with the regexp /^\0$/ using different ways provided by gawk: +# +func _chm(t) { + _ch("match()",match(t,/^\0$/)) + _ch("split()",split(t,A,/^\0$/)>1) + _ch("patsplit()",patsplit(t,A,/^\0$/)) + _ch("gsub()",gsub(/^\0$/,"&",t)) + _ch("sub()",sub(/^\0$/,"&",t)) + _ch("gensub()",!gensub(/^\0$/,"","G",t)) + _ch("str~/rexp/",t~/^\0$/) + a=0; switch ( t ) { case /^\0$/: a=1 }; _ch("switch-case //",a) } + +func _ch(fn,bool) { + print substr(fn ": ",1,16) (bool ? "+" : "-") } + +BEGIN{ _chm("\000") } +# +# output: +# +# > gawk -f _null.gwk +# match(): + +# split(): + +# patsplit(): + +# gsub(): + +# sub(): + +# gensub(): + +# str~/rexp/: - +# switch-case //: - +# +# can someone explain me: +# +# why in case using match(), split(), patsplit(), gsub(), sub() and gensub() the given string "\x00" matches with the /^\0$/ +# +# but in cases: +# +# "\x00"~/^\0$/ +# +# and +# +# switch ( "\x00" ) { case /^\0$/: doesn't match? } +# +# +# thank You +# +# +# GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p8, GNU MP 5.0.2) +# Copyright (C) 1989, 1991-2014 Free Software Foundation. +# downloaded from ezwinports +# +# windows 7x64; cmd +# +# diff --git a/test/regnul1.ok b/test/regnul1.ok new file mode 100644 index 00000000..2ba0e1da --- /dev/null +++ b/test/regnul1.ok @@ -0,0 +1,8 @@ +match(): + +split(): + +patsplit(): + +gsub(): + +sub(): + +gensub(): + +str~/rexp/: + +switch-case //: + diff --git a/test/regnul2.awk b/test/regnul2.awk new file mode 100644 index 00000000..3d93df41 --- /dev/null +++ b/test/regnul2.awk @@ -0,0 +1,112 @@ +# From denis@gissoft.eu Thu May 29 09:10:18 IDT 2014 +# Article: 8408 of comp.lang.awk +# X-Received: by 10.182.128.166 with SMTP id np6mr93689obb.16.1401289466734; +# Wed, 28 May 2014 08:04:26 -0700 (PDT) +# X-Received: by 10.140.36.6 with SMTP id o6mr4939qgo.26.1401289466607; Wed, 28 +# May 2014 08:04:26 -0700 (PDT) +# Path: eternal-september.org!news.eternal-september.org!feeder.eternal-september.org!news.glorb.com!c1no19185457igq.0!news-out.google.com!qf4ni13600igc.0!nntp.google.com!c1no19185454igq.0!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail +# Newsgroups: comp.lang.awk +# Date: Wed, 28 May 2014 08:04:26 -0700 (PDT) +# In-Reply-To: <lm4rra$4u9$1@dont-email.me> +# Complaints-To: groups-abuse@google.com +# Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=82.131.35.51; posting-account=zNhVLgoAAACsg-WfVe_or2VV7loUhx8H +# NNTP-Posting-Host: 82.131.35.51 +# References: <3112e356-d2e1-45cd-ba55-2f939ee50105@googlegroups.com> +# <lm34d7$tb4$1@news.m-online.net> <f666871f-a94c-4505-9677-8711d656433c@googlegroups.com> +# <lm4rra$4u9$1@dont-email.me> +# User-Agent: G2/1.0 +# MIME-Version: 1.0 +# Message-ID: <79828a24-d265-4e88-8de1-e61ecbaa6701@googlegroups.com> +# Subject: Re: \0 character can't be implement inside regexp in some cases? +# From: Denis Shirokov <denis@gissoft.eu> +# Injection-Date: Wed, 28 May 2014 15:04:26 +0000 +# Content-Type: text/plain; charset=ISO-8859-1 +# Xref: news.eternal-september.org comp.lang.awk:8408 +# +# +# All of the other use-cases just cluttered up your posting. +# +# oh, really? +# +# 1. where in the Janis code the case with the `switch-case'? +# 2. how do you know about that there is only two cases? may be you know it because my code contains the other test cases? +# 3. fine. do you know what situation with the dynamic regexps? no? +# 4. do you know what situation with RS,FS and /.../ in the middle-area? how you can say that there is only two cases if you absolutely do not know it? +# +# i'm asking: WHO will perform testing other cases? You? gawk-team? the God? +# what is that point of view: that it will be enough to say: +# Oh! my match(t,/^\0$/) is matching "\x00" but t~/^\0$/ is not. why oh why? +# +# where is the test cover? or you think that other peoples will doing its instead of You? instead of Me? +# +# and the second point: guys you are screaming about two levels of stack. really, you kidding? =) +# +# however, i'm attaching some additional information about dynrexp: +# +func _chmd(t,r) { + _ch("match()",match(t,r)) + _ch("split()",split(t,A,r)>1) + _ch("patsplit()",patsplit(t,A,r)) + _ch("gsub()",gsub(r,"&",t)) + t2=t; _ch("sub()",sub(r,"&",t2)) + _ch("gensub()",!gensub(r,"","G",t)) + _ch("str~/rexp/",t~r) + # switch-case is not applicable with dynrxp + _conline() } + +func _ch(fn,bool) { + print substr(fn ": ",1,16) (bool ? "+" : "-") } + +func _conline() { + print "__________________________"; print } + +BEGIN{ _chmd("\x01","^\1$") #testing that all doings right; all match + _chmd("\x00","^\1$") #testing that all doings right; all not match + _chmd("\x00","^\0$") #tesing dynrexp +} +# +# output: +# +# match(): + +# split(): + +# patsplit(): + +# gsub(): + +# sub(): + +# gensub(): + +# str~/rexp/: + +# __________________________ +# +# match(): - +# split(): - +# patsplit(): - +# gsub(): - +# sub(): - +# gensub(): - +# str~/rexp/: - +# __________________________ +# +# match(): + +# split(): + +# patsplit(): + +# gsub(): + +# sub(): + +# gensub(): + +# str~/rexp/: - +# +# it's looks like with the dynamic regexp the same story. +# +# i found another one moment that is possible near with the reason of this issue: +# +# i was testing what characters can be present in doublestring and regexp "directly" (just as the character) and what characters must be present as escape sequence (\qqq) +# +# so, i found the following: +# +# t="abc" +# if ( match(t,/^abc[NUL]def/) ) ... - where [NUL] is the character \x00 +# +# it's seems that in that case the regular expression is processed until [NUL]character and the other part is ignored because the example above gives TRUE +# +# friendship +# Denis Shirokov +# +# diff --git a/test/regnul2.ok b/test/regnul2.ok new file mode 100644 index 00000000..6b3cecab --- /dev/null +++ b/test/regnul2.ok @@ -0,0 +1,27 @@ +match(): + +split(): + +patsplit(): + +gsub(): + +sub(): + +gensub(): + +str~/rexp/: + +__________________________ + +match(): - +split(): - +patsplit(): - +gsub(): - +sub(): - +gensub(): - +str~/rexp/: - +__________________________ + +match(): + +split(): + +patsplit(): + +gsub(): + +sub(): + +gensub(): + +str~/rexp/: + +__________________________ + |