diff options
Diffstat (limited to 'test/gsubtst4.awk')
-rw-r--r-- | test/gsubtst4.awk | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/test/gsubtst4.awk b/test/gsubtst4.awk new file mode 100644 index 00000000..48b8413e --- /dev/null +++ b/test/gsubtst4.awk @@ -0,0 +1,242 @@ +# From arnold Thu May 9 17:27:03 2002 +# Return-Path: <arnold@skeeve.com> +# Received: (from arnold@localhost) +# by skeeve.com (8.11.6/8.11.6) id g49ER3K27925 +# for arnold; Thu, 9 May 2002 17:27:03 +0300 +# Date: Thu, 9 May 2002 17:27:03 +0300 +# From: Aharon Robbins <arnold@skeeve.com> +# Message-Id: <200205091427.g49ER3K27925@skeeve.com> +# To: arnold@skeeve.com +# Subject: fixme +# X-SpamBouncer: 1.4 (10/07/01) +# X-SBRule: Pattern Match (Other Patterns) (Score: 4850) +# X-SBRule: Pattern Match (Spam Phone #) (Score: 0) +# X-SBClass: Blocked +# Status: RO +# +# Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail +# From: laura@madonnaweb.com (laura fairhead) +# Newsgroups: comp.lang.awk +# Subject: bug in gawk3.1.0 regex code +# Date: Wed, 08 May 2002 23:31:40 GMT +# Organization: that'll be the daewooo :) +# Lines: 211 +# Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE> +# Reply-To: laura@madonnaweb.com +# NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48) +# X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286]) +# X-Newsreader: Forte Free Agent 1.21/32.243 +# Xref: dfw-artgen comp.lang.awk:13059 +# +# +# I believe I've just found a bug in gawk3.1.0 implementation of +# extended regular expressions. It seems to be down to the alternation +# operator; when using an end anchor '$' as a subexpression in an +# alternation and the entire matched RE is a nul-string it fails +# to match the end of string, for example; +# +# gsub(/$|2/,"x") +# print +# +# input = 12345 +# expected output = 1x345x +# actual output = 1x345 +# +# The start anchor '^' always works as expected; +# +# gsub(/^|2/,"x") +# print +# +# input = 12345 +# expected output = x1x345 +# actual output = x1x345 +# +# This was with POSIX compliance enabled althought that doesn't +# effect the result. +# +# I checked on gawk3.0.6 and got exactly the same results however +# gawk2.15.6 gives the expected results. +# +# I'm about to post a bug report about this into gnu.utils.bug +# but I thought I'd post it here first in case anyone has +# any input/comments/whatever .... +# +# Complete test results were as follows; +# +# input 12345 +# output gsub(/regex/,"x",input) +# +# regex output +# (^) x12345 +# ($) 12345x +# (^)|($) x12345x +# ($)|(^) x12345x +# (2) 1x345 +# (^)|2 x1x345 +# 2|(^) x1x345 +# ($)|2 1x345 +# 2|($) 1x345 +# (2)|(^) x1x345 +# (^)|(2) x1x345 +# (2)|($) 1x345 +# ($)|(2) 1x345 +# .((2)|(^)) x345 +# .((^)|(2)) x345 +# .((2)|($)) x34x +# .(($)|(2)) x34x +# x{0}((2)|(^)) x1x345 +# x{0}((^)|(2)) x1x345 +# x{0}((2)|($)) 1x345 +# x{0}(($)|(2)) 1x345 +# x*((2)|(^)) x1x345 +# x*((^)|(2)) x1x345 +# x*((2)|($)) 1x345 +# x*(($)|(2)) 1x345 +# +# Here's the test program I used, a few of the cases use ERE {n[,[m]]} +# operators so that will have to be commented out or have a check +# added or something (should have put a conditional in I know... ;-) +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +BEGIN{ + +TESTSTR="12345" + +print "input "TESTSTR +print "output gsub(/regex/,\"x\",input)" +print "" + +print "regex output" +$0=TESTSTR +gsub(/(^)/,"x") +print "(^) "$0 + +$0=TESTSTR +gsub(/($)/,"x") +print "($) "$0 + +$0=TESTSTR +gsub(/(^)|($)/,"x") +print "(^)|($) "$0 + +$0=TESTSTR +gsub(/($)|(^)/,"x") +print "($)|(^) "$0 + +$0=TESTSTR +gsub(/2/,"x") +print "(2) "$0 + +$0=TESTSTR +gsub(/(^)|2/,"x") +print "(^)|2 "$0 + +$0=TESTSTR +gsub(/2|(^)/,"x") +print "2|(^) "$0 + +$0=TESTSTR +gsub(/($)|2/,"x") +print "($)|2 "$0 + +$0=TESTSTR +gsub(/2|($)/,"x") +print "2|($) "$0 + +$0=TESTSTR +gsub(/(2)|(^)/,"x") +print "(2)|(^) "$0 + +$0=TESTSTR +gsub(/(^)|(2)/,"x") +print "(^)|(2) "$0 + +$0=TESTSTR +gsub(/(2)|($)/,"x") +print "(2)|($) "$0 + +$0=TESTSTR +gsub(/($)|(2)/,"x") +print "($)|(2) "$0 + +$0=TESTSTR +gsub(/.((2)|(^))/,"x") +print ".((2)|(^)) "$0 + +$0=TESTSTR +gsub(/.((^)|(2))/,"x") +print ".((^)|(2)) "$0 + +$0=TESTSTR +gsub(/.((2)|($))/,"x") +print ".((2)|($)) "$0 + +$0=TESTSTR +gsub(/.(($)|(2))/,"x") +print ".(($)|(2)) "$0 + +$0=TESTSTR +gsub(/x{0}((2)|(^))/,"x") +print "x{0}((2)|(^)) "$0 + +$0=TESTSTR +gsub(/x{0}((^)|(2))/,"x") +print "x{0}((^)|(2)) "$0 + +$0=TESTSTR +gsub(/x{0}((2)|($))/,"x") +print "x{0}((2)|($)) "$0 + +$0=TESTSTR +gsub(/x{0}(($)|(2))/,"x") +print "x{0}(($)|(2)) "$0 + +$0=TESTSTR +gsub(/x*((2)|(^))/,"x") +print "x*((2)|(^)) "$0 + +$0=TESTSTR +gsub(/x*((^)|(2))/,"x") +print "x*((^)|(2)) "$0 + +$0=TESTSTR +gsub(/x*((2)|($))/,"x") +print "x*((2)|($)) "$0 + +$0=TESTSTR +gsub(/x*(($)|(2))/,"x") +print "x*(($)|(2)) "$0 + +$0=TESTSTR +gsub(/x{0}^/,"x") +print "x{0}^ "$0 + +$0=TESTSTR +gsub(/x{0}$/,"x") +print "x{0}$ "$0 + +$0=TESTSTR +gsub(/(x{0}^)|2/,"x") +print "(x{0}^)|2 "$0 + +$0=TESTSTR +gsub(/(x{0}$)|2/,"x") +print "(x{0}$)|2 "$0 + + +} +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# byefrom +# +# -- +# laura fairhead # laura@madonnaweb.com http://lf.8k.com +# # if you are bored crack my sig. +# 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A +# EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E +# 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA +# 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978 +# 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492 +# |