diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2013-09-25 12:59:21 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2013-09-25 12:59:21 +0300 |
commit | 026a126c27f648839325c7fadb37c42d2935f467 (patch) | |
tree | 2657622bcb410d484434804bc900dde0eca53d13 | |
parent | 7234d4a6c1ef8763ab6ac25619f8a225260d60b8 (diff) | |
download | egawk-026a126c27f648839325c7fadb37c42d2935f467.tar.gz egawk-026a126c27f648839325c7fadb37c42d2935f467.tar.bz2 egawk-026a126c27f648839325c7fadb37c42d2935f467.zip |
Improvements in the random number generator.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | builtin.c | 21 | ||||
-rw-r--r-- | test/ChangeLog | 6 | ||||
-rw-r--r-- | test/Makefile.am | 9 | ||||
-rw-r--r-- | test/Makefile.in | 9 | ||||
-rw-r--r-- | test/rand.ok | 2 | ||||
-rw-r--r-- | test/randtest.ok | 0 | ||||
-rwxr-xr-x | test/randtest.sh | 113 |
9 files changed, 168 insertions, 4 deletions
@@ -1,3 +1,10 @@ +2013-09-25 Arnold D. Robbins <arnold@skeeve.com> + + * builtin.c (do_rand): Make the result more random by calling + random() twice. See the comment in the code. Thanks to + Bob Jewett <jewett@bill.scs.agilent.com> for the report and + the fix. + 2013-09-24 Arnold D. Robbins <arnold@skeeve.com> * debug.c (find_rule): Handle case where lineno is zero. Can happen @@ -11,6 +11,11 @@ Changes from 4.1.1 to 4.2.0 gawk's environment, affecting any programs run by system() or for piped redirections. +2. The series of numbers returned by rand() should now be "more + random" than previously. Gawk's rand() remains repeatable; you will + get the same series of numbers each time you call rand() repeatedly, + but this will be a different series than previously. + Changes from 4.1.0 to 4.1.1 --------------------------- @@ -2372,6 +2372,8 @@ static char *const state = (char *const) istate; NODE * do_rand(int nargs ATTRIBUTE_UNUSED) { + double tmprand; +#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0) if (firstrand) { (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), initstate() does it for us. */ @@ -2383,7 +2385,24 @@ do_rand(int nargs ATTRIBUTE_UNUSED) * * 0 <= n < 1 */ - return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX); + /* + * Date: Wed, 28 Aug 2013 17:52:46 -0700 + * From: Bob Jewett <jewett@bill.scs.agilent.com> + * + * Call random() twice to fill in more bits in the value + * of the double. Also, there is a bug in random() such + * that when the values of successive values are combined + * like (rand1*rand2)^2, (rand3*rand4)^2, ... the + * resulting time series is not white noise. The + * following also seems to fix that bug. + * + * The add/subtract 0.5 keeps small bits from filling + * below 2^-53 in the double, not that anyone should be + * looking down there. + */ + + tmprand = 0.5 + ( (random()/RAND_DIVISOR + random()) / RAND_DIVISOR); + return make_number((AWKNUM) (tmprand - 0.5)); } /* do_srand --- seed the random number generator */ diff --git a/test/ChangeLog b/test/ChangeLog index ad0366f2..b1c3e7b9 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,9 @@ +2013-09-25 Arnold D. Robbins <arnold@skeeve.com> + + * Makfile.am (randtest): New test. + * randtest.sh, randtest.ok: New files. + * rand.ok: Updated to reflect new results based on code change. + 2013-09-13 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am: Fix quoting for generation of Maketests file so diff --git a/test/Makefile.am b/test/Makefile.am index 8288cf7d..d09e6988 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -691,6 +691,8 @@ EXTRA_DIST = \ rand-mpfr.ok \ rand.awk \ rand.ok \ + randtest.sh \ + randtest.ok \ range1.awk \ range1.in \ range1.ok \ @@ -952,7 +954,7 @@ BASIC_TESTS = \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regexprange regrange reindops \ + rand randtest range1 rebt8b1 redfilnm regeq regexprange regrange reindops \ reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty splitargv splitarr splitdef \ @@ -1858,6 +1860,11 @@ dfamb1: AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +randtest:: + @echo $@ + @GAWK="$(AWKPROG)" "$(srcdir)"/randtest.sh >_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + # Targets generated for other tests: include Maketests diff --git a/test/Makefile.in b/test/Makefile.in index 31a22602..0b7f1f46 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -909,6 +909,8 @@ EXTRA_DIST = \ rand-mpfr.ok \ rand.awk \ rand.ok \ + randtest.sh \ + randtest.ok \ range1.awk \ range1.in \ range1.ok \ @@ -1169,7 +1171,7 @@ BASIC_TESTS = \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regexprange regrange reindops \ + rand randtest range1 rebt8b1 redfilnm regeq regexprange regrange reindops \ reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty splitargv splitarr splitdef \ @@ -2251,6 +2253,11 @@ dfamb1: @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \ AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +randtest:: + @echo $@ + @GAWK="$(AWKPROG)" "$(srcdir)"/randtest.sh >_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ Gt-dummy: # file Maketests, generated from Makefile.am by the Gentests program addcomma: diff --git a/test/rand.ok b/test/rand.ok index 60432b95..1df4ba39 100644 --- a/test/rand.ok +++ b/test/rand.ok @@ -1 +1 @@ - 62 67 88 6 35 77 3 68 30 96 90 26 35 8 88 93 49 53 37 + 67 6 77 68 96 26 8 93 53 74 53 95 78 74 96 77 33 58 91 diff --git a/test/randtest.ok b/test/randtest.ok new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/randtest.ok diff --git a/test/randtest.sh b/test/randtest.sh new file mode 100755 index 00000000..b17fda73 --- /dev/null +++ b/test/randtest.sh @@ -0,0 +1,113 @@ +# THIS PURPOSELY DOES NOT HAVE A !# LINE !!!! +# +# Date: Mon, 9 Sep 2013 14:49:43 -0700 +# From: Bob Jewett <jewett@bill.scs.agilent.com> +# Message-Id: <201309092149.r89Lnh94010909@bill.scs.agilent.com> +# To: arnold@skeeve.com +# Subject: Re: [bug-gawk] Bug in random() in builtin.c +# +# Hi Arnold, +# +# Attached below is a script that tests gawk for this particular +# rand() problem. The pair-wise combinations show a strong +# autocorrelation for a delay of 31 pairs of rand() samples. +# +# The script prints out the measured autocorrelation for a record +# of NSAMPLES pairs. It also prints a fail message at the end if +# it fails. +# +# If you want to see the autocorrelation values, there is a print +# statement that if uncommented will save them to a file. +# +# Please let me know if the mailer screws up the transfer or +# if you have any questions about the test. +# +# Best regards, +# Bob +# +# -------------- test_pair_power_autocorrelation ----------------------- +# +#!/bin/ksh + +#GAWK=/bin/gawk + +# ADR: Get GAWK from the environment. +# Additional note: This wants ksh/bash for the use of $RANDOM below to +# seed the generator. However, shells that don't provide it won't be +# a problem since gawk will then seed the generator with the time of day, +# as srand() will be called without an argument. + +# large NSAMPLES and NRUNS will bring any correlation out of the noise better +NSAMPLES=1024; MAX_ALLOWED_SIGMA=5; NRUNS=50; + +$GAWK 'BEGIN{ + srand('$RANDOM'); + nsamples=('$NSAMPLES'); + max_allowed_sigma=('$MAX_ALLOWED_SIGMA'); + nruns=('$NRUNS'); + for(tau=0;tau<nsamples/2;tau++) corr[tau]=0; + + for(run=0;run<nruns;run++) { + sum=0; + + # Fill an array with a sequence of samples that are a + # function of pairs of rand() values. + + for(i=0;i<nsamples;i++) { + samp[i]=((rand()-0.5)*(rand()-0.5))^2; + sum=sum+samp[i]; + } + + # Subtract off the mean of the sequence: + + mean=sum/nsamples; + for(i=0;i<nsamples;i++) samp[i]=samp[i]-mean; + + # Calculate an autocorrelation function on the sequence. + # Because the values of rand() should be independent, there + # should be no peaks in the autocorrelation. + + for(tau=0;tau<nsamples/2;tau++) { + sum=0; + for(i=0;i<nsamples/2;i++) sum=sum+samp[i]*samp[i+tau]; + corr[tau]=corr[tau]+sum; + } + + } + # Normalize the autocorrelation to the tau=0 value. + + max_corr=corr[0]; + for(tau=0;tau<nsamples/2;tau++) corr[tau]=corr[tau]/max_corr; + + # OPTIONALLY Print out the autocorrelation values: + + # for(tau=0;tau<nsamples/2;tau++) print tau, corr[tau] > "pairpower_corr.data"; + + # Calculate the sigma for the non-zero tau values: + + power_sum=0; + + for(tau=1;tau<nsamples/2;tau++) power_sum=power_sum+(corr[tau])^2; + + sigma=sqrt(power_sum/(nsamples/2-1)); + + # See if any of the correlations exceed a reasonable number of sigma: + + passed=1; + for(tau=1;tau<nsamples/2;tau++) { + if ( abs(corr[tau])/sigma > max_allowed_sigma ) { + print "Tau=", tau ", Autocorr=", corr[tau]/sigma, "sigma"; + passed=0; + } + } + if(!passed) { + print "Test failed." + exit(1); + } + else exit (0); + } + +function abs(abs_input) { return(sqrt(abs_input^2)) ; } +' + +exit 0 |