aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2013-09-25 12:59:21 +0300
committerArnold D. Robbins <arnold@skeeve.com>2013-09-25 12:59:21 +0300
commit026a126c27f648839325c7fadb37c42d2935f467 (patch)
tree2657622bcb410d484434804bc900dde0eca53d13
parent7234d4a6c1ef8763ab6ac25619f8a225260d60b8 (diff)
downloadegawk-026a126c27f648839325c7fadb37c42d2935f467.tar.gz
egawk-026a126c27f648839325c7fadb37c42d2935f467.tar.bz2
egawk-026a126c27f648839325c7fadb37c42d2935f467.zip
Improvements in the random number generator.
-rw-r--r--ChangeLog7
-rw-r--r--NEWS5
-rw-r--r--builtin.c21
-rw-r--r--test/ChangeLog6
-rw-r--r--test/Makefile.am9
-rw-r--r--test/Makefile.in9
-rw-r--r--test/rand.ok2
-rw-r--r--test/randtest.ok0
-rwxr-xr-xtest/randtest.sh113
9 files changed, 168 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 64caea35..fe593aeb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2013-09-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_rand): Make the result more random by calling
+ random() twice. See the comment in the code. Thanks to
+ Bob Jewett <jewett@bill.scs.agilent.com> for the report and
+ the fix.
+
2013-09-24 Arnold D. Robbins <arnold@skeeve.com>
* debug.c (find_rule): Handle case where lineno is zero. Can happen
diff --git a/NEWS b/NEWS
index b6f97d5e..3284ec1c 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,11 @@ Changes from 4.1.1 to 4.2.0
gawk's environment, affecting any programs run by system()
or for piped redirections.
+2. The series of numbers returned by rand() should now be "more
+ random" than previously. Gawk's rand() remains repeatable; you will
+ get the same series of numbers each time you call rand() repeatedly,
+ but this will be a different series than previously.
+
Changes from 4.1.0 to 4.1.1
---------------------------
diff --git a/builtin.c b/builtin.c
index 9258ea60..925a92bd 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2372,6 +2372,8 @@ static char *const state = (char *const) istate;
NODE *
do_rand(int nargs ATTRIBUTE_UNUSED)
{
+ double tmprand;
+#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
if (firstrand) {
(void) initstate((unsigned) 1, state, SIZEOF_STATE);
/* don't need to srandom(1), initstate() does it for us. */
@@ -2383,7 +2385,24 @@ do_rand(int nargs ATTRIBUTE_UNUSED)
*
* 0 <= n < 1
*/
- return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX);
+ /*
+ * Date: Wed, 28 Aug 2013 17:52:46 -0700
+ * From: Bob Jewett <jewett@bill.scs.agilent.com>
+ *
+ * Call random() twice to fill in more bits in the value
+ * of the double. Also, there is a bug in random() such
+ * that when the values of successive values are combined
+ * like (rand1*rand2)^2, (rand3*rand4)^2, ... the
+ * resulting time series is not white noise. The
+ * following also seems to fix that bug.
+ *
+ * The add/subtract 0.5 keeps small bits from filling
+ * below 2^-53 in the double, not that anyone should be
+ * looking down there.
+ */
+
+ tmprand = 0.5 + ( (random()/RAND_DIVISOR + random()) / RAND_DIVISOR);
+ return make_number((AWKNUM) (tmprand - 0.5));
}
/* do_srand --- seed the random number generator */
diff --git a/test/ChangeLog b/test/ChangeLog
index ad0366f2..b1c3e7b9 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+2013-09-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makfile.am (randtest): New test.
+ * randtest.sh, randtest.ok: New files.
+ * rand.ok: Updated to reflect new results based on code change.
+
2013-09-13 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am: Fix quoting for generation of Maketests file so
diff --git a/test/Makefile.am b/test/Makefile.am
index 8288cf7d..d09e6988 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -691,6 +691,8 @@ EXTRA_DIST = \
rand-mpfr.ok \
rand.awk \
rand.ok \
+ randtest.sh \
+ randtest.ok \
range1.awk \
range1.in \
range1.ok \
@@ -952,7 +954,7 @@ BASIC_TESTS = \
paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \
pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \
prt1eval prtoeval \
- rand range1 rebt8b1 redfilnm regeq regexprange regrange reindops \
+ rand randtest range1 rebt8b1 redfilnm regeq regexprange regrange reindops \
reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \
rstest3 rstest4 rstest5 rswhite \
scalar sclforin sclifin sortempty splitargv splitarr splitdef \
@@ -1858,6 +1860,11 @@ dfamb1:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+randtest::
+ @echo $@
+ @GAWK="$(AWKPROG)" "$(srcdir)"/randtest.sh >_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
# Targets generated for other tests:
include Maketests
diff --git a/test/Makefile.in b/test/Makefile.in
index 31a22602..0b7f1f46 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -909,6 +909,8 @@ EXTRA_DIST = \
rand-mpfr.ok \
rand.awk \
rand.ok \
+ randtest.sh \
+ randtest.ok \
range1.awk \
range1.in \
range1.ok \
@@ -1169,7 +1171,7 @@ BASIC_TESTS = \
paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \
pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \
prt1eval prtoeval \
- rand range1 rebt8b1 redfilnm regeq regexprange regrange reindops \
+ rand randtest range1 rebt8b1 redfilnm regeq regexprange regrange reindops \
reparse resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \
rstest3 rstest4 rstest5 rswhite \
scalar sclforin sclifin sortempty splitargv splitarr splitdef \
@@ -2251,6 +2253,11 @@ dfamb1:
@[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+randtest::
+ @echo $@
+ @GAWK="$(AWKPROG)" "$(srcdir)"/randtest.sh >_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
Gt-dummy:
# file Maketests, generated from Makefile.am by the Gentests program
addcomma:
diff --git a/test/rand.ok b/test/rand.ok
index 60432b95..1df4ba39 100644
--- a/test/rand.ok
+++ b/test/rand.ok
@@ -1 +1 @@
- 62 67 88 6 35 77 3 68 30 96 90 26 35 8 88 93 49 53 37
+ 67 6 77 68 96 26 8 93 53 74 53 95 78 74 96 77 33 58 91
diff --git a/test/randtest.ok b/test/randtest.ok
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/randtest.ok
diff --git a/test/randtest.sh b/test/randtest.sh
new file mode 100755
index 00000000..b17fda73
--- /dev/null
+++ b/test/randtest.sh
@@ -0,0 +1,113 @@
+# THIS PURPOSELY DOES NOT HAVE A !# LINE !!!!
+#
+# Date: Mon, 9 Sep 2013 14:49:43 -0700
+# From: Bob Jewett <jewett@bill.scs.agilent.com>
+# Message-Id: <201309092149.r89Lnh94010909@bill.scs.agilent.com>
+# To: arnold@skeeve.com
+# Subject: Re: [bug-gawk] Bug in random() in builtin.c
+#
+# Hi Arnold,
+#
+# Attached below is a script that tests gawk for this particular
+# rand() problem. The pair-wise combinations show a strong
+# autocorrelation for a delay of 31 pairs of rand() samples.
+#
+# The script prints out the measured autocorrelation for a record
+# of NSAMPLES pairs. It also prints a fail message at the end if
+# it fails.
+#
+# If you want to see the autocorrelation values, there is a print
+# statement that if uncommented will save them to a file.
+#
+# Please let me know if the mailer screws up the transfer or
+# if you have any questions about the test.
+#
+# Best regards,
+# Bob
+#
+# -------------- test_pair_power_autocorrelation -----------------------
+#
+#!/bin/ksh
+
+#GAWK=/bin/gawk
+
+# ADR: Get GAWK from the environment.
+# Additional note: This wants ksh/bash for the use of $RANDOM below to
+# seed the generator. However, shells that don't provide it won't be
+# a problem since gawk will then seed the generator with the time of day,
+# as srand() will be called without an argument.
+
+# large NSAMPLES and NRUNS will bring any correlation out of the noise better
+NSAMPLES=1024; MAX_ALLOWED_SIGMA=5; NRUNS=50;
+
+$GAWK 'BEGIN{
+ srand('$RANDOM');
+ nsamples=('$NSAMPLES');
+ max_allowed_sigma=('$MAX_ALLOWED_SIGMA');
+ nruns=('$NRUNS');
+ for(tau=0;tau<nsamples/2;tau++) corr[tau]=0;
+
+ for(run=0;run<nruns;run++) {
+ sum=0;
+
+ # Fill an array with a sequence of samples that are a
+ # function of pairs of rand() values.
+
+ for(i=0;i<nsamples;i++) {
+ samp[i]=((rand()-0.5)*(rand()-0.5))^2;
+ sum=sum+samp[i];
+ }
+
+ # Subtract off the mean of the sequence:
+
+ mean=sum/nsamples;
+ for(i=0;i<nsamples;i++) samp[i]=samp[i]-mean;
+
+ # Calculate an autocorrelation function on the sequence.
+ # Because the values of rand() should be independent, there
+ # should be no peaks in the autocorrelation.
+
+ for(tau=0;tau<nsamples/2;tau++) {
+ sum=0;
+ for(i=0;i<nsamples/2;i++) sum=sum+samp[i]*samp[i+tau];
+ corr[tau]=corr[tau]+sum;
+ }
+
+ }
+ # Normalize the autocorrelation to the tau=0 value.
+
+ max_corr=corr[0];
+ for(tau=0;tau<nsamples/2;tau++) corr[tau]=corr[tau]/max_corr;
+
+ # OPTIONALLY Print out the autocorrelation values:
+
+ # for(tau=0;tau<nsamples/2;tau++) print tau, corr[tau] > "pairpower_corr.data";
+
+ # Calculate the sigma for the non-zero tau values:
+
+ power_sum=0;
+
+ for(tau=1;tau<nsamples/2;tau++) power_sum=power_sum+(corr[tau])^2;
+
+ sigma=sqrt(power_sum/(nsamples/2-1));
+
+ # See if any of the correlations exceed a reasonable number of sigma:
+
+ passed=1;
+ for(tau=1;tau<nsamples/2;tau++) {
+ if ( abs(corr[tau])/sigma > max_allowed_sigma ) {
+ print "Tau=", tau ", Autocorr=", corr[tau]/sigma, "sigma";
+ passed=0;
+ }
+ }
+ if(!passed) {
+ print "Test failed."
+ exit(1);
+ }
+ else exit (0);
+ }
+
+function abs(abs_input) { return(sqrt(abs_input^2)) ; }
+'
+
+exit 0