diff options
Diffstat (limited to 'doc/gawk.texi')
-rw-r--r-- | doc/gawk.texi | 273 |
1 files changed, 250 insertions, 23 deletions
diff --git a/doc/gawk.texi b/doc/gawk.texi index 414d5d8c..078a0ec8 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -920,6 +920,7 @@ particular records in a file and perform operations upon them. * Inexact representation:: Numbers are not exactly represented. * Comparing FP Values:: How to compare floating point values. * Errors accumulate:: Errors get bigger as they go. +* Strange values:: A few words about infinities and NaNs. * Getting Accuracy:: Getting more accuracy takes some work. * Try To Round:: Add digits and round. * Setting precision:: How to set the precision. @@ -3136,11 +3137,12 @@ column means that the person is a friend. An @samp{R} means that the person is a relative: @example -@c system if test ! -d eg ; then mkdir eg ; fi -@c system if test ! -d eg/lib ; then mkdir eg/lib ; fi -@c system if test ! -d eg/data ; then mkdir eg/data ; fi -@c system if test ! -d eg/prog ; then mkdir eg/prog ; fi -@c system if test ! -d eg/misc ; then mkdir eg/misc ; fi +@c system if test ! -d eg ; then mkdir eg ; fi +@c system if test ! -d eg/lib ; then mkdir eg/lib ; fi +@c system if test ! -d eg/data ; then mkdir eg/data ; fi +@c system if test ! -d eg/prog ; then mkdir eg/prog ; fi +@c system if test ! -d eg/misc ; then mkdir eg/misc ; fi +@c system if test ! -d eg/test-programs ; then mkdir eg/test-programs ; fi @c file eg/data/mail-list Amelia 555-5553 amelia.zodiacusque@@gmail.com F Anthony 555-3412 anthony.asserturo@@hotmail.com A @@ -8442,7 +8444,7 @@ FPAT = "([^,]*)|(\"[^\"]+\")" @c Per email from Ed Morton <mortoneccc@comcast.net> @c @c WONTFIX: 10/2020 -@c This is too much work. FPAT and CSV files are very flakey and +@c This is too much work. FPAT and CSV files are very flaky and @c fragile. Doing something like this is merely inviting trouble. As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified}) @@ -10101,7 +10103,7 @@ infinity are formatted as and positive infinity as @samp{inf} or @samp{infinity}. The special ``not a number'' value formats as @samp{-nan} or @samp{nan} -(@pxref{Math Definitions}). +(@pxref{Strange values}). @item @code{%F} Like @samp{%f}, but the infinity and ``not a number'' values are spelled @@ -18337,7 +18339,7 @@ compatibility mode (@pxref{Options}). @cindexawkfunc{log} @cindex logarithm Return the natural logarithm of @var{x}, if @var{x} is positive; -otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems. +otherwise, return NaN (``not a number'') on IEEE 754 systems. Additionally, @command{gawk} prints a warning message when @code{x} is negative. @@ -33684,21 +33686,9 @@ A special value representing infinity. Operations involving another number and infinity produce infinity. @item NaN -``Not a number.''@footnote{Thanks to Michael Brennan for this description, -which we have paraphrased, and for the examples.} A special value that -results from attempting a calculation that has no answer as a real number. -In such a case, programs can either receive a floating-point exception, -or get @code{NaN} back as the result. The IEEE 754 standard recommends -that systems return @code{NaN}. Some examples: - -@table @code -@item sqrt(-1) -This makes sense in the range of complex numbers, but not in the -range of real numbers, so the result is @code{NaN}. - -@item log(-8) -@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}. -@end table +``Not a number.'' A special value that results from attempting a +calculation that has no answer as a real number. @xref{Strange values}, +for more information about infinity and not-a-number values. @item Normalized How the significand (see later in this list) is usually stored. The @@ -33867,6 +33857,7 @@ decimal places in the final result. * Inexact representation:: Numbers are not exactly represented. * Comparing FP Values:: How to compare floating point values. * Errors accumulate:: Errors get bigger as they go. +* Strange values:: A few words about infinities and NaNs. @end menu @node Inexact representation @@ -33988,6 +33979,242 @@ $ @kbd{gawk 'BEGIN @{} @print{} 4 @end example +@node Strange values +@subsubsection Floating Point Values They Didn't Talk About In School + +Both IEEE 754 floating-point hardware, and MPFR, support two kinds of +values that you probably didn't learn about in school. The first is +@dfn{infinity}, a special value, that can be either negative or positive, +and which is either smaller than any other value (negative infinity), +or larger than any other value (positive infinity). When such values +are generated, @command{gawk} prints them as either @samp{-inf} or +@samp{+inf}, respectively. It accepts those strings as data input and +converts them to the proper floating-point values internally. + +Infinity values of the same sign compare as equal to each other. +Otherwise, operations (addition, subtraction, etc.) involving another +number and infinity produce mathematically reasonable results. + +The second kind of value is ``not a number'', or NaN for +short.@footnote{Thanks to Michael Brennan for this description, which we +have paraphrased, and for the examples.} This is a special value that results +from attempting a calculation that has no answer as a real number. +In such a case, programs can either receive a floating-point exception, +or get NaN back as the result. The IEEE 754 standard recommends +that systems return NaN. Some examples: + +@table @code +@item sqrt(-1) +@iftex +The @math{\sqrt{-1}} +@end iftex +@ifnottex +This +@end ifnottex +makes sense in the range of complex numbers, but not in the +range of real numbers, so the result is NaN. + +@item log(-8) +@minus{}8 is out of the domain of @code{log()}, so the result is NaN. +@end table + +NaN values are strange. In particular, they cannot be compared with other +floating point values; any such comparison, except for ``is not equal +to'', returns false. NaN values are so much unequal to other values that +even comparing two identical NaN values with @code{!=} returns true! + +NaN values can also be signed, although it depends upon the implementation +as to which sign you get for any operation that returns a NaN. For +example, on some systems, @code{sqrt(-1)} returns a negative NaN. On +others, it returns a positive NaN. + +When such values are generated, @command{gawk} prints them as either +@samp{-nan} or @samp{+nan}, respectively. Here too, @command{gawk} +accepts those strings as data input and converts them to the proper +floating-point values internally. + +If you want to dive more deeply into this topic, you can find +test programs in C, @command{awk} and Python in the directory +@file{awklib/eg/test-programs} in the @command{gawk} distribution. +These programs enable comparison among programming languages as to how +they handle NaN and infinity values. + +@ignore +@c file eg/test-programs/gen-float-table.awk +function eq(left, right) +{ + return left == right +} + +function ne(left, right) +{ + return left != right +} + +function lt(left, right) +{ + return left < right +} + +function le(left, right) +{ + return left <= right +} + +function gt(left, right) +{ + return left > right +} + +function ge(left, right) +{ + return left >= right +} + +BEGIN { + nan = sqrt(-1) + inf = -log(0) + split("== != < <= > >=", names) + names[3] = names[3] " " + names[5] = names[5] " " + split("eq ne lt le gt ge", funcs) + + compare[1] = 2.0 + compare[2] = values[1] = -sqrt(-1.0) # nan + compare[3] = values[2] = sqrt(-1.0) # -nan + compare[4] = values[3] = -log(0.0) # inf + compare[5] = values[4] = log(0.0) # -inf + + for (i = 1; i in values; i++) { + for (j = 1; j in compare; j++) { + for (k = 1; k in names; k++) { + the_func = funcs[k] + printf("%g %s %g -> %s\n", + values[i], + names[k], + compare[j], + @the_func(values[i], compare[j]) ? + "true" : "false"); + } + printf("\n"); + } + } +} +@c endfile +@end ignore + +@ignore +@c file eg/test-programs/gen-float-table.c +#include <stdio.h> +#include <math.h> +#include <stdbool.h> + +#define def_func(name, op) \ + bool name(double left, double right) { \ + return left op right; \ + } + +def_func(eq, ==) +def_func(ne, !=) +def_func(lt, <) +def_func(le, <=) +def_func(gt, >) +def_func(ge, >=) + +struct { + const char *name; + bool (*func)(double left, double right); +} functions[] = { + { "==", eq }, + { "!=", ne }, + { "< ", lt }, + { "<=", le }, + { "> ", gt }, + { ">=", ge }, + { 0, 0 } +}; + +int main() +{ + double values[] = { + -sqrt(-1), // nan + sqrt(-1), // -nan + -log(0.0), // inf + log(0.0) // -inf + }; + double compare[] = { 2.0, + -sqrt(-1), // nan + sqrt(-1), // -nan + -log(0.0), // inf + log(0.0) // -inf + }; + + int i, j, k; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 5; j++) { + for (k = 0; functions[k].name != NULL; k++) { + printf("%g %s %g -> %s\n", values[i], + functions[k].name, + compare[j], + functions[k].func(values[i], compare[j]) ? "true" : "false"); + } + printf("\n"); + } + } + + return 0; +} +@c endfile +@end ignore + +@ignore +@c file eg/test-programs/gen-float-table.py +from math import * + +nan = float('NaN') +inf = float('Inf') + +def eq(left, right): + return left == right + +def ne(left, right): + return left != right + +def lt(left, right): + return left < right + +def le(left, right): + return left <= right + +def gt(left, right): + return left > right + +def ge(left, right): + return left >= right + +func_map = { + "==": eq, + "!=": ne, + "< ": lt, + "<=": le, + "> ": gt, + ">=": ge, +} + +compare = [2.0, nan, -nan, inf, -inf] +values = [nan, -nan, inf, -inf] + +for i in range(len(values)): + for j in range(len(compare)): + for op in func_map: + print("%g %s %g -> %s" % + (values[i], op, compare[j], func_map[op](values[i], compare[j]))) + + print("") +@c endfile +@end ignore + @node Getting Accuracy @subsection Getting the Accuracy You Need |