diff options
-rw-r--r-- | ChangeLog | 32 | ||||
-rw-r--r-- | array.c | 15 | ||||
-rw-r--r-- | awk.h | 6 | ||||
-rw-r--r-- | awkgram.c | 1 | ||||
-rw-r--r-- | awkgram.y | 1 | ||||
-rw-r--r-- | bool.notes | 53 | ||||
-rw-r--r-- | builtin.c | 42 | ||||
-rw-r--r-- | eval.c | 1 | ||||
-rw-r--r-- | field.c | 2 | ||||
-rw-r--r-- | gawkapi.h | 47 | ||||
-rw-r--r-- | main.c | 1 | ||||
-rw-r--r-- | node.c | 22 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/dumpvars.ok | 2 | ||||
-rw-r--r-- | test/functab5.ok | 1 | ||||
-rw-r--r-- | test/id.ok | 1 | ||||
-rw-r--r-- | test/intest.awk | 4 | ||||
-rw-r--r-- | test/symtab11.ok | 1 | ||||
-rw-r--r-- | test/symtab8.ok | 2 |
19 files changed, 210 insertions, 29 deletions
@@ -1,3 +1,35 @@ +2021-03-20 Arnold D. Robbins <arnold@skeeve.com> + + * array.c (do_sort_up_value_type): Add logic for handling bools. + +2021-03-08 Arnold D. Robbins <arnold@skeeve.com> + + * awk.h (warn_bool, do_bool): Add function declarations. + * awkgram.y (tokentab): Add entry for "bool" builtin. + * builtin.c (warn_bool): New function. + (do_index, do_substr, do_toupper, do_tolower, do_match, + do_length): Call it. + (do_sub): If first arg to sub/gsub is bool, fatal error. + (do_bool): New function. + * field.c (do_split, do_patsplit): Call warn_bool. + * main.c (load_procinfo_bools): Removed function and call. + +2021-03-05 Arnold D. Robbins <arnold@skeeve.com> + + Start on a bool type for gawk. + + * awk.h (BOOL): New flag value. + (make_bool_node): Add declaration of new function. + (bool_val): Check for BOOL along with NUMBER. + * builtin.c (do_typeof): Add support for BOOL. + * eval.c (flags2str): Ditto. + * gawkapi.h (awk_val_type): Add AWK_BOOL. + (awk_value_t): Add awk_bool_t element to the union and macro for it. + (struct gawk_api): Update the table of request/return values. + * main.c (load_procinfo_bools): New function. + (load_procinfo): Call it. + * node.c (make_bool_node): New function. + 2021-02-13 Arnold D. Robbins <arnold@skeeve.com> * io.c (nextfile): Use the value of ARGC directly in the for @@ -1209,11 +1209,24 @@ do_sort_up_value_type(const void *p1, const void *p2) (void) fixtype(n1); (void) fixtype(n2); + /* 3a. Bools first */ + if ((n1->flags & BOOL) != 0 && (n2->flags & BOOL) != 0) { + return cmp_numbers(n1, n2); + } + + /* 3b. Numbers next */ if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) { return cmp_numbers(n1, n2); } - /* 3. All numbers are less than all strings. This is aribitrary. */ + /* 3c. Bools before everything else */ + if ((n1->flags & BOOL) != 0 && (n2->flags & BOOL) == 0) { + return -1; + } else if ((n1->flags & BOOL) == 0 && (n2->flags & BOOL) != 0) { + return 1; + } + + /* 3d. All numbers are less than all strings. This is aribitrary. */ if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) { return -1; } else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) { @@ -463,6 +463,7 @@ typedef struct exp_node { XARRAY = 0x10000, NUMCONSTSTR = 0x20000, /* have string value for numeric constant */ REGEX = 0x40000, /* this is a typed regex */ + BOOL = 0x80000, /* this is a boolean value */ } flags; long valref; } NODE; @@ -1454,6 +1455,7 @@ extern bool is_identchar(int c); extern NODE *make_regnode(NODETYPE type, NODE *exp); extern bool validate_qualified_name(char *token); /* builtin.c */ +extern void warn_bool(const char *func, int argnum, NODE *n); extern double double_to_int(double d); extern NODE *do_exp(int nargs); extern NODE *do_fflush(int nargs); @@ -1503,6 +1505,7 @@ extern int strncasecmpmbs(const unsigned char *, const unsigned char *, size_t); extern int sanitize_exit_status(int status); extern void check_symtab_functab(NODE *dest, const char *fname, const char *msg); +extern NODE *do_bool(int nargs); /* debug.c */ extern void init_debug(void); extern int debug_prog(INSTRUCTION *pc); @@ -1714,6 +1717,7 @@ extern NODE *r_force_number(NODE *n); extern NODE *r_format_val(const char *format, int index, NODE *s); extern NODE *r_dupnode(NODE *n); extern NODE *make_str_node(const char *s, size_t len, int flags); +extern NODE *make_bool_node(bool value); extern NODE *make_typed_regex(const char *re, size_t len); extern void *more_blocks(int id); extern int parse_escape(const char **string_ptr); @@ -1995,7 +1999,7 @@ static inline bool boolval(NODE *t) { (void) fixtype(t); - if ((t->flags & NUMBER) != 0) + if ((t->flags & (BOOL|NUMBER)) != 0) return ! is_zero(t); return (t->stlen > 0); } @@ -4779,6 +4779,7 @@ static const struct token tokentab[] = { {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti, 0}, {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2, MPF(atan2)}, {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain, 0}, +{"bool", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_bool, 0}, {"break", Op_K_break, LEX_BREAK, 0, 0, 0}, {"case", Op_K_case, LEX_CASE, GAWKX, 0, 0}, {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close, 0}, @@ -2277,6 +2277,7 @@ static const struct token tokentab[] = { {"asorti", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asorti, 0}, {"atan2", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2, MPF(atan2)}, {"bindtextdomain", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain, 0}, +{"bool", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_bool, 0}, {"break", Op_K_break, LEX_BREAK, 0, 0, 0}, {"case", Op_K_case, LEX_CASE, GAWKX, 0, 0}, {"close", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close, 0}, diff --git a/bool.notes b/bool.notes new file mode 100644 index 00000000..8e24c2b5 --- /dev/null +++ b/bool.notes @@ -0,0 +1,53 @@ +Thu Mar 18 21:05:29 IST 2021 +============================ + +Design Notes for a Boolean Type in Gawk + +1. A new function bool(val) converts val to bool, returning Boolean TRUE +or FALSE. This is the generator for boolean values and is enough to have +instead of predefining new variables TRUE and FALSE. + +2. Assigning from a boolean value copies the bool type. + +3. Boolean variables have numeric values 1 and 0 respectively, and string +values "TRUE" and "FALSE". Thus they differ from other variables where a +"false" value must be zero and null. + +Given: + + true = bool(1) + false = bool(0) + +this implies all of the following: + + print(true) --> "TRUE" + print(false) --> "FALSE" + Same for %s in printf + Same for bool_var "" + printf %d gives 0/1 + +4. typeof() returns "bool". + +5. Numeric operators treat booleans as numbers. asort() sorts booleans before +numbers, and false before true. + +6. These string function generate a runtime fatal error +if given an argument / target of boolean type: + + gsub sub + +These functions merely treat the value as a string +but issue a lint warning. + + substr match index gensub + length split patsplit + tolower toupper + +7. Updates to API needed for an additional type, and the table +for requested vs. returns. + +8. The following extensions need revising: + + - JSON extension + - dump / read array extensions + - what else? @@ -381,6 +381,8 @@ do_index(int nargs) s1 = force_string(s1); s2 = force_string(s2); + warn_bool("index", 1, s1); + warn_bool("index", 2, s2); p1 = s1->stptr; p2 = s2->stptr; @@ -552,6 +554,7 @@ do_length(int nargs) if (do_lint && (fixtype(tmp)->flags & STRING) == 0) lintwarn(_("%s: received non-string argument"), "length"); tmp = force_string(tmp); + warn_bool("length", 1, tmp); if (gawk_mb_cur_max > 1) { tmp = force_wstring(tmp); @@ -1779,6 +1782,16 @@ do_sqrt(int nargs) return make_number((AWKNUM) sqrt(arg)); } +/* warn_bool --- warn that bool parameter is used as a string */ + +void +warn_bool(const char *func, int argnum, NODE *n) +{ + if (do_lint && (n->flags & BOOL) != 0) + lintwarn(_("%s: argument %d of type bool used as a string"), + func, argnum); +} + /* do_substr --- do the substr function */ NODE * @@ -1802,6 +1815,7 @@ do_substr(int nargs) DEREF(t1); t1 = POP_STRING(); + warn_bool("substr", 1, t1); if (nargs == 3) { if (! (d_length >= 1)) { @@ -2407,6 +2421,7 @@ do_tolower(int nargs) NODE *t1, *t2; t1 = POP_SCALAR(); + warn_bool("tolower", 1, t1); if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("%s: received non-string argument"), "tolower"); t1 = force_string(t1); @@ -2438,6 +2453,7 @@ do_toupper(int nargs) NODE *t1, *t2; t1 = POP_SCALAR(); + warn_bool("toupper", 1, t1); if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("%s: received non-string argument"), "toupper"); t1 = force_string(t1); @@ -2662,6 +2678,7 @@ do_match(int nargs) tre = POP(); rp = re_update(tre); t1 = POP_STRING(); + warn_bool("mastch", 1, t1); rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START); if (rstart >= 0) { /* match succeded */ @@ -2882,6 +2899,7 @@ do_sub(int nargs, unsigned int flags) rp = re_update(tmp); target = POP_STRING(); /* original string */ + warn_bool("gensub", 3, target); glob_flag = POP_SCALAR(); /* value of global flag */ if ( (glob_flag->flags & STRING) != 0 @@ -2924,6 +2942,10 @@ do_sub(int nargs, unsigned int flags) } } + if ((target->flags & BOOL) != 0) + fatal(_("%s: target cannot be of type bool"), + (flags & GSUB) != 0 ? "gsub" : "sub"); + global = (how_many == -1); rep_node = POP_STRING(); /* replacement text */ @@ -4112,7 +4134,10 @@ do_typeof(int nargs) } break; case Node_val: - switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { + switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX|BOOL)) { + case BOOL: + res = "bool"; + break; case NUMBER: res = "number"; break; @@ -4321,3 +4346,18 @@ check_symtab_functab(NODE *dest, const char *fname, const char *msg) else if (dest == func_table) fatal(msg, fname, "FUNCTAB"); } + +/* do_bool --- create boolean values */ + +NODE * +do_bool(int nargs) +{ + NODE *tmp; + bool result; + + tmp = POP_SCALAR(); + result = boolval(tmp); + DEREF(tmp); + + return make_bool_node(result); +} @@ -455,6 +455,7 @@ flags2str(int flagval) { XARRAY, "XARRAY" }, { NUMCONSTSTR, "NUMCONSTSTR" }, { REGEX, "REGEX" }, + { BOOL, "BOOL" }, { 0, NULL }, }; @@ -1019,6 +1019,7 @@ do_split(int nargs) assoc_clear(arr); src = TOP_STRING(); + warn_bool("split", 1, src); if (src->stlen == 0) { /* * Skip the work if first arg is the null string. @@ -1096,6 +1097,7 @@ do_patsplit(int nargs) _("%s: cannot use %s as second argument")); src = TOP_STRING(); + warn_bool("patsplit", 1, src); if ((sep->flags & REGEX) != 0) sep = sep->typed_re; @@ -366,7 +366,8 @@ typedef enum { AWK_STRNUM, AWK_ARRAY, AWK_SCALAR, /* opaque access to a variable */ - AWK_VALUE_COOKIE /* for updating a previously created value */ + AWK_VALUE_COOKIE, /* for updating a previously created value */ + AWK_BOOL } awk_valtype_t; /* @@ -381,6 +382,7 @@ typedef struct awk_value { awk_array_t a; awk_scalar_t scl; awk_value_cookie_t vc; + awk_bool_t b; } u; #define str_value u.s #define strnum_value str_value @@ -391,6 +393,7 @@ typedef struct awk_value { #define array_cookie u.a #define scalar_cookie u.scl #define value_cookie u.vc +#define bool_value u.b } awk_value_t; /* @@ -567,26 +570,28 @@ typedef struct gawk_api { +-------------------------------------------------------+ | Type of Actual Value: | - +--------+--------+--------+--------+-------+-----------+ - | String | Strnum | Number | Regex | Array | Undefined | - +-----------+-----------+--------+--------+--------+--------+-------+-----------+ - | | String | String | String | String | String | false | false | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | | Strnum | false | Strnum | Strnum | false | false | false | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | | Number | Number | Number | Number | false | false | false | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | | Regex | false | false | false | Regex | false | false | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | Type | Array | false | false | false | false | Array | false | - | Requested +-----------+--------+--------+--------+--------+-------+-----------+ - | | Scalar | Scalar | Scalar | Scalar | Scalar | false | false | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | | Undefined | String | Strnum | Number | Regex | Array | Undefined | - | +-----------+--------+--------+--------+--------+-------+-----------+ - | | Value | false | false | false | false | false | false | - | | Cookie | | | | | | | - +-----------+-----------+--------+--------+--------+--------+-------+-----------+ + +--------+--------+--------+--------+--------+-------+-----------+ + | String | Strnum | Number | Regex | Bool | Array | Undefined | + +-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | String | String | String | String | String | String | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Strnum | false | Strnum | Strnum | false | false | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Number | Number | Number | Number | false | Number | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Regex | false | false | false | Regex | false | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Bool | false | false | false | false | Bool | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | Type | Array | false | false | false | false | false | Array | false | + | Requested +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Scalar | Scalar | Scalar | Scalar | Scalar | Scalar | false | false | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Undefined | String | Strnum | Number | Regex | Bool | Array | Undefined | + | +-----------+--------+--------+--------+--------+--------+-------+-----------+ + | | Value | false | false | false | false | false | false | false | + | | Cookie | | | | | | | | + +-----------+-----------+--------+--------+--------+--------+--------+-------+-----------+ */ /* Functions to handle parameters passed to the extension. */ @@ -980,7 +980,6 @@ load_procinfo_argv() // hook it into PROCINFO sub = make_string("argv", 4); assoc_set(PROCINFO_node, sub, argv_array); - } /* load_procinfo --- populate the PROCINFO array */ @@ -1082,3 +1082,25 @@ more_blocks(int id) } #endif + +/* make_bool_node --- make a boolean-valued node */ + +extern NODE * +make_bool_node(bool value) +{ + NODE *val; + const char *sval; + AWKNUM nval; + + sval = (value ? "TRUE" : "FALSE"); + nval = (value ? 1.0 : 0.0); + + val = make_number(nval); + val->stptr = estrdup(sval, strlen(sval)); + val->stlen = strlen(sval); + val->flags &= ~NUMBER; + val->flags |= NUMCUR|STRCUR|BOOL; + val->stfmt = STFMT_UNUSED; + + return val; +} diff --git a/test/ChangeLog b/test/ChangeLog index 53fe5627..2604c270 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2021-03-08 Arnold D. Robbins <arnold@skeeve.com> + + * dumpvars.ok, functab5.ok, id.ok, intest.awk, symtab11.ok, + symtab8.ok: Updated after code changes. + 2021-02-13 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (EXTRA_DIST): argcasfile, new test. diff --git a/test/dumpvars.ok b/test/dumpvars.ok index 85d1c859..7caecd35 100644 --- a/test/dumpvars.ok +++ b/test/dumpvars.ok @@ -9,7 +9,7 @@ FILENAME: "-" FNR: 3 FPAT: "[^[:space:]]+" FS: " " -FUNCTAB: array, 41 elements +FUNCTAB: array, 42 elements IGNORECASE: 0 LINT: 0 NF: 1 diff --git a/test/functab5.ok b/test/functab5.ok index 9ac4295d..ef110989 100644 --- a/test/functab5.ok +++ b/test/functab5.ok @@ -3,6 +3,7 @@ asort' asorti' atan2' bindtextdomain' +bool' chdir' close' compl' @@ -34,6 +34,7 @@ asort -> builtin asorti -> builtin atan2 -> builtin bindtextdomain -> builtin +bool -> builtin close -> builtin compl -> builtin cos -> builtin diff --git a/test/intest.awk b/test/intest.awk index f030d07a..18e0cc4d 100644 --- a/test/intest.awk +++ b/test/intest.awk @@ -1,4 +1,4 @@ BEGIN { - bool = ((b = 1) in c); - print bool, b # gawk-3.0.1 prints "0 "; should print "0 1" + bool_result = ((b = 1) in c); + print bool_result, b # gawk-3.0.1 prints "0 "; should print "0 1" } diff --git a/test/symtab11.ok b/test/symtab11.ok index 7d4be46c..da2cfcba 100644 --- a/test/symtab11.ok +++ b/test/symtab11.ok @@ -37,6 +37,7 @@ BEGIN -- Functab is next [asorti] = asorti [atan2] = atan2 [bindtextdomain] = bindtextdomain +[bool] = bool [close] = close [compl] = compl [cos] = cos diff --git a/test/symtab8.ok b/test/symtab8.ok index da29b585..0cf40fe9 100644 --- a/test/symtab8.ok +++ b/test/symtab8.ok @@ -9,7 +9,7 @@ FIELDWIDTHS: "" FNR: 1 FPAT: "[^[:space:]]+" FS: " " -FUNCTAB: array, 41 elements +FUNCTAB: array, 42 elements IGNORECASE: 0 LINT: 0 NF: 1 |