diff options
-rw-r--r-- | ChangeLog | 52 | ||||
-rw-r--r-- | awk.h | 10 | ||||
-rw-r--r-- | builtin.c | 9 | ||||
-rw-r--r-- | doc/ChangeLog | 16 | ||||
-rw-r--r-- | doc/gawktexi.in | 95 | ||||
-rw-r--r-- | extension/ChangeLog | 7 | ||||
-rw-r--r-- | extension/rwarray.c | 38 | ||||
-rw-r--r-- | gawkapi.c | 195 | ||||
-rw-r--r-- | gawkapi.h | 88 | ||||
-rw-r--r-- | test/ChangeLog | 4 | ||||
-rw-r--r-- | test/rwarray.awk | 8 |
11 files changed, 408 insertions, 114 deletions
@@ -73,6 +73,57 @@ * dfa.c: Sync with GNULIB. +2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + Add API support for strnum values. + * gawkapi.c (awk_value_to_node): Add AWK_STRNUM. + (assign_string): Add a type argument so we can use this for AWK_STRING + or AWK_STRNUM. + (node_to_awk_value): When AWK_NUMBER is requested, a regex value + should return false, as per the header file documentation. + Add support for AWK_STRNUM requests. When AWK_REGEX is requested, + implement the cases properly instead of always returning true. + Fix AWK_SCALAR logic. For AWK_UNDEFINED, rewrite using a switch + and support AWK_STRNUM. + (api_sym_update): Add AWK_STRNUM. + (api_sym_update_scalar): Add optimized support for updating AWK_STRNUM. + (valid_subscript_type): Add AWK_STRNUM. + (api_create_value): Add AWK_STRNUM. + * gawkapi.h (awk_valtype_t): Add AWK_STRNUM. + (strnum_value): New macro. + (Value fetching table): Updated. + +2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawkapi.c (assign_regex): Do not call assign_string, since we + know that a REGEX value is not an unterminated field string. + * gawkapi.h (make_regex): Delete macro. + (make_const_regex, make_malloced_regex): Add new macros to replace + make_regex with necessary memory management support. + +2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * awk.h (fixtype): Remove conditional checking if the node type + is Node_val. This is already covered by the assert, and if it's not + true, we have serious bugs. + * builtin.c (do_typeof): Do not treat Node_var the same way as + Node_val, since they are different beasts. In reality, the argument + to this function will never have type Node_var. + +2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawkapi.h (awk_element_t): Remove obsolete comment claiming that + the index will always be a string. + (gawk_api_t): Add new api_flatten_array_typed function and indicate + that api_flatten_array has been superseded. + (flatten_array_typed): New macro to call api_flatten_array_typed. + (flatten_array): Redefine using the new flatten_array_typed macro. + * gawkapi.c (api_flatten_array_typed): New function renamed from + api_flatten_array to flatten an array with the types requested by the + caller. Also update the comments and error messages. + (api_flatten_array): Now a wrapper around api_flatten_array_typed. + (api_impl): Add new api_flatten_array_typed hook. + 2016-12-06 Arnold D. Robbins <arnold@skeeve.com> Add minimum required and maximum expected number of arguments @@ -87,6 +138,7 @@ in instructions. Add checking code and lint checks. (Op_ext_func): Copy min_required and max_expected from function info. + 2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> * gawkapi.h (r_make_string_type): New inline function to create strings @@ -1887,12 +1887,10 @@ static inline NODE * fixtype(NODE *n) { assert(n->type == Node_val); - if (n->type == Node_val) { - if ((n->flags & (NUMCUR|USER_INPUT)) == USER_INPUT) - return force_number(n); - if ((n->flags & INTIND) != 0) - return force_string(n); - } + if ((n->flags & (NUMCUR|USER_INPUT)) == USER_INPUT) + return force_number(n); + if ((n->flags & INTIND) != 0) + return force_string(n); return n; } @@ -3988,7 +3988,6 @@ do_typeof(int nargs) deref = false; break; case Node_val: - case Node_var: switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { case STRING: res = "string"; @@ -4017,6 +4016,14 @@ do_typeof(int nargs) res = "untyped"; deref = false; break; + case Node_var: + /* + * Note: this doesn't happen because the function calling code + * in interpret.h pushes Node_var->var_value. + */ + fatal(_("typeof: invalid argument type `%s'"), + nodetype2str(arg->type)); + break; default: fatal(_("typeof: unknown argument type `%s'"), nodetype2str(arg->type)); diff --git a/doc/ChangeLog b/doc/ChangeLog index 68c139a9..d3e974c7 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -12,6 +12,22 @@ * gawktexi.in: Update description of awk_ext_func_t structure. +2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Document strnum changes as relates to API. + Still stuff left to do -- tables for type conversions need + to be updated to show new strnum and regex rows and columns. + +2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Remove make_regex and replace it with make_const_regex + and make_malloced_regex. + +2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Document new flatten_array_typed API function, and + indicate that the old flatten_array function has been superseded. + 2016-11-30 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Document typed regex changes as relates to API. diff --git a/doc/gawktexi.in b/doc/gawktexi.in index f7f232a9..c1674c11 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -31574,7 +31574,8 @@ multibyte encoding. @itemx @ @ @ @ AWK_ARRAY, @itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */ @itemx @ @ @ @ AWK_VALUE_COOKIE,@ @ @ /* for updating a previously created value */ -@itemx @ @ @ @ AWK_REGEX +@itemx @ @ @ @ AWK_REGEX, +@itemx @ @ @ @ AWK_STRNUM @itemx @} awk_valtype_t; This @code{enum} indicates the type of a value. It is used in the following @code{struct}. @@ -31594,6 +31595,7 @@ The @code{val_type} member indicates what kind of value the @code{union} holds, and each member is of the appropriate type. @item #define str_value@ @ @ @ @ @ u.s +@itemx #define strnum_value@ @ @ @ str_value @itemx #define regex_value@ @ @ @ str_value @itemx #define num_value@ @ @ @ @ @ u.d @itemx #define array_cookie@ @ @ u.a @@ -31615,7 +31617,7 @@ and in more detail in @ref{Cached values}. @end table -Scalar values in @command{awk} are numbers, strings, or typed regexps. The +Scalar values in @command{awk} are numbers, strings, strnums, or typed regexps. The @code{awk_value_t} struct represents values. The @code{val_type} member indicates what is in the @code{union}. @@ -31624,6 +31626,12 @@ require more work. Because @command{gawk} allows embedded @sc{nul} bytes in string values, a string must be represented as a pair containing a data pointer and length. This is the @code{awk_string_t} type. +A strnum (numeric string) value is represented as a string and consists +of user input data that appears to be numeric. +When an extension attempts to create a strnum value, a string flagged +as user input is created. Subsequent parsing will determine whether it +looks like a number and should be treated as a strnum or a regular string. + Typed regexp values (@pxref{Strong Regexp Constants}) are not of much use to extension functions. Extension functions can tell that they've received them, and create them for scalar values. Otherwise, @@ -31798,9 +31806,29 @@ This function simply creates a numeric value in the @code{awk_value_t} variable pointed to by @code{result}. @item static inline awk_value_t * -@itemx make_regex(const char *string, size_t length, awk_value_t *result); +@itemx make_const_user_input(const char *string, size_t length, awk_value_t *result); +This function is identical to @code{make_const_string}, but the string is +flagged as user input that should be treated as a strnum value if the contents +of the string are numeric. + +@item static inline awk_value_t * +@itemx make_malloced_user_input(const char *string, size_t length, awk_value_t *result); +This function is identical to @code{make_malloced_string}, but the string is +flagged as user input that should be treated as a strnum value if the contents +of the string are numeric. + +@item static inline awk_value_t * +@itemx make_const_regex(const char *string, size_t length, awk_value_t *result); +This function creates a strongly typed regexp value by allocating a copy of the string. +@code{string} is the regular expression of length @code{len}. + +@item static inline awk_value_t * +@itemx make_malloced_regex(const char *string, size_t length, awk_value_t *result); This function creates a strongly typed regexp value. @code{string} is the regular expression of length @code{len}. +It expects @code{string} to be a @samp{char *} +value pointing to data previously obtained from @code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}. + @end table @node Registration Functions @@ -32584,29 +32612,28 @@ value type, as appropriate. This behavior is summarized in @end ifnotplaintext @ifplaintext @example - +-------------------------------------------------------------+ - | Type of Actual Value: | - +------------+------------+-----------+-----------+-----------+ - | String | Number | Regex | Array | Undefined | -+-----------+-----------+------------+------------+-----------+-----------+-----------+ -| | String | String | String | String | false | false | -| +-----------+------------+------------+-----------+-----------+-----------+ -| | Number | Number if | Number | false | false | false | -| | | can be | | | | | -| | | converted, | | | | | -| | | else false | | | | | -| +-----------+------------+------------+-----------+-----------+-----------+ -| | Regex | false | false | Regex | false | false | -| +-----------+------------+------------+-----------+-----------+-----------+ -| Type | Array | false | false | false | Array | false | -| Requested +-----------+------------+------------+-----------+-----------+-----------+ -| | Scalar | Scalar | Scalar | Scalar | false | false | -| +-----------+------------+------------+-----------+-----------+-----------+ -| | Undefined | String | Number | Regex | Array | Undefined | -| +-----------+------------+------------+-----------+-----------+-----------+ -| | Value | false | false | false | false | false | -| | Cookie | | | | | | -+-----------+-----------+------------+------------+-----------+-----------+-----------+ + +-------------------------------------------------------+ + | Type of Actual Value: | + +--------+--------+--------+--------+-------+-----------+ + | String | Strnum | Number | Regex | Array | Undefined | ++-----------+-----------+--------+--------+--------+--------+-------+-----------+ +| | String | String | String | String | String | false | false | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| | Strnum | false | Strnum | Strnum | false | false | false | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| | Number | Number | Number | Number | false | false | false | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| | Regex | false | false | false | Regex | false | false | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| Type | Array | false | false | false | false | Array | false | +| Requested +-----------+--------+--------+--------+--------+-------+-----------+ +| | Scalar | Scalar | Scalar | Scalar | Scalar | false | false | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| | Undefined | String | Strnum | Number | Regex | Array | Undefined | +| +-----------+--------+--------+--------+--------+-------+-----------+ +| | Value | false | false | false | false | false | false | +| | Cookie | | | | | | | ++-----------+-----------+--------+--------+--------+--------+-------+-----------+ @end example @end ifplaintext @end float @@ -32706,7 +32733,7 @@ Return false if the value cannot be retrieved. @item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value); Update the value associated with a scalar cookie. Return false if -the new value is not of type @code{AWK_STRING}, @code{AWK_REGEX}, or @code{AWK_NUMBER}. +the new value is not of type @code{AWK_STRING}, @code{AWK_STRNUM}, @code{AWK_REGEX}, or @code{AWK_NUMBER}. Here too, the predefined variables may not be updated. @end table @@ -32827,7 +32854,7 @@ is what the routines in this @value{SECTION} let you do. The functions are as f @table @code @item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result); Create a cached string or numeric value from @code{value} for -efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, +efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, @code{AWK_STRNUM}, and @code{AWK_STRING} are allowed. Any other type is rejected. @code{AWK_UNDEFINED} could be allowed, but doing so would result in inferior performance. @@ -33053,9 +33080,9 @@ The array remains an array, but after calling this function, it has no elements. This is equivalent to using the @code{delete} statement (@pxref{Delete}). -@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data); +@item awk_bool_t flatten_array_typed(awk_array_t a_cookie, awk_flat_array_t **data, awk_valtype_t index_type, awk_valtype_t value_type); For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t} -structure and fill it in. Set the pointer whose address is passed as @code{data} +structure and fill it in with indices and values of the requested types. Set the pointer whose address is passed as @code{data} to point to this structure. Return true upon success, or false otherwise. @ifset FOR_PRINT @@ -33067,6 +33094,12 @@ See the next @value{SECTION} for a discussion of how to flatten an array and work with it. +@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data); +For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t} +structure and fill it in with @code{AWK_STRING} indices and +@code{AWK_UNDEFINED} values. This is superseded by @code{flatten_array_typed} +and retained only for legacy binary compatibility. + @item awk_bool_t release_flattened_array(awk_array_t a_cookie, @itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_flat_array_t *data); When done with a flattened array, release the storage using this function. @@ -33179,7 +33212,7 @@ to double-check that the count in the @code{awk_flat_array_t} is the same as the count just retrieved: @example - if (! flatten_array(value2.array_cookie, & flat_array)) @{ + if (! flatten_array_typed(value2.array_cookie, & flat_array, AWK_STRING, AWK_UNDEFINED)) @{ printf("dump_array_and_delete: could not flatten array\n"); goto out; @} diff --git a/extension/ChangeLog b/extension/ChangeLog index bf7d3fa4..9c647f05 100644 --- a/extension/ChangeLog +++ b/extension/ChangeLog @@ -41,6 +41,13 @@ * testext.c: Ditto. * time.c: Ditto. +2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * rwarray.c: Adjust to read and write strnum values. + (write_value): When writing a string value, code should use htonl. + There are now 3 string types: string, strnum, and regex. + (read_value): Support 3 string types: string, strnum, and regex. + 2016-11-30 Arnold D. Robbins <arnold@skeeve.com> * rwarray.c: Restore read comparion of major and minor versions diff --git a/extension/rwarray.c b/extension/rwarray.c index b62b6de0..186dac0f 100644 --- a/extension/rwarray.c +++ b/extension/rwarray.c @@ -84,7 +84,7 @@ static awk_bool_t read_value(FILE *fp, awk_value_t *value); * For each element: * Length of index val: 4 bytes - network order * Index val as characters (N bytes) - * Value type 4 bytes (0 = string, 1 = number, 2 = array, 3 = regex) + * Value type 4 bytes (0 = string, 1 = number, 2 = array, 3 = regex, 4 = strnum) * IF string: * Length of value 4 bytes * Value as characters (N bytes) @@ -210,7 +210,7 @@ write_elem(FILE *fp, awk_element_t *element) return write_value(fp, & element->value); } -/* write_value --- write a number or a string or a regex or an array */ +/* write_value --- write a number or a string or a strnum or a regex or an array */ static awk_bool_t write_value(FILE *fp, awk_value_t *val) @@ -232,7 +232,22 @@ write_value(FILE *fp, awk_value_t *val) if (fwrite(& val->num_value, 1, sizeof(val->num_value), fp) != sizeof(val->num_value)) return awk_false; } else { - code = (val->val_type == AWK_STRING ? 0 : 3); + switch (val->val_type) { + case AWK_STRING: + code = htonl(0); + break; + case AWK_STRNUM: + code = htonl(4); + break; + case AWK_REGEX: + code = htonl(3); + break; + default: + /* XXX can this happen? */ + code = htonl(0); + warning(ext_id, _("array value has unknown type %d"), val->val_type); + break; + } if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code)) return awk_false; @@ -449,7 +464,22 @@ read_value(FILE *fp, awk_value_t *value) return awk_false; } len = ntohl(len); - value->val_type = (code == 0 ? AWK_STRING : AWK_REGEX); + switch (code) { + case 0: + value->val_type = AWK_STRING; + break; + case 3: + value->val_type = AWK_REGEX; + break; + case 4: + value->val_type = AWK_STRNUM; + break; + default: + /* this cannot happen! */ + warning(ext_id, _("treating recovered value with unknown type code %d as a string"), code); + value->val_type = AWK_STRING; + break; + } value->str_value.len = len; value->str_value.str = gawk_malloc(len + 1); memset(value->str_value.str, '\0', len + 1); @@ -164,6 +164,11 @@ awk_value_to_node(const awk_value_t *retval) ext_ret_val = make_str_node(retval->str_value.str, retval->str_value.len, ALREADY_MALLOCED); break; + case AWK_STRNUM: + ext_ret_val = make_str_node(retval->str_value.str, + retval->str_value.len, ALREADY_MALLOCED); + ext_ret_val->flags |= USER_INPUT; + break; case AWK_REGEX: ext_ret_val = make_typed_regex(retval->str_value.str, retval->str_value.len); @@ -415,9 +420,9 @@ free_api_string_copies() /* assign_string --- return a string node with NUL termination */ static inline void -assign_string(NODE *node, awk_value_t *val) +assign_string(NODE *node, awk_value_t *val, awk_valtype_t val_type) { - val->val_type = AWK_STRING; + val->val_type = val_type; if (node->stptr[node->stlen] != '\0') { /* * This is an unterminated field string, so make a copy. @@ -449,7 +454,11 @@ assign_string(NODE *node, awk_value_t *val) static inline void assign_regex(NODE *node, awk_value_t *val) { - assign_string(node, val); + /* a REGEX node cannot be an unterminated field string */ + assert((node->flags & MALLOC) != 0); + assert(node->stptr[node->stlen] == '\0'); + val->str_value.str = node->stptr; + val->str_value.len = node->stlen; val->val_type = AWK_REGEX; } @@ -489,55 +498,139 @@ node_to_awk_value(NODE *node, awk_value_t *val, awk_valtype_t wanted) /* a scalar value */ switch (wanted) { case AWK_NUMBER: - val->val_type = AWK_NUMBER; + if (node->flags & REGEX) + val->val_type = AWK_REGEX; + else { + val->val_type = AWK_NUMBER; + (void) force_number(node); + val->num_value = get_number_d(node); + ret = awk_true; + } + break; - (void) force_number(node); - val->num_value = get_number_d(node); - ret = awk_true; + case AWK_STRNUM: + switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { + case STRING: + val->val_type = AWK_STRING; + break; + case NUMBER: + (void) force_string(node); + assign_string(node, val, AWK_STRNUM); + ret = awk_true; + break; + case NUMBER|USER_INPUT: + assign_string(node, val, AWK_STRNUM); + ret = awk_true; + break; + case REGEX: + val->val_type = AWK_REGEX; + break; + case NUMBER|STRING: + if (node == Nnull_string) { + val->val_type = AWK_UNDEFINED; + break; + } + /* fall through */ + default: + warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags)); + val->val_type = AWK_UNDEFINED; + break; + } break; case AWK_STRING: (void) force_string(node); - assign_string(node, val); + assign_string(node, val, AWK_STRING); ret = awk_true; break; case AWK_REGEX: - assign_regex(node, val); - ret = awk_true; + switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { + case STRING: + val->val_type = AWK_STRING; + break; + case NUMBER: + val->val_type = AWK_NUMBER; + break; + case NUMBER|USER_INPUT: + val->val_type = AWK_STRNUM; + break; + case REGEX: + assign_regex(node, val); + ret = awk_true; + break; + case NUMBER|STRING: + if (node == Nnull_string) { + val->val_type = AWK_UNDEFINED; + break; + } + /* fall through */ + default: + warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags)); + val->val_type = AWK_UNDEFINED; + break; + } break; case AWK_SCALAR: - fixtype(node); - if ((node->flags & NUMBER) != 0) { - val->val_type = AWK_NUMBER; - } else if ((node->flags & STRING) != 0) { + switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { + case STRING: val->val_type = AWK_STRING; - } else if ((node->flags & REGEX) != 0) { + break; + case NUMBER: + val->val_type = AWK_NUMBER; + break; + case NUMBER|USER_INPUT: + val->val_type = AWK_STRNUM; + break; + case REGEX: val->val_type = AWK_REGEX; - } else + break; + case NUMBER|STRING: + if (node == Nnull_string) { + val->val_type = AWK_UNDEFINED; + break; + } + /* fall through */ + default: + warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags)); val->val_type = AWK_UNDEFINED; - ret = awk_false; + break; + } break; case AWK_UNDEFINED: /* return true and actual type for request of undefined */ - fixtype(node); - if (node == Nnull_string) { - val->val_type = AWK_UNDEFINED; + switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) { + case STRING: + assign_string(node, val, AWK_STRING); ret = awk_true; - } else if ((node->flags & NUMBER) != 0) { + break; + case NUMBER: val->val_type = AWK_NUMBER; val->num_value = get_number_d(node); ret = awk_true; - } else if ((node->flags & STRING) != 0) { - assign_string(node, val); + break; + case NUMBER|USER_INPUT: + assign_string(node, val, AWK_STRNUM); ret = awk_true; - } else if ((node->flags & REGEX) != 0) { + break; + case REGEX: assign_regex(node, val); ret = awk_true; - } else + break; + case NUMBER|STRING: + if (node == Nnull_string) { + val->val_type = AWK_UNDEFINED; + ret = awk_true; + break; + } + /* fall through */ + default: + warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags)); val->val_type = AWK_UNDEFINED; + break; + } break; case AWK_ARRAY: @@ -640,6 +733,7 @@ api_sym_update(awk_ext_id_t id, switch (value->val_type) { case AWK_NUMBER: + case AWK_STRNUM: case AWK_STRING: case AWK_REGEX: case AWK_UNDEFINED: @@ -741,6 +835,7 @@ api_sym_update_scalar(awk_ext_id_t id, break; case AWK_STRING: + case AWK_STRNUM: if (node->var_value->valref == 1) { NODE *r = node->var_value; @@ -754,6 +849,8 @@ api_sym_update_scalar(awk_ext_id_t id, /* make_str_node(s, l, ALREADY_MALLOCED): */ r->numbr = 0; r->flags = (MALLOC|STRING|STRCUR); + if (value->val_type == AWK_STRNUM) + r->flags |= USER_INPUT; r->stfmt = STFMT_UNUSED; r->stptr = value->str_value.str; r->stlen = value->str_value.len; @@ -790,6 +887,7 @@ valid_subscript_type(awk_valtype_t valtype) switch (valtype) { case AWK_UNDEFINED: case AWK_NUMBER: + case AWK_STRNUM: case AWK_STRING: case AWK_REGEX: case AWK_SCALAR: @@ -995,9 +1093,10 @@ api_clear_array(awk_ext_id_t id, awk_array_t a_cookie) /* api_flatten_array --- flatten out an array so that it can be looped over easily. */ static awk_bool_t -api_flatten_array(awk_ext_id_t id, +api_flatten_array_typed(awk_ext_id_t id, awk_array_t a_cookie, - awk_flat_array_t **data) + awk_flat_array_t **data, + awk_valtype_t index_type, awk_valtype_t value_type) { NODE **list; size_t i, j; @@ -1014,7 +1113,7 @@ api_flatten_array(awk_ext_id_t id, (array->table_size - 1) * sizeof(awk_element_t); emalloc(*data, awk_flat_array_t *, alloc_size, - "api_flatten_array"); + "api_flatten_array_typed"); memset(*data, 0, alloc_size); list = assoc_list(array, "@unsorted", ASORTI); @@ -1029,29 +1128,35 @@ api_flatten_array(awk_ext_id_t id, index = list[i]; value = list[i + 1]; /* number or string or subarray */ - /* - * Convert index and value to ext types. Force the - * index to be a string, since indices are always - * conceptually strings, regardless of internal optimizations - * to treat them as integers in some cases. - * - * Regexes are forced to string too. - */ + /* Convert index and value to ext types. */ if (! node_to_awk_value(index, - & (*data)->elements[j].index, AWK_STRING)) { - fatal(_("api_flatten_array: could not convert index %d\n"), - (int) i); + & (*data)->elements[j].index, index_type)) { + fatal(_("api_flatten_array_typed: could not convert index %d to %d\n"), + (int) i, (int) index_type); } if (! node_to_awk_value(value, - & (*data)->elements[j].value, AWK_UNDEFINED)) { - fatal(_("api_flatten_array: could not convert value %d\n"), - (int) i); + & (*data)->elements[j].value, value_type)) { + fatal(_("api_flatten_array_typed: could not convert value %d to %d\n"), + (int) i, (int) value_type); } } return awk_true; } /* + * api_flatten_array -- replaced by api_flatten_array_typed. This function + * is retained only for binary compatibility. + */ + +static awk_bool_t +api_flatten_array(awk_ext_id_t id, + awk_array_t a_cookie, + awk_flat_array_t **data) +{ + return api_flatten_array_typed(id, a_cookie, data, AWK_STRING, AWK_UNDEFINED); +} + +/* * api_release_flattened_array --- release array memory, * delete any marked elements. Count must match what * gawk thinks the size is. @@ -1103,6 +1208,7 @@ api_create_value(awk_ext_id_t id, awk_value_t *value, switch (value->val_type) { case AWK_NUMBER: + case AWK_STRNUM: case AWK_STRING: case AWK_REGEX: break; @@ -1298,7 +1404,7 @@ gawk_api_t api_impl = { api_del_array_element, api_create_array, api_clear_array, - api_flatten_array, + api_flatten_array, /* for legacy binary compatibility */ api_release_flattened_array, /* Memory allocation */ @@ -1312,6 +1418,9 @@ gawk_api_t api_impl = { /* Print nonfatal error message */ api_nonfatal, + + /* New array flattening function */ + api_flatten_array_typed, }; /* init_ext_api --- init the extension API */ @@ -309,7 +309,8 @@ typedef enum { AWK_ARRAY, AWK_SCALAR, /* opaque access to a variable */ AWK_VALUE_COOKIE, /* for updating a previously created value */ - AWK_REGEX /* last for binary compatibility */ + AWK_REGEX, + AWK_STRNUM } awk_valtype_t; /* @@ -326,6 +327,7 @@ typedef struct awk_value { awk_value_cookie_t vc; } u; #define str_value u.s +#define strnum_value str_value #define regex_value str_value #define num_value u.d #define array_cookie u.a @@ -349,7 +351,7 @@ typedef struct awk_element { AWK_ELEMENT_DELETE = 1 /* set by extension if should be deleted */ } flags; - awk_value_t index; /* guaranteed to be a string! */ + awk_value_t index; awk_value_t value; } awk_element_t; @@ -495,29 +497,28 @@ typedef struct gawk_api { Table entry is type returned: - +-------------------------------------------------------------+ - | Type of Actual Value: | - +------------+------------+-----------+-----------+-----------+ - | String | Number | Regex | Array | Undefined | - +-----------+-----------+------------+------------+-----------+-----------+-----------+ - | | String | String | String | String | false | false | - | +-----------+------------+------------+-----------+-----------+-----------+ - | | Number | Number if | Number | false | false | false | - | | | can be | | | | | - | | | converted, | | | | | - | | | else false | | | | | - | +-----------+------------+------------+-----------+-----------+-----------+ - | | Regex | false | false | Regex | false | false | - | +-----------+------------+------------+-----------+-----------+-----------+ - | Type | Array | false | false | false | Array | false | - | Requested +-----------+------------+------------+-----------+-----------+-----------+ - | | Scalar | Scalar | Scalar | Scalar | false | false | - | +-----------+------------+------------+-----------+-----------+-----------+ - | | Undefined | String | Number | Regex | Array | Undefined | - | +-----------+------------+------------+-----------+-----------+-----------+ - | | Value | false | false | false | false | false | - | | Cookie | | | | | | - +-----------+-----------+------------+------------+-----------+-----------+-----------+ + +-------------------------------------------------------+ + | Type of Actual Value: | + +--------+--------+--------+--------+-------+-----------+ + | String | Strnum | Number | Regex | Array | Undefined | + +-----------+-----------+--------+--------+--------+--------+-------+-----------+ + | | String | String | String | String | String | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Strnum | false | Strnum | Strnum | false | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Number | Number | Number | Number | false | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Regex | false | false | false | Regex | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | Type | Array | false | false | false | false | Array | false | + | Requested +-----------+--------+--------+--------+--------+-------+-----------+ + | | Scalar | Scalar | Scalar | Scalar | Scalar | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Undefined | String | Strnum | Number | Regex | Array | Undefined | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Value | false | false | false | false | false | false | + | | Cookie | | | | | | | + +-----------+-----------+--------+--------+--------+--------+-------+-----------+ */ /* Functions to handle parameters passed to the extension. */ @@ -684,7 +685,13 @@ typedef struct gawk_api { /* Clear out an array */ awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie); - /* Flatten out an array so that it can be looped over easily. */ + /* + * Flatten out an array so that it can be looped over easily. + * This function returns all indices as strings and values as + * the native type one would get from an AWK_UNDEFINED request. + * Please use api_flatten_array_typed for more control over the + * type conversions. + */ awk_bool_t (*api_flatten_array)(awk_ext_id_t id, awk_array_t a_cookie, awk_flat_array_t **data); @@ -740,6 +747,16 @@ typedef struct gawk_api { /* Print nonfatal error message */ void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...); + /* + * Flatten out an array with type conversions as requested. + * This supersedes the api_flatten_array function that did not allow + * the caller to specify the requested types. + */ + awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id, + awk_array_t a_cookie, + awk_flat_array_t **data, + awk_valtype_t index_type, awk_valtype_t value_type); + } gawk_api_t; #ifndef GAWK /* these are not for the gawk code itself! */ @@ -806,8 +823,11 @@ typedef struct gawk_api { #define clear_array(array) (api->api_clear_array(ext_id, array)) +#define flatten_array_typed(array, data, index_type, value_type) \ + (api->api_flatten_array_typed(ext_id, array, data, index_type, value_type)) + #define flatten_array(array, data) \ - (api->api_flatten_array(ext_id, array, data)) + flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED) #define release_flattened_array(array, data) \ (api->api_release_flattened_array(ext_id, array, data)) @@ -843,7 +863,7 @@ typedef struct gawk_api { /* Constructor functions */ -/* r_make_string_type --- make a string or regexp value in result from the passed-in string */ +/* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */ static inline awk_value_t * r_make_string_type(const gawk_api_t *api, /* needed for emalloc */ @@ -888,7 +908,17 @@ r_make_string(const gawk_api_t *api, /* needed for emalloc */ #define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, 1, result) #define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, 0, result) -#define make_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_REGEX) + +#define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_REGEX) +#define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_REGEX) + +/* + * Note: The caller may not create a Strnum, but it can create a string that is + * flagged as user input that MAY be a Strnum. Gawk will decide whether it's a + * Strnum or a String by checking whether the string is numeric. + */ +#define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM) +#define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM) /* make_null_string --- make a null string value */ diff --git a/test/ChangeLog b/test/ChangeLog index a2dbdc5a..d2b0cf79 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,7 @@ +2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * rwarray.awk: Check that strnum is recreated correctly. + 2016-11-30 Arnold D. Robbins <arnold@skeeve.com> * rwarray.awk: Use typeof() to verify that typed regex is diff --git a/test/rwarray.awk b/test/rwarray.awk index 70809b64..86a4b589 100644 --- a/test/rwarray.awk +++ b/test/rwarray.awk @@ -7,6 +7,10 @@ BEGIN { re_sub = "/typed-regex/" dict[re_sub] = @/search me/ + strnum_sub = "strnum-sub" + split("-2.4", f) + dict[strnum_sub] = f[1] + n = asorti(dict, dictindices) for (i = 1; i <= n; i++) printf("dict[%s] = %s\n", dictindices[i], dict[dictindices[i]]) > "orig.out" @@ -43,4 +47,8 @@ BEGIN { if (typeof(dict[re_sub]) != "regexp") printf("dict[\"%s\"] should be regexp, is %s\n", re_sub, typeof(dict[re_sub])); + + if (typeof(dict[strnum_sub]) != "strnum") + printf("dict[\"%s\"] should be strnum, is %s\n", + strnum_sub, typeof(dict[strnum_sub])); } |