aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog52
-rw-r--r--awk.h10
-rw-r--r--builtin.c9
-rw-r--r--doc/ChangeLog16
-rw-r--r--doc/gawktexi.in95
-rw-r--r--extension/ChangeLog7
-rw-r--r--extension/rwarray.c38
-rw-r--r--gawkapi.c195
-rw-r--r--gawkapi.h88
-rw-r--r--test/ChangeLog4
-rw-r--r--test/rwarray.awk8
11 files changed, 408 insertions, 114 deletions
diff --git a/ChangeLog b/ChangeLog
index ebe138bc..c29c85ec 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -73,6 +73,57 @@
* dfa.c: Sync with GNULIB.
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ Add API support for strnum values.
+ * gawkapi.c (awk_value_to_node): Add AWK_STRNUM.
+ (assign_string): Add a type argument so we can use this for AWK_STRING
+ or AWK_STRNUM.
+ (node_to_awk_value): When AWK_NUMBER is requested, a regex value
+ should return false, as per the header file documentation.
+ Add support for AWK_STRNUM requests. When AWK_REGEX is requested,
+ implement the cases properly instead of always returning true.
+ Fix AWK_SCALAR logic. For AWK_UNDEFINED, rewrite using a switch
+ and support AWK_STRNUM.
+ (api_sym_update): Add AWK_STRNUM.
+ (api_sym_update_scalar): Add optimized support for updating AWK_STRNUM.
+ (valid_subscript_type): Add AWK_STRNUM.
+ (api_create_value): Add AWK_STRNUM.
+ * gawkapi.h (awk_valtype_t): Add AWK_STRNUM.
+ (strnum_value): New macro.
+ (Value fetching table): Updated.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawkapi.c (assign_regex): Do not call assign_string, since we
+ know that a REGEX value is not an unterminated field string.
+ * gawkapi.h (make_regex): Delete macro.
+ (make_const_regex, make_malloced_regex): Add new macros to replace
+ make_regex with necessary memory management support.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * awk.h (fixtype): Remove conditional checking if the node type
+ is Node_val. This is already covered by the assert, and if it's not
+ true, we have serious bugs.
+ * builtin.c (do_typeof): Do not treat Node_var the same way as
+ Node_val, since they are different beasts. In reality, the argument
+ to this function will never have type Node_var.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawkapi.h (awk_element_t): Remove obsolete comment claiming that
+ the index will always be a string.
+ (gawk_api_t): Add new api_flatten_array_typed function and indicate
+ that api_flatten_array has been superseded.
+ (flatten_array_typed): New macro to call api_flatten_array_typed.
+ (flatten_array): Redefine using the new flatten_array_typed macro.
+ * gawkapi.c (api_flatten_array_typed): New function renamed from
+ api_flatten_array to flatten an array with the types requested by the
+ caller. Also update the comments and error messages.
+ (api_flatten_array): Now a wrapper around api_flatten_array_typed.
+ (api_impl): Add new api_flatten_array_typed hook.
+
2016-12-06 Arnold D. Robbins <arnold@skeeve.com>
Add minimum required and maximum expected number of arguments
@@ -87,6 +138,7 @@
in instructions. Add checking code and lint checks.
(Op_ext_func): Copy min_required and max_expected from function info.
+
2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
* gawkapi.h (r_make_string_type): New inline function to create strings
diff --git a/awk.h b/awk.h
index 537b5c68..278f54c5 100644
--- a/awk.h
+++ b/awk.h
@@ -1887,12 +1887,10 @@ static inline NODE *
fixtype(NODE *n)
{
assert(n->type == Node_val);
- if (n->type == Node_val) {
- if ((n->flags & (NUMCUR|USER_INPUT)) == USER_INPUT)
- return force_number(n);
- if ((n->flags & INTIND) != 0)
- return force_string(n);
- }
+ if ((n->flags & (NUMCUR|USER_INPUT)) == USER_INPUT)
+ return force_number(n);
+ if ((n->flags & INTIND) != 0)
+ return force_string(n);
return n;
}
diff --git a/builtin.c b/builtin.c
index 5d7c3764..f71d71dd 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3988,7 +3988,6 @@ do_typeof(int nargs)
deref = false;
break;
case Node_val:
- case Node_var:
switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
case STRING:
res = "string";
@@ -4017,6 +4016,14 @@ do_typeof(int nargs)
res = "untyped";
deref = false;
break;
+ case Node_var:
+ /*
+ * Note: this doesn't happen because the function calling code
+ * in interpret.h pushes Node_var->var_value.
+ */
+ fatal(_("typeof: invalid argument type `%s'"),
+ nodetype2str(arg->type));
+ break;
default:
fatal(_("typeof: unknown argument type `%s'"),
nodetype2str(arg->type));
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 68c139a9..d3e974c7 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -12,6 +12,22 @@
* gawktexi.in: Update description of awk_ext_func_t structure.
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document strnum changes as relates to API.
+ Still stuff left to do -- tables for type conversions need
+ to be updated to show new strnum and regex rows and columns.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Remove make_regex and replace it with make_const_regex
+ and make_malloced_regex.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document new flatten_array_typed API function, and
+ indicate that the old flatten_array function has been superseded.
+
2016-11-30 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Document typed regex changes as relates to API.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index f7f232a9..c1674c11 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -31574,7 +31574,8 @@ multibyte encoding.
@itemx @ @ @ @ AWK_ARRAY,
@itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */
@itemx @ @ @ @ AWK_VALUE_COOKIE,@ @ @ /* for updating a previously created value */
-@itemx @ @ @ @ AWK_REGEX
+@itemx @ @ @ @ AWK_REGEX,
+@itemx @ @ @ @ AWK_STRNUM
@itemx @} awk_valtype_t;
This @code{enum} indicates the type of a value.
It is used in the following @code{struct}.
@@ -31594,6 +31595,7 @@ The @code{val_type} member indicates what kind of value the
@code{union} holds, and each member is of the appropriate type.
@item #define str_value@ @ @ @ @ @ u.s
+@itemx #define strnum_value@ @ @ @ str_value
@itemx #define regex_value@ @ @ @ str_value
@itemx #define num_value@ @ @ @ @ @ u.d
@itemx #define array_cookie@ @ @ u.a
@@ -31615,7 +31617,7 @@ and in more detail in @ref{Cached values}.
@end table
-Scalar values in @command{awk} are numbers, strings, or typed regexps. The
+Scalar values in @command{awk} are numbers, strings, strnums, or typed regexps. The
@code{awk_value_t} struct represents values. The @code{val_type} member
indicates what is in the @code{union}.
@@ -31624,6 +31626,12 @@ require more work. Because @command{gawk} allows embedded @sc{nul} bytes
in string values, a string must be represented as a pair containing a
data pointer and length. This is the @code{awk_string_t} type.
+A strnum (numeric string) value is represented as a string and consists
+of user input data that appears to be numeric.
+When an extension attempts to create a strnum value, a string flagged
+as user input is created. Subsequent parsing will determine whether it
+looks like a number and should be treated as a strnum or a regular string.
+
Typed regexp values (@pxref{Strong Regexp Constants}) are not of
much use to extension functions. Extension functions can tell that
they've received them, and create them for scalar values. Otherwise,
@@ -31798,9 +31806,29 @@ This function simply creates a numeric value in the @code{awk_value_t} variable
pointed to by @code{result}.
@item static inline awk_value_t *
-@itemx make_regex(const char *string, size_t length, awk_value_t *result);
+@itemx make_const_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_const_string}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_malloced_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_malloced_string}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_const_regex(const char *string, size_t length, awk_value_t *result);
+This function creates a strongly typed regexp value by allocating a copy of the string.
+@code{string} is the regular expression of length @code{len}.
+
+@item static inline awk_value_t *
+@itemx make_malloced_regex(const char *string, size_t length, awk_value_t *result);
This function creates a strongly typed regexp value.
@code{string} is the regular expression of length @code{len}.
+It expects @code{string} to be a @samp{char *}
+value pointing to data previously obtained from @code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
+
@end table
@node Registration Functions
@@ -32584,29 +32612,28 @@ value type, as appropriate. This behavior is summarized in
@end ifnotplaintext
@ifplaintext
@example
- +-------------------------------------------------------------+
- | Type of Actual Value: |
- +------------+------------+-----------+-----------+-----------+
- | String | Number | Regex | Array | Undefined |
-+-----------+-----------+------------+------------+-----------+-----------+-----------+
-| | String | String | String | String | false | false |
-| +-----------+------------+------------+-----------+-----------+-----------+
-| | Number | Number if | Number | false | false | false |
-| | | can be | | | | |
-| | | converted, | | | | |
-| | | else false | | | | |
-| +-----------+------------+------------+-----------+-----------+-----------+
-| | Regex | false | false | Regex | false | false |
-| +-----------+------------+------------+-----------+-----------+-----------+
-| Type | Array | false | false | false | Array | false |
-| Requested +-----------+------------+------------+-----------+-----------+-----------+
-| | Scalar | Scalar | Scalar | Scalar | false | false |
-| +-----------+------------+------------+-----------+-----------+-----------+
-| | Undefined | String | Number | Regex | Array | Undefined |
-| +-----------+------------+------------+-----------+-----------+-----------+
-| | Value | false | false | false | false | false |
-| | Cookie | | | | | |
-+-----------+-----------+------------+------------+-----------+-----------+-----------+
+ +-------------------------------------------------------+
+ | Type of Actual Value: |
+ +--------+--------+--------+--------+-------+-----------+
+ | String | Strnum | Number | Regex | Array | Undefined |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
+| | String | String | String | String | String | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Strnum | false | Strnum | Strnum | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Number | Number | Number | Number | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Regex | false | false | false | Regex | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| Type | Array | false | false | false | false | Array | false |
+| Requested +-----------+--------+--------+--------+--------+-------+-----------+
+| | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Undefined | String | Strnum | Number | Regex | Array | Undefined |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Value | false | false | false | false | false | false |
+| | Cookie | | | | | | |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
@end example
@end ifplaintext
@end float
@@ -32706,7 +32733,7 @@ Return false if the value cannot be retrieved.
@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
Update the value associated with a scalar cookie. Return false if
-the new value is not of type @code{AWK_STRING}, @code{AWK_REGEX}, or @code{AWK_NUMBER}.
+the new value is not of type @code{AWK_STRING}, @code{AWK_STRNUM}, @code{AWK_REGEX}, or @code{AWK_NUMBER}.
Here too, the predefined variables may not be updated.
@end table
@@ -32827,7 +32854,7 @@ is what the routines in this @value{SECTION} let you do. The functions are as f
@table @code
@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
Create a cached string or numeric value from @code{value} for
-efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX},
+efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, @code{AWK_STRNUM},
and @code{AWK_STRING} are allowed. Any other type is rejected.
@code{AWK_UNDEFINED} could be allowed, but doing so would result in
inferior performance.
@@ -33053,9 +33080,9 @@ The array remains an array, but after calling this function, it
has no elements. This is equivalent to using the @code{delete}
statement (@pxref{Delete}).
-@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+@item awk_bool_t flatten_array_typed(awk_array_t a_cookie, awk_flat_array_t **data, awk_valtype_t index_type, awk_valtype_t value_type);
For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
-structure and fill it in. Set the pointer whose address is passed as @code{data}
+structure and fill it in with indices and values of the requested types. Set the pointer whose address is passed as @code{data}
to point to this structure.
Return true upon success, or false otherwise.
@ifset FOR_PRINT
@@ -33067,6 +33094,12 @@ See the next @value{SECTION}
for a discussion of how to
flatten an array and work with it.
+@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
+structure and fill it in with @code{AWK_STRING} indices and
+@code{AWK_UNDEFINED} values. This is superseded by @code{flatten_array_typed}
+and retained only for legacy binary compatibility.
+
@item awk_bool_t release_flattened_array(awk_array_t a_cookie,
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_flat_array_t *data);
When done with a flattened array, release the storage using this function.
@@ -33179,7 +33212,7 @@ to double-check that the count in the @code{awk_flat_array_t}
is the same as the count just retrieved:
@example
- if (! flatten_array(value2.array_cookie, & flat_array)) @{
+ if (! flatten_array_typed(value2.array_cookie, & flat_array, AWK_STRING, AWK_UNDEFINED)) @{
printf("dump_array_and_delete: could not flatten array\n");
goto out;
@}
diff --git a/extension/ChangeLog b/extension/ChangeLog
index bf7d3fa4..9c647f05 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -41,6 +41,13 @@
* testext.c: Ditto.
* time.c: Ditto.
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * rwarray.c: Adjust to read and write strnum values.
+ (write_value): When writing a string value, code should use htonl.
+ There are now 3 string types: string, strnum, and regex.
+ (read_value): Support 3 string types: string, strnum, and regex.
+
2016-11-30 Arnold D. Robbins <arnold@skeeve.com>
* rwarray.c: Restore read comparion of major and minor versions
diff --git a/extension/rwarray.c b/extension/rwarray.c
index b62b6de0..186dac0f 100644
--- a/extension/rwarray.c
+++ b/extension/rwarray.c
@@ -84,7 +84,7 @@ static awk_bool_t read_value(FILE *fp, awk_value_t *value);
* For each element:
* Length of index val: 4 bytes - network order
* Index val as characters (N bytes)
- * Value type 4 bytes (0 = string, 1 = number, 2 = array, 3 = regex)
+ * Value type 4 bytes (0 = string, 1 = number, 2 = array, 3 = regex, 4 = strnum)
* IF string:
* Length of value 4 bytes
* Value as characters (N bytes)
@@ -210,7 +210,7 @@ write_elem(FILE *fp, awk_element_t *element)
return write_value(fp, & element->value);
}
-/* write_value --- write a number or a string or a regex or an array */
+/* write_value --- write a number or a string or a strnum or a regex or an array */
static awk_bool_t
write_value(FILE *fp, awk_value_t *val)
@@ -232,7 +232,22 @@ write_value(FILE *fp, awk_value_t *val)
if (fwrite(& val->num_value, 1, sizeof(val->num_value), fp) != sizeof(val->num_value))
return awk_false;
} else {
- code = (val->val_type == AWK_STRING ? 0 : 3);
+ switch (val->val_type) {
+ case AWK_STRING:
+ code = htonl(0);
+ break;
+ case AWK_STRNUM:
+ code = htonl(4);
+ break;
+ case AWK_REGEX:
+ code = htonl(3);
+ break;
+ default:
+ /* XXX can this happen? */
+ code = htonl(0);
+ warning(ext_id, _("array value has unknown type %d"), val->val_type);
+ break;
+ }
if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code))
return awk_false;
@@ -449,7 +464,22 @@ read_value(FILE *fp, awk_value_t *value)
return awk_false;
}
len = ntohl(len);
- value->val_type = (code == 0 ? AWK_STRING : AWK_REGEX);
+ switch (code) {
+ case 0:
+ value->val_type = AWK_STRING;
+ break;
+ case 3:
+ value->val_type = AWK_REGEX;
+ break;
+ case 4:
+ value->val_type = AWK_STRNUM;
+ break;
+ default:
+ /* this cannot happen! */
+ warning(ext_id, _("treating recovered value with unknown type code %d as a string"), code);
+ value->val_type = AWK_STRING;
+ break;
+ }
value->str_value.len = len;
value->str_value.str = gawk_malloc(len + 1);
memset(value->str_value.str, '\0', len + 1);
diff --git a/gawkapi.c b/gawkapi.c
index ac0fe3fd..72797e0c 100644
--- a/gawkapi.c
+++ b/gawkapi.c
@@ -164,6 +164,11 @@ awk_value_to_node(const awk_value_t *retval)
ext_ret_val = make_str_node(retval->str_value.str,
retval->str_value.len, ALREADY_MALLOCED);
break;
+ case AWK_STRNUM:
+ ext_ret_val = make_str_node(retval->str_value.str,
+ retval->str_value.len, ALREADY_MALLOCED);
+ ext_ret_val->flags |= USER_INPUT;
+ break;
case AWK_REGEX:
ext_ret_val = make_typed_regex(retval->str_value.str,
retval->str_value.len);
@@ -415,9 +420,9 @@ free_api_string_copies()
/* assign_string --- return a string node with NUL termination */
static inline void
-assign_string(NODE *node, awk_value_t *val)
+assign_string(NODE *node, awk_value_t *val, awk_valtype_t val_type)
{
- val->val_type = AWK_STRING;
+ val->val_type = val_type;
if (node->stptr[node->stlen] != '\0') {
/*
* This is an unterminated field string, so make a copy.
@@ -449,7 +454,11 @@ assign_string(NODE *node, awk_value_t *val)
static inline void
assign_regex(NODE *node, awk_value_t *val)
{
- assign_string(node, val);
+ /* a REGEX node cannot be an unterminated field string */
+ assert((node->flags & MALLOC) != 0);
+ assert(node->stptr[node->stlen] == '\0');
+ val->str_value.str = node->stptr;
+ val->str_value.len = node->stlen;
val->val_type = AWK_REGEX;
}
@@ -489,55 +498,139 @@ node_to_awk_value(NODE *node, awk_value_t *val, awk_valtype_t wanted)
/* a scalar value */
switch (wanted) {
case AWK_NUMBER:
- val->val_type = AWK_NUMBER;
+ if (node->flags & REGEX)
+ val->val_type = AWK_REGEX;
+ else {
+ val->val_type = AWK_NUMBER;
+ (void) force_number(node);
+ val->num_value = get_number_d(node);
+ ret = awk_true;
+ }
+ break;
- (void) force_number(node);
- val->num_value = get_number_d(node);
- ret = awk_true;
+ case AWK_STRNUM:
+ switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
+ case STRING:
+ val->val_type = AWK_STRING;
+ break;
+ case NUMBER:
+ (void) force_string(node);
+ assign_string(node, val, AWK_STRNUM);
+ ret = awk_true;
+ break;
+ case NUMBER|USER_INPUT:
+ assign_string(node, val, AWK_STRNUM);
+ ret = awk_true;
+ break;
+ case REGEX:
+ val->val_type = AWK_REGEX;
+ break;
+ case NUMBER|STRING:
+ if (node == Nnull_string) {
+ val->val_type = AWK_UNDEFINED;
+ break;
+ }
+ /* fall through */
+ default:
+ warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags));
+ val->val_type = AWK_UNDEFINED;
+ break;
+ }
break;
case AWK_STRING:
(void) force_string(node);
- assign_string(node, val);
+ assign_string(node, val, AWK_STRING);
ret = awk_true;
break;
case AWK_REGEX:
- assign_regex(node, val);
- ret = awk_true;
+ switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
+ case STRING:
+ val->val_type = AWK_STRING;
+ break;
+ case NUMBER:
+ val->val_type = AWK_NUMBER;
+ break;
+ case NUMBER|USER_INPUT:
+ val->val_type = AWK_STRNUM;
+ break;
+ case REGEX:
+ assign_regex(node, val);
+ ret = awk_true;
+ break;
+ case NUMBER|STRING:
+ if (node == Nnull_string) {
+ val->val_type = AWK_UNDEFINED;
+ break;
+ }
+ /* fall through */
+ default:
+ warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags));
+ val->val_type = AWK_UNDEFINED;
+ break;
+ }
break;
case AWK_SCALAR:
- fixtype(node);
- if ((node->flags & NUMBER) != 0) {
- val->val_type = AWK_NUMBER;
- } else if ((node->flags & STRING) != 0) {
+ switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
+ case STRING:
val->val_type = AWK_STRING;
- } else if ((node->flags & REGEX) != 0) {
+ break;
+ case NUMBER:
+ val->val_type = AWK_NUMBER;
+ break;
+ case NUMBER|USER_INPUT:
+ val->val_type = AWK_STRNUM;
+ break;
+ case REGEX:
val->val_type = AWK_REGEX;
- } else
+ break;
+ case NUMBER|STRING:
+ if (node == Nnull_string) {
+ val->val_type = AWK_UNDEFINED;
+ break;
+ }
+ /* fall through */
+ default:
+ warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags));
val->val_type = AWK_UNDEFINED;
- ret = awk_false;
+ break;
+ }
break;
case AWK_UNDEFINED:
/* return true and actual type for request of undefined */
- fixtype(node);
- if (node == Nnull_string) {
- val->val_type = AWK_UNDEFINED;
+ switch (fixtype(node)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
+ case STRING:
+ assign_string(node, val, AWK_STRING);
ret = awk_true;
- } else if ((node->flags & NUMBER) != 0) {
+ break;
+ case NUMBER:
val->val_type = AWK_NUMBER;
val->num_value = get_number_d(node);
ret = awk_true;
- } else if ((node->flags & STRING) != 0) {
- assign_string(node, val);
+ break;
+ case NUMBER|USER_INPUT:
+ assign_string(node, val, AWK_STRNUM);
ret = awk_true;
- } else if ((node->flags & REGEX) != 0) {
+ break;
+ case REGEX:
assign_regex(node, val);
ret = awk_true;
- } else
+ break;
+ case NUMBER|STRING:
+ if (node == Nnull_string) {
+ val->val_type = AWK_UNDEFINED;
+ ret = awk_true;
+ break;
+ }
+ /* fall through */
+ default:
+ warning(_("node_to_awk_value detected invalid flags combination `%s'; please file a bug report."), flags2str(node->flags));
val->val_type = AWK_UNDEFINED;
+ break;
+ }
break;
case AWK_ARRAY:
@@ -640,6 +733,7 @@ api_sym_update(awk_ext_id_t id,
switch (value->val_type) {
case AWK_NUMBER:
+ case AWK_STRNUM:
case AWK_STRING:
case AWK_REGEX:
case AWK_UNDEFINED:
@@ -741,6 +835,7 @@ api_sym_update_scalar(awk_ext_id_t id,
break;
case AWK_STRING:
+ case AWK_STRNUM:
if (node->var_value->valref == 1) {
NODE *r = node->var_value;
@@ -754,6 +849,8 @@ api_sym_update_scalar(awk_ext_id_t id,
/* make_str_node(s, l, ALREADY_MALLOCED): */
r->numbr = 0;
r->flags = (MALLOC|STRING|STRCUR);
+ if (value->val_type == AWK_STRNUM)
+ r->flags |= USER_INPUT;
r->stfmt = STFMT_UNUSED;
r->stptr = value->str_value.str;
r->stlen = value->str_value.len;
@@ -790,6 +887,7 @@ valid_subscript_type(awk_valtype_t valtype)
switch (valtype) {
case AWK_UNDEFINED:
case AWK_NUMBER:
+ case AWK_STRNUM:
case AWK_STRING:
case AWK_REGEX:
case AWK_SCALAR:
@@ -995,9 +1093,10 @@ api_clear_array(awk_ext_id_t id, awk_array_t a_cookie)
/* api_flatten_array --- flatten out an array so that it can be looped over easily. */
static awk_bool_t
-api_flatten_array(awk_ext_id_t id,
+api_flatten_array_typed(awk_ext_id_t id,
awk_array_t a_cookie,
- awk_flat_array_t **data)
+ awk_flat_array_t **data,
+ awk_valtype_t index_type, awk_valtype_t value_type)
{
NODE **list;
size_t i, j;
@@ -1014,7 +1113,7 @@ api_flatten_array(awk_ext_id_t id,
(array->table_size - 1) * sizeof(awk_element_t);
emalloc(*data, awk_flat_array_t *, alloc_size,
- "api_flatten_array");
+ "api_flatten_array_typed");
memset(*data, 0, alloc_size);
list = assoc_list(array, "@unsorted", ASORTI);
@@ -1029,29 +1128,35 @@ api_flatten_array(awk_ext_id_t id,
index = list[i];
value = list[i + 1]; /* number or string or subarray */
- /*
- * Convert index and value to ext types. Force the
- * index to be a string, since indices are always
- * conceptually strings, regardless of internal optimizations
- * to treat them as integers in some cases.
- *
- * Regexes are forced to string too.
- */
+ /* Convert index and value to ext types. */
if (! node_to_awk_value(index,
- & (*data)->elements[j].index, AWK_STRING)) {
- fatal(_("api_flatten_array: could not convert index %d\n"),
- (int) i);
+ & (*data)->elements[j].index, index_type)) {
+ fatal(_("api_flatten_array_typed: could not convert index %d to %d\n"),
+ (int) i, (int) index_type);
}
if (! node_to_awk_value(value,
- & (*data)->elements[j].value, AWK_UNDEFINED)) {
- fatal(_("api_flatten_array: could not convert value %d\n"),
- (int) i);
+ & (*data)->elements[j].value, value_type)) {
+ fatal(_("api_flatten_array_typed: could not convert value %d to %d\n"),
+ (int) i, (int) value_type);
}
}
return awk_true;
}
/*
+ * api_flatten_array -- replaced by api_flatten_array_typed. This function
+ * is retained only for binary compatibility.
+ */
+
+static awk_bool_t
+api_flatten_array(awk_ext_id_t id,
+ awk_array_t a_cookie,
+ awk_flat_array_t **data)
+{
+ return api_flatten_array_typed(id, a_cookie, data, AWK_STRING, AWK_UNDEFINED);
+}
+
+/*
* api_release_flattened_array --- release array memory,
* delete any marked elements. Count must match what
* gawk thinks the size is.
@@ -1103,6 +1208,7 @@ api_create_value(awk_ext_id_t id, awk_value_t *value,
switch (value->val_type) {
case AWK_NUMBER:
+ case AWK_STRNUM:
case AWK_STRING:
case AWK_REGEX:
break;
@@ -1298,7 +1404,7 @@ gawk_api_t api_impl = {
api_del_array_element,
api_create_array,
api_clear_array,
- api_flatten_array,
+ api_flatten_array, /* for legacy binary compatibility */
api_release_flattened_array,
/* Memory allocation */
@@ -1312,6 +1418,9 @@ gawk_api_t api_impl = {
/* Print nonfatal error message */
api_nonfatal,
+
+ /* New array flattening function */
+ api_flatten_array_typed,
};
/* init_ext_api --- init the extension API */
diff --git a/gawkapi.h b/gawkapi.h
index 07fb37e7..384fbe81 100644
--- a/gawkapi.h
+++ b/gawkapi.h
@@ -309,7 +309,8 @@ typedef enum {
AWK_ARRAY,
AWK_SCALAR, /* opaque access to a variable */
AWK_VALUE_COOKIE, /* for updating a previously created value */
- AWK_REGEX /* last for binary compatibility */
+ AWK_REGEX,
+ AWK_STRNUM
} awk_valtype_t;
/*
@@ -326,6 +327,7 @@ typedef struct awk_value {
awk_value_cookie_t vc;
} u;
#define str_value u.s
+#define strnum_value str_value
#define regex_value str_value
#define num_value u.d
#define array_cookie u.a
@@ -349,7 +351,7 @@ typedef struct awk_element {
AWK_ELEMENT_DELETE = 1 /* set by extension if
should be deleted */
} flags;
- awk_value_t index; /* guaranteed to be a string! */
+ awk_value_t index;
awk_value_t value;
} awk_element_t;
@@ -495,29 +497,28 @@ typedef struct gawk_api {
Table entry is type returned:
- +-------------------------------------------------------------+
- | Type of Actual Value: |
- +------------+------------+-----------+-----------+-----------+
- | String | Number | Regex | Array | Undefined |
- +-----------+-----------+------------+------------+-----------+-----------+-----------+
- | | String | String | String | String | false | false |
- | +-----------+------------+------------+-----------+-----------+-----------+
- | | Number | Number if | Number | false | false | false |
- | | | can be | | | | |
- | | | converted, | | | | |
- | | | else false | | | | |
- | +-----------+------------+------------+-----------+-----------+-----------+
- | | Regex | false | false | Regex | false | false |
- | +-----------+------------+------------+-----------+-----------+-----------+
- | Type | Array | false | false | false | Array | false |
- | Requested +-----------+------------+------------+-----------+-----------+-----------+
- | | Scalar | Scalar | Scalar | Scalar | false | false |
- | +-----------+------------+------------+-----------+-----------+-----------+
- | | Undefined | String | Number | Regex | Array | Undefined |
- | +-----------+------------+------------+-----------+-----------+-----------+
- | | Value | false | false | false | false | false |
- | | Cookie | | | | | |
- +-----------+-----------+------------+------------+-----------+-----------+-----------+
+ +-------------------------------------------------------+
+ | Type of Actual Value: |
+ +--------+--------+--------+--------+-------+-----------+
+ | String | Strnum | Number | Regex | Array | Undefined |
+ +-----------+-----------+--------+--------+--------+--------+-------+-----------+
+ | | String | String | String | String | String | false | false |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Strnum | false | Strnum | Strnum | false | false | false |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Number | Number | Number | Number | false | false | false |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Regex | false | false | false | Regex | false | false |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | Type | Array | false | false | false | false | Array | false |
+ | Requested +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Undefined | String | Strnum | Number | Regex | Array | Undefined |
+ | +-----------+--------+--------+--------+--------+-------+-----------+
+ | | Value | false | false | false | false | false | false |
+ | | Cookie | | | | | | |
+ +-----------+-----------+--------+--------+--------+--------+-------+-----------+
*/
/* Functions to handle parameters passed to the extension. */
@@ -684,7 +685,13 @@ typedef struct gawk_api {
/* Clear out an array */
awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie);
- /* Flatten out an array so that it can be looped over easily. */
+ /*
+ * Flatten out an array so that it can be looped over easily.
+ * This function returns all indices as strings and values as
+ * the native type one would get from an AWK_UNDEFINED request.
+ * Please use api_flatten_array_typed for more control over the
+ * type conversions.
+ */
awk_bool_t (*api_flatten_array)(awk_ext_id_t id,
awk_array_t a_cookie,
awk_flat_array_t **data);
@@ -740,6 +747,16 @@ typedef struct gawk_api {
/* Print nonfatal error message */
void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...);
+ /*
+ * Flatten out an array with type conversions as requested.
+ * This supersedes the api_flatten_array function that did not allow
+ * the caller to specify the requested types.
+ */
+ awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id,
+ awk_array_t a_cookie,
+ awk_flat_array_t **data,
+ awk_valtype_t index_type, awk_valtype_t value_type);
+
} gawk_api_t;
#ifndef GAWK /* these are not for the gawk code itself! */
@@ -806,8 +823,11 @@ typedef struct gawk_api {
#define clear_array(array) (api->api_clear_array(ext_id, array))
+#define flatten_array_typed(array, data, index_type, value_type) \
+ (api->api_flatten_array_typed(ext_id, array, data, index_type, value_type))
+
#define flatten_array(array, data) \
- (api->api_flatten_array(ext_id, array, data))
+ flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED)
#define release_flattened_array(array, data) \
(api->api_release_flattened_array(ext_id, array, data))
@@ -843,7 +863,7 @@ typedef struct gawk_api {
/* Constructor functions */
-/* r_make_string_type --- make a string or regexp value in result from the passed-in string */
+/* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */
static inline awk_value_t *
r_make_string_type(const gawk_api_t *api, /* needed for emalloc */
@@ -888,7 +908,17 @@ r_make_string(const gawk_api_t *api, /* needed for emalloc */
#define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, 1, result)
#define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, 0, result)
-#define make_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_REGEX)
+
+#define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_REGEX)
+#define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_REGEX)
+
+/*
+ * Note: The caller may not create a Strnum, but it can create a string that is
+ * flagged as user input that MAY be a Strnum. Gawk will decide whether it's a
+ * Strnum or a String by checking whether the string is numeric.
+ */
+#define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM)
+#define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM)
/* make_null_string --- make a null string value */
diff --git a/test/ChangeLog b/test/ChangeLog
index a2dbdc5a..d2b0cf79 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * rwarray.awk: Check that strnum is recreated correctly.
+
2016-11-30 Arnold D. Robbins <arnold@skeeve.com>
* rwarray.awk: Use typeof() to verify that typed regex is
diff --git a/test/rwarray.awk b/test/rwarray.awk
index 70809b64..86a4b589 100644
--- a/test/rwarray.awk
+++ b/test/rwarray.awk
@@ -7,6 +7,10 @@ BEGIN {
re_sub = "/typed-regex/"
dict[re_sub] = @/search me/
+ strnum_sub = "strnum-sub"
+ split("-2.4", f)
+ dict[strnum_sub] = f[1]
+
n = asorti(dict, dictindices)
for (i = 1; i <= n; i++)
printf("dict[%s] = %s\n", dictindices[i], dict[dictindices[i]]) > "orig.out"
@@ -43,4 +47,8 @@ BEGIN {
if (typeof(dict[re_sub]) != "regexp")
printf("dict[\"%s\"] should be regexp, is %s\n",
re_sub, typeof(dict[re_sub]));
+
+ if (typeof(dict[strnum_sub]) != "strnum")
+ printf("dict[\"%s\"] should be strnum, is %s\n",
+ strnum_sub, typeof(dict[strnum_sub]));
}