diff options
Diffstat (limited to 'extension/rwarray.c')
-rw-r--r-- | extension/rwarray.c | 343 |
1 files changed, 269 insertions, 74 deletions
diff --git a/extension/rwarray.c b/extension/rwarray.c index e8bc2276..7422be9f 100644 --- a/extension/rwarray.c +++ b/extension/rwarray.c @@ -37,6 +37,7 @@ #include <assert.h> #include <errno.h> #include <fcntl.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -63,11 +64,11 @@ #define MAGIC "awkrulz\n" #define MAJOR 4 -#define MINOR 0 +#define MINOR 1 static const gawk_api_t *api; /* for convenience macros to work */ static awk_ext_id_t ext_id; -static const char *ext_version = "rwarray extension: version 2.0"; +static const char *ext_version = "rwarray extension: version 2.1"; static awk_bool_t (*init_func)(void) = NULL; int plugin_is_GPL_compatible; @@ -77,10 +78,16 @@ static awk_bool_t write_elem(FILE *fp, awk_element_t *element); static awk_bool_t write_value(FILE *fp, awk_value_t *val); static awk_bool_t write_number(FILE *fp, awk_value_t *val); +typedef union { + mpz_t mpz_val; + mpfr_t mpfr_val; +} value_storage; + +typedef awk_array_t (*array_handle_t)(awk_value_t *); static awk_bool_t read_array(FILE *fp, awk_array_t array); -static awk_bool_t read_elem(FILE *fp, awk_element_t *element); -static awk_bool_t read_value(FILE *fp, awk_value_t *value); -static awk_bool_t read_number(FILE *fp, awk_value_t *value, uint32_t code); +static awk_bool_t read_elem(FILE *fp, awk_element_t *element, array_handle_t, value_storage *); +static awk_bool_t read_value(FILE *fp, awk_value_t *value, array_handle_t, awk_value_t *idx, value_storage *vs); +static awk_bool_t read_number(FILE *fp, awk_value_t *value, uint32_t code, value_storage *); /* * Format of array info: @@ -113,14 +120,15 @@ static awk_bool_t read_number(FILE *fp, awk_value_t *value, uint32_t code); #define VT_ARRAY 5 #define VT_REGEX 6 #define VT_STRNUM 7 +#define VT_BOOL 8 #define VT_UNDEFINED 20 -/* do_writea --- write an array */ +/* write_backend --- write an array */ static awk_value_t * -do_writea(int nargs, awk_value_t *result, struct awk_ext_func *unused) +write_backend(awk_value_t *result, awk_array_t array, const char *name) { - awk_value_t filename, array; + awk_value_t filename; FILE *fp = NULL; uint32_t major = MAJOR; uint32_t minor = MINOR; @@ -128,18 +136,9 @@ do_writea(int nargs, awk_value_t *result, struct awk_ext_func *unused) assert(result != NULL); make_number(0.0, result); - if (nargs < 2) - goto out; - - /* filename is first arg, array to dump is second */ + /* filename is first arg */ if (! get_argument(0, AWK_STRING, & filename)) { - warning(ext_id, _("do_writea: first argument is not a string")); - errno = EINVAL; - goto done1; - } - - if (! get_argument(1, AWK_ARRAY, & array)) { - warning(ext_id, _("do_writea: second argument is not an array")); + warning(ext_id, _("%s: first argument is not a string"), name); errno = EINVAL; goto done1; } @@ -160,21 +159,55 @@ do_writea(int nargs, awk_value_t *result, struct awk_ext_func *unused) if (fwrite(& minor, 1, sizeof(minor), fp) != sizeof(minor)) goto done1; - if (write_array(fp, array.array_cookie)) { + if (write_array(fp, array)) { make_number(1.0, result); - goto done0; + fclose(fp); + return result; } done1: update_ERRNO_int(errno); - unlink(filename.str_value.str); - -done0: - fclose(fp); -out: + if (fp != NULL) { + fclose(fp); + unlink(filename.str_value.str); + } return result; } +/* do_writea --- write an array */ + +static awk_value_t * +do_writea(int nargs, awk_value_t *result, struct awk_ext_func *unused) +{ + awk_value_t array; + + if (! get_argument(1, AWK_ARRAY, & array)) { + warning(ext_id, _("writea: second argument is not an array")); + errno = EINVAL; + update_ERRNO_int(errno); + make_number(0.0, result); + return result; + } + return write_backend(result, array.array_cookie, "writea"); +} + +/* do_writeall --- write out SYMTAB */ + +static awk_value_t * +do_writeall(int nargs, awk_value_t *result, struct awk_ext_func *unused) +{ + awk_value_t array; + + if (! sym_lookup("SYMTAB", AWK_ARRAY, & array)) { + warning(ext_id, _("writeall: unable to find SYMTAB array")); + errno = EINVAL; + update_ERRNO_int(errno); + make_number(0.0, result); + return result; + } + return write_backend(result, array.array_cookie, "writeall"); +} + /* write_array --- write out an array or a sub-array */ @@ -258,6 +291,9 @@ write_value(FILE *fp, awk_value_t *val) case AWK_REGEX: code = htonl(VT_REGEX); break; + case AWK_BOOL: + code = htonl(VT_BOOL); + break; case AWK_UNDEFINED: code = htonl(VT_UNDEFINED); break; @@ -267,17 +303,29 @@ write_value(FILE *fp, awk_value_t *val) warning(ext_id, _("array value has unknown type %d"), val->val_type); break; } + if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code)) return awk_false; - len = htonl(val->str_value.len); - if (fwrite(& len, 1, sizeof(len), fp) != sizeof(len)) - return awk_false; + if (code == ntohl(VT_BOOL)) { + len = (val->bool_value == awk_true ? 4 : 5); + len = htonl(len); + const char *s = (val->bool_value == awk_true ? "TRUE" : "FALSE"); - if (fwrite(val->str_value.str, 1, val->str_value.len, fp) - != (ssize_t) val->str_value.len) - return awk_false; + if (fwrite(& len, 1, sizeof(len), fp) != sizeof(len)) + return awk_false; + + if (fwrite(s, 1, strlen(s), fp) != (ssize_t) strlen(s)) + return awk_false; + } else { + len = htonl(val->str_value.len); + if (fwrite(& len, 1, sizeof(len), fp) != sizeof(len)) + return awk_false; + if (fwrite(val->str_value.str, 1, val->str_value.len, fp) + != (ssize_t) val->str_value.len) + return awk_false; + } return awk_true; } @@ -323,9 +371,9 @@ write_number(FILE *fp, awk_value_t *val) if (mpfr_fpif_export(fp, val->num_ptr) != 0) #else #define MPFR_STR_BASE 62 /* maximize base to minimize string len */ -#define MPFR_STR_ROUND MPFR_RNDN +#define MPFR_STR_ROUND mpfr_get_default_rounding_mode() /* - * XXX does the choice of MPFR_RNDN matter, given + * Does the choice of rounding mode matter, given * that the precision is 0, so we should be rendering * in full precision? */ @@ -350,12 +398,139 @@ write_number(FILE *fp, awk_value_t *val) return awk_true; } -/* do_reada --- read an array */ +/* free_value --- release memory for ignored global variables */ + +static void +free_value(awk_value_t *v) +{ + switch (v->val_type) { + case AWK_ARRAY: + clear_array(v->array_cookie); + break; + case AWK_STRING: + case AWK_REGEX: + case AWK_STRNUM: + case AWK_UNDEFINED: + gawk_free(v->str_value.str); + break; + case AWK_BOOL: + /* no memory allocated */ + break; + case AWK_NUMBER: + switch (v->num_type) { + case AWK_NUMBER_TYPE_DOUBLE: + /* no memory allocated */ + break; + case AWK_NUMBER_TYPE_MPZ: + mpz_clear(v->num_ptr); + break; + case AWK_NUMBER_TYPE_MPFR: + mpfr_clear(v->num_ptr); + break; + default: + warning(ext_id, _("cannot free number with unknown type %d"), v->num_type); + break; + } + break; + default: + warning(ext_id, _("cannot free value with unhandled type %d"), v->val_type); + break; + } +} + +/* do_poke --- create a global variable */ + +static awk_bool_t +do_poke(awk_element_t *e) +{ + awk_value_t t; + + if (e->index.val_type != AWK_STRING) + return awk_false; + /* So this is a bit tricky. If the program refers to the variable, + * then it will already exist in an undefined state after parsing. + * If the program never refers to it, then the lookup fails. + * We still need to create it in case the program accesses it via + * indirection through the SYMTAB table. */ + if (sym_lookup(e->index.str_value.str, AWK_UNDEFINED, &t) && (t.val_type != AWK_UNDEFINED)) + return awk_false; + if (! sym_update(e->index.str_value.str, & e->value)) { + warning(ext_id, _("readall: unable to set %s"), e->index.str_value.str); + return awk_false; + } + return awk_true; +} + +/* regular_array_handle --- array creation hook for normal reada */ + +static awk_array_t +regular_array_handle(awk_value_t *unused) +{ + return create_array(); +} + +/* global_array_handle --- array creation hook for readall */ + +static awk_array_t +global_array_handle(awk_value_t *n) +{ + awk_value_t t; + size_t count; + + /* The array may exist already because it was instantiated during + * program parsing, so we use the existing array if it is empty. */ + return ((n->val_type == AWK_STRING) && sym_lookup(n->str_value.str, AWK_UNDEFINED, &t) && (t.val_type == AWK_ARRAY) && get_element_count(t.array_cookie, & count) && ! count) ? t.array_cookie : create_array(); +} + +/* read_global --- read top-level variables dumped from SYMTAB */ + +static awk_bool_t +read_global(FILE *fp, awk_array_t unused) +{ + uint32_t i; + uint32_t count; + awk_element_t new_elem; + value_storage vs; + + if (fread(& count, 1, sizeof(count), fp) != sizeof(count)) + return awk_false; + + count = ntohl(count); + + for (i = 0; i < count; i++) { + if (read_elem(fp, & new_elem, global_array_handle, &vs)) { + if (! do_poke(& new_elem)) + free_value(& new_elem.value); + if (new_elem.index.str_value.len) + /* free string allocated by make_const_string */ + gawk_free(new_elem.index.str_value.str); + } else + return awk_false; + } + + return awk_true; +} + +/* read_one --- read one array */ + +static awk_bool_t +read_one(FILE *fp, awk_array_t array) +{ + if (! clear_array(array)) { + errno = ENOMEM; + warning(ext_id, _("reada: clear_array failed")); + return awk_false; + } + + return read_array(fp, array); +} + +/* read_backend --- common code for reada and readall */ static awk_value_t * -do_reada(int nargs, awk_value_t *result, struct awk_ext_func *unused) +read_backend(awk_value_t *result, awk_array_t array, const char *name, awk_bool_t (*func)(FILE *, awk_array_t)) { - awk_value_t filename, array; + awk_value_t filename; FILE *fp = NULL; uint32_t major; uint32_t minor; @@ -364,18 +539,9 @@ do_reada(int nargs, awk_value_t *result, struct awk_ext_func *unused) assert(result != NULL); make_number(0.0, result); - if (nargs < 2) - goto out; - - /* directory is first arg, array to read is second */ + /* filename is first arg */ if (! get_argument(0, AWK_STRING, & filename)) { - warning(ext_id, _("do_reada: first argument is not a string")); - errno = EINVAL; - goto done1; - } - - if (! get_argument(1, AWK_ARRAY, & array)) { - warning(ext_id, _("do_reada: second argument is not an array")); + warning(ext_id, _("%s: first argument is not a string"), name); errno = EINVAL; goto done1; } @@ -417,13 +583,7 @@ do_reada(int nargs, awk_value_t *result, struct awk_ext_func *unused) goto done1; } - if (! clear_array(array.array_cookie)) { - errno = ENOMEM; - warning(ext_id, _("do_reada: clear_array failed")); - goto done1; - } - - if (read_array(fp, array.array_cookie)) { + if ((*func)(fp, array)) { make_number(1.0, result); goto done0; } @@ -433,10 +593,34 @@ done1: done0: if (fp != NULL) fclose(fp); -out: return result; } +/* do_reada --- read an array */ + +static awk_value_t * +do_reada(int nargs, awk_value_t *result, struct awk_ext_func *unused) +{ + awk_value_t array; + + if (! get_argument(1, AWK_ARRAY, & array)) { + warning(ext_id, _("reada: second argument is not an array")); + errno = EINVAL; + update_ERRNO_int(errno); + make_number(0.0, result); + return result; + } + return read_backend(result, array.array_cookie, "read", read_one); +} + +/* do_readall --- read top-level variables */ + +static awk_value_t * +do_readall(int nargs, awk_value_t *result, struct awk_ext_func *unused) +{ + return read_backend(result, NULL, "readall", read_global); +} + /* read_array --- read in an array or sub-array */ @@ -446,6 +630,7 @@ read_array(FILE *fp, awk_array_t array) uint32_t i; uint32_t count; awk_element_t new_elem; + value_storage vs; if (fread(& count, 1, sizeof(count), fp) != sizeof(count)) return awk_false; @@ -453,7 +638,7 @@ read_array(FILE *fp, awk_array_t array) count = ntohl(count); for (i = 0; i < count; i++) { - if (read_elem(fp, & new_elem)) { + if (read_elem(fp, & new_elem, regular_array_handle, &vs)) { /* add to array */ if (! set_array_element_by_elem(array, & new_elem)) { warning(ext_id, _("read_array: set_array_element failed")); @@ -472,7 +657,7 @@ read_array(FILE *fp, awk_array_t array) /* read_elem --- read in a single element */ static awk_bool_t -read_elem(FILE *fp, awk_element_t *element) +read_elem(FILE *fp, awk_element_t *element, array_handle_t array_handle, value_storage *vs) { uint32_t index_len; static char *buffer; @@ -510,7 +695,7 @@ read_elem(FILE *fp, awk_element_t *element) make_null_string(& element->index); } - if (! read_value(fp, & element->value)) + if (! read_value(fp, & element->value, array_handle, & element->index, vs)) return awk_false; return awk_true; @@ -519,7 +704,7 @@ read_elem(FILE *fp, awk_element_t *element) /* read_value --- read a number or a string */ static awk_bool_t -read_value(FILE *fp, awk_value_t *value) +read_value(FILE *fp, awk_value_t *value, array_handle_t array_handle, awk_value_t *idx, value_storage *vs) { uint32_t code, len; @@ -529,7 +714,7 @@ read_value(FILE *fp, awk_value_t *value) code = ntohl(code); if (code == VT_ARRAY) { - awk_array_t array = create_array(); + awk_array_t array = (*array_handle)(idx); if (! read_array(fp, array)) return awk_false; @@ -540,7 +725,7 @@ read_value(FILE *fp, awk_value_t *value) } else if (code == VT_NUMBER || code == VT_GMP || code == VT_MPFR) { - return read_number(fp, value, code); + return read_number(fp, value, code, vs); } else { if (fread(& len, 1, sizeof(len), fp) != sizeof(len)) { return awk_false; @@ -559,6 +744,9 @@ read_value(FILE *fp, awk_value_t *value) case VT_UNDEFINED: value->val_type = AWK_UNDEFINED; break; + case VT_BOOL: + value->val_type = AWK_BOOL; + break; default: /* this cannot happen! */ warning(ext_id, _("treating recovered value with unknown type code %d as a string"), code); @@ -573,6 +761,15 @@ read_value(FILE *fp, awk_value_t *value) return awk_false; } value->str_value.str[len] = '\0'; + value->str_value.len = len; + + if (code == VT_BOOL) { + bool val = (strcmp(value->str_value.str, "TRUE") == 0); + + gawk_free(value->str_value.str); + value->str_value.str = NULL; + value->bool_value = val ? awk_true : awk_false; + } } return awk_true; @@ -581,7 +778,7 @@ read_value(FILE *fp, awk_value_t *value) /* read_number --- read a double, GMP, or MPFR number */ static awk_bool_t -read_number(FILE *fp, awk_value_t *value, uint32_t code) +read_number(FILE *fp, awk_value_t *value, uint32_t code, value_storage *vs) { uint32_t len; @@ -603,28 +800,24 @@ read_number(FILE *fp, awk_value_t *value, uint32_t code) } else { #ifdef HAVE_MPFR if (code == VT_GMP) { - mpz_t mp_ptr; - - mpz_init(mp_ptr); - if (mpz_inp_raw(mp_ptr, fp) == 0) + mpz_init(vs->mpz_val); + if (mpz_inp_raw(vs->mpz_val, fp) == 0) return awk_false; - value = make_number_mpz(mp_ptr, value); + value = make_number_mpz(vs->mpz_val, value); } else { - mpfr_t mpfr_val; - mpfr_init(mpfr_val); - + mpfr_init(vs->mpfr_val); #ifdef USE_MPFR_FPIF /* preferable if widely available and stable */ - if (mpfr_fpif_import(mpfr_val, fp) != 0) + if (mpfr_fpif_import(vs->mpfr_val, fp) != 0) #else // N.B. need to consume the terminating space we wrote // after mpfr_out_str - if ((mpfr_inp_str(mpfr_val, fp, MPFR_STR_BASE, MPFR_STR_ROUND) == 0) || (getc(fp) != ' ')) + if ((mpfr_inp_str(vs->mpfr_val, fp, MPFR_STR_BASE, MPFR_STR_ROUND) == 0) || (getc(fp) != ' ')) #endif return awk_false; - value = make_number_mpfr(& mpfr_val, value); + value = make_number_mpfr(vs->mpfr_val, value); } #else fatal(ext_id, _("rwarray extension: GMP/MPFR value in file but compiled without GMP/MPFR support.")); @@ -637,6 +830,8 @@ read_number(FILE *fp, awk_value_t *value, uint32_t code) static awk_ext_func_t func_table[] = { { "writea", do_writea, 2, 2, awk_false, NULL }, { "reada", do_reada, 2, 2, awk_false, NULL }, + { "writeall", do_writeall, 1, 1, awk_false, NULL }, + { "readall", do_readall, 1, 1, awk_false, NULL }, }; |