diff options
-rw-r--r-- | ChangeLog | 101 | ||||
-rw-r--r-- | array.c | 179 | ||||
-rw-r--r-- | awk.h | 74 | ||||
-rw-r--r-- | awkgram.c | 11 | ||||
-rw-r--r-- | awkgram.y | 11 | ||||
-rw-r--r-- | builtin.c | 18 | ||||
-rw-r--r-- | cint_array.c | 96 | ||||
-rwxr-xr-x | configure | 4 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | debug.c | 20 | ||||
-rw-r--r-- | doc/ChangeLog | 1 | ||||
-rw-r--r-- | doc/gawk.info | 674 | ||||
-rw-r--r-- | doc/gawk.texi | 38 | ||||
-rw-r--r-- | eval.c | 3 | ||||
-rw-r--r-- | ext.c | 164 | ||||
-rw-r--r-- | extension/ChangeLog | 21 | ||||
-rw-r--r-- | extension/bindarr.c | 347 | ||||
-rw-r--r-- | extension/dbarray.awk | 222 | ||||
-rw-r--r-- | extension/fileop.c | 394 | ||||
-rw-r--r-- | extension/record.awk | 252 | ||||
-rw-r--r-- | extension/sparr.c | 163 | ||||
-rw-r--r-- | extension/spec_array.c | 416 | ||||
-rw-r--r-- | extension/spec_array.h | 28 | ||||
-rwxr-xr-x | extension/steps | 10 | ||||
-rw-r--r-- | extension/testdbarray.awk | 21 | ||||
-rwxr-xr-x | extension/testrecord.sh | 19 | ||||
-rw-r--r-- | extension/testsparr.awk | 18 | ||||
-rw-r--r-- | field.c | 4 | ||||
-rw-r--r-- | gawkapi.c | 2 | ||||
-rw-r--r-- | int_array.c | 65 | ||||
-rw-r--r-- | interpret.h | 104 | ||||
-rw-r--r-- | msg.c | 3 | ||||
-rw-r--r-- | po/da.gmo | bin | 42480 -> 42587 bytes | |||
-rw-r--r-- | po/da.po | 13 | ||||
-rw-r--r-- | po/de.gmo | bin | 45571 -> 45571 bytes | |||
-rw-r--r-- | po/de.po | 26 | ||||
-rw-r--r-- | po/es.gmo | bin | 44930 -> 44930 bytes | |||
-rw-r--r-- | po/es.po | 21 | ||||
-rw-r--r-- | po/fi.gmo | bin | 45237 -> 45237 bytes | |||
-rw-r--r-- | po/fi.po | 2 | ||||
-rw-r--r-- | profile.c | 1 | ||||
-rw-r--r-- | str_array.c | 46 | ||||
-rw-r--r-- | symbol.c | 10 |
43 files changed, 3348 insertions, 258 deletions
@@ -26,6 +26,8 @@ <ctype.h> functions since those could rely on the locale. (make_builtin): Adjust test for valid name to call the new functions and return false instead of throwing a fatal error. + (make_old_builtin): Adjust test for valid name to call the new + function. * awk.h (is_identchar): Move from here, ... * awkgram.y (is_identchar): ... to here. This is safe, since the locale is C during parsing the program. @@ -81,6 +83,105 @@ * builtin.c (do_fflush): Make fflush() and fflush("") both flush everything. See the comment in the code. +2012-11-26 Arnold D. Robbins <arnold@skeeve.com> + + * awk.h (Node_old_ext_func, Op_old_ext_func): New enum values. + * configure.ac: Use -export-dynamic if supported for old extension + mechanism. + * eval.c (nodeytpes): Add Node_old_ext_func. + (optypetab): Add Op_old_ext_func. + * ext.c (make_old_ext_builtin): "New" function. + * interpret.h: Special case Op_old_ext_builtin. Add checks for + Node_old_ext_func. + * msg.c: Adjust placement of a comment. + +2012-05-02 John Haque <j.eh@mchsi.com> + + * str_array.c (str_copy): Initialize next pointer in the linked list + to avoid memory corruption. + * int_array.c (int_copy): Ditto. + +2012-04-21 John Haque <j.eh@mchsi.com> + + Shutdown routine for a dynamic extension. + + * awk.h (SRCFILE): New field fini_func. + * ext.c (load_ext): Takes an additional argument to look up and + save the clean up routine in SRCFILE struct. + (INIT_FUNC, FINI_FUNC): Defines for default init and fini routine + names. + (do_ext): Use default for the name of the init or fini routine if + one is not supplied. Adjust call to load_ext(). + (close_extensions): Execute fini routines. + * interpret.h (Op_at_exit): Call close_extensions(). + * msg.c (gawk_exit): Ditto. + * debug.c (close_all): Ditto. + * main.c (main): Adjust call to load_ext(). + * awkgram.y (tokentab): Specify 2nd and 3rd optional arguments + for the extension() built-in. + + Unrelated: + + * interpret.h (Op_arrayfor_init): Use assoc_length for array size. + +2012-04-19 John Haque <j.eh@mchsi.com> + + Enhanced array interface to support transparent implementation + using external storage and ... + + * awk.h (astore): Optional post-assignment store routine for + array subscripts. + (Op_subscript_assign): New opcode to support the store routine. + (alength): New array interface routine for array length. + (assoc_length): New macro. + (assoc_empty): Renamed from array_empty. + * awkgram.y (snode): Append Op_subscript_assign opcode if + (g)sub variable is an array element. + (mk_getline): Same for getline variable. + (mk_assignment): Same if assigning to an array element. + * field.c (set_element): Call store routine if needed. + * builtin.c (do_match): Ditto. + (do_length): Use length routine for array size. + * symbol.c (print_vars): Ditto. + * array.c (null_length): Default function for array length interface. + (asort_actual): Call store routine if defined. + (asort_actual, assoc_list): Use length routine for array size. + (null_array_func): Add length and store routine entries. + * str_array.c (str_array_func): Same. + * cint_array.c (cint_array_func): Same. + * int_array.c (int_array_func): Same. + * eval.c (optypetab): Add Op_subscript_assign. + * profile.c (pprint): Add case Op_subscript_assign. + * interpret.h (set_array, set_idx): New variables to keep track + of an array element with store routine. + (Op_sub_array, Op_subscript_lhs, Op_store_sub, Op_subscript_assign): + Add code to handle array store routine. + * debug.c (print_symbol, print_array, cmp_val, watchpoint_triggered, + initialize_watch_item): Use length routine for array size. + + * awk.h (assoc_kind_t): New typedef for enum assoc_list_flags. + (sort_context_t): Renamed from SORT_CONTEXT. + * array.c (asort_actual, assoc_sort): Adjust. + * cint_array.c (cint_list, tree_list, leaf_list): Adjust. + * int_array.c (int_list): Adjust. + * str_array.c (str_list): Adjust. + +2012-04-18 John Haque <j.eh@mchsi.com> + + * awk.h (atypeof, AFUNC): New macros. + (afunc_t): Renamed typedef from array_ptr. + * array.c (register_array_func, null_lookup): Use AFUNC macro + instead of hard-coded index for array functions. + (asort_actual): Unref null array elements before overwriting. + (force_array): Renamed from get_array. + (null_array): Renamed from init_array. Also initialize flags to 0. + (array_types): Renamed from atypes. + (num_array_types): Renamed from num_atypes. + * interpret.h (r_interpret): In case Op_sub_array, unref null array element. + * str_array.c (str_array_init): Reworked for (re)initialization of array. + * int_array.c (int_array_init): Ditto. + * cint_array.c (cint_array_init): Ditto. + 2012-11-24 Arnold D. Robbins <arnold@skeeve.com> Directory cleanup. @@ -27,20 +27,17 @@ extern FILE *output_fp; extern NODE **fmt_list; /* declared in eval.c */ -extern array_ptr str_array_func[]; -extern array_ptr cint_array_func[]; -extern array_ptr int_array_func[]; static size_t SUBSEPlen; static char *SUBSEP; static char indent_char[] = " "; static NODE **null_lookup(NODE *symbol, NODE *subs); -static NODE **null_afunc(NODE *symbol, NODE *subs); static NODE **null_dump(NODE *symbol, NODE *subs); -static array_ptr null_array_func[] = { - (array_ptr) 0, - (array_ptr) 0, +static afunc_t null_array_func[] = { + (afunc_t) 0, + (afunc_t) 0, + null_length, null_lookup, null_afunc, null_afunc, @@ -48,31 +45,25 @@ static array_ptr null_array_func[] = { null_afunc, null_afunc, null_dump, + (afunc_t) 0, }; #define MAX_ATYPE 10 -static array_ptr *atypes[MAX_ATYPE]; -static int num_atypes = 0; +static afunc_t *array_types[MAX_ATYPE]; +static int num_array_types = 0; -/* - * register_array_func --- add routines to handle arrays. - * - * index 0 : initialization. - * index 1 : check if index is compatible. - * index 8 : array dump, memory and other statistics (do_adump). - */ - +/* register_array_func --- add routines to handle arrays */ int -register_array_func(array_ptr *afunc) +register_array_func(afunc_t *afunc) { - if (afunc && num_atypes < MAX_ATYPE) { - if (afunc != str_array_func && ! afunc[1]) + if (afunc && num_array_types < MAX_ATYPE) { + if (afunc != str_array_func && ! afunc[AFUNC(atypeof)]) return false; - atypes[num_atypes++] = afunc; - if (afunc[0]) /* execute init routine if any */ - (void) (*afunc[0])(NULL, NULL); + array_types[num_array_types++] = afunc; + if (afunc[AFUNC(ainit)]) /* execute init routine if any */ + (void) (*afunc[AFUNC(ainit)])(NULL, NULL); return true; } return false; @@ -108,21 +99,19 @@ make_array() } -/* init_array --- (re)initialize an array node */ +/* null_array --- force symbol to be an empty typeless array */ void -init_array(NODE *symbol) +null_array(NODE *symbol) { symbol->type = Node_var_array; symbol->array_funcs = null_array_func; symbol->buckets = NULL; symbol->table_size = symbol->array_size = 0; symbol->array_capacity = 0; - + symbol->flags = 0; assert(symbol->xarray == NULL); - /* symbol->xarray = NULL; */ - - /* flags, vname, parent_array not (re)initialized */ + /* vname, parent_array not (re)initialized */ } @@ -132,30 +121,40 @@ static NODE ** null_lookup(NODE *symbol, NODE *subs) { int i; - array_ptr *afunc = NULL; + afunc_t *afunc = NULL; assert(symbol->table_size == 0); - /* Check which array type wants to accept this sub; traverse + /* + * Check which array type wants to accept this sub; traverse * array type list in reverse order. */ - for (i = num_atypes - 1; i >= 1; i--) { - afunc = atypes[i]; - if (afunc[1](symbol, subs) != NULL) + for (i = num_array_types - 1; i >= 1; i--) { + afunc = array_types[i]; + if (afunc[AFUNC(atypeof)](symbol, subs) != NULL) break; } if (i == 0 || afunc == NULL) - afunc = atypes[0]; /* default is str_array_func */ + afunc = array_types[0]; /* default is str_array_func */ symbol->array_funcs = afunc; /* We have the right type of array; install the subscript */ return symbol->alookup(symbol, subs); } +/* null_length --- default function for array length interface */ -/* null_afunc --- dummy function for an empty array */ +NODE ** +null_length(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) +{ + static NODE *tmp; + tmp = symbol; + return & tmp; +} -static NODE ** +/* null_afunc --- default function for array interface */ + +NODE ** null_afunc(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED) { return NULL; @@ -317,14 +316,14 @@ array_vname(const NODE *symbol) /* - * get_array --- proceed to the actual Node_var_array, + * force_array --- proceed to the actual Node_var_array, * change Node_var_new to an array. * If canfatal and type isn't good, die fatally, * otherwise return the final actual value. */ NODE * -get_array(NODE *symbol, bool canfatal) +force_array(NODE *symbol, bool canfatal) { NODE *save_symbol = symbol; bool isparam = false; @@ -338,7 +337,7 @@ get_array(NODE *symbol, bool canfatal) switch (symbol->type) { case Node_var_new: - init_array(symbol); + null_array(symbol); symbol->parent_array = NULL; /* main array has no parent */ /* fall through */ case Node_var_array: @@ -432,7 +431,8 @@ concat_exp(int nargs, bool do_subsep) } -/* adjust_fcall_stack: remove subarray(s) of symbol[] from +/* + * adjust_fcall_stack: remove subarray(s) of symbol[] from * function call stack. */ @@ -480,7 +480,8 @@ adjust_fcall_stack(NODE *symbol, int nsubs) && symbol->parent_array != NULL && nsubs > 0 ) { - /* 'symbol' is a subarray, and 'r' is the same subarray: + /* + * 'symbol' is a subarray, and 'r' is the same subarray: * * function f(c, d) { delete c[0]; .. } * BEGIN { a[0][0] = 1; f(a, a[0]); .. } @@ -491,9 +492,8 @@ adjust_fcall_stack(NODE *symbol, int nsubs) * BEGIN { a[0][0] = 1; f(a[0], a[0]); ...} */ - init_array(r); + null_array(r); r->parent_array = NULL; - r->flags = 0; continue; } @@ -501,7 +501,8 @@ adjust_fcall_stack(NODE *symbol, int nsubs) for (n = n->parent_array; n != NULL; n = n->parent_array) { assert(n->type == Node_var_array); if (n == symbol) { - /* 'r' is a subarray of 'symbol': + /* + * 'r' is a subarray of 'symbol': * * function f(c, d) { delete c; .. use d as array .. } * BEGIN { a[0][0] = 1; f(a, a[0]); .. } @@ -509,9 +510,8 @@ adjust_fcall_stack(NODE *symbol, int nsubs) * BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. } * */ - init_array(r); + null_array(r); r->parent_array = NULL; - r->flags = 0; break; } } @@ -626,14 +626,13 @@ void do_delete_loop(NODE *symbol, NODE **lhs) { NODE **list; - NODE fl; + NODE akind; - if (array_empty(symbol)) - return; + akind.flags = AINDEX|ADELETE; /* need a single index */ + list = symbol->alist(symbol, & akind); - fl.flags = AINDEX|ADELETE; /* need a single index */ - list = symbol->alist(symbol, & fl); - assert(list != NULL); + if (assoc_empty(symbol)) + return; unref(*lhs); *lhs = list[0]; @@ -755,7 +754,8 @@ do_adump(int nargs) static NODE ndump; long depth = 0; - /* depth < 0, no index and value info. + /* + * depth < 0, no index and value info. * = 0, main array index and value info; does not descend into sub-arrays. * > 0, descends into 'depth' sub-arrays, and prints index and value info. */ @@ -780,11 +780,11 @@ do_adump(int nargs) /* asort_actual --- do the actual work to sort the input array */ static NODE * -asort_actual(int nargs, SORT_CTXT ctxt) +asort_actual(int nargs, sort_context_t ctxt) { NODE *array, *dest = NULL, *result; NODE *r, *subs, *s; - NODE **list = NULL, **ptr; + NODE **list = NULL, **ptr, **lhs; unsigned long num_elems, i; const char *sort_str; @@ -833,11 +833,11 @@ asort_actual(int nargs, SORT_CTXT ctxt) } } - num_elems = array->table_size; - if (num_elems > 0) /* sorting happens inside assoc_list */ - list = assoc_list(array, sort_str, ctxt); + /* sorting happens inside assoc_list */ + list = assoc_list(array, sort_str, ctxt); DEREF(s); + num_elems = assoc_length(array); if (num_elems == 0 || list == NULL) { /* source array is empty */ if (dest != NULL && dest != array) @@ -866,7 +866,11 @@ asort_actual(int nargs, SORT_CTXT ctxt) for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) { subs = make_number(i); - *assoc_lookup(result, subs) = *ptr; + lhs = assoc_lookup(result, subs); + unref(*lhs); + *lhs = *ptr; + if (result->astore != NULL) + (*result->astore)(result, subs); unref(subs); } } else { @@ -882,9 +886,11 @@ asort_actual(int nargs, SORT_CTXT ctxt) /* value node */ r = *ptr++; - if (r->type == Node_val) - *assoc_lookup(result, subs) = dupnode(r); - else { + if (r->type == Node_val) { + lhs = assoc_lookup(result, subs); + unref(*lhs); + *lhs = dupnode(r); + } else { NODE *arr; arr = make_array(); subs = force_string(subs); @@ -892,8 +898,12 @@ asort_actual(int nargs, SORT_CTXT ctxt) subs->stptr = NULL; subs->flags &= ~STRCUR; arr->parent_array = array; /* actual parent, not the temporary one. */ - *assoc_lookup(result, subs) = assoc_copy(r, arr); + lhs = assoc_lookup(result, subs); + unref(*lhs); + *lhs = assoc_copy(r, arr); } + if (result->astore != NULL) + (*result->astore)(result, subs); unref(subs); } } @@ -1226,14 +1236,14 @@ sort_user_func(const void *p1, const void *p2) /* assoc_list -- construct, and optionally sort, a list of array elements */ NODE ** -assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) +assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt) { typedef int (*qsort_compfunc)(const void *, const void *); static const struct qsort_funcs { const char *name; qsort_compfunc comp_func; - enum assoc_list_flags flags; + assoc_kind_t kind; } sort_funcs[] = { { "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC }, { "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC }, @@ -1248,25 +1258,22 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) { "@unsorted", 0, AINDEX }, }; - /* N.B.: AASC and ADESC are hints to the specific array types. + /* + * N.B.: AASC and ADESC are hints to the specific array types. * See cint_list() in cint_array.c. */ NODE **list; - NODE fl; + NODE akind; unsigned long num_elems, j; int elem_size, qi; qsort_compfunc cmp_func = 0; INSTRUCTION *code = NULL; extern int currule; int save_rule = 0; + assoc_kind_t assoc_kind = 0; - num_elems = symbol->table_size; - if (num_elems == 0) - return NULL; - elem_size = 1; - fl.flags = 0; for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) { if (strcmp(sort_funcs[qi].name, sort_str) == 0) @@ -1275,15 +1282,15 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) if (qi < j) { cmp_func = sort_funcs[qi].comp_func; - fl.flags = sort_funcs[qi].flags; + assoc_kind = sort_funcs[qi].kind; if (symbol->array_funcs != cint_array_func) - fl.flags &= ~(AASC|ADESC); + assoc_kind &= ~(AASC|ADESC); - if (sort_ctxt != SORTED_IN || (fl.flags & AVALUE) != 0) { + if (sort_ctxt != SORTED_IN || (assoc_kind & AVALUE) != 0) { /* need index and value pair in the list */ - fl.flags |= (AINDEX|AVALUE); + assoc_kind |= (AINDEX|AVALUE); elem_size = 2; } @@ -1291,8 +1298,7 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) NODE *f; const char *sp; - for (sp = sort_str; *sp != '\0' - && ! isspace((unsigned char) *sp); sp++) + for (sp = sort_str; *sp != '\0' && ! isspace((unsigned char) *sp); sp++) continue; /* empty string or string with space(s) not valid as function name */ @@ -1306,7 +1312,7 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) cmp_func = sort_user_func; /* need index and value pair in the list */ - fl.flags |= (AVALUE|AINDEX); + assoc_kind |= (AVALUE|AINDEX); elem_size = 2; /* make function call instructions */ @@ -1316,7 +1322,8 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) (code + 1)->expr_count = 4; /* function takes 4 arguments */ code->nexti = bcalloc(Op_stop, 1, 0); - /* make non-redirected getline, exit, `next' and `nextfile' fatal in + /* + * make non-redirected getline, exit, `next' and `nextfile' fatal in * callback function by setting currule in interpret() * to undefined (0). */ @@ -1327,11 +1334,15 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) PUSH_CODE(code); } - list = symbol->alist(symbol, & fl); + akind.flags = (unsigned int) assoc_kind; /* kludge */ + list = symbol->alist(symbol, & akind); + assoc_kind = (assoc_kind_t) akind.flags; /* symbol->alist can modify it */ - if (list == NULL || ! cmp_func || (fl.flags & (AASC|ADESC)) != 0) + if (list == NULL || ! cmp_func || (assoc_kind & (AASC|ADESC)) != 0) return list; /* empty list or unsorted, or list already sorted */ + num_elems = assoc_length(symbol); + qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */ if (cmp_func == sort_user_func) { @@ -1341,7 +1352,7 @@ assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt) bcfree(code); /* Op_func_call */ } - if (sort_ctxt == SORTED_IN && (fl.flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) { + if (sort_ctxt == SORTED_IN && (assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) { /* relocate all index nodes to the first half of the list. */ for (j = 1; j < num_elems; j++) list[j] = list[2 * j]; @@ -294,6 +294,7 @@ typedef enum nodevals { Node_param_list, /* lnode is a variable, rnode is more list */ Node_func, /* lnode is param. list, rnode is body */ Node_ext_func, /* extension function, code_ptr is builtin code */ + Node_old_ext_func, /* extension function, code_ptr is builtin code */ Node_array_ref, /* array passed by ref as parameter */ Node_array_tree, /* Hashed array tree (HAT) */ @@ -344,7 +345,7 @@ typedef union bucket_item { struct exp_instruction; typedef int (*Func_print)(FILE *, const char *, ...); -typedef struct exp_node **(*array_ptr)(struct exp_node *, struct exp_node *); +typedef struct exp_node **(*afunc_t)(struct exp_node *, struct exp_node *); /* * NOTE - this struct is a rather kludgey -- it is packed to minimize @@ -357,13 +358,14 @@ typedef struct exp_node { struct exp_node *lptr; struct exp_instruction *li; long ll; - array_ptr *lp; + afunc_t *lp; } l; union { struct exp_node *rptr; Regexp *preg; struct exp_node **av; BUCKET **bv; + void *aq; void (*uptr)(void); struct exp_instruction *iptr; } r; @@ -496,6 +498,7 @@ typedef struct exp_node { /* Node_var_array: */ #define buckets sub.nodep.r.bv #define nodes sub.nodep.r.av +#define a_opaque sub.nodep.r.aq #define array_funcs sub.nodep.l.lp #define array_base sub.nodep.l.ll #define table_size sub.nodep.reflags @@ -504,17 +507,21 @@ typedef struct exp_node { #define xarray sub.nodep.rn #define parent_array sub.nodep.x.extra -/* array_funcs[0] is the array initialization function and - * array_funcs[1] is the index type checking function - */ -#define alookup array_funcs[2] -#define aexists array_funcs[3] -#define aclear array_funcs[4] -#define aremove array_funcs[5] -#define alist array_funcs[6] -#define acopy array_funcs[7] -#define adump array_funcs[8] -#define NUM_AFUNCS 9 /* # of entries in array_funcs */ +#define ainit array_funcs[0] +#define atypeof array_funcs[1] +#define alength array_funcs[2] +#define alookup array_funcs[3] +#define aexists array_funcs[4] +#define aclear array_funcs[5] +#define aremove array_funcs[6] +#define alist array_funcs[7] +#define acopy array_funcs[8] +#define adump array_funcs[9] +#define astore array_funcs[10] +#define NUM_AFUNCS 11 /* # of entries in array_funcs */ + +/* array func to index mapping */ +#define AFUNC(F) (& ((NODE *) 0)->F - ((NODE *) 0)->array_funcs) /* Node_array_ref: */ #define orig_array lnode @@ -622,6 +629,7 @@ typedef enum opcodeval { Op_builtin, Op_sub_builtin, /* sub, gsub and gensub */ Op_ext_builtin, + Op_old_ext_builtin, /* temporary */ Op_in_array, /* boolean test of membership in array */ /* function call instruction */ @@ -651,6 +659,7 @@ typedef enum opcodeval { Op_var_update, /* update value of NR, NF or FNR */ Op_var_assign, Op_field_assign, + Op_subscript_assign, Op_after_beginfile, Op_after_endfile, @@ -956,6 +965,8 @@ typedef struct srcfile { int fd; int maxlen; /* size of the longest line */ + void (*fini_func)(); /* dynamic extension of type SRC_EXTLIB */ + char *lexptr; char *lexend; char *lexeme; @@ -994,6 +1005,8 @@ enum block_id { BLOCK_MAX /* count */ }; +typedef int (*Func_pre_exec)(INSTRUCTION **); +typedef void (*Func_post_exec)(INSTRUCTION *); #ifndef LONG_MAX #define LONG_MAX ((long)(~(1L << (sizeof (long) * 8 - 1)))) @@ -1040,8 +1053,10 @@ extern NODE *(*str2number)(NODE *); extern NODE *(*format_val)(const char *, int, NODE *); extern int (*cmp_numbers)(const NODE *, const NODE *); -typedef int (*Func_pre_exec)(INSTRUCTION **); -typedef void (*Func_post_exec)(INSTRUCTION *); +/* built-in array types */ +extern afunc_t str_array_func[]; +extern afunc_t cint_array_func[]; +extern afunc_t int_array_func[]; extern BLOCK nextfree[]; extern bool field0_valid; @@ -1291,7 +1306,8 @@ if (val++) \ if (--val) \ memcpy((char *) tag, (const char *) (stack), sizeof(jmp_buf)) -#define array_empty(a) ((a)->table_size == 0) +#define assoc_length(a) ((*((a)->alength(a, NULL)))->table_size) +#define assoc_empty(a) (assoc_length(a) == 0) #define assoc_lookup(a, s) ((a)->alookup(a, s)) /* assoc_clear --- flush all the values in symbol[] */ @@ -1302,8 +1318,8 @@ if (--val) \ /* ------------- Function prototypes or defs (as appropriate) ------------- */ /* array.c */ -typedef enum sort_context { SORTED_IN = 1, ASORT, ASORTI } SORT_CTXT; -enum assoc_list_flags { +typedef enum { SORTED_IN = 1, ASORT, ASORTI } sort_context_t; +typedef enum { AINDEX = 0x001, /* list of indices */ AVALUE = 0x002, /* list of values */ AINUM = 0x004, /* numeric index */ @@ -1313,20 +1329,22 @@ enum assoc_list_flags { AASC = 0x040, /* ascending order */ ADESC = 0x080, /* descending order */ ADELETE = 0x100 /* need a single index; for use in do_delete_loop */ -}; +} assoc_kind_t; extern NODE *make_array(void); -extern void init_array(NODE *symbol); -extern NODE *get_array(NODE *symbol, bool canfatal); +extern void null_array(NODE *symbol); +extern NODE *force_array(NODE *symbol, bool canfatal); extern const char *make_aname(const NODE *symbol); extern const char *array_vname(const NODE *symbol); extern void array_init(void); -extern int register_array_func(array_ptr *afunc); +extern int register_array_func(afunc_t *afunc); +extern NODE **null_length(NODE *symbol, NODE *subs); +extern NODE **null_afunc(NODE *symbol, NODE *subs); extern void set_SUBSEP(void); extern NODE *concat_exp(int nargs, bool do_subsep); extern NODE *assoc_copy(NODE *symbol, NODE *newsymb); extern void assoc_dump(NODE *symbol, NODE *p); -extern NODE **assoc_list(NODE *symbol, const char *sort_str, SORT_CTXT sort_ctxt); +extern NODE **assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt); extern void assoc_info(NODE *subs, NODE *val, NODE *p, const char *aname); extern void do_delete(NODE *symbol, int nsubs); extern void do_delete_loop(NODE *symbol, NODE **lhs); @@ -1428,8 +1446,12 @@ extern STACK_ITEM *grow_stack(void); extern void dump_fcall_stack(FILE *fp); extern int register_exec_hook(Func_pre_exec preh, Func_post_exec posth); /* ext.c */ -void load_ext(const char *lib_name); +extern NODE *do_ext(int nargs); +void load_ext(const char *lib_name); /* temporary */ +extern NODE *load_old_ext(SRCFILE *s, const char *init_func, const char *fini_func, NODE *obj); +extern void close_extensions(void); #ifdef DYNAMIC +extern void make_old_builtin(const char *, NODE *(*)(int), int); extern awk_bool_t make_builtin(const awk_ext_func_t *); extern NODE *get_argument(int); extern NODE *get_actual_argument(int, bool, bool); @@ -1690,7 +1712,7 @@ POP_ARRAY() { NODE *t = POP(); - return (t->type == Node_var_array) ? t : get_array(t, true); + return (t->type == Node_var_array) ? t : force_array(t, true); } /* POP_PARAM --- get the top parameter, array or scalar */ @@ -1700,7 +1722,7 @@ POP_PARAM() { NODE *t = POP(); - return (t->type == Node_var_array) ? t : get_array(t, false); + return (t->type == Node_var_array) ? t : force_array(t, false); } /* POP_SCALAR --- pop the scalar at the top of the stack */ @@ -4392,6 +4392,7 @@ static const struct token tokentab[] = { {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, {"exit", Op_K_exit, LEX_EXIT, 0, 0, 0}, {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp, MPF(exp)}, +{"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_ext, 0}, {"fflush", Op_builtin, LEX_BUILTIN, A(0)|A(1), do_fflush, 0}, {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0, 0}, @@ -6336,7 +6337,11 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) subn->lasti->assign_ctxt = Op_sub_builtin; subn->lasti->field_assign = (Func_ptr) 0; ip->target_assign = subn->lasti; + } else if (ip->opcode == Op_subscript_lhs) { + (void) list_append(subn, instruction(Op_subscript_assign)); + subn->lasti->assign_ctxt = Op_sub_builtin; } + return subn; } else { @@ -7542,6 +7547,8 @@ mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op) (void) list_append(ip, instruction(Op_field_assign)); ip->lasti->field_assign = (Func_ptr) 0; tp->target_assign = ip->lasti; + } else if (tp->opcode == Op_subscript_lhs) { + (void) list_append(ip, instruction(Op_subscript_assign)); } return ip; @@ -7751,7 +7758,11 @@ mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype) asgn->assign_ctxt = op->opcode; asgn->field_assign = (Func_ptr) 0; /* determined at run time */ tp->target_assign = asgn; + } else if (tp->opcode == Op_subscript_lhs) { + asgn = instruction(Op_subscript_assign); + asgn->assign_ctxt = op->opcode; } + if (redir != NULL) { ip = list_merge(redir, var); (void) list_append(ip, op); @@ -1859,6 +1859,7 @@ static const struct token tokentab[] = { {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, {"exit", Op_K_exit, LEX_EXIT, 0, 0, 0}, {"exp", Op_builtin, LEX_BUILTIN, A(1), do_exp, MPF(exp)}, +{"extension", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_ext, 0}, {"fflush", Op_builtin, LEX_BUILTIN, A(0)|A(1), do_fflush, 0}, {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0, 0}, @@ -3803,7 +3804,11 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) subn->lasti->assign_ctxt = Op_sub_builtin; subn->lasti->field_assign = (Func_ptr) 0; ip->target_assign = subn->lasti; + } else if (ip->opcode == Op_subscript_lhs) { + (void) list_append(subn, instruction(Op_subscript_assign)); + subn->lasti->assign_ctxt = Op_sub_builtin; } + return subn; } else { @@ -5009,6 +5014,8 @@ mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op) (void) list_append(ip, instruction(Op_field_assign)); ip->lasti->field_assign = (Func_ptr) 0; tp->target_assign = ip->lasti; + } else if (tp->opcode == Op_subscript_lhs) { + (void) list_append(ip, instruction(Op_subscript_assign)); } return ip; @@ -5218,7 +5225,11 @@ mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype) asgn->assign_ctxt = op->opcode; asgn->field_assign = (Func_ptr) 0; /* determined at run time */ tp->target_assign = asgn; + } else if (tp->opcode == Op_subscript_lhs) { + asgn = instruction(Op_subscript_assign); + asgn->assign_ctxt = op->opcode; } + if (redir != NULL) { ip = list_merge(redir, var); (void) list_append(ip, op); @@ -514,6 +514,7 @@ do_length(int nargs) tmp = POP(); if (tmp->type == Node_var_array) { static bool warned = false; + unsigned long size; if (do_posix) fatal(_("length: received array argument")); @@ -521,7 +522,15 @@ do_length(int nargs) warned = true; lintwarn(_("`length(array)' is a gawk extension")); } - return make_number((AWKNUM) tmp->table_size); + + /* + * Support for deferred loading of array elements requires that + * we use the array length interface even though it isn't + * necessary for the built-in array types. + */ + + size = assoc_length(tmp); + return make_number(size); } assert(tmp->type == Node_val); @@ -2472,6 +2481,9 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + /* execute post-assignment routine if any */ + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); sprintf(buff, "%d", ii); @@ -2495,6 +2507,8 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); memcpy(buf, buff, ilen); @@ -2508,6 +2522,8 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); } } diff --git a/cint_array.c b/cint_array.c index 625730a7..29b6fdff 100644 --- a/cint_array.c +++ b/cint_array.c @@ -39,7 +39,8 @@ extern NODE **is_integer(NODE *symbol, NODE *subs); static int NHAT = 10; static long THRESHOLD; -/* What is the optimium NHAT ? timing results suggest that 10 is a good choice, +/* + * What is the optimium NHAT ? timing results suggest that 10 is a good choice, * although differences aren't that significant for > 10. */ @@ -57,9 +58,10 @@ static NODE **cint_dump(NODE *symbol, NODE *ndump); static void cint_print(NODE *symbol); #endif -array_ptr cint_array_func[] = { +afunc_t cint_array_func[] = { cint_array_init, is_uinteger, + null_length, cint_lookup, cint_exists, cint_clear, @@ -67,6 +69,7 @@ array_ptr cint_array_func[] = { cint_list, cint_copy, cint_dump, + (afunc_t) 0, }; static inline int cint_hash(long k); @@ -79,7 +82,7 @@ static NODE **tree_exists(NODE *tree, long k); static void tree_clear(NODE *tree); static int tree_remove(NODE *symbol, NODE *tree, long k); static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree); -static long tree_list(NODE *tree, NODE **list, unsigned int flags); +static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind); static inline NODE **tree_find(NODE *tree, long k, int i); static void tree_info(NODE *tree, NODE *ndump, const char *aname); static size_t tree_kilobytes(NODE *tree); @@ -92,7 +95,7 @@ static inline NODE **leaf_exists(NODE *array, long k); static void leaf_clear(NODE *array); static int leaf_remove(NODE *symbol, NODE *array, long k); static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray); -static long leaf_list(NODE *array, NODE **list, unsigned int flags); +static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind); static void leaf_info(NODE *array, NODE *ndump, const char *aname); #ifdef ARRAYDEBUG static void leaf_print(NODE *array, size_t bi, int indent_level); @@ -140,16 +143,21 @@ static const long power_two_table[] = { * */ -/* cint_array_init --- check relevant environment variables */ +/* cint_array_init --- array initialization routine */ static NODE ** cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED) { - long newval; + if (symbol == NULL) { + long newval; + + /* check relevant environment variables */ + if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT) + NHAT = newval; + THRESHOLD = power_two_table[NHAT + 1]; + } else + null_array(symbol); - if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT) - NHAT = newval; - THRESHOLD = power_two_table[NHAT + 1]; return (NODE **) ! NULL; } @@ -235,13 +243,11 @@ xinstall: symbol->table_size++; if (xn == NULL) { - extern array_ptr int_array_func[]; - extern array_ptr str_array_func[]; - xn = symbol->xarray = make_array(); xn->vname = symbol->vname; /* shallow copy */ - /* Avoid using assoc_lookup(xn, subs) which may lead + /* + * Avoid using assoc_lookup(xn, subs) which may lead * to infinite recursion. */ @@ -300,7 +306,7 @@ cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) } efree(symbol->nodes); - init_array(symbol); /* re-initialize symbol */ + symbol->ainit(symbol, NULL); /* re-initialize symbol */ return NULL; } @@ -337,7 +343,7 @@ cint_remove(NODE *symbol, NODE *subs) if (xn == NULL && symbol->table_size == 0) { efree(symbol->nodes); - init_array(symbol); /* re-initialize array 'symbol' */ + symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */ } else if(xn != NULL && symbol->table_size == xn->table_size) { /* promote xn to symbol */ @@ -416,15 +422,16 @@ cint_list(NODE *symbol, NODE *t) unsigned long k = 0, num_elems, list_size; size_t j, ja, jd; int elem_size = 1; + assoc_kind_t assoc_kind; num_elems = symbol->table_size; if (num_elems == 0) return NULL; - - if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) + assoc_kind = (assoc_kind_t) t->flags; + if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) num_elems = 1; - if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) + if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) elem_size = 2; list_size = num_elems * elem_size; @@ -432,7 +439,8 @@ cint_list(NODE *symbol, NODE *t) xn = symbol->xarray; list = xn->alist(xn, t); assert(list != NULL); - t->flags &= ~(AASC|ADESC); + assoc_kind &= ~(AASC|ADESC); + t->flags = (unsigned int) assoc_kind; if (num_elems == 1 || num_elems == xn->table_size) return list; erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list"); @@ -440,18 +448,20 @@ cint_list(NODE *symbol, NODE *t) } else emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list"); - - if ((t->flags & AINUM) == 0) /* not sorting by "index num" */ - t->flags &= ~(AASC|ADESC); + if ((assoc_kind & AINUM) == 0) { + /* not sorting by "index num" */ + assoc_kind &= ~(AASC|ADESC); + t->flags = (unsigned int) assoc_kind; + } /* populate it with index in ascending or descending order */ for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) { - j = (t->flags & ADESC) != 0 ? jd-- : ja++; + j = (assoc_kind & ADESC) != 0 ? jd-- : ja++; tn = symbol->nodes[j]; if (tn == NULL) continue; - k += tree_list(tn, list + k, t->flags); + k += tree_list(tn, list + k, assoc_kind); if (k >= list_size) return list; } @@ -471,7 +481,6 @@ cint_dump(NODE *symbol, NODE *ndump) AWKNUM kb = 0; extern AWKNUM int_kilobytes(NODE *symbol); extern AWKNUM str_kilobytes(NODE *symbol); - extern array_ptr int_array_func[]; indent_level = ndump->alevel; @@ -565,7 +574,8 @@ cint_hash(long k) /* Find the Floor(log base 2 of 32-bit integer) */ - /* Warren Jr., Henry S. (2002). Hacker's Delight. + /* + * Warren Jr., Henry S. (2002). Hacker's Delight. * Addison Wesley. pp. pp. 215. ISBN 978-0201914658. * * r = 0; @@ -577,7 +587,8 @@ cint_hash(long k) */ - /* Slightly different code copied from: + /* + * Slightly different code copied from: * * http://www-graphics.stanford.edu/~seander/bithacks.html * Bit Twiddling Hacks @@ -872,7 +883,7 @@ tree_find(NODE *tree, long k, int i) /* tree_list --- return a list of items in the HAT */ static long -tree_list(NODE *tree, NODE **list, unsigned int flags) +tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind) { NODE *tn; size_t j, cj, hsize; @@ -885,15 +896,15 @@ tree_list(NODE *tree, NODE **list, unsigned int flags) hsize /= 2; for (j = 0; j < hsize; j++) { - cj = (flags & ADESC) != 0 ? (hsize - 1 - j) : j; + cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j; tn = tree->nodes[cj]; if (tn == NULL) continue; if (tn->type == Node_array_tree) - k += tree_list(tn, list + k, flags); + k += tree_list(tn, list + k, assoc_kind); else - k += leaf_list(tn, list + k, flags); - if ((flags & ADELETE) != 0 && k >= 1) + k += leaf_list(tn, list + k, assoc_kind); + if ((assoc_kind & ADELETE) != 0 && k >= 1) return k; } return k; @@ -1017,9 +1028,10 @@ tree_print(NODE *tree, size_t bi, int indent_level) /*--------------------- leaf (linear 1-D) array --------------------*/ -/* leaf_lookup --- find an integer subscript in the array; Install it if - it isn't there. -*/ +/* + * leaf_lookup --- find an integer subscript in the array; Install it if + * it isn't there. + */ static inline NODE ** leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base) @@ -1138,7 +1150,7 @@ leaf_copy(NODE *newsymb, NODE *array, NODE *newarray) /* leaf_list --- return a list of items */ static long -leaf_list(NODE *array, NODE **list, unsigned int flags) +leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind) { NODE *r, *subs; long num, i, ci, k = 0; @@ -1146,14 +1158,14 @@ leaf_list(NODE *array, NODE **list, unsigned int flags) static char buf[100]; for (i = 0; i < size; i++) { - ci = (flags & ADESC) != 0 ? (size - 1 - i) : i; + ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i; r = array->nodes[ci]; if (r == NULL) continue; /* index */ num = array->array_base + ci; - if ((flags & AISTR) != 0) { + if ((assoc_kind & AISTR) != 0) { sprintf(buf, "%ld", num); subs = make_string(buf, strlen(buf)); subs->numbr = num; @@ -1165,16 +1177,16 @@ leaf_list(NODE *array, NODE **list, unsigned int flags) list[k++] = subs; /* value */ - if ((flags & AVALUE) != 0) { + if ((assoc_kind & AVALUE) != 0) { if (r->type == Node_val) { - if ((flags & AVNUM) != 0) + if ((assoc_kind & AVNUM) != 0) (void) force_number(r); - else if ((flags & AVSTR) != 0) + else if ((assoc_kind & AVSTR) != 0) r = force_string(r); } list[k++] = r; } - if ((flags & ADELETE) != 0 && k >= 1) + if ((assoc_kind & ADELETE) != 0 && k >= 1) return k; } @@ -10086,6 +10086,10 @@ fi $as_echo "#define DYNAMIC 1" >>confdefs.h + if uname | $EGREP -i 'linux|freebsd' > /dev/null + then + LDFLAGS="$LDFLAGS -export-dynamic" + fi fi fi diff --git a/configure.ac b/configure.ac index b7d59f0e..bccd2351 100644 --- a/configure.ac +++ b/configure.ac @@ -295,6 +295,10 @@ AC_CHECK_HEADER(dlfcn.h, if test "$gawk_have_dlopen" = yes then AC_DEFINE([DYNAMIC], 1, [dynamic loading is possible]) + if uname | $EGREP -i 'linux|freebsd' > /dev/null + then + LDFLAGS="$LDFLAGS -export-dynamic" + fi fi ]) @@ -942,7 +942,7 @@ print_symbol(NODE *r, bool isparam) valinfo(r->var_value, fprintf, out_fp); break; case Node_var_array: - fprintf(out_fp, "array, %ld elements\n", r->table_size); + fprintf(out_fp, "array, %ld elements\n", assoc_length(r)); break; case Node_func: fprintf(out_fp, "`function'\n"); @@ -1063,12 +1063,12 @@ print_array(volatile NODE *arr, char *arr_name) volatile int ret = 0; volatile jmp_buf pager_quit_tag_stack; - if (array_empty(arr)) { + if (assoc_empty((NODE *) arr)) { gprintf(out_fp, _("array `%s' is empty\n"), arr_name); return 0; } - num_elems = arr->table_size; + num_elems = assoc_length((NODE *) arr); /* sort indices, sub_arrays are also sorted! */ list = assoc_list((NODE *) arr, "@ind_str_asc", SORTED_IN); @@ -1645,7 +1645,7 @@ cmp_val(struct list_item *w, NODE *old, NODE *new) if (new->type == Node_val) /* 7 */ return true; /* new->type == Node_var_array */ /* 8 */ - size = new->table_size; + size = assoc_length(new); if (w->cur_size == size) return false; return true; @@ -1719,7 +1719,7 @@ watchpoint_triggered(struct list_item *w) w->flags &= ~CUR_IS_ARRAY; w->cur_value = dupnode(t2); } else - w->cur_size = (t2->type == Node_var_array) ? t2->table_size : 0; + w->cur_size = (t2->type == Node_var_array) ? assoc_length(t2) : 0; } else if (! t1) { /* 1, 2 */ w->old_value = 0; /* new != NULL */ @@ -1727,7 +1727,7 @@ watchpoint_triggered(struct list_item *w) w->cur_value = dupnode(t2); else { w->flags |= CUR_IS_ARRAY; - w->cur_size = (t2->type == Node_var_array) ? t2->table_size : 0; + w->cur_size = (t2->type == Node_var_array) ? assoc_length(t2) : 0; } } else /* if (t1->type == Node_val) */ { /* 4, 5, 6 */ w->old_value = w->cur_value; @@ -1735,7 +1735,7 @@ watchpoint_triggered(struct list_item *w) w->cur_value = 0; else if (t2->type == Node_var_array) { w->flags |= CUR_IS_ARRAY; - w->cur_size = t2->table_size; + w->cur_size = assoc_length(t2); } else w->cur_value = dupnode(t2); } @@ -1761,7 +1761,7 @@ initialize_watch_item(struct list_item *w) w->cur_value = (NODE *) 0; else if (r->type == Node_var_array) { /* it's a sub-array */ w->flags |= CUR_IS_ARRAY; - w->cur_size = r->table_size; + w->cur_size = assoc_length(r); } else w->cur_value = dupnode(r); } else if (IS_FIELD(w)) { @@ -1778,7 +1778,7 @@ initialize_watch_item(struct list_item *w) w->cur_value = dupnode(r); } else if (symbol->type == Node_var_array) { w->flags |= CUR_IS_ARRAY; - w->cur_size = symbol->table_size; + w->cur_size = assoc_length(symbol); } /* else can't happen */ } @@ -5344,6 +5344,8 @@ close_all() } } + close_extensions(); + set_gawk_output(NULL); /* closes output_fp if not stdout */ } diff --git a/doc/ChangeLog b/doc/ChangeLog index 5e780c07..34457ad4 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -15,6 +15,7 @@ * gawk.texi: Fix all @tex ... @end tex tables to use a different control character than @ so that the new makeinfo won't complain about them. Thanks to Karl Berry for the guidance. + (Old Extension Mechansim): New node. 2012-12-01 Arnold D. Robbins <arnold@skeeve.com> diff --git a/doc/gawk.info b/doc/gawk.info index 6843effe..2e542fd9 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -637,6 +637,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Future Extensions:: New features that may be implemented one day. * Implementation Limitations:: Some limitations of the implementation. +* Old Extension Mechansim:: Some compatibility for old extensions. * Basic High Level:: The high level view. * Basic Data Typing:: A very quick intro to data types. @@ -26496,6 +26497,7 @@ and maintainers of `gawk'. Everything in it applies specifically to * Additions:: Making Additions To `gawk'. * Future Extensions:: New features that may be implemented one day. * Implementation Limitations:: Some limitations of the implementation. +* Old Extension Mechansim:: Some compatibility for old extensions. File: gawk.info, Node: Compatibility Mode, Next: Additions, Up: Notes @@ -26959,7 +26961,7 @@ Additions::, if you are interested in tackling any of the projects listed there. -File: gawk.info, Node: Implementation Limitations, Prev: Future Extensions, Up: Notes +File: gawk.info, Node: Implementation Limitations, Next: Old Extension Mechansim, Prev: Future Extensions, Up: Notes C.4 Some Limitations of the Implementation ========================================== @@ -26990,6 +26992,45 @@ Size of a literal string `MAX_INT ' Size of a printf string `MAX_INT ' +File: gawk.info, Node: Old Extension Mechansim, Prev: Implementation Limitations, Up: Notes + +C.5 Compatibility For Old Extensions +==================================== + +*note Dynamic Extensions::, describes the supported API and mechanisms +for writing extensions for `gawk'. This API was introduced in *FIXME: +VERSION*. However, for many years `gawk' provided an extension +mechanism that required knowledge of `gawk' internals and that was not +as well designed. + + In order to provide a transition period, `gawk' version *FIXME: +VERSION* continues to support the original extension mechanism. This +will be true for the life of exactly one major release. This support +will be withdrawn, and removed from the source code, at the next major +release. + + Briefly, original-style extensions should be compiled by including +the `awk.h' header file in the extension source code. Additionally, you +must define the identifier `GAWK' when building (use `-DGAWK' with +Unix-style compilers). Otherwise, the definitions in `gawkapi.h' will +cause conflicts with those in `awk.h' and your extension will not +compile. + + Just as in previous versions, you load an old-style extension with +the `extension()' built-in function (which is not otherwise documented). +This function in turn finds and loads the shared object file containing +the extension and calls its `dl_load()' C routine. + + Because original-style and new-style extensions use different +initialiation routines (`dl_load()' versus `dlload()'), they may safely +be installed in the same directory (to be found by `AWKLIBPATH') +without conflict. + + The `gawk' development team strongly recommends that you convert any +old extensions that you may have to use the new API described in *note +Dynamic Extensions::. + + File: gawk.info, Node: Basic Concepts, Next: Glossary, Prev: Notes, Up: Top Appendix D Basic Programming Concepts @@ -31779,6 +31820,520 @@ Index Tag Table: Node: Top1352 +<<<<<<< HEAD +Node: Foreword40217 +Node: Preface44562 +Ref: Preface-Footnote-147615 +Ref: Preface-Footnote-247721 +Node: History47953 +Node: Names50344 +Ref: Names-Footnote-151821 +Node: This Manual51893 +Ref: This Manual-Footnote-157799 +Node: Conventions57899 +Node: Manual History60033 +Ref: Manual History-Footnote-163303 +Ref: Manual History-Footnote-263344 +Node: How To Contribute63418 +Node: Acknowledgments64562 +Node: Getting Started69058 +Node: Running gawk71437 +Node: One-shot72623 +Node: Read Terminal73848 +Ref: Read Terminal-Footnote-175498 +Ref: Read Terminal-Footnote-275774 +Node: Long75945 +Node: Executable Scripts77321 +Ref: Executable Scripts-Footnote-179190 +Ref: Executable Scripts-Footnote-279292 +Node: Comments79839 +Node: Quoting82306 +Node: DOS Quoting86929 +Node: Sample Data Files87604 +Node: Very Simple90636 +Node: Two Rules95235 +Node: More Complex97382 +Ref: More Complex-Footnote-1100312 +Node: Statements/Lines100397 +Ref: Statements/Lines-Footnote-1104859 +Node: Other Features105124 +Node: When106052 +Node: Invoking Gawk108199 +Node: Command Line109660 +Node: Options110443 +Ref: Options-Footnote-1125841 +Node: Other Arguments125866 +Node: Naming Standard Input128524 +Node: Environment Variables129618 +Node: AWKPATH Variable130176 +Ref: AWKPATH Variable-Footnote-1132934 +Node: AWKLIBPATH Variable133194 +Node: Other Environment Variables133791 +Node: Exit Status136286 +Node: Include Files136961 +Node: Loading Shared Libraries140530 +Node: Obsolete141755 +Node: Undocumented142452 +Node: Regexp142695 +Node: Regexp Usage144084 +Node: Escape Sequences146110 +Node: Regexp Operators151873 +Ref: Regexp Operators-Footnote-1159253 +Ref: Regexp Operators-Footnote-2159400 +Node: Bracket Expressions159498 +Ref: table-char-classes161388 +Node: GNU Regexp Operators163911 +Node: Case-sensitivity167634 +Ref: Case-sensitivity-Footnote-1170602 +Ref: Case-sensitivity-Footnote-2170837 +Node: Leftmost Longest170945 +Node: Computed Regexps172146 +Node: Reading Files175556 +Node: Records177559 +Ref: Records-Footnote-1186483 +Node: Fields186520 +Ref: Fields-Footnote-1189553 +Node: Nonconstant Fields189639 +Node: Changing Fields191841 +Node: Field Separators197822 +Node: Default Field Splitting200451 +Node: Regexp Field Splitting201568 +Node: Single Character Fields204910 +Node: Command Line Field Separator205969 +Node: Field Splitting Summary209410 +Ref: Field Splitting Summary-Footnote-1212602 +Node: Constant Size212703 +Node: Splitting By Content217287 +Ref: Splitting By Content-Footnote-1221013 +Node: Multiple Line221053 +Ref: Multiple Line-Footnote-1226900 +Node: Getline227079 +Node: Plain Getline229295 +Node: Getline/Variable231384 +Node: Getline/File232525 +Node: Getline/Variable/File233847 +Ref: Getline/Variable/File-Footnote-1235446 +Node: Getline/Pipe235533 +Node: Getline/Variable/Pipe238093 +Node: Getline/Coprocess239200 +Node: Getline/Variable/Coprocess240443 +Node: Getline Notes241157 +Node: Getline Summary243944 +Ref: table-getline-variants244352 +Node: Read Timeout245210 +Ref: Read Timeout-Footnote-1248955 +Node: Command line directories249012 +Node: Printing249642 +Node: Print251273 +Node: Print Examples252610 +Node: Output Separators255394 +Node: OFMT257154 +Node: Printf258512 +Node: Basic Printf259418 +Node: Control Letters260957 +Node: Format Modifiers264769 +Node: Printf Examples270778 +Node: Redirection273493 +Node: Special Files280477 +Node: Special FD281010 +Ref: Special FD-Footnote-1284635 +Node: Special Network284709 +Node: Special Caveats285559 +Node: Close Files And Pipes286355 +Ref: Close Files And Pipes-Footnote-1293378 +Ref: Close Files And Pipes-Footnote-2293526 +Node: Expressions293676 +Node: Values294808 +Node: Constants295484 +Node: Scalar Constants296164 +Ref: Scalar Constants-Footnote-1297023 +Node: Nondecimal-numbers297205 +Node: Regexp Constants300264 +Node: Using Constant Regexps300739 +Node: Variables303794 +Node: Using Variables304449 +Node: Assignment Options306173 +Node: Conversion308045 +Ref: table-locale-affects313421 +Ref: Conversion-Footnote-1314045 +Node: All Operators314154 +Node: Arithmetic Ops314784 +Node: Concatenation317289 +Ref: Concatenation-Footnote-1320082 +Node: Assignment Ops320202 +Ref: table-assign-ops325190 +Node: Increment Ops326598 +Node: Truth Values and Conditions330068 +Node: Truth Values331151 +Node: Typing and Comparison332200 +Node: Variable Typing332989 +Ref: Variable Typing-Footnote-1336886 +Node: Comparison Operators337008 +Ref: table-relational-ops337418 +Node: POSIX String Comparison340967 +Ref: POSIX String Comparison-Footnote-1341923 +Node: Boolean Ops342061 +Ref: Boolean Ops-Footnote-1346139 +Node: Conditional Exp346230 +Node: Function Calls347962 +Node: Precedence351556 +Node: Locales355225 +Node: Patterns and Actions356314 +Node: Pattern Overview357368 +Node: Regexp Patterns359037 +Node: Expression Patterns359580 +Node: Ranges363265 +Node: BEGIN/END366231 +Node: Using BEGIN/END366993 +Ref: Using BEGIN/END-Footnote-1369724 +Node: I/O And BEGIN/END369830 +Node: BEGINFILE/ENDFILE372112 +Node: Empty375016 +Node: Using Shell Variables375332 +Node: Action Overview377617 +Node: Statements379974 +Node: If Statement381828 +Node: While Statement383327 +Node: Do Statement385371 +Node: For Statement386527 +Node: Switch Statement389679 +Node: Break Statement391776 +Node: Continue Statement393766 +Node: Next Statement395559 +Node: Nextfile Statement397949 +Node: Exit Statement400590 +Node: Built-in Variables403006 +Node: User-modified404101 +Ref: User-modified-Footnote-1412456 +Node: Auto-set412518 +Ref: Auto-set-Footnote-1424869 +Ref: Auto-set-Footnote-2425074 +Node: ARGC and ARGV425130 +Node: Arrays428981 +Node: Array Basics430486 +Node: Array Intro431312 +Node: Reference to Elements435630 +Node: Assigning Elements437900 +Node: Array Example438391 +Node: Scanning an Array440123 +Node: Controlling Scanning442437 +Ref: Controlling Scanning-Footnote-1447370 +Node: Delete447686 +Ref: Delete-Footnote-1450451 +Node: Numeric Array Subscripts450508 +Node: Uninitialized Subscripts452691 +Node: Multi-dimensional454319 +Node: Multi-scanning457413 +Node: Arrays of Arrays459004 +Node: Functions463649 +Node: Built-in464468 +Node: Calling Built-in465546 +Node: Numeric Functions467534 +Ref: Numeric Functions-Footnote-1471366 +Ref: Numeric Functions-Footnote-2471723 +Ref: Numeric Functions-Footnote-3471771 +Node: String Functions472040 +Ref: String Functions-Footnote-1495537 +Ref: String Functions-Footnote-2495666 +Ref: String Functions-Footnote-3495914 +Node: Gory Details496001 +Ref: table-sub-escapes497680 +Ref: table-sub-posix-92499034 +Ref: table-sub-proposed500385 +Ref: table-posix-sub501739 +Ref: table-gensub-escapes503284 +Ref: Gory Details-Footnote-1504491 +Ref: Gory Details-Footnote-2504542 +Node: I/O Functions504693 +Ref: I/O Functions-Footnote-1511717 +Node: Time Functions511864 +Ref: Time Functions-Footnote-1522756 +Ref: Time Functions-Footnote-2522824 +Ref: Time Functions-Footnote-3522982 +Ref: Time Functions-Footnote-4523093 +Ref: Time Functions-Footnote-5523205 +Ref: Time Functions-Footnote-6523432 +Node: Bitwise Functions523698 +Ref: table-bitwise-ops524256 +Ref: Bitwise Functions-Footnote-1528477 +Node: Type Functions528661 +Node: I18N Functions529131 +Node: User-defined530758 +Node: Definition Syntax531562 +Ref: Definition Syntax-Footnote-1536472 +Node: Function Example536541 +Node: Function Caveats539135 +Node: Calling A Function539556 +Node: Variable Scope540671 +Node: Pass By Value/Reference543634 +Node: Return Statement547074 +Node: Dynamic Typing550055 +Node: Indirect Calls550790 +Node: Library Functions560475 +Ref: Library Functions-Footnote-1563474 +Node: Library Names563645 +Ref: Library Names-Footnote-1567116 +Ref: Library Names-Footnote-2567336 +Node: General Functions567422 +Node: Strtonum Function568375 +Node: Assert Function571305 +Node: Round Function574631 +Node: Cliff Random Function576174 +Node: Ordinal Functions577190 +Ref: Ordinal Functions-Footnote-1580260 +Ref: Ordinal Functions-Footnote-2580512 +Node: Join Function580721 +Ref: Join Function-Footnote-1582492 +Node: Getlocaltime Function582692 +Node: Data File Management586407 +Node: Filetrans Function587039 +Node: Rewind Function591178 +Node: File Checking592565 +Node: Empty Files593659 +Node: Ignoring Assigns595889 +Node: Getopt Function597442 +Ref: Getopt Function-Footnote-1608746 +Node: Passwd Functions608949 +Ref: Passwd Functions-Footnote-1617924 +Node: Group Functions618012 +Node: Walking Arrays626096 +Node: Sample Programs627665 +Node: Running Examples628342 +Node: Clones629070 +Node: Cut Program630294 +Node: Egrep Program640139 +Ref: Egrep Program-Footnote-1647912 +Node: Id Program648022 +Node: Split Program651638 +Ref: Split Program-Footnote-1655157 +Node: Tee Program655285 +Node: Uniq Program658088 +Node: Wc Program665517 +Ref: Wc Program-Footnote-1669783 +Ref: Wc Program-Footnote-2669983 +Node: Miscellaneous Programs670075 +Node: Dupword Program671263 +Node: Alarm Program673294 +Node: Translate Program678043 +Ref: Translate Program-Footnote-1682430 +Ref: Translate Program-Footnote-2682658 +Node: Labels Program682792 +Ref: Labels Program-Footnote-1686163 +Node: Word Sorting686247 +Node: History Sorting690131 +Node: Extract Program691970 +Ref: Extract Program-Footnote-1699453 +Node: Simple Sed699581 +Node: Igawk Program702643 +Ref: Igawk Program-Footnote-1717800 +Ref: Igawk Program-Footnote-2718001 +Node: Anagram Program718139 +Node: Signature Program721207 +Node: Internationalization722307 +Node: I18N and L10N723739 +Node: Explaining gettext724425 +Ref: Explaining gettext-Footnote-1729491 +Ref: Explaining gettext-Footnote-2729675 +Node: Programmer i18n729840 +Node: Translator i18n734040 +Node: String Extraction734833 +Ref: String Extraction-Footnote-1735794 +Node: Printf Ordering735880 +Ref: Printf Ordering-Footnote-1738664 +Node: I18N Portability738728 +Ref: I18N Portability-Footnote-1741177 +Node: I18N Example741240 +Ref: I18N Example-Footnote-1743875 +Node: Gawk I18N743947 +Node: Advanced Features744564 +Node: Nondecimal Data746068 +Node: Array Sorting747651 +Node: Controlling Array Traversal748348 +Node: Array Sorting Functions756586 +Ref: Array Sorting Functions-Footnote-1760260 +Ref: Array Sorting Functions-Footnote-2760353 +Node: Two-way I/O760547 +Ref: Two-way I/O-Footnote-1765979 +Node: TCP/IP Networking766049 +Node: Profiling768893 +Node: Debugger776347 +Node: Debugging777315 +Node: Debugging Concepts777748 +Node: Debugging Terms779604 +Node: Awk Debugging782201 +Node: Sample Debugging Session783093 +Node: Debugger Invocation783613 +Node: Finding The Bug784942 +Node: List of Debugger Commands791430 +Node: Breakpoint Control792764 +Node: Debugger Execution Control796428 +Node: Viewing And Changing Data799788 +Node: Execution Stack803144 +Node: Debugger Info804611 +Node: Miscellaneous Debugger Commands808592 +Node: Readline Support814037 +Node: Limitations814868 +Node: Arbitrary Precision Arithmetic817120 +Ref: Arbitrary Precision Arithmetic-Footnote-1818762 +Node: General Arithmetic818910 +Node: Floating Point Issues820630 +Node: String Conversion Precision821511 +Ref: String Conversion Precision-Footnote-1823217 +Node: Unexpected Results823326 +Node: POSIX Floating Point Problems825479 +Ref: POSIX Floating Point Problems-Footnote-1829304 +Node: Integer Programming829342 +Node: Floating-point Programming831095 +Ref: Floating-point Programming-Footnote-1837404 +Node: Floating-point Representation837668 +Node: Floating-point Context838833 +Ref: table-ieee-formats839675 +Node: Rounding Mode841059 +Ref: table-rounding-modes841538 +Ref: Rounding Mode-Footnote-1844542 +Node: Gawk and MPFR844723 +Node: Arbitrary Precision Floats845965 +Ref: Arbitrary Precision Floats-Footnote-1848394 +Node: Setting Precision848705 +Node: Setting Rounding Mode851438 +Ref: table-gawk-rounding-modes851842 +Node: Floating-point Constants853022 +Node: Changing Precision854446 +Ref: Changing Precision-Footnote-1855846 +Node: Exact Arithmetic856020 +Node: Arbitrary Precision Integers859128 +Ref: Arbitrary Precision Integers-Footnote-1862128 +Node: Dynamic Extensions862275 +Node: Extension Intro863661 +Node: Plugin License864869 +Node: Extension Design865543 +Node: Old Extension Problems866614 +Ref: Old Extension Problems-Footnote-1868124 +Node: Extension New Mechanism Goals868181 +Ref: Extension New Mechanism Goals-Footnote-1870893 +Node: Extension Other Design Decisions871079 +Node: Extension Mechanism Outline873191 +Ref: load-extension874216 +Ref: load-new-function875694 +Ref: call-new-function876675 +Node: Extension Future Growth878669 +Node: Extension API Description879487 +Node: Extension API Functions Introduction880815 +Node: General Data Types885515 +Ref: General Data Types-Footnote-1891117 +Node: Requesting Values891416 +Ref: table-value-types-returned892147 +Node: Constructor Functions893101 +Node: Registration Functions896097 +Node: Extension Functions896782 +Node: Exit Callback Functions898614 +Node: Extension Version String899857 +Node: Input Parsers900507 +Node: Output Wrappers909094 +Node: Two-way processors913510 +Node: Printing Messages915640 +Ref: Printing Messages-Footnote-1916717 +Node: Updating `ERRNO'916869 +Node: Accessing Parameters917608 +Node: Symbol Table Access918838 +Node: Symbol table by name919350 +Ref: Symbol table by name-Footnote-1921520 +Node: Symbol table by cookie921600 +Ref: Symbol table by cookie-Footnote-1925729 +Node: Cached values925792 +Ref: Cached values-Footnote-1929235 +Node: Array Manipulation929326 +Ref: Array Manipulation-Footnote-1930424 +Node: Array Data Types930463 +Ref: Array Data Types-Footnote-1933166 +Node: Array Functions933258 +Node: Flattening Arrays937024 +Node: Creating Arrays943857 +Node: Extension API Variables948652 +Node: Extension Versioning949288 +Node: Extension API Informational Variables951189 +Node: Extension API Boilerplate952275 +Node: Finding Extensions956106 +Node: Extension Example956653 +Node: Internal File Description957391 +Node: Internal File Ops961079 +Ref: Internal File Ops-Footnote-1972526 +Node: Using Internal File Ops972666 +Ref: Using Internal File Ops-Footnote-1975019 +Node: Extension Samples975285 +Node: Extension Sample File Functions976728 +Node: Extension Sample Fnmatch985201 +Node: Extension Sample Fork986927 +Node: Extension Sample Ord988141 +Node: Extension Sample Readdir988917 +Node: Extension Sample Revout990421 +Node: Extension Sample Rev2way991014 +Node: Extension Sample Read write array991704 +Node: Extension Sample Readfile993587 +Node: Extension Sample API Tests994342 +Node: Extension Sample Time994867 +Node: gawkextlib996174 +Node: Language History998555 +Node: V7/SVR3.11000077 +Node: SVR41002398 +Node: POSIX1003840 +Node: BTL1004848 +Node: POSIX/GNU1005582 +Node: Common Extensions1011117 +Node: Ranges and Locales1012224 +Ref: Ranges and Locales-Footnote-11016842 +Ref: Ranges and Locales-Footnote-21016869 +Ref: Ranges and Locales-Footnote-31017129 +Node: Contributors1017350 +Node: Installation1021646 +Node: Gawk Distribution1022540 +Node: Getting1023024 +Node: Extracting1023850 +Node: Distribution contents1025542 +Node: Unix Installation1030764 +Node: Quick Installation1031381 +Node: Additional Configuration Options1033343 +Node: Configuration Philosophy1034820 +Node: Non-Unix Installation1037162 +Node: PC Installation1037620 +Node: PC Binary Installation1038919 +Node: PC Compiling1040767 +Node: PC Testing1043711 +Node: PC Using1044887 +Node: Cygwin1049072 +Node: MSYS1050072 +Node: VMS Installation1050586 +Node: VMS Compilation1051189 +Ref: VMS Compilation-Footnote-11052196 +Node: VMS Installation Details1052254 +Node: VMS Running1053889 +Node: VMS Old Gawk1055496 +Node: Bugs1055970 +Node: Other Versions1059822 +Node: Notes1065137 +Node: Compatibility Mode1065867 +Node: Additions1066650 +Node: Accessing The Source1067577 +Node: Adding Code1069180 +Node: New Ports1075222 +Node: Derived Files1079357 +Ref: Derived Files-Footnote-11084665 +Ref: Derived Files-Footnote-21084699 +Ref: Derived Files-Footnote-31085299 +Node: Future Extensions1085397 +Node: Implementation Limitations1085978 +Node: Old Extension Mechansim1087237 +Node: Basic Concepts1089004 +Node: Basic High Level1089685 +Ref: figure-general-flow1089956 +Ref: figure-process-flow1090555 +Ref: Basic High Level-Footnote-11093784 +Node: Basic Data Typing1093969 +Node: Glossary1097324 +Node: Copying1122635 +Node: GNU Free Documentation License1160192 +Node: Index1185329 +======= Node: Foreword40138 Node: Preface44483 Ref: Preface-Footnote-147536 @@ -32177,6 +32732,122 @@ Ref: call-new-function876677 Node: Extension Future Growth878671 Node: Extension API Description879489 Node: Extension API Functions Introduction880817 +<<<<<<< HEAD +Node: General Data Types885517 +Ref: General Data Types-Footnote-1891119 +Node: Requesting Values891418 +Ref: table-value-types-returned892149 +Node: Constructor Functions893103 +Node: Registration Functions896099 +Node: Extension Functions896784 +Node: Exit Callback Functions898616 +Node: Extension Version String899859 +Node: Input Parsers900509 +Node: Output Wrappers909096 +Node: Two-way processors913512 +Node: Printing Messages915642 +Ref: Printing Messages-Footnote-1916719 +Node: Updating `ERRNO'916871 +Node: Accessing Parameters917610 +Node: Symbol Table Access918840 +Node: Symbol table by name919352 +Ref: Symbol table by name-Footnote-1921522 +Node: Symbol table by cookie921602 +Ref: Symbol table by cookie-Footnote-1925731 +Node: Cached values925794 +Ref: Cached values-Footnote-1929237 +Node: Array Manipulation929328 +Ref: Array Manipulation-Footnote-1930426 +Node: Array Data Types930465 +Ref: Array Data Types-Footnote-1933168 +Node: Array Functions933260 +Node: Flattening Arrays937026 +Node: Creating Arrays943859 +Node: Extension API Variables948654 +Node: Extension Versioning949290 +Node: Extension API Informational Variables951191 +Node: Extension API Boilerplate952277 +Node: Finding Extensions956108 +Node: Extension Example956655 +Node: Internal File Description957393 +Node: Internal File Ops961081 +Ref: Internal File Ops-Footnote-1972528 +Node: Using Internal File Ops972668 +Ref: Using Internal File Ops-Footnote-1975021 +Node: Extension Samples975287 +Node: Extension Sample File Functions976730 +Node: Extension Sample Fnmatch985203 +Node: Extension Sample Fork986929 +Node: Extension Sample Ord988143 +Node: Extension Sample Readdir988919 +Node: Extension Sample Revout990423 +Node: Extension Sample Rev2way991016 +Node: Extension Sample Read write array991706 +Node: Extension Sample Readfile993589 +Node: Extension Sample API Tests994344 +Node: Extension Sample Time994869 +Node: gawkextlib996176 +Node: Language History998557 +Node: V7/SVR3.11000079 +Node: SVR41002400 +Node: POSIX1003842 +Node: BTL1004850 +Node: POSIX/GNU1005655 +Node: Common Extensions1011190 +Node: Ranges and Locales1012249 +Ref: Ranges and Locales-Footnote-11016867 +Ref: Ranges and Locales-Footnote-21016894 +Ref: Ranges and Locales-Footnote-31017154 +Node: Contributors1017375 +Node: Installation1021671 +Node: Gawk Distribution1022565 +Node: Getting1023049 +Node: Extracting1023875 +Node: Distribution contents1025567 +Node: Unix Installation1030789 +Node: Quick Installation1031406 +Node: Additional Configuration Options1033368 +Node: Configuration Philosophy1034845 +Node: Non-Unix Installation1037187 +Node: PC Installation1037645 +Node: PC Binary Installation1038944 +Node: PC Compiling1040792 +Node: PC Testing1043736 +Node: PC Using1044912 +Node: Cygwin1049097 +Node: MSYS1050097 +Node: VMS Installation1050611 +Node: VMS Compilation1051214 +Ref: VMS Compilation-Footnote-11052221 +Node: VMS Installation Details1052279 +Node: VMS Running1053914 +Node: VMS Old Gawk1055521 +Node: Bugs1055995 +Node: Other Versions1059847 +Node: Notes1065162 +Node: Compatibility Mode1065821 +Node: Additions1066604 +Node: Accessing The Source1067531 +Node: Adding Code1069134 +Node: New Ports1075176 +Node: Derived Files1079311 +Ref: Derived Files-Footnote-11084619 +Ref: Derived Files-Footnote-21084653 +Ref: Derived Files-Footnote-31085253 +Node: Future Extensions1085351 +Node: Implementation Limitations1085932 +Node: Basic Concepts1087159 +Node: Basic High Level1087840 +Ref: figure-general-flow1088111 +Ref: figure-process-flow1088710 +Ref: Basic High Level-Footnote-11091939 +Node: Basic Data Typing1092124 +Node: Glossary1095479 +Node: Copying1120790 +Node: GNU Free Documentation License1158347 +Node: Index1183484 +>>>>>>> master +======= Node: General Data Types885595 Ref: General Data Types-Footnote-1891197 Node: Requesting Values891496 @@ -32290,5 +32961,6 @@ Node: Glossary1095938 Node: Copying1121249 Node: GNU Free Documentation License1158806 Node: Index1183943 +>>>>>>> master End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 77f8b527..fca7cebb 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -845,6 +845,7 @@ particular records in a file and perform operations upon them. * Future Extensions:: New features that may be implemented one day. * Implementation Limitations:: Some limitations of the implementation. +* Old Extension Mechansim:: Some compatibility for old extensions. * Basic High Level:: The high level view. * Basic Data Typing:: A very quick intro to data types. @end detailmenu @@ -34303,6 +34304,7 @@ maintainers of @command{gawk}. Everything in it applies specifically to * Additions:: Making Additions To @command{gawk}. * Future Extensions:: New features that may be implemented one day. * Implementation Limitations:: Some limitations of the implementation. +* Old Extension Mechansim:: Some compatibility for old extensions. @end menu @node Compatibility Mode @@ -34933,6 +34935,42 @@ different limits. @item Size of a printf string @tab @code{MAX_INT } @end multitable +@node Old Extension Mechansim +@appendixsec Compatibility For Old Extensions + +@ref{Dynamic Extensions}, describes the supported API and mechanisms +for writing extensions for @command{gawk}. This API was introduced +in @strong{FIXME: VERSION}. However, for many years @command{gawk} +provided an extension mechanism that required knowledge of @command{gawk} +internals and that was not as well designed. + +In order to provide a transition period, @command{gawk} version +@strong{FIXME: VERSION} continues to support the original extension mechanism. +This will be true for the life of exactly one major release. This support +will be withdrawn, and removed from the source code, at the next major +release. + +Briefly, original-style extensions should be compiled by including the +@file{awk.h} header file in the extension source code. Additionally, +you must define the identifier @samp{GAWK} when building (use +@samp{-DGAWK} with Unix-style compilers). Otherwise, the definitions +in @file{gawkapi.h} will cause conflicts with those in @file{awk.h} +and your extension will not compile. + +Just as in previous versions, you load an old-style extension with the +@code{extension()} built-in function (which is not otherwise documented). +This function in turn finds and loads the shared object file containing +the extension and calls its @code{dl_load()} C routine. + +Because original-style and new-style extensions use different initialiation +routines (@code{dl_load()} versus @code{dlload()}), they may safely +be installed in the same directory (to be found by @env{AWKLIBPATH}) +without conflict. + +The @command{gawk} development team strongly recommends that you +convert any old extensions that you may have to use the new API +described in @ref{Dynamic Extensions}. + @c ENDOFRANGE impis @c ENDOFRANGE gawii @@ -240,6 +240,7 @@ static const char *const nodetypes[] = { "Node_param_list", "Node_func", "Node_ext_func", + "Node_old_ext_func", "Node_array_ref", "Node_array_tree", "Node_array_leaf", @@ -329,6 +330,7 @@ static struct optypetab { { "Op_builtin", NULL }, { "Op_sub_builtin", NULL }, { "Op_ext_builtin", NULL }, + { "Op_old_ext_builtin", NULL }, /* temporary */ { "Op_in_array", " in " }, { "Op_func_call", NULL }, { "Op_indirect_func_call", NULL }, @@ -354,6 +356,7 @@ static struct optypetab { { "Op_var_update", NULL }, { "Op_var_assign", NULL }, { "Op_field_assign", NULL }, + { "Op_subscript_assign", NULL }, { "Op_after_beginfile", NULL }, { "Op_after_endfile", NULL }, { "Op_func", NULL }, @@ -28,9 +28,13 @@ */ #include "awk.h" +extern SRCFILE *srcfiles; #ifdef DYNAMIC +#define OLD_INIT_FUNC "dlload" +#define OLD_FINI_FUNC "dlunload" + #include <dlfcn.h> /* @@ -111,6 +115,94 @@ load_ext(const char *lib_name) lib_name, INIT_FUNC); } +/* do_ext --- load an extension at run-time: interface to load_ext */ + +NODE * +do_ext(int nargs) +{ + NODE *obj, *init = NULL, *fini = NULL, *ret = NULL; + SRCFILE *s; + char *init_func = NULL; + char *fini_func = NULL; + + if (nargs == 3) { + fini = POP_STRING(); + fini_func = fini->stptr; + } + if (nargs >= 2) { + init = POP_STRING(); + init_func = init->stptr; + } + obj = POP_STRING(); + + s = add_srcfile(SRC_EXTLIB, obj->stptr, srcfiles, NULL, NULL); + if (s != NULL) + ret = load_old_ext(s, init_func, fini_func, obj); + + DEREF(obj); + if (fini != NULL) + DEREF(fini); + if (init != NULL) + DEREF(init); + if (ret == NULL) + ret = dupnode(Nnull_string); + return ret; +} + +/* load_ext --- load an external library */ + +NODE * +load_old_ext(SRCFILE *s, const char *init_func, const char *fini_func, NODE *obj) +{ + NODE *(*func)(NODE *, void *); + NODE *tmp; + void *dl; + int flags = RTLD_LAZY; + int *gpl_compat; + const char *lib_name = s->fullpath; + + if (init_func == NULL || init_func[0] == '\0') + init_func = OLD_INIT_FUNC; + + if (fini_func == NULL || fini_func[0] == '\0') + fini_func = OLD_FINI_FUNC; + + if (do_sandbox) + fatal(_("extensions are not allowed in sandbox mode")); + + if (do_traditional || do_posix) + fatal(_("`extension' is a gawk extension")); + + if (lib_name == NULL) + fatal(_("load_ext: received NULL lib_name")); + + if ((dl = dlopen(s->fullpath, flags)) == NULL) + fatal(_("extension: cannot open library `%s' (%s)"), lib_name, + dlerror()); + + /* Per the GNU Coding standards */ + gpl_compat = (int *) dlsym(dl, "plugin_is_GPL_compatible"); + if (gpl_compat == NULL) + fatal(_("extension: library `%s': does not define `plugin_is_GPL_compatible' (%s)"), + lib_name, dlerror()); + func = (NODE *(*)(NODE *, void *)) dlsym(dl, init_func); + if (func == NULL) + fatal(_("extension: library `%s': cannot call function `%s' (%s)"), + lib_name, init_func, dlerror()); + + if (obj == NULL) { + obj = make_string(lib_name, strlen(lib_name)); + tmp = (*func)(obj, dl); + unref(tmp); + unref(obj); + tmp = NULL; + } else + tmp = (*func)(obj, dl); + + s->fini_func = (void (*)(void)) dlsym(dl, fini_func); + return tmp; +} + /* make_builtin --- register name to be called as func with a builtin body */ @@ -171,6 +263,57 @@ make_builtin(const awk_ext_func_t *funcinfo) return true; } +/* make_old_builtin --- register name to be called as func with a builtin body */ + +void +make_old_builtin(const char *name, NODE *(*func)(int), int count) /* temporary */ +{ + NODE *symbol, *f; + INSTRUCTION *b; + const char *sp; + char c; + + sp = name; + if (sp == NULL || *sp == '\0') + fatal(_("extension: missing function name")); + + while ((c = *sp++) != '\0') { + if ((sp == & name[1] && c != '_' && ! isalpha((unsigned char) c)) + || (sp > &name[1] && ! is_identifier_char((unsigned char) c))) + fatal(_("extension: illegal character `%c' in function name `%s'"), c, name); + } + + f = lookup(name); + + if (f != NULL) { + if (f->type == Node_func) { + /* user-defined function */ + fatal(_("extension: can't redefine function `%s'"), name); + } else if (f->type == Node_ext_func) { + /* multiple extension() calls etc. */ + if (do_lint) + lintwarn(_("extension: function `%s' already defined"), name); + return; + } else + /* variable name etc. */ + fatal(_("extension: function name `%s' previously defined"), name); + } else if (check_special(name) >= 0) + fatal(_("extension: can't use gawk built-in `%s' as function name"), name); + + if (count < 0) + fatal(_("make_builtin: negative argument count for function `%s'"), + name); + + b = bcalloc(Op_symbol, 1, 0); + b->builtin = func; + b->expr_count = count; + + /* NB: extension sub must return something */ + + symbol = install_symbol(estrdup(name, strlen(name)), Node_old_ext_func); + symbol->code_ptr = b; +} + /* get_argument --- get the i'th argument of a dynamically linked function */ @@ -201,9 +344,10 @@ get_argument(int i) } -/* get_actual_argument --- get the i'th scalar or array argument of a - dynamically linked function, allowed to be optional. -*/ +/* + * get_actual_argument --- get the i'th scalar or array argument of a + * dynamically linked function, allowed to be optional. + */ NODE * get_actual_argument(int i, bool optional, bool want_array) @@ -230,7 +374,7 @@ get_actual_argument(int i, bool optional, bool want_array) if (t->type == Node_var_new) { if (want_array) - return get_array(t, false); + return force_array(t, false); else { t->type = Node_var; t->var_value = dupnode(Nnull_string); @@ -261,3 +405,15 @@ load_ext(const char *lib_name) fatal(_("dynamic loading of library not supported")); } #endif + +/* close_extensions --- execute extension cleanup routines */ + +void +close_extensions() +{ + SRCFILE *s; + + for (s = srcfiles->next; s != srcfiles; s = s->next) + if (s->stype == SRC_EXTLIB && s->fini_func) + (*s->fini_func)(); +} diff --git a/extension/ChangeLog b/extension/ChangeLog index ac3034a0..3e46d637 100644 --- a/extension/ChangeLog +++ b/extension/ChangeLog @@ -7,6 +7,27 @@ * filefuncs.c readdir.c, revoutput.c, revtwoway.c, rwarray.c, rwarray0.c, testext.c: Use awk_true and awk_false instead of 1 and 0. +2012-11-26 Arnold D. Robbins <arnold@skeeve.com> + + * bindarr.c, fileop.c, sparr.c: Make them compile. + * steps: Reinstated and updated. + * testsparr.awk: Add call to extension(). + +2011-05-03 John Haque <j.eh@mchsi.com> + + * fileop.c, record.awk, testrecord.sh: New files. + * steps: Updated. + +2011-05-02 John Haque <j.eh@mchsi.com> + + * bindarr.c, dbarray.awk, testdbarray.awk: New files. + * steps: Updated. + +2011-04-24 John Haque <j.eh@mchsi.com> + + * spec_array.c, spec_array.h, sparr.c, testsparr.awk: New files. + * steps: Updated. + 2012-11-21 Arnold D. Robbins <arnold@skeeve.com> * filefuncs.c (do_stat): Optional third argument indicates to diff --git a/extension/bindarr.c b/extension/bindarr.c new file mode 100644 index 00000000..60959903 --- /dev/null +++ b/extension/bindarr.c @@ -0,0 +1,347 @@ +/* + * bindarr.c - routines for binding (attaching) user-defined functions + * to array and array elements. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "awk.h" + +/* + * Binding an array is basically the binding of functions to the internal + * triggers for reading and writing that array or an element of that array. + * This allows the user to define the set of behaviors for gawk arrays + * using gawk functions. With arrays you can assign and read values of + * specific elements, provide list of indices and values, and tell if a + * certain index exists or not. A variable can be "tied" by including + * code which overrides any or all of the standard behaviors of awk arrays. + * + * See dbarray.awk and testdbarray.awk to learn how to bind an array + * to an external database for persistent storage. + */ + +int plugin_is_GPL_compatible; + +static NODE **bind_array_lookup(NODE *, NODE *); +static NODE **bind_array_exists(NODE *, NODE *); +static NODE **bind_array_clear(NODE *, NODE *); +static NODE **bind_array_remove(NODE *, NODE *); +static NODE **bind_array_list(NODE *, NODE *); +static NODE **bind_array_store(NODE *, NODE *); +static NODE **bind_array_length(NODE *, NODE *); + +static afunc_t bind_array_func[] = { + (afunc_t) 0, + (afunc_t) 0, + bind_array_length, + bind_array_lookup, + bind_array_exists, + bind_array_clear, + bind_array_remove, + bind_array_list, + null_afunc, /* copy */ + null_afunc, /* dump */ + bind_array_store, +}; + +enum { INIT, FINI, COUNT, EXISTS, LOOKUP, + STORE, DELETE, CLEAR, FETCHALL +}; + +static const char *const bfn[] = { + "init", "fini", "count", "exists", "lookup", + "store", "delete", "clear", "fetchall", +}; + +typedef struct { + NODE *func[sizeof(bfn)/sizeof(char *)]; + NODE *arg0; +} array_t; + +static NODE *call_func(NODE *func, NODE **arg, int arg_count); +static long array_func_call(NODE *, NODE *, int); + + +/* bind_array_length -- find the number of elements in the array */ + +static NODE ** +bind_array_length(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) +{ + static NODE *length_node; + + symbol->table_size = array_func_call(symbol, NULL, COUNT); + length_node = symbol; + return & length_node; +} + +/* bind_array_lookup --- find element in the array; return a pointer to value. */ + +static NODE ** +bind_array_lookup(NODE *symbol, NODE *subs) +{ + NODE *xn = symbol->xarray; + (void) array_func_call(symbol, subs, LOOKUP); + return xn->alookup(xn, subs); +} + +/* + * bind_array_exists --- test whether the array element symbol[subs] exists or not, + * return pointer to value if it does. + */ + +static NODE ** +bind_array_exists(NODE *symbol, NODE *subs) +{ + NODE *xn = symbol->xarray; + (void) array_func_call(symbol, subs, EXISTS); + return xn->aexists(xn, subs); +} + +/* bind_array_clear --- flush all the values in symbol[] */ + +static NODE ** +bind_array_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) +{ + NODE *xn = symbol->xarray; + (void) xn->aclear(xn, NULL); + (void) array_func_call(symbol, NULL, CLEAR); + return NULL; +} + +/* bind_array_remove --- if subs is already in the table, remove it. */ + +static NODE ** +bind_array_remove(NODE *symbol, NODE *subs) +{ + NODE *xn = symbol->xarray; + (void) xn->aremove(xn, subs); + (void) array_func_call(symbol, subs, DELETE); + return NULL; +} + +/* bind_array_store --- update the value for the SUBS */ + +static NODE ** +bind_array_store(NODE *symbol, NODE *subs) +{ + (void) array_func_call(symbol, subs, STORE); + return NULL; +} + +/* bind_array_list --- return a list of array items */ + +static NODE** +bind_array_list(NODE *symbol, NODE *akind) +{ + NODE *xn = symbol->xarray; + (void) array_func_call(symbol, NULL, FETCHALL); + return xn->alist(xn, akind); +} + + +/* array_func_call --- call user-defined array routine */ + +static long +array_func_call(NODE *symbol, NODE *arg1, int fi) +{ + NODE *argp[3]; + NODE *retval; + long ret; + int i = 0; + array_t *aq; + + aq = symbol->a_opaque; + if (! aq) /* an array routine invoked from the same or another routine */ + fatal(_("bind_array: cannot access bound array, operation not allowed")); + symbol->a_opaque = NULL; /* avoid infinite recursion */ + + argp[i++] = symbol->xarray; + argp[i++] = aq->arg0; + if (arg1 != NULL) + argp[i++] = arg1; + + retval = call_func(aq->func[fi], argp, i); + symbol->a_opaque = aq; + force_number(retval); + ret = get_number_si(retval); + unref(retval); + if (ret < 0) { + if (ERRNO_node->var_value->stlen > 0) + fatal(_("%s"), ERRNO_node->var_value->stptr); + else + fatal(_("unknown reason")); + } + return ret; +} + +/* do_bind_array --- bind an array to user-defined functions */ + +static NODE * +do_bind_array(int nargs) +{ + NODE *symbol, *xn, *t, *td; + int i; + array_t *aq; + char *aname; + + symbol = get_array_argument(0, false); + if (symbol->array_funcs == bind_array_func) + fatal(_("bind_array: array `%s' already bound"), array_vname(symbol)); + + assoc_clear(symbol); + + emalloc(aq, array_t *, sizeof(array_t), "do_bind_array"); + memset(aq, '\0', sizeof(array_t)); + + t = get_array_argument(1, false); + + for (i = 0; i < sizeof(bfn)/sizeof(char *); i++) { + NODE *subs, *val, *f; + + subs = make_string(bfn[i], strlen(bfn[i])); + val = in_array(t, subs); + unref(subs); + if (val == NULL) { + if (i != INIT && i != FINI) + fatal(_("bind_array: array element `%s[\"%s\"]' not defined"), + t->vname, bfn[i]); + continue; + } + + force_string(val); + f = lookup(val->stptr); + if (f == NULL || f->type != Node_func) + fatal(_("bind_array: function `%s' is not defined"), val->stptr); + aq->func[i] = f; + } + + /* copy the array -- this is passed as the second argument to the functions */ + emalloc(aname, char *, strlen(t->vname) + 2, "do_bind_array"); + aname[0] = '~'; /* any illegal character */ + strcpy(& aname[1], symbol->vname); + td = make_array(); + td->vname = aname; + assoc_copy(t, td); + aq->arg0 = td; + + /* internal array for the actual storage */ + xn = make_array(); + xn->vname = symbol->vname; /* shallow copy */ + xn->flags |= XARRAY; + symbol->a_opaque = aq; + symbol->array_funcs = bind_array_func; + symbol->xarray = xn; + + if (aq->func[INIT] != NULL) + (void) array_func_call(symbol, NULL, INIT); + + return make_number(0); +} + +/* do_unbind_array --- unbind an array */ + +static NODE * +do_unbind_array(int nargs) +{ + NODE *symbol, *xn, *td; + array_t *aq; + + symbol = get_array_argument(0, false); + if (symbol->array_funcs != bind_array_func) + fatal(_("unbind_array: `%s' is not a bound array"), array_vname(symbol)); + + aq = symbol->a_opaque; + if (aq->func[FINI] != NULL) + (void) array_func_call(symbol, NULL, FINI); + + td = aq->arg0; + assoc_clear(td); + efree(td->vname); + freenode(td); + efree(aq); + + /* promote xarray to symbol */ + xn = symbol->xarray; + xn->flags &= ~XARRAY; + xn->parent_array = symbol->parent_array; + *symbol = *xn; + freenode(xn); + + return make_number(0); +} + + +/* call_func --- call a user-defined gawk function */ + +static NODE * +call_func(NODE *func, NODE **arg, int arg_count) +{ + NODE *ret; + INSTRUCTION *code; + extern int currule; + int i, save_rule = 0; + + if (arg_count > func->param_cnt) + fatal(_("function `%s' called with too many parameters"), func->vname); + + /* make function call instructions */ + code = bcalloc(Op_func_call, 2, 0); + code->func_body = func; + code->func_name = NULL; /* not needed, func_body already assigned */ + (code + 1)->expr_count = arg_count; + code->nexti = bcalloc(Op_stop, 1, 0); + + save_rule = currule; /* save current rule */ + currule = 0; + + /* push arguments onto stack */ + for (i = 0; i < arg_count; i++) { + if (arg[i]->type == Node_val) + UPREF(arg[i]); + PUSH(arg[i]); + } + + /* execute the function */ + (void) interpret(code); + + ret = POP_SCALAR(); /* the return value of the function */ + + /* restore current rule */ + currule = save_rule; + + /* free code */ + bcfree(code->nexti); + bcfree(code); + + return ret; +} + + +/* dlload --- load this library */ + +NODE * +dlload(NODE *obj, void *dl) +{ + make_old_builtin("bind_array", do_bind_array, 2); + make_old_builtin("unbind_array", do_unbind_array, 1); + return make_number((AWKNUM) 0); +} diff --git a/extension/dbarray.awk b/extension/dbarray.awk new file mode 100644 index 00000000..e0a3c093 --- /dev/null +++ b/extension/dbarray.awk @@ -0,0 +1,222 @@ +# dbarray.awk -- persistent array with sqlite database backend + +# @load "bindarr" + +BEGIN { + extension("bindarr") +} + +function _db_count(symbol, sq, + sth, ret, count) +{ + sth = sq["sqlc"] + printf "SELECT count(col1) FROM %s;\n", sq["table"] |& sth + close(sth, "to") + ret = (sth |& getline count) + if (close(sth) != 0 || ret <= 0) + return -1 + return count +} + +function _db_exists(symbol, sq, subs, + sth, ret, row, qsubs) +{ + if (! (subs in symbol)) { + sth = sq["sqlc"] + + # double up single quotes + qsubs = gensub(/'/, "''", "g", subs) + + printf "SELECT col2 FROM %s WHERE col1='%s';\n", sq["table"], qsubs |& sth + close(sth, "to") + ret = (sth |& getline row) + if (close(sth) != 0 || ret < 0) + return -1 + if (ret == 0) # non-existent row + return 0 + if (row == sq["null"]) + symbol[subs] # install null string as value + else + symbol[subs] = row + } + return 0 +} + +function _db_lookup(symbol, sq, subs, + sth, ret, row, qsubs) +{ + if (! (subs in symbol)) { + sth = sq["sqlc"] + + # double up single quotes + qsubs = gensub(/'/, "''", "g", subs) + + printf "SELECT col2 FROM %s WHERE col1='%s';\n", sq["table"], qsubs |& sth + close(sth, "to") + ret = (sth |& getline row) + if (close(sth) != 0 || ret < 0) + return -1 + + if (ret > 0) { + if (row == sq["null"]) + symbol[subs] # install null string as value + else + symbol[subs] = row + } else { + # Not there, install it with NULL as value + printf "INSERT INTO %s (col1) VALUES('%s');\n", sq["table"], qsubs |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + } + } + return 0 +} + +function _db_clear(symbol, sq, + sth, ret) +{ + sth = sq["sqlc"] + printf "DELETE FROM %s;\n", sq["table"] |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + return 0 +} + +function _db_delete(symbol, sq, subs, + sth, ret, qsubs) +{ + sth = sq["sqlc"] + qsubs = gensub(/'/, "''", "g", subs) + printf "DELETE FROM %s WHERE col1='%s';\n", sq["table"], qsubs |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + return 0 +} + +function _db_store(symbol, sq, subs, + sth, ret, qsubs, qval) +{ + sth = sq["sqlc"] + + qval = gensub(/'/, "''", "g", symbol[subs]) + qsubs = gensub(/'/, "''", "g", subs) + printf "UPDATE %s SET col2='%s' WHERE col1='%s';\n", \ + sq["table"], qval, qsubs |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + return 0 +} + +function _db_fetchall(symbol, sq, + sth, ret, save_RS, save_FS) +{ + sth = sq["sqlc2"] + + if (! sq["loaded"]) { + printf "SELECT col1, col2 FROM %s;\n", sq["table"] |& sth + close(sth, "to") + save_RS = RS + save_FS = FS + RS = "\n\n" + FS = "\n" + while ((ret = (sth |& getline)) > 0) { + sub(/^ *col1 = /, "", $1) + sub(/^ *col2 = /, "", $2) + if ($2 == sq["null"]) + symbol[$1] # install null string as value + else + symbol[$1] = $2 + } + RS = save_RS + FS = save_FS + if (ret < 0 || close(sth) != 0) + return -1 + sq["loaded"] = 1 + } +} + + +function _db_init(symbol, sq, + sth, table, ret) +{ + sth = sq["sqlc"] + table = sq["table"] + + # check if table exists + printf ".tables %s\n", table |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + if (ret > 0 && $0 == table) { + # verify schema + printf ".schema %s\n", table |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret <= 0) + return -1 + if ($0 !~ /\(col1 TEXT PRIMARY KEY, col2 TEXT\)/) { + printf "table %s: Invalid column name or type(s)\n", table > "/dev/stderr" + return -1 + } + } else { + # table does not exist, create it. + printf "CREATE TABLE %s (col1 TEXT PRIMARY KEY, col2 TEXT);\n", table |& sth + close(sth, "to") + ret = (sth |& getline) + if (close(sth) != 0 || ret < 0) + return -1 + } + return 0 +} + +#function _db_fini(tie, a, subs) {} + +function db_bind(arr, database, table, sq) +{ + if (! database) { + print "db_bind: must specify a database name" > "/dev/stderr" + exit(1) + } + + if (! table) { + print "db_bind: must specify a table name" > "/dev/stderr" + exit(1) + } + + # string used by the sqlite3 client to represent NULL + sq["null"] = "(null)" + + sq["sqlc"] = sprintf("sqlite3 -nullvalue '%s' %s", sq["null"], database) + # sqlite command used in _db_fetchall + sq["sqlc2"] = sprintf("sqlite3 -line -nullvalue '%s' %s", sq["null"], database) + + sq["table"] = table + + # register our array routines + sq["init"] = "_db_init" + sq["count"] = "_db_count" + sq["exists"] = "_db_exists" + sq["lookup"] = "_db_lookup" + sq["delete"] = "_db_delete" + sq["store"] = "_db_store" + sq["clear"] = "_db_clear" + sq["fetchall"] = "_db_fetchall" + +# sq["fini"] = "_db_fini"; + + bind_array(arr, sq) +} + +function db_unbind(arr) +{ + unbind_array(arr) +} diff --git a/extension/fileop.c b/extension/fileop.c new file mode 100644 index 00000000..86f62576 --- /dev/null +++ b/extension/fileop.c @@ -0,0 +1,394 @@ +/* + * fileop.c -- Builtin functions for binary I/O and other interfaces to + * the filesystem. + */ + +/* + * Copyright (C) 2012 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "awk.h" + +int plugin_is_GPL_compatible; + +typedef struct file_struct { + struct file_struct *next; + FILE *fp; + int flags; + char path[1]; +} file_t; + +static file_t *files; +static file_t *file_open(const char *builtin_name, int nargs, int do_open); +static int mode2flags(const char *mode); + +/* do_fread --- read from file */ + +static NODE * +do_fread(int nargs) +{ + NODE *arg; + size_t rlen, count; + file_t *f; + char *rbuf; + + f = file_open("fread", nargs, true); + + arg = get_scalar_argument(2, false); + force_number(arg); + rlen = get_number_ui(arg); + + emalloc(rbuf, char *, rlen + 2, "do_fread"); + if ((count = fread(rbuf, 1, rlen, f->fp)) < rlen) { + if (! feof(f->fp)) + update_ERRNO_int(errno); + } + return make_str_node(rbuf, count, ALREADY_MALLOCED); +} + +/* do_fwrite --- write to file */ + +static NODE * +do_fwrite(int nargs) +{ + NODE *arg; + file_t *f; + size_t count = 0; + + f = file_open("fwrite", nargs, true); + + arg = get_scalar_argument(2, false); + force_string(arg); + if (arg->stlen > 0) { + count = fwrite(arg->stptr, 1, arg->stlen, f->fp); + if (count < arg->stlen) + update_ERRNO_int(errno); + } + return make_number(count); +} + +/* do_fseek --- set the file position indicator */ + +static NODE * +do_fseek(int nargs) +{ + NODE *arg; + long offset; + file_t *f; + int whence = 0, ret = 0; + + f = file_open("fseek", nargs, true); + + arg = get_scalar_argument(2, false); + force_number(arg); + offset = get_number_si(arg); + + arg = get_scalar_argument(3, false); + force_string(arg); + if (strcasecmp(arg->stptr, "SEEK_SET") == 0) + whence = SEEK_SET; + else if (strcasecmp(arg->stptr, "SEEK_CUR") == 0) + whence = SEEK_CUR; + else if (strcasecmp(arg->stptr, "SEEK_END") == 0) + whence = SEEK_END; + else + fatal(_("fseek: `%.*s' is not a valid 4th argument"), + (int) arg->stlen, arg->stptr); + + if (fseek(f->fp, offset, whence) < 0) { + update_ERRNO_int(errno); + ret = -1; + } + return make_number(ret); +} + +/* do_ftruncate --- truncate the file to a specified length */ + +static NODE * +do_ftruncate(int nargs) +{ + NODE *arg; + file_t *f; + off_t len; + int ret = 0; + + f = file_open("ftruncate", nargs, true); + arg = get_scalar_argument(2, false); + force_number(arg); + len = (off_t) get_number_si(arg); + if (ftruncate(fileno(f->fp), len) < 0) { + update_ERRNO_int(errno); + ret = -1; + } + return make_number(ret); +} + +/* do_unlink --- delete the name from the filesystem */ + +static NODE * +do_unlink(int nargs) +{ + NODE *file; + int ret = 0; + + file = get_scalar_argument(0, false); + force_string(file); + if (file->stlen == 0) + fatal(_("unlink: filename has empty string value")); + if (unlink(file->stptr) < 0) { + update_ERRNO_int(errno); + ret = -1; + } + return make_number(ret); +} + +/* do_flush --- flush buffered data to file */ + +static NODE * +do_flush(int nargs) +{ + file_t *f; + int status = -1; + + f = file_open("flush", nargs, false); + if (f != NULL) { + status = fflush(f->fp); + if (status != 0) + update_ERRNO_int(errno); + } + return make_number(status); +} + +/* do_fclose --- close an open file */ + +static NODE * +do_fclose(int nargs) +{ + file_t *f; + int status = -1; + + f = file_open("fclose", nargs, false); + if (f != NULL) { + status = fclose(f->fp); + if (status != 0) + update_ERRNO_int(errno); + assert(files == f); + files = f->next; + efree(f); + } + return make_number(status); +} + +/* do_filesize --- return the size of the file */ + +static NODE * +do_filesize(int nargs) +{ + NODE *file; + struct stat sbuf; + AWKNUM d = -1.0; + + file = get_scalar_argument(0, false); + force_string(file); + if (file->stlen == 0) + fatal(_("filesize: filename has empty string value")); + + if (stat(file->stptr, & sbuf) < 0) { + update_ERRNO_int(errno); + goto ferror; + } + if ((sbuf.st_mode & S_IFMT) != S_IFREG) { + errno = EINVAL; + update_ERRNO_int(errno); + goto ferror; + } + d = sbuf.st_size; + +ferror: + return make_number(d); +} + +/* do_file_exists --- check if path exists in the filesystem */ + +static NODE * +do_file_exists(int nargs) +{ + NODE *file; + struct stat sbuf; + int ret = 1; + + file = get_scalar_argument(0, false); + force_string(file); + if (file->stlen == 0) + fatal(_("file_exists: filename has empty string value")); + + if (stat(file->stptr, & sbuf) < 0) { + if (errno != ENOENT) + update_ERRNO_int(errno); + ret = 0; + } + return make_number(ret); +} + + +/* file_open --- open a file or find an already opened file */ + +static file_t * +file_open(const char *builtin_name, int nargs, int do_open) +{ + NODE *file, *mode; + file_t *f, *prev; + FILE *fp; + int flags; + char *path; + + if (nargs < 2) + cant_happen(); + + file = get_scalar_argument(0, false); + force_string(file); + mode = get_scalar_argument(1, true); + force_string(mode); + + if (file->stlen == 0) + fatal(_("%s: filename has empty string value"), builtin_name); + if (mode->stlen == 0) + fatal(_("%s: mode has empty string value"), builtin_name); + + flags = mode2flags(mode->stptr); + if (flags < 0) + fatal(_("%s: invalid mode `%.*s'"), builtin_name, + (int) mode->stlen, mode->stptr); + + path = file->stptr; + for (prev = NULL, f = files; f != NULL; prev = f, f = f->next) { + if (strcmp(f->path, path) == 0 && f->flags == flags) { + /* Move to the head of the list */ + if (prev != NULL) { + prev->next = f->next; + f->next = files; + files = f; + } + return f; + } + } + + if (! do_open) { + if (do_lint) + lintwarn(_("%s: `%.*s' is not an open file"), + builtin_name, (int) file->stlen, file->stptr); + return NULL; + } + + fp = fopen(path, mode->stptr); + if (fp == NULL) + fatal(_("%s: cannot open file `%.*s'"), + builtin_name, (int) file->stlen, file->stptr); + + os_close_on_exec(fileno(fp), path, "", ""); + + emalloc(f, file_t *, sizeof(file_t) + file->stlen + 1, "file_open"); + memcpy(f->path, path, file->stlen + 1); + f->fp = fp; + f->flags = flags; + f->next = files; + files = f; + return f; +} + + +/* + * mode2flags --- convert a string mode to an integer flag; + * modified from str2mode in io.c. + */ + +static int +mode2flags(const char *mode) +{ + int ret = -1; + const char *second; + + if (mode == NULL || mode[0] == '\0') + return -1; + + second = & mode[1]; + + if (*second == 'b') + second++; + + switch(mode[0]) { + case 'r': + ret = O_RDONLY; + if (*second == '+' || *second == 'w') + ret = O_RDWR; + break; + + case 'w': + ret = O_WRONLY|O_CREAT|O_TRUNC; + if (*second == '+' || *second == 'r') + ret = O_RDWR|O_CREAT|O_TRUNC; + break; + + case 'a': + ret = O_WRONLY|O_APPEND|O_CREAT; + if (*second == '+') + ret = O_RDWR|O_APPEND|O_CREAT; + break; + + default: + ret = -1; + } + if (ret != -1 && strchr(mode, 'b') != NULL) + ret |= O_BINARY; + return ret; +} + + +/* dlload --- load new builtins in this library */ + +NODE * +dlload(NODE *tree, void *dl) +{ + make_old_builtin("fseek", do_fseek, 4); + make_old_builtin("fread", do_fread, 3); + make_old_builtin("fwrite", do_fwrite, 3); + make_old_builtin("flush", do_flush, 2); + make_old_builtin("filesize", do_filesize, 1); + make_old_builtin("file_exists", do_file_exists, 1); + make_old_builtin("fclose", do_fclose, 2); + make_old_builtin("ftruncate", do_ftruncate, 3); + make_old_builtin("unlink", do_unlink, 1); + return make_number((AWKNUM) 0); +} + + +/* dlunload --- routine called when exiting */ + +void +dlunload() +{ + file_t *f; + for (f = files; f != NULL; f = f->next) { + if (f->fp != NULL) { + fclose(f->fp); + f->fp = NULL; + } + } +} diff --git a/extension/record.awk b/extension/record.awk new file mode 100644 index 00000000..18a3ce48 --- /dev/null +++ b/extension/record.awk @@ -0,0 +1,252 @@ +# record.awk -- represent fixed-length records in a file as an array. +# Each element in the array corresponds to a record in the file. +# The records are numbered starting from 1, and each record read in +# from the file is cached. If opened using mode "r+", +# changes to the array are reflected in the file immediately i.e. +# writing to an element writes the data into the file. +# +# Usage: +# record(r, path [, reclen [, mode]]) +# r -- array to bind +# path -- filename +# reclen -- length of each record +# mode -- "r" for reading (default), "r+" for reading and writing +# +# With reclen <= 0, entire file is treated as one record #1. +# +# record(r, "data.in", 80, "r+") +# r[10] = r[1] +# for (i = 1; i in r; i++) +# print r[i] +# delete r[1] +# +# See Also: testrecord.sh +# +# +# TODO: +# * implement deferred writing +# * limit memory usage for read cache +# * use fixed size buffer when deleting a record +# + +BEGIN { + extension("fileop.so") + extension("bindarr.so") +} + +# _record_count --- return the number of records in file + +function _record_count(symbol, rd) +{ + if (! ("rectot" in rd)) + rd["rectot"] = ("reclen" in rd) ? + int(filesize(rd["path"]) / rd["reclen"]) : 1 + return rd["rectot"] +} + +# _record_exists --- check if record exists + +function _record_exists(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + recnum = int(recnum) + if (recnum <= 0 || recnum > rectot) + return 0 + + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + return 0 +} + +# _record_lookup --- lookup a record + +function _record_lookup(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + recnum = int(recnum) + if (recnum <= 0 || recnum > rectot) { + ERRNO = sprintf("record: %s: reference to non-existent record #%d", path, recnum) + return -1 + } + + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + return 0 +} + +# _record_clear --- remove all records + +function _record_clear(symbol, rd, + path, mode) +{ + path = rd["path"] + mode = rd["mode"] + if (mode == "r") { + ERRNO = sprintf("record: cannot delete record from file `%s' opened only for reading", path) + return -1 + } + ftruncate(path, mode, 0) + delete rd["reclen"] + return 0 +} + +# _record_delete --- delete a record from the file + +function _record_delete(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + + if (mode == "r") { + ERRNO = sprintf("record: cannot delete record from file `%s' opened only for reading", path) + return -1 + } + + recnum = int(recnum) + if (! ("reclen" in rd)) { + # entire file is record #1 + ftruncate(path, mode, 0) + delete rd["reclen"] + return 0 + } + + sz = filesize(path) + rectot = int(sz / reclen) + + recstart = (recnum - 1) * reclen + off = sz - (recstart + reclen) + + fseek(path, mode, -off, "SEEK_END") + tmp = fread(path, mode, off) + fseek(path, mode, recstart, "SEEK_SET") + if (fwrite(path, mode, tmp) != length(tmp)) + return -1 + flush(path, mode) + ftruncate(path, mode, sz - reclen) + + rd["rectot"] = rectot - 1 + for (i = recnum + 1; i <= rectot; i++) { + if (i in symbol) { + symbol[i - 1] = symbol[i] + delete symbol[i] + } + } + return 0 +} + +# _record_store --- write a record to file + +function _record_store(symbol, rd, recnum, + path, mode, reclen, val) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + + if (mode == "r") { + ERRNO = sprintf("record: cannot write to file `%s' opened only for reading", path) + return -1 + } + + recnum = int(recnum) + val = symbol[recnum] + if (! ("reclen" in rd)) { + # the entire file is record #1 + if (reclen != 0) + ftruncate(path, mode, 0) + } else if (length(val) != reclen) { + ERRNO = sprintf("record: %s: invalid length for record #%d", path, recnum) + return -1 + } + + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + if (fwrite(path, mode, val) != length(val)) + return -1 + flush(path, mode) + return 0 +} + +# _record_fetchall --- retrieve all the records + +function _record_fetchall(symbol, rd, + path, mode, reclen, rectot, recnum) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + if (rd["loaded"]) + return 0 + for (recnum = 1; recnum <= rectot; recnum++) { + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + } + rd["loaded"] = 1 + return 0 +} + +# _record_init --- initialization routine + +function _record_init(symbol, rd) +{ + if (! file_exists(rd["path"])) { + ERRNO = sprintf("record: cannot open file `%s' for reading", rd["path"]) + return -1 + } + return 0 +} + +# _record_fini --- cleanup routine + +function _record_fini(symbol, rd) +{ + fclose(rd["path"], rd["mode"]) +} + +# record --- bind an array to a file with fixed-length records + +function record(array, path, reclen, mode, rd) +{ + if (path == "") { + print "fatal: record: empty string value for filename" > "/dev/stderr" + exit(1) + } + + # register our array routines + rd["init"] = "_record_init" + rd["fini"] = "_record_fini" + rd["count"] = "_record_count" + rd["exists"] = "_record_exists" + rd["lookup"] = "_record_lookup" + rd["delete"] = "_record_delete" + rd["store"] = "_record_store" + rd["clear"] = "_record_clear" + rd["fetchall"] = "_record_fetchall" + + rd["path"] = path + if (reclen > 0) + rd["reclen"] = reclen + rd["mode"] = mode == "r+" ? "r+" : "r" + + delete array + bind_array(array, rd) +} diff --git a/extension/sparr.c b/extension/sparr.c new file mode 100644 index 00000000..a3d06e66 --- /dev/null +++ b/extension/sparr.c @@ -0,0 +1,163 @@ +/* + * sparr.c - Example of changing behavior of arrays in gawk. + * See testsparr.awk for usage. + */ + +/* + * Copyright (C) 2012 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + + +#include "awk.h" +#include "spec_array.h" + +int plugin_is_GPL_compatible; + +typedef struct { + int load_file; + NODE *filename; +} sdata_t; + +/* install_array --- install an array in the symbol table */ + +static NODE * +install_array(const char *name) +{ + NODE *r; + + r = lookup(name); + if (r == NULL) + r = install_symbol(estrdup(name, strlen(name)), Node_var_array); + switch (r->type) { + case Node_var_new: + r = force_array(r, false); + /* fall through */ + case Node_var_array: + assoc_clear(r); + break; + default: + fatal(_("`%s' is not an array"), name); + } + return r; +} + +/* fetch_SYS --- fetch routine for the array `SYS' */ + +static NODE * +fetch_SYS(NODE *symbol, NODE *subs, void *data) +{ + force_string(subs); + if (strcmp(subs->stptr, "time") == 0) + return do_strftime(0); + return NULL; +} + +/* store_SYS --- store routine for the array `SYS' */ + +static void +store_SYS(NODE *symbol, NODE *subs, NODE *val, void *data) +{ + sdata_t *sd = (sdata_t *) data; + + if (subs != NULL && val != NULL && val->type == Node_val) { + force_string(subs); + if (strcmp(subs->stptr, "readline") == 0) { + sd->load_file = true; + unref(sd->filename); + sd->filename = dupnode(val); + } + } +} + +/* load_READLINE --- load routine for the array `READLINE' */ + +static void +load_READLINE(NODE *symbol, void *data) +{ + sdata_t *sd = (sdata_t *) data; + NODE *file, *tmp; + FILE *fp; + static char linebuf[BUFSIZ]; + int i; + bool long_line = false; + + if (! sd->load_file) /* non-existent SYS["readline"] or already loaded */ + return; + + file = sd->filename; + force_string(file); + + if (file->stlen == 0) + return; + + assoc_clear(symbol); + + if ((fp = fopen(file->stptr, "r" )) == NULL) { + warning(_("READLINE (%s): %s"), file->stptr, strerror(errno)); + return; + } + + for (i = 1; fgets(linebuf, sizeof(linebuf), fp ) != NULL; i++) { + NODE **lhs; + size_t sz; + + sz = strlen(linebuf); + if (sz > 0 && linebuf[sz - 1] == '\n') { + linebuf[sz - 1] = '\0'; + sz--; + if (long_line) { + long_line = false; + i--; + continue; + } + } else if (long_line) { + i--; + continue; + } else { + if (do_lint) + lintwarn(_("file `%s' does not end in newline or line # `%d' is too long"), + file->stptr, i); + long_line = true; + } + + tmp = make_number(i); + lhs = assoc_lookup(symbol, tmp); + unref(tmp); + unref(*lhs); + *lhs = make_string(linebuf, sz); + } + fclose(fp); + sd->load_file = false; /* don't load this file again */ +} + +/* dlload --- load this library */ + +NODE * +dlload(NODE *obj, void *dl) +{ + NODE *a1, *a2; + static sdata_t data; + + a1 = install_array("SYS"); + register_dyn_array(a1, fetch_SYS, store_SYS, & data); + a2 = install_array("READLINE"); + register_deferred_array(a2, load_READLINE, & data); + return make_number((AWKNUM) 0); +} diff --git a/extension/spec_array.c b/extension/spec_array.c new file mode 100644 index 00000000..78b24018 --- /dev/null +++ b/extension/spec_array.c @@ -0,0 +1,416 @@ +/* + * spec_array.c - Support for specialized associative arrays. + */ + +/* + * Copyright (C) 2012 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "awk.h" +#include "spec_array.h" + +typedef struct spec_array { + Fetch_func_t fetch_func; + Store_func_t store_func; + Load_func_t load_func; + void *data; +} array_t; + +/* + * The array_t structure is attached to the array itself without + * the necessity to maintain a list of symbols; this works only + * because there is just enough free space in the NODE strcture when + * the base array is str_array. + */ + +#define SUPER(F) (*str_array_func[AFUNC(F)]) + + +/* + * deferred_array --- Deferred loading of array at run-time. + * + * The load routine takes two arguments, the array and + * a void * data: + * + * void load_func(NODE *array, void *data) + * + * Use register_deferred_array(array, load_func, void *data) to + * bind an array to the load routine. + */ + +static NODE **deferred_array_init(NODE *, NODE *); +static NODE **deferred_array_lookup(NODE *, NODE *); +static NODE **deferred_array_exists(NODE *, NODE *); +static NODE **deferred_array_remove(NODE *, NODE *); +static NODE **deferred_array_clear(NODE *, NODE *); +static NODE **deferred_array_list(NODE *, NODE *); +static NODE **deferred_array_copy(NODE *, NODE *); +static NODE **deferred_array_length(NODE *, NODE *); + +static afunc_t deferred_array_func[] = { + deferred_array_init, + (afunc_t) 0, /* typeof */ + deferred_array_length, + deferred_array_lookup, + deferred_array_exists, + deferred_array_clear, + deferred_array_remove, + deferred_array_list, + deferred_array_copy, + null_afunc, /* dump */ + (afunc_t) 0, /* store */ +}; + + +/* deferred_array_init --- called when array becomes empty, e.g: delete BOUND_ARRAY */ + +static NODE ** +deferred_array_init(NODE *symbol, NODE *subs) +{ + if (symbol != NULL) { + array_t *av = (array_t *) symbol->xarray; + symbol->xarray = NULL; /* this is to avoid an assertion failure in null_array */ + null_array(symbol); /* typeless empty array */ + if (symbol->parent_array == NULL) { + /* main array */ + symbol->array_funcs = deferred_array_func; /* restore type */ + symbol->xarray = (NODE *) av; + } else if (av) /* sub-array */ + efree(av); + } + return NULL; +} + +/* deferred_array_length --- get the length of the array */ + +static NODE ** +deferred_array_length(NODE *symbol, NODE *subs) +{ + static NODE *length_node; + array_t *av = (array_t *) symbol->xarray; + if (av) { + symbol->xarray = NULL; + (*av->load_func)(symbol, av->data); + symbol->xarray = (NODE *) av; + } + length_node = symbol; + return & length_node; +} + +#define DEF_ARR(F) static NODE ** \ +deferred_array_##F(NODE *symbol, NODE *subs) \ +{ \ + array_t *av = (array_t *) symbol->xarray; \ + if (av) { \ + symbol->xarray = NULL; \ + (*av->load_func)(symbol, av->data); \ + symbol->xarray = (NODE *) av; \ + } \ + return SUPER(a##F)(symbol, subs); \ +} + +/* the rest of the routines */ + +DEF_ARR(exists) +DEF_ARR(lookup) +DEF_ARR(list) +DEF_ARR(copy) + +#undef DEF_ARR + +/* deferred_array_remove --- remove the index from the array */ + +static NODE ** +deferred_array_remove(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aremove)(symbol, subs); + if (av) { + symbol->xarray = NULL; + (*av->load_func)(symbol, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + +/* deferred_array_clear --- flush all the values in symbol[] */ + +static NODE ** +deferred_array_clear(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aclear)(symbol, subs); + if (av) { + symbol->xarray = NULL; + (*av->load_func)(symbol, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + + +/* + * dyn_array --- array with triggers for reading and writing + * an element. + * + * The fetch routine should expect three arguments, the array, + * the subscript and optional void * data. It should return the value + * if it exists or NULL otherwise. + * + * NODE *fetch_func(NODE *array, NODE *subs, void *data) + * + * The store routine must take an additional argument for the + * value. The value can be NULL if the specific element is + * removed from the array. The subscript (and the value) is NULL + * when the entire array is deleted. + * + * void store_func(NODE *array, NODE *subs, NODE *value, void *data) + * + * Use register_dyn_array(array, fetch_func, store_func, void *data) to + * bind an array to the fetch/store routine. + */ + + +static NODE **dyn_array_init(NODE *, NODE *); +static NODE **dyn_array_lookup(NODE *, NODE *); +static NODE **dyn_array_exists(NODE *, NODE *); +static NODE **dyn_array_remove(NODE *, NODE *); +static NODE **dyn_array_clear(NODE *, NODE *); +static NODE **dyn_array_list(NODE *, NODE *); +static NODE **dyn_array_copy(NODE *, NODE *); +static NODE **dyn_array_store(NODE *, NODE *); + +static afunc_t dyn_array_func[] = { + dyn_array_init, + (afunc_t) 0, /* typeof */ + null_length, /* length */ + dyn_array_lookup, + dyn_array_exists, + dyn_array_clear, + dyn_array_remove, + dyn_array_list, + dyn_array_copy, + null_afunc, /* dump */ + dyn_array_store, +}; + +/* dyn_array_init --- called when array becomes empty */ + +static NODE ** +dyn_array_init(NODE *symbol, NODE *subs) +{ + if (symbol != NULL) { + array_t *av = (array_t *) symbol->xarray; + symbol->xarray = NULL; + null_array(symbol); /* typeless empty array */ + if (symbol->parent_array == NULL) { + /* main array */ + symbol->array_funcs = dyn_array_func; /* restore type */ + symbol->xarray = (NODE *) av; + } else if (av) /* sub-array */ + efree(av); + } + return NULL; +} + +/* dyn_array_exists --- check if the SUBS exists */ + +static NODE ** +dyn_array_exists(NODE *symbol, NODE *subs) +{ + NODE *r; + array_t *av = (array_t *) symbol->xarray; + + if (av && av->fetch_func) { + symbol->xarray = NULL; + r = (*av->fetch_func)(symbol, subs, av->data); + symbol->xarray = (NODE *) av; + if (r != NULL) { + NODE **lhs; + lhs = SUPER(alookup)(symbol, subs); + unref(*lhs); + *lhs = r; + return lhs; + } + } + + return SUPER(aexists)(symbol, subs); +} + +/* dyn_array_lookup --- lookup SUBS and return a pointer to store its value */ + +static NODE ** +dyn_array_lookup(NODE *symbol, NODE *subs) +{ + NODE **lhs; + NODE *r; + array_t *av = (array_t *) symbol->xarray; + + lhs = SUPER(alookup)(symbol, subs); + if (av && av->fetch_func) { + symbol->xarray = NULL; + r = (*av->fetch_func)(symbol, subs, av->data); + symbol->xarray = (NODE *) av; + if (r != NULL) { + unref(*lhs); + *lhs = r; + } + } + return lhs; +} + +/* dyn_array_store --- call the store routine after an assignment */ + +static NODE ** +dyn_array_store(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + if (av && av->store_func) { + NODE **lhs; + lhs = SUPER(aexists)(symbol, subs); + symbol->xarray = NULL; + (*av->store_func)(symbol, subs, *lhs, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + +/* dyn_array_remove --- remove the index from the array */ + +static NODE ** +dyn_array_remove(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aremove)(symbol, subs); + if (av && av->store_func) { + symbol->xarray = NULL; + (*av->store_func)(symbol, subs, NULL, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + +/* dyn_array_clear --- flush all the values in symbol[] */ + +static NODE ** +dyn_array_clear(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aclear)(symbol, subs); + if (av && av->store_func) { + symbol->xarray = NULL; + (*av->store_func)(symbol, NULL, NULL, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + +/* dyn_array_list --- return a list of items in symbol[] */ + +static NODE ** +dyn_array_list(NODE *symbol, NODE *subs) +{ + return SUPER(alist)(symbol, subs); +} + +/* dyn_array_copy --- duplicate the array */ + +static NODE ** +dyn_array_copy(NODE *symbol, NODE *subs) +{ + return SUPER(acopy)(symbol, subs); +} + +/* register_array_s --- attach the specified routine(s) to an array */ + +static void +register_array_s(NODE *symbol, Fetch_func_t fetch_func, + Store_func_t store_func, Load_func_t load_func, void *data) +{ + array_t *av; + + if (symbol->type != Node_var_array) + fatal(_("register_array_s: argument is not an array")); + + if (symbol->array_funcs == deferred_array_func + || symbol->array_funcs == dyn_array_func) + fatal(_("register_array_s: `%s' already is a deferred/dyn array"), + array_vname(symbol)); + + assoc_clear(symbol); + assert(symbol->xarray == NULL); + emalloc(av, array_t *, sizeof (array_t), "register_spec_array"); + av->fetch_func = fetch_func; + av->store_func = store_func; + av->load_func = load_func; + av->data = data; + symbol->xarray = (NODE *) av; +} + +/* register_deferred_array --- make the array to be loaded at run-time */ + +void +register_deferred_array(NODE *symbol, Load_func_t load_func, void *dq) +{ + if (! load_func) + fatal(_("register_deferred_array: null load function")); + register_array_s(symbol, 0, 0, load_func, dq); + symbol->array_funcs = deferred_array_func; +} + +/* register_dyn_array --- attach read and write triggers to an array */ + +void +register_dyn_array(NODE *symbol, Fetch_func_t fetch_func, + Store_func_t store_func, void *dq) +{ + register_array_s(symbol, fetch_func, store_func, 0, dq); + symbol->array_funcs = dyn_array_func; +} + +/* unregister_array_s --- un-special the array */ + +void * +unregister_array_s(NODE *symbol) +{ + void *data = NULL; + if (symbol->type != Node_var_array) + fatal(_("unregister_array_s: argument is not an array")); + + if (symbol->array_funcs == dyn_array_func + || symbol->array_funcs == deferred_array_func + ) { + array_t *av; + + av = (array_t *) symbol->xarray; + assert(av != NULL); + data = av->data; + efree(av); + symbol->array_funcs = str_array_func; + symbol->xarray = NULL; + /* FIXME: do we assoc_clear the array ? */ + } + return data; +} diff --git a/extension/spec_array.h b/extension/spec_array.h new file mode 100644 index 00000000..f75fc7ce --- /dev/null +++ b/extension/spec_array.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + + +typedef NODE *(*Fetch_func_t)(NODE *, NODE *, void *); +typedef void(*Store_func_t)(NODE *, NODE *, NODE *, void *); +typedef void(*Load_func_t)(NODE *, void *); +extern void register_dyn_array(NODE *, Fetch_func_t, Store_func_t, void *); +extern void register_deferred_array(NODE *, Load_func_t, void *); +extern void *unregister_array_s(NODE *); diff --git a/extension/steps b/extension/steps new file mode 100755 index 00000000..3e8070d6 --- /dev/null +++ b/extension/steps @@ -0,0 +1,10 @@ +# what to do under linux to make dl.so +# Sun Nov 25 21:40:49 IST 2012 + +gcc -fPIC -shared -Wall -DGAWK -DHAVE_CONFIG_H -c -O -g -I.. spec_array.c +gcc -fPIC -shared -Wall -DGAWK -DHAVE_CONFIG_H -c -O -g -I.. sparr.c +gcc -fPIC -shared -Wall -DGAWK -DHAVE_CONFIG_H -c -O -g -I.. bindarr.c +gcc -fPIC -shared -Wall -DGAWK -DHAVE_CONFIG_H -c -O -g -I.. fileop.c +gcc -o sparr.so -shared sparr.o spec_array.o +gcc -o bindarr.so -shared bindarr.o +gcc -o fileop.so -shared fileop.o diff --git a/extension/testdbarray.awk b/extension/testdbarray.awk new file mode 100644 index 00000000..fd7fd595 --- /dev/null +++ b/extension/testdbarray.awk @@ -0,0 +1,21 @@ +@include "dbarray.awk" + +# $ ../gawk -f testdbarray.awk +# $ ../gawk -f testdbarray.awk +# ... +# $ ../gawk -vINIT=1 -f testdbarray.awk + + +BEGIN { + # bind array 'A' to the table 'table_A' in sqlite3 database 'testdb' + db_bind(A, "testdb", "table_A") + + if (INIT) # detele table and start over + delete A + + lenA = length(A) + A[++lenA] = strftime() + PROCINFO["sorted_in"] = "@ind_num_asc" + for (item in A) + print item, ":", A[item] +} diff --git a/extension/testrecord.sh b/extension/testrecord.sh new file mode 100755 index 00000000..61d1ba76 --- /dev/null +++ b/extension/testrecord.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +AWK=../gawk +$AWK 'BEGIN { OFS = ORS = ""; for (j = 1; j <= 4; j++) for (i = 1; i <= 16; i++) print j}' > _rec.in +for i in 1 2 3 4 5 +do +$AWK -f record.awk -vinfile='_rec.in' -e 'BEGIN { +reclen = 16 +record(r, infile, reclen, "r+") +FIELDWIDTHS="8 4 4" +for (i = 1; i in r; i++) { + $0 = r[i] + print $1 +} +delete r[1] +unbind_array(r) +print "--" }' +done +rm -f _rec.in diff --git a/extension/testsparr.awk b/extension/testsparr.awk new file mode 100644 index 00000000..648a21a2 --- /dev/null +++ b/extension/testsparr.awk @@ -0,0 +1,18 @@ +# ../gawk -lsparr -f testsparr.awk +BEGIN { + extension("sparr") + print SYS["time"] + SYS["readline"] = "sparr.c"; + printf("File %s has %d lines\n", SYS["readline"], length(READLINE)) + SYS["readline"] = "testsparr.awk"; + printf("File %s has %d lines\n", SYS["readline"], length(READLINE)) + for (i = 1; i in READLINE; i++) + print READLINE[i] + + system("sleep 1") + +# PROCINFO["/dev/stdin", "READ_TIMEOUT"] = 1000 +# getline < "/dev/stdin" + + print SYS["time"] +} @@ -939,9 +939,11 @@ set_element(long num, char *s, long len, NODE *n) it->flags |= MAYBE_NUM; sub = make_number((AWKNUM) (num)); lhs = assoc_lookup(n, sub); - unref(sub); unref(*lhs); *lhs = it; + if (n->astore != NULL) + (*n->astore)(n, sub); + unref(sub); } /* do_split --- implement split(), semantics are same as for field splitting */ @@ -900,7 +900,7 @@ api_create_array(awk_ext_id_t id) getnode(n); memset(n, 0, sizeof(NODE)); - init_array(n); + null_array(n); return (awk_array_t) n; } diff --git a/int_array.c b/int_array.c index 2909f6f9..769ac9bb 100644 --- a/int_array.c +++ b/int_array.c @@ -45,9 +45,10 @@ static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1); static NODE **int_insert(NODE *symbol, long k, uint32_t hash1); static void grow_int_table(NODE *symbol); -array_ptr int_array_func[] = { +afunc_t int_array_func[] = { int_array_init, is_integer, + null_length, int_lookup, int_exists, int_clear, @@ -55,22 +56,27 @@ array_ptr int_array_func[] = { int_list, int_copy, int_dump, + (afunc_t) 0, }; -/* int_array_init --- check relevant environment variables */ +/* int_array_init --- array initialization routine */ static NODE ** -int_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED) +int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) { - long newval; + if (symbol == NULL) { /* first time */ + long newval; + + /* check relevant environment variables */ + if ((newval = getenv_long("INT_CHAIN_MAX")) > 0) + INT_CHAIN_MAX = newval; + } else + null_array(symbol); - if ((newval = getenv_long("INT_CHAIN_MAX")) > 0) - INT_CHAIN_MAX = newval; return (NODE **) ! NULL; } - /* is_integer --- check if subscript is an integer */ NODE ** @@ -147,7 +153,8 @@ is_integer(NODE *symbol, NODE *subs) } -/* int_lookup --- Find SYMBOL[SUBS] in the assoc array. Install it with value "" +/* + * int_lookup --- Find SYMBOL[SUBS] in the assoc array. Install it with value "" * if it isn't there. Returns a pointer ala get_lhs to where its value is stored. */ @@ -160,7 +167,8 @@ int_lookup(NODE *symbol, NODE *subs) NODE **lhs; NODE *xn; - /* N.B: symbol->table_size is the total # of non-integers (symbol->xarray) + /* + * N.B: symbol->table_size is the total # of non-integers (symbol->xarray) * and integer elements. Also, symbol->xarray must have at least one * item in it, and can not exist if there are no integer elements. * In that case, symbol->xarray is promoted to 'symbol' (See int_remove). @@ -207,7 +215,8 @@ int_lookup(NODE *symbol, NODE *subs) } -/* int_exists --- test whether the array element symbol[subs] exists or not, +/* + * int_exists --- test whether the array element symbol[subs] exists or not, * return pointer to value if it does. */ @@ -266,8 +275,7 @@ int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) } if (symbol->buckets != NULL) efree(symbol->buckets); - init_array(symbol); /* re-initialize symbol */ - symbol->flags &= ~ARRAYMAXED; + symbol->ainit(symbol, NULL); /* re-initialize symbol */ return NULL; } @@ -338,9 +346,7 @@ removed: BUCKET *head = symbol->buckets[hash1]; assert(b->aicount == 1); - /* move the last element from head - * to bucket to make it full. - */ + /* move the last element from head to bucket to make it full. */ i = --head->aicount; /* head has one less element */ b->ainum[1] = head->ainum[i]; b->aivalue[1] = head->aivalue[i]; @@ -356,8 +362,7 @@ removed: symbol->table_size--; if (xn == NULL && symbol->table_size == 0) { efree(symbol->buckets); - init_array(symbol); /* re-initialize array 'symbol' */ - symbol->flags &= ~ARRAYMAXED; + symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */ } else if (xn != NULL && symbol->table_size == xn->table_size) { /* promote xn (str_array) to symbol */ xn->flags &= ~XARRAY; @@ -421,6 +426,7 @@ int_copy(NODE *symbol, NODE *newsymb) } *pnew = newchain; + newchain->ainext = NULL; pnew = & newchain->ainext; } } @@ -456,15 +462,17 @@ int_list(NODE *symbol, NODE *t) int j, elem_size = 1; long num; static char buf[100]; + assoc_kind_t assoc_kind; if (symbol->table_size == 0) return NULL; + assoc_kind = (assoc_kind_t) t->flags; num_elems = symbol->table_size; - if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) + if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) num_elems = 1; - if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) + if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) elem_size = 2; list_size = elem_size * num_elems; @@ -486,7 +494,7 @@ int_list(NODE *symbol, NODE *t) for (j = 0; j < b->aicount; j++) { /* index */ num = b->ainum[j]; - if ((t->flags & AISTR) != 0) { + if ((assoc_kind & AISTR) != 0) { sprintf(buf, "%ld", num); subs = make_string(buf, strlen(buf)); subs->numbr = num; @@ -498,12 +506,12 @@ int_list(NODE *symbol, NODE *t) list[k++] = subs; /* value */ - if ((t->flags & AVALUE) != 0) { + if ((assoc_kind & AVALUE) != 0) { r = b->aivalue[j]; if (r->type == Node_val) { - if ((t->flags & AVNUM) != 0) + if ((assoc_kind & AVNUM) != 0) (void) force_number(r); - else if ((t->flags & AVSTR) != 0) + else if ((assoc_kind & AVSTR) != 0) r = force_string(r); } list[k++] = r; @@ -659,14 +667,13 @@ static uint32_t int_hash(uint32_t k, uint32_t hsize) { -/* Code snippet copied from: +/* + * Code snippet copied from: * Hash functions (http://www.azillionmonkeys.com/qed/hash.html). * Copyright 2004-2008 by Paul Hsieh. Licenced under LGPL 2.1. */ - /* This is the final mixing function used by Paul Hsieh - * in SuperFastHash. - */ + /* This is the final mixing function used by Paul Hsieh in SuperFastHash. */ k ^= k << 3; k += k >> 5; @@ -709,9 +716,7 @@ int_insert(NODE *symbol, long k, uint32_t hash1) b = symbol->buckets[hash1]; - /* Only the first bucket in the chain can be partially full, - * but is never empty. - */ + /* Only the first bucket in the chain can be partially full, but is never empty. */ if (b == NULL || (i = b->aicount) == 2) { getbucket(b); diff --git a/interpret.h b/interpret.h index 21cd9a80..228a3f3e 100644 --- a/interpret.h +++ b/interpret.h @@ -37,6 +37,9 @@ r_interpret(INSTRUCTION *code) AWKNUM x, x2; int di; Regexp *rp; + NODE *set_array = NULL; /* array with a post-assignment routine */ + NODE *set_idx = NULL; /* the index of the array element */ + /* array subscript */ #define mk_sub(n) (n == 1 ? POP_SCALAR() : concat_exp(n, true)) @@ -103,6 +106,8 @@ top: */ if (stdio_problem && ! exiting && exit_val == 0) exit_val = 1; + + close_extensions(); } break; @@ -248,9 +253,15 @@ top: if (r == NULL) { r = make_array(); r->parent_array = t1; - *assoc_lookup(t1, t2) = r; + lhs = assoc_lookup(t1, t2); + unref(*lhs); + *lhs = r; t2 = force_string(t2); r->vname = estrdup(t2->stptr, t2->stlen); /* the subscript in parent array */ + + /* execute post-assignment routine if any */ + if (t1->astore != NULL) + (*t1->astore)(t1, t2); } else if (r->type != Node_var_array) { t2 = force_string(t2); fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"), @@ -280,7 +291,36 @@ top: array_vname(t1), (int) t2->stlen, t2->stptr); } - DEREF(t2); + /* + * Changing something in FUNCTAB is not allowed. + * + * SYMTAB is a little more messy. Three kinds of values may + * be stored in SYMTAB: + * 1. Variables that don"t yet have a value (Node_var_new) + * 2. Variables that have a value (Node_var) + * 3. Values that awk code stuck into SYMTAB not related to variables (Node_value) + * For 1, since we are giving it a value, we have to change the type to Node_var. + * For 1 and 2, we have to step through the Node_var to get to the value. + * For 3, we just us the value we got from assoc_lookup(), above. + */ + if (t1 == func_table) + fatal(_("cannot assign to elements of FUNCTAB")); + else if ( t1 == symbol_table + && ( (*lhs)->type == Node_var + || (*lhs)->type == Node_var_new)) { + (*lhs)->type = Node_var; /* in case was Node_var_new */ + lhs = & ((*lhs)->var_value); /* extra level of indirection */ + } + + assert(set_idx == NULL); + + if (t1->astore) { + /* array has post-assignment routine */ + set_array = t1; + set_idx = t2; + } else + DEREF(t2); + PUSH_ADDRESS(lhs); break; @@ -544,10 +584,11 @@ mod: break; case Op_store_sub: - /* array[sub] assignment optimization, + /* + * array[sub] assignment optimization, * see awkgram.y (optimize_assignment) */ - t1 = get_array(pc->memory, true); /* array */ + t1 = force_array(pc->memory, true); /* array */ t2 = mk_sub(pc->expr_count); /* subscript */ lhs = assoc_lookup(t1, t2); if ((*lhs)->type == Node_var_array) { @@ -580,10 +621,17 @@ mod: unref(*lhs); *lhs = POP_SCALAR(); + + /* execute post-assignment routine if any */ + if (t1->astore != NULL) + (*t1->astore)(t1, t2); + + DEREF(t2); break; case Op_store_var: - /* simple variable assignment optimization, + /* + * simple variable assignment optimization, * see awkgram.y (optimize_assignment) */ @@ -671,6 +719,30 @@ mod: REPLACE(r); break; + case Op_subscript_assign: + /* conditionally execute post-assignment routine for an array element */ + + if (set_idx != NULL) { + di = true; + if (pc->assign_ctxt == Op_sub_builtin + && (r = TOP()) + && get_number_si(r) == 0 /* no substitution performed */ + ) + di = false; + else if ((pc->assign_ctxt == Op_K_getline + || pc->assign_ctxt == Op_K_getline_redir) + && (r = TOP()) + && get_number_si(r) <= 0 /* EOF or error */ + ) + di = false; + + if (di) + (*set_array->astore)(set_array, set_idx); + unref(set_idx); + set_idx = NULL; + } + break; + /* numeric assignments */ case Op_assign_plus: case Op_assign_minus: @@ -775,11 +847,10 @@ mod: array = POP_ARRAY(); /* sanity: check if empty */ - if (array_empty(array)) + num_elems = assoc_length(array); + if (num_elems == 0) goto arrayfor; - num_elems = array->table_size; - if (sorted_in == NULL) /* do this once */ sorted_in = make_string("sorted_in", 9); @@ -842,12 +913,16 @@ arrayfor: break; case Op_ext_builtin: + case Op_old_ext_builtin: { int arg_count = pc->expr_count; awk_value_t result; PUSH_CODE(pc); - r = awk_value_to_node(pc->extfunc(arg_count, & result)); + if (op == Op_ext_builtin) + r = awk_value_to_node(pc->extfunc(arg_count, & result)); + else + r = pc->builtin(arg_count); (void) POP_CODE(); while (arg_count-- > 0) { t1 = POP(); @@ -953,7 +1028,7 @@ match_re: } if (f == NULL || f->type != Node_func) { - if (f->type == Node_ext_func) + if (f->type == Node_ext_func || f->type == Node_old_ext_func) fatal(_("cannot (yet) call extension functions indirectly")); else fatal(_("function called indirectly through `%s' does not exist"), @@ -973,19 +1048,22 @@ match_re: f = pc->func_body; if (f == NULL) { f = lookup(pc->func_name); - if (f == NULL || (f->type != Node_func && f->type != Node_ext_func)) + if (f == NULL || (f->type != Node_func && f->type != Node_ext_func && f->type != Node_old_ext_func)) fatal(_("function `%s' not defined"), pc->func_name); pc->func_body = f; /* save for next call */ } - if (f->type == Node_ext_func) { + if (f->type == Node_ext_func || f->type == Node_old_ext_func) { INSTRUCTION *bc; char *fname = pc->func_name; int arg_count = (pc + 1)->expr_count; bc = f->code_ptr; assert(bc->opcode == Op_symbol); - pc->opcode = Op_ext_builtin; /* self modifying code */ + if (f->type == Node_ext_func) + pc->opcode = Op_ext_builtin; /* self modifying code */ + else + pc->opcode = Op_old_ext_builtin; /* self modifying code */ pc->extfunc = bc->extfunc; pc->expr_count = arg_count; /* actual argument count */ (pc + 1)->func_name = fname; /* name of the builtin */ @@ -173,5 +173,8 @@ final_exit(int status) /* run any extension exit handlers */ run_ext_exit_handlers(status); + /* we could close_io() here */ + close_extensions(); + exit(status); } Binary files differ@@ -14,10 +14,10 @@ msgstr "" "PO-Revision-Date: 2012-02-06 10:37+0100\n" "Last-Translator: Keld Simonsen <keld@keldix.com>\n" "Language-Team: Danish <dansk@dansk-gruppen.dk>\n" -"Language: da\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=iso-8859-1\n" "Content-Transfer-Encoding: 8bit\n" +"Language: da\n" "X-Generator: Lokalize 1.0\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" @@ -2197,13 +2197,20 @@ msgstr "sqrt: kaldt med negativt argument %g" #: extension/rwarray.c:127 #, fuzzy, c-format + +msgid "" +"extension: library `%s': does not define `plugin_is_GPL_compatible' (%s)\n" +msgstr "" +"fatalt: extension: bibliotek '%s': definer ikke 'plugin_is_GPL_compatible' (%" +"s)\n" + msgid "do_writea: argument 0 is not a string\n" -msgstr "exp: argumentet %g er uden for det tilladte område" +msgstr "exp: argumentet %g er uden for det tilladte område\n" #: extension/rwarray.c:133 #, fuzzy, c-format msgid "do_writea: argument 1 is not an array\n" -msgstr "split: fjerde argument er ikke et array" +msgstr "split: fjerde argument er ikke et array\n" #: extension/rwarray.c:180 #, c-format Binary files differ@@ -12,10 +12,10 @@ msgstr "" "PO-Revision-Date: 2012-01-30 16:21+0100\n" "Last-Translator: Philipp Thomas <pth@suse.de>\n" "Language-Team: German <translation-team-de@lists.sourceforge.net>\n" -"Language: de\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" +"Language: de\n" #: array.c:252 #, c-format @@ -129,8 +129,8 @@ msgstr "»%s« ist eine eingebaute Funktion und kann nicht umdefiniert werden" #: awkgram.y:414 msgid "regexp constant `//' looks like a C++ comment, but is not" msgstr "" -"Die Regulärer-Ausdruck-Konstante »//« sieht wie ein C-Kommentar aus, ist " -"aber keiner" +"Die Regulärer-Ausdruck-Konstante »//« sieht wie ein C-Kommentar aus, ist aber " +"keiner" #: awkgram.y:418 #, c-format @@ -493,8 +493,8 @@ msgstr "Funktion »%s«: Funktionsnamen können nicht als Parameternamen benutze #, c-format msgid "function `%s': can't use special variable `%s' as a function parameter" msgstr "" -"Funktion »%s«: die spezielle Variable »%s« kann nicht als Parameter " -"verwendet werden" +"Funktion »%s«: die spezielle Variable »%s« kann nicht als Parameter verwendet " +"werden" #: awkgram.y:4248 #, c-format @@ -564,8 +564,7 @@ msgstr "" #, c-format msgid "fflush: cannot flush: file `%s' opened for reading, not writing" msgstr "" -"fflush: Leeren der Puffer nicht möglich, Datei »%s« ist nur zum Lesen " -"geöffnet" +"fflush: Leeren der Puffer nicht möglich, Datei »%s« ist nur zum Lesen geöffnet" #: builtin.c:239 #, c-format @@ -2482,8 +2481,7 @@ msgstr "%s: Die Option »-W %s« erfordert ein Argument\n" #: io.c:347 #, c-format msgid "command line argument `%s' is a directory: skipped" -msgstr "" -"das Kommandozeilen-Argument »%s« ist ein Verzeichnis: wird übersprungen" +msgstr "das Kommandozeilen-Argument »%s« ist ein Verzeichnis: wird übersprungen" #: io.c:350 io.c:463 #, c-format @@ -2582,8 +2580,8 @@ msgstr "»close« für eine Umlenkung, die nie geöffnet wurde" #, c-format msgid "close: redirection `%s' not opened with `|&', second argument ignored" msgstr "" -"close: Umlenkung »%s« wurde nicht mit »[&« geöffnet, das zweite Argument " -"wird ignoriert" +"close: Umlenkung »%s« wurde nicht mit »[&« geöffnet, das zweite Argument wird " +"ignoriert" #: io.c:1167 #, c-format @@ -3356,14 +3354,12 @@ msgstr "redir2str: unbekannter Umlenkungstyp %d" #: re.c:568 #, c-format msgid "range of the form `[%c-%c]' is locale dependent" -msgstr "" -"Ein Bereich in der Form »[%c-%c]« ist abhängig von der gesetzten Locale" +msgstr "Ein Bereich in der Form »[%c-%c]« ist abhängig von der gesetzten Locale" #: re.c:595 #, c-format msgid "regexp component `%.*s' should probably be `[%.*s]'" -msgstr "" -"Regulärer-Ausdruck-Komponente »%.*s« sollte wahrscheinlich »[%.*s]« sein" +msgstr "Regulärer-Ausdruck-Komponente »%.*s« sollte wahrscheinlich »[%.*s]« sein" #: regcomp.c:131 msgid "Success" Binary files differ@@ -11,10 +11,10 @@ msgstr "" "PO-Revision-Date: 2012-01-30 07:42-0600\n" "Last-Translator: Cristian Othón MartÃnez Vera <cfuga@cfuga.mx>\n" "Language-Team: Spanish <es@li.org>\n" -"Language: es\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" +"Language: es\n" #: array.c:252 #, c-format @@ -2204,6 +2204,21 @@ msgstr "sqrt: se llamó con el argumento negativo %g" msgid "chr: called with no arguments" msgstr "sqrt: se llamó con el argumento negativo %g" +#: ext.c:86 +#, fuzzy, c-format +msgid "" +"extension: library `%s': does not define `plugin_is_GPL_compatible' (%s)\n" +msgstr "" +"fatal: extension: la biblioteca `%s': no define `plugin_is_GPL_compatible' (%" +"s)\n" + +#: ext.c:91 +#, fuzzy, c-format +msgid "extension: library `%s': cannot call function `%s' (%s)\n" +msgstr "" +"fatal: extension: la biblioteca `%s': no puede llamar a la función `%s' (%" +"s)\n" + #: extension/ordchr.c:111 #, fuzzy msgid "chr: called with inappropriate argument(s)" @@ -2697,8 +2712,8 @@ msgstr "falló al cerrar la entrada estándar en el hijo (%s)" #, c-format msgid "moving slave pty to stdin in child failed (dup: %s)" msgstr "" -"falló el movimiento del pty esclavo a la entrada estándar en el hijo (dup: " -"%s)" +"falló el movimiento del pty esclavo a la entrada estándar en el hijo (dup: %" +"s)" #: io.c:1855 io.c:1876 #, c-format Binary files differ@@ -11,10 +11,10 @@ msgstr "" "PO-Revision-Date: 2012-03-13 18:00+0200\n" "Last-Translator: Jorma Karvonen <karvonen.jorma@gmail.com>\n" "Language-Team: Finnish <translation-team-fi@lists.sourceforge.net>\n" -"Language: fi\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" +"Language: fi\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" #: array.c:252 @@ -478,6 +478,7 @@ cleanup: case Op_var_update: case Op_var_assign: case Op_field_assign: + case Op_subscript_assign: case Op_arrayfor_init: case Op_arrayfor_incr: case Op_arrayfor_final: diff --git a/str_array.c b/str_array.c index db6031d4..e5b3b400 100644 --- a/str_array.c +++ b/str_array.c @@ -55,9 +55,10 @@ static NODE **str_list(NODE *symbol, NODE *subs); static NODE **str_copy(NODE *symbol, NODE *newsymb); static NODE **str_dump(NODE *symbol, NODE *ndump); -array_ptr str_array_func[] = { +afunc_t str_array_func[] = { str_array_init, - (array_ptr) 0, + (afunc_t) 0, + null_length, str_lookup, str_exists, str_clear, @@ -65,6 +66,7 @@ array_ptr str_array_func[] = { str_list, str_copy, str_dump, + (afunc_t) 0, }; static inline NODE **str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1); @@ -77,18 +79,23 @@ static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, si unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash; -/* str_array_init --- check relevant environment variables */ +/* str_array_init --- array initialization routine */ static NODE ** str_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED) { - long newval; - const char *val; + if (symbol == NULL) { /* first time */ + long newval; + const char *val; + + /* check relevant environment variables */ + if ((newval = getenv_long("STR_CHAIN_MAX")) > 0) + STR_CHAIN_MAX = newval; + if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0) + hash = gst_hash_string; + } else + null_array(symbol); - if ((newval = getenv_long("STR_CHAIN_MAX")) > 0) - STR_CHAIN_MAX = newval; - if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0) - hash = gst_hash_string; return (NODE **) ! NULL; } @@ -217,8 +224,7 @@ str_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) if (symbol->buckets != NULL) efree(symbol->buckets); - init_array(symbol); /* re-initialize symbol */ - symbol->flags &= ~ARRAYMAXED; + symbol->ainit(symbol, NULL); /* re-initialize symbol */ return NULL; } @@ -264,8 +270,7 @@ str_remove(NODE *symbol, NODE *subs) if (--symbol->table_size == 0) { if (symbol->buckets != NULL) efree(symbol->buckets); - init_array(symbol); /* re-initialize symbol */ - symbol->flags &= ~ARRAYMAXED; + symbol->ainit(symbol, NULL); /* re-initialize symbol */ } return (NODE **) ! NULL; /* return success */ @@ -327,6 +332,7 @@ str_copy(NODE *symbol, NODE *newsymb) newchain->ahcode = chain->ahcode; *pnew = newchain; + newchain->ahnext = NULL; pnew = & newchain->ahnext; } } @@ -349,16 +355,18 @@ str_list(NODE *symbol, NODE *t) BUCKET *b; unsigned long num_elems, list_size, i, k = 0; int elem_size = 1; + assoc_kind_t assoc_kind; if (symbol->table_size == 0) return NULL; - if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) + assoc_kind = (assoc_kind_t) t->flags; + if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) elem_size = 2; /* allocate space for array */ num_elems = symbol->table_size; - if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) + if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) num_elems = 1; list_size = elem_size * num_elems; @@ -370,17 +378,17 @@ str_list(NODE *symbol, NODE *t) for (b = symbol->buckets[i]; b != NULL; b = b->ahnext) { /* index */ subs = b->ahname; - if ((t->flags & AINUM) != 0) + if ((assoc_kind & AINUM) != 0) (void) force_number(subs); list[k++] = dupnode(subs); /* value */ - if ((t->flags & AVALUE) != 0) { + if ((assoc_kind & AVALUE) != 0) { val = b->ahvalue; if (val->type == Node_val) { - if ((t->flags & AVNUM) != 0) + if ((assoc_kind & AVNUM) != 0) (void) force_number(val); - else if ((t->flags & AVSTR) != 0) + else if ((assoc_kind & AVSTR) != 0) val = force_string(val); } list[k++] = val; @@ -55,11 +55,11 @@ init_symbol_table() { getnode(global_table); memset(global_table, '\0', sizeof(NODE)); - init_array(global_table); + null_array(global_table); getnode(param_table); memset(param_table, '\0', sizeof(NODE)); - init_array(param_table); + null_array(param_table); installing_specials = true; func_table = install_symbol(estrdup("FUNCTAB", 7), Node_var_array); @@ -280,7 +280,7 @@ make_symbol(char *name, NODETYPE type) getnode(r); memset(r, '\0', sizeof(NODE)); if (type == Node_var_array) - init_array(r); + null_array(r); else if (type == Node_var) r->var_value = dupnode(Nnull_string); r->vname = name; @@ -449,7 +449,7 @@ print_vars(NODE **table, int (*print_func)(FILE *, const char *, ...), FILE *fp) continue; print_func(fp, "%s: ", r->vname); if (r->type == Node_var_array) - print_func(fp, "array, %ld elements\n", r->table_size); + print_func(fp, "array, %ld elements\n", assoc_length(r)); else if (r->type == Node_var_new) print_func(fp, "untyped variable\n"); else if (r->type == Node_var) @@ -549,7 +549,7 @@ load_symbols() getnode(sym_array); memset(sym_array, '\0', sizeof(NODE)); /* PPC Mac OS X wants this */ - init_array(sym_array); + null_array(sym_array); unref(*aptr); *aptr = sym_array; |