diff options
Diffstat (limited to 'awk.h')
-rw-r--r-- | awk.h | 162 |
1 files changed, 124 insertions, 38 deletions
@@ -1,23 +1,23 @@ /* - * awk.h -- Definitions for gawk. + * awk.h -- Definitions for gawk. */ -/* +/* * Copyright (C) 1986, 1988, 1989, 1991-2016 the Free Software Foundation, Inc. - * + * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. - * + * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. - * + * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -178,6 +178,10 @@ extern void *memset_ulong(void *dest, int val, unsigned long l); #define memset memset_ulong #endif +#ifdef HAVE_FWRITE_UNLOCKED +#define fwrite fwrite_unlocked +#endif /* HAVE_FWRITE_UNLOCKED */ + #if defined(__EMX__) || defined(__MINGW32__) #include "nonposix.h" #endif /* defined(__EMX__) || defined(__MINGW32__) */ @@ -206,11 +210,9 @@ typedef struct Regexp { struct re_pattern_buffer pat; struct re_registers regs; struct dfa *dfareg; - short dfa; - short has_anchor; /* speed up of avoid_dfa kludge, temporary */ - short non_empty; /* for use in fpat_parse_field */ - short has_meta; /* re has meta chars so (probably) isn't simple string */ - short maybe_long; /* re has meta chars that can match long text */ + bool non_empty; /* for use in fpat_parse_field */ + bool has_meta; /* re has meta chars so (probably) isn't simple string */ + bool maybe_long; /* re has meta chars that can match long text */ } Regexp; #define RESTART(rp,s) (rp)->regs.start[0] #define REEND(rp,s) (rp)->regs.end[0] @@ -219,6 +221,7 @@ typedef struct Regexp { #define NUMSUBPATS(rp,s) (rp)->regs.num_regs /* regexp matching flags: */ +#define RE_NO_FLAGS 0 /* empty flags */ #define RE_NEED_START 1 /* need to know start/end of match */ #define RE_NO_BOL 2 /* not allowed to match ^ in regexp */ @@ -272,7 +275,6 @@ typedef enum nodevals { Node_param_list, /* lnode is a variable, rnode is more list */ Node_func, /* lnode is param. list, rnode is body */ Node_ext_func, /* extension function, code_ptr is builtin code */ - Node_old_ext_func, /* extension function, code_ptr is builtin code */ Node_builtin_func, /* built-in function, main use is for FUNCTAB */ Node_array_ref, /* array passed by ref as parameter */ @@ -392,8 +394,11 @@ typedef struct exp_node { /* type = Node_val */ /* - * STRING and NUMBER are mutually exclusive. They represent the - * type of a value as assigned. + * STRING and NUMBER are mutually exclusive, except for the special + * case of an uninitialized value, represented internally by + * Nnull_string. They represent the type of a value as assigned. + * Nnull_string has both STRING and NUMBER attributes, but all other + * scalar values should have precisely one of these bits set. * * STRCUR and NUMCUR are not mutually exclusive. They represent that * the particular type of value is up to date. For example, @@ -408,7 +413,8 @@ typedef struct exp_node { * * MAYBE_NUM is the joker. It means "this is string data, but * the user may have really wanted it to be a number. If we have - * to guess, like in a comparison, turn it into a number." + * to guess, like in a comparison, turn it into a number if the string + * is indeed numeric." * For example, gawk -v a=42 .... * Here, `a' gets STRING|STRCUR|MAYBE_NUM and then when used where * a number is needed, it gets turned into a NUMBER and STRING @@ -453,6 +459,7 @@ typedef struct exp_node { # define HALFHAT 0x10000 /* half-capacity Hashed Array Tree; * See cint_array.c */ # define XARRAY 0x20000 +# define NUMCONSTSTR 0x40000 /* have string value for numeric constant */ } NODE; #define vname sub.nodep.name @@ -480,6 +487,13 @@ typedef struct exp_node { #define re_cnt flags /* Node_val */ +/* + * Note that the string in stptr may not be NUL-terminated, but it is + * guaranteed to have at least one extra byte that may be temporarily set + * to '\0'. This is helpful when calling functions such as strtod that require + * a NUL-terminated argument. In particular, field values $n for n > 0 and + * n < NF will not have a NUL terminator, since they point into the $0 buffer. + */ #define stptr sub.val.sp #define stlen sub.val.slen #define valref sub.val.sref @@ -494,6 +508,16 @@ typedef struct exp_node { #define numbr sub.val.fltnum #endif +/* + * If stfmt is set to STFMT_UNUSED, it means that the string representation + * stored in stptr is not a function of the value of CONVFMT or OFMT. That + * indicates that either the string value was explicitly assigned, or it + * was converted from a NUMBER that has an integer value. When stfmt is not + * set to STFMT_UNUSED, it is an offset into the fmt_list array of distinct + * CONVFMT and OFMT node pointers. + */ +#define STFMT_UNUSED -1 + /* Node_arrayfor */ #define for_list sub.nodep.r.av #define for_list_size sub.nodep.reflags @@ -522,7 +546,7 @@ typedef struct exp_node { #define array_size sub.nodep.cnt #define array_capacity sub.nodep.reserved #define xarray sub.nodep.rn -#define parent_array sub.nodep.x.extra +#define parent_array sub.nodep.x.extra #define ainit array_funcs[0] #define ainit_ind 0 @@ -556,6 +580,11 @@ typedef struct exp_node { #define adepth sub.nodep.l.ll #define alevel sub.nodep.x.xl +/* Op_comment */ +#define comment_type sub.val.idx +#define EOL_COMMENT 1 +#define FULL_COMMENT 2 + /* --------------------------------lint warning types----------------------------*/ typedef enum lintvals { LINT_illegal, @@ -597,6 +626,7 @@ typedef enum opcodeval { Op_postincrement, Op_postdecrement, Op_unary_minus, + Op_unary_plus, Op_field_spec, /* unary relationals */ @@ -633,7 +663,7 @@ typedef enum opcodeval { Op_nomatch, Op_rule, - + /* keywords */ Op_K_case, Op_K_default, @@ -654,7 +684,6 @@ typedef enum opcodeval { Op_builtin, Op_sub_builtin, /* sub, gsub and gensub */ Op_ext_builtin, - Op_old_ext_builtin, /* temporary */ Op_in_array, /* boolean test of membership in array */ /* function call instruction */ @@ -663,6 +692,7 @@ typedef enum opcodeval { Op_push, /* scalar variable */ Op_push_arg, /* variable type (scalar or array) argument to built-in */ + Op_push_arg_untyped, /* like Op_push_arg, but for typeof */ Op_push_i, /* number, string */ Op_push_re, /* regex */ Op_push_array, @@ -690,6 +720,7 @@ typedef enum opcodeval { Op_func, + Op_comment, /* for pretty printing */ Op_exec_count, Op_breakpoint, Op_lint, @@ -697,13 +728,13 @@ typedef enum opcodeval { Op_stop, /* parsing (yylex and yyparse), should never appear in valid compiled code */ - Op_token, + Op_token, Op_symbol, Op_list, /* program structures -- for use in the profiler/pretty printer */ Op_K_do, - Op_K_for, + Op_K_for, Op_K_arrayfor, Op_K_while, Op_K_switch, @@ -772,7 +803,7 @@ typedef struct exp_instruction { /* Op_K_exit */ #define target_end d.di -#define target_atexit x.xi +#define target_atexit x.xi /* Op_newfile, Op_K_getline, Op_nextfile */ #define target_endfile x.xi @@ -861,7 +892,7 @@ typedef struct exp_instruction { #define field_assign x.aptr /* Op_field_assign, Op_var_assign */ -#define assign_ctxt d.dl +#define assign_ctxt d.dl /* Op_concat */ #define concat_flag d.dl @@ -894,7 +925,7 @@ typedef struct exp_instruction { /* Op_line_range */ #define condpair_left d.di -#define condpair_right x.xi +#define condpair_right x.xi /* Op_store_var */ #define initval x.xn @@ -984,7 +1015,7 @@ typedef struct srcfile { int fd; int maxlen; /* size of the longest line */ - void (*fini_func)(); /* dynamic extension of type SRC_EXTLIB */ + void (*fini_func)(); /* dynamic extension of type SRC_EXTLIB */ char *lexptr; char *lexend; @@ -1022,7 +1053,7 @@ enum block_id { BLOCK_NODE, BLOCK_BUCKET, BLOCK_MAX /* count */ -}; +}; typedef int (*Func_pre_exec)(INSTRUCTION **); typedef void (*Func_post_exec)(INSTRUCTION *); @@ -1036,7 +1067,7 @@ typedef void (*Func_post_exec)(INSTRUCTION *); #ifndef LONG_MIN #define LONG_MIN ((long)(-LONG_MAX - 1L)) #endif -#define UNLIMITED LONG_MAX +#define UNLIMITED LONG_MAX /* -------------------------- External variables -------------------------- */ /* gawk builtin variables */ @@ -1181,7 +1212,7 @@ extern STACK_ITEM *stack_top; #define POP_ADDRESS() (decr_sp()->lptr) #define PEEK(n) ((stack_ptr - (n))->rptr) #define TOP() (stack_ptr->rptr) /* same as PEEK(0) */ -#define TOP_ADDRESS() (stack_ptr->lptr) +#define TOP_ADDRESS() (stack_ptr->lptr) #define PUSH(r) (void) (incr_sp()->rptr = (r)) #define PUSH_ADDRESS(l) (void) (incr_sp()->lptr = (l)) #define REPLACE(r) (void) (stack_ptr->rptr = (r)) @@ -1203,6 +1234,7 @@ extern void r_unref(NODE *tmp); static inline void DEREF(NODE *r) { + assert(r->valref > 0); if (--r->valref == 0) r_unref(r); } @@ -1306,7 +1338,7 @@ if (--val) \ typedef enum { SORTED_IN = 1, ASORT, ASORTI } sort_context_t; typedef enum { ANONE = 0x00, /* "unused" value */ - AINDEX = 0x001, /* list of indices */ + AINDEX = 0x001, /* list of indices */ AVALUE = 0x002, /* list of values */ AINUM = 0x004, /* numeric index */ AISTR = 0x008, /* string index */ @@ -1339,6 +1371,7 @@ extern NODE *do_aoption(int nargs); extern NODE *do_asort(int nargs); extern NODE *do_asorti(int nargs); extern unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code); +extern void init_env_array(NODE *env_node); /* awkgram.c */ extern NODE *variable(int location, char *name, NODETYPE type); extern int parse_program(INSTRUCTION **pcode); @@ -1401,17 +1434,20 @@ extern NODE *do_or(int nargs); extern NODE *do_xor(int nargs); extern NODE *do_compl(int nargs); extern NODE *do_strtonum(int nargs); -extern AWKNUM nondec2awknum(char *str, size_t len); +extern AWKNUM nondec2awknum(char *str, size_t len, char **endptr); extern NODE *do_dcgettext(int nargs); extern NODE *do_dcngettext(int nargs); extern NODE *do_bindtextdomain(int nargs); +extern NODE *do_intdiv(int nargs); +extern NODE *do_typeof(int nargs); extern int strncasecmpmbs(const unsigned char *, const unsigned char *, size_t); +extern int sanitize_exit_status(int status); /* eval.c */ extern void PUSH_CODE(INSTRUCTION *cp); extern INSTRUCTION *POP_CODE(void); extern void init_interpret(void); -extern int cmp_nodes(NODE *t1, NODE *t2); +extern int cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp); extern int cmp_awknums(const NODE *t1, const NODE *t2); extern void set_IGNORECASE(void); extern void set_OFS(void); @@ -1443,10 +1479,8 @@ extern NODE **r_get_field(NODE *n, Func_ptr *assign, bool reference); /* ext.c */ extern NODE *do_ext(int nargs); void load_ext(const char *lib_name); /* temporary */ -extern NODE *load_old_ext(SRCFILE *s, const char *init_func, const char *fini_func, NODE *obj); extern void close_extensions(void); #ifdef DYNAMIC -extern void make_old_builtin(const char *, NODE *(*)(int), int); extern awk_bool_t make_builtin(const awk_ext_func_t *); extern NODE *get_argument(int); extern NODE *get_actual_argument(NODE *, int, bool); @@ -1508,7 +1542,10 @@ extern void register_two_way_processor(awk_two_way_processor_t *processor); extern void set_FNR(void); extern void set_NR(void); -extern struct redirect *redirect(NODE *redir_exp, int redirtype, int *errflg); +extern struct redirect *redirect(NODE *redir_exp, int redirtype, int *errflg, bool failure_fatal); +extern struct redirect *redirect_string(const char *redir_exp_str, + size_t redir_exp_len, bool not_string_flag, int redirtype, + int *errflg, int extfd, bool failure_fatal); extern NODE *do_close(int nargs); extern int flush_io(void); extern int close_io(bool *stdio_problem); @@ -1523,6 +1560,8 @@ extern NODE *do_getline(int intovar, IOBUF *iop); extern struct redirect *getredirect(const char *str, int len); extern bool inrec(IOBUF *iop, int *errcode); extern int nextfile(IOBUF **curfile, bool skipping); +extern bool is_non_fatal_std(FILE *fp); +extern bool is_non_fatal_redirect(const char *str); /* main.c */ extern int arg_assign(char *arg, bool initing); extern int is_std_var(const char *var); @@ -1530,6 +1569,7 @@ extern int is_off_limits_var(const char *var); extern char *estrdup(const char *str, size_t len); extern void update_global_values(); extern long getenv_long(const char *name); +extern void after_beginfile(IOBUF **curfile); /* mpfr.c */ extern void set_PREC(void); @@ -1546,6 +1586,7 @@ extern NODE *do_mpfr_compl(int); extern NODE *do_mpfr_cos(int); extern NODE *do_mpfr_exp(int); extern NODE *do_mpfr_int(int); +extern NODE *do_mpfr_intdiv(int); extern NODE *do_mpfr_log(int); extern NODE *do_mpfr_lshift(int); extern NODE *do_mpfr_or(int); @@ -1614,9 +1655,9 @@ extern void reg_error(const char *s); extern Regexp *re_update(NODE *t); extern void resyntax(int syntax); extern void resetup(void); -extern int avoid_dfa(NODE *re, char *str, size_t len); extern int reisstring(const char *text, size_t len, Regexp *re, const char *buf); extern int get_numbase(const char *str, bool use_locale); +extern bool using_utf8(void); /* symbol.c */ extern void load_symbols(); @@ -1720,7 +1761,7 @@ POP_SCALAR() if (t->type == Node_var_array) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t)); - + return t; } @@ -1733,7 +1774,7 @@ TOP_SCALAR() if (t->type == Node_var_array) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t)); - + return t; } @@ -1751,7 +1792,7 @@ in_array(NODE *a, NODE *s) NODE **ret; ret = a->aexists(a, s); - + return ret ? *ret : NULL; } @@ -1777,7 +1818,7 @@ static inline NODE * force_string(NODE *s) { if ((s->flags & STRCUR) != 0 - && (s->stfmt == -1 || s->stfmt == CONVFMTidx) + && (s->stfmt == STFMT_UNUSED || s->stfmt == CONVFMTidx) ) return s; return format_val(CONVFMT, CONVFMTidx, s); @@ -1807,6 +1848,51 @@ force_number(NODE *n) #endif /* GAWKDEBUG */ + +/* fixtype --- make a node decide if it's a number or a string */ + +/* + * In certain contexts, the true type of a scalar value matters, and we + * must ascertain whether it is a NUMBER or a STRING. In such situations, + * please use this function to resolve the type. + * + * It is safe to assume that the return value will be the same NODE, + * since force_number on a MAYBE_NUM should always return the same NODE, + * and force_string on an INTIND should as well. + * + * There is no way to handle a Node_typedregex correctly, so we ignore + * that case. + */ + +static inline NODE * +fixtype(NODE *n) +{ + assert(n->type == Node_val); + if (n->type == Node_val) { + if ((n->flags & MAYBE_NUM) != 0) + return force_number(n); + if ((n->flags & INTIND) != 0) + return force_string(n); + } + return n; +} + +/* boolval --- return true/false based on awk's criteria */ + +/* + * In awk, a value is considered to be true if it is nonzero _or_ + * non-null. Otherwise, the value is false. + */ + +static inline int +boolval(NODE *t) +{ + (void) fixtype(t); + if ((t->flags & NUMBER) != 0) + return ! iszero(t); + return (t->stlen > 0); +} + /* emalloc_real --- malloc with error checking */ static inline void * |