aboutsummaryrefslogtreecommitdiffstats
path: root/awk.h
diff options
context:
space:
mode:
Diffstat (limited to 'awk.h')
-rw-r--r--awk.h120
1 files changed, 103 insertions, 17 deletions
diff --git a/awk.h b/awk.h
index ff622898..ad7b33ad 100644
--- a/awk.h
+++ b/awk.h
@@ -178,6 +178,10 @@ extern void *memset_ulong(void *dest, int val, unsigned long l);
#define memset memset_ulong
#endif
+#ifdef HAVE_FWRITE_UNLOCKED
+#define fwrite fwrite_unlocked
+#endif /* HAVE_FWRITE_UNLOCKED */
+
#if defined(__EMX__) || defined(__MINGW32__)
#include "nonposix.h"
#endif /* defined(__EMX__) || defined(__MINGW32__) */
@@ -206,11 +210,9 @@ typedef struct Regexp {
struct re_pattern_buffer pat;
struct re_registers regs;
struct dfa *dfareg;
- short dfa;
- short has_anchor; /* speed up of avoid_dfa kludge, temporary */
- short non_empty; /* for use in fpat_parse_field */
- short has_meta; /* re has meta chars so (probably) isn't simple string */
- short maybe_long; /* re has meta chars that can match long text */
+ bool non_empty; /* for use in fpat_parse_field */
+ bool has_meta; /* re has meta chars so (probably) isn't simple string */
+ bool maybe_long; /* re has meta chars that can match long text */
} Regexp;
#define RESTART(rp,s) (rp)->regs.start[0]
#define REEND(rp,s) (rp)->regs.end[0]
@@ -219,6 +221,7 @@ typedef struct Regexp {
#define NUMSUBPATS(rp,s) (rp)->regs.num_regs
/* regexp matching flags: */
+#define RE_NO_FLAGS 0 /* empty flags */
#define RE_NEED_START 1 /* need to know start/end of match */
#define RE_NO_BOL 2 /* not allowed to match ^ in regexp */
@@ -272,7 +275,6 @@ typedef enum nodevals {
Node_param_list, /* lnode is a variable, rnode is more list */
Node_func, /* lnode is param. list, rnode is body */
Node_ext_func, /* extension function, code_ptr is builtin code */
- Node_old_ext_func, /* extension function, code_ptr is builtin code */
Node_builtin_func, /* built-in function, main use is for FUNCTAB */
Node_array_ref, /* array passed by ref as parameter */
@@ -392,8 +394,11 @@ typedef struct exp_node {
/* type = Node_val */
/*
- * STRING and NUMBER are mutually exclusive. They represent the
- * type of a value as assigned.
+ * STRING and NUMBER are mutually exclusive, except for the special
+ * case of an uninitialized value, represented internally by
+ * Nnull_string. They represent the type of a value as assigned.
+ * Nnull_string has both STRING and NUMBER attributes, but all other
+ * scalar values should have precisely one of these bits set.
*
* STRCUR and NUMCUR are not mutually exclusive. They represent that
* the particular type of value is up to date. For example,
@@ -408,7 +413,8 @@ typedef struct exp_node {
*
* MAYBE_NUM is the joker. It means "this is string data, but
* the user may have really wanted it to be a number. If we have
- * to guess, like in a comparison, turn it into a number."
+ * to guess, like in a comparison, turn it into a number if the string
+ * is indeed numeric."
* For example, gawk -v a=42 ....
* Here, `a' gets STRING|STRCUR|MAYBE_NUM and then when used where
* a number is needed, it gets turned into a NUMBER and STRING
@@ -453,6 +459,7 @@ typedef struct exp_node {
# define HALFHAT 0x10000 /* half-capacity Hashed Array Tree;
* See cint_array.c */
# define XARRAY 0x20000
+# define NUMCONSTSTR 0x40000 /* have string value for numeric constant */
} NODE;
#define vname sub.nodep.name
@@ -480,6 +487,13 @@ typedef struct exp_node {
#define re_cnt flags
/* Node_val */
+/*
+ * Note that the string in stptr may not be NUL-terminated, but it is
+ * guaranteed to have at least one extra byte that may be temporarily set
+ * to '\0'. This is helpful when calling functions such as strtod that require
+ * a NUL-terminated argument. In particular, field values $n for n > 0 and
+ * n < NF will not have a NUL terminator, since they point into the $0 buffer.
+ */
#define stptr sub.val.sp
#define stlen sub.val.slen
#define valref sub.val.sref
@@ -494,6 +508,16 @@ typedef struct exp_node {
#define numbr sub.val.fltnum
#endif
+/*
+ * If stfmt is set to STFMT_UNUSED, it means that the string representation
+ * stored in stptr is not a function of the value of CONVFMT or OFMT. That
+ * indicates that either the string value was explicitly assigned, or it
+ * was converted from a NUMBER that has an integer value. When stfmt is not
+ * set to STFMT_UNUSED, it is an offset into the fmt_list array of distinct
+ * CONVFMT and OFMT node pointers.
+ */
+#define STFMT_UNUSED -1
+
/* Node_arrayfor */
#define for_list sub.nodep.r.av
#define for_list_size sub.nodep.reflags
@@ -556,6 +580,11 @@ typedef struct exp_node {
#define adepth sub.nodep.l.ll
#define alevel sub.nodep.x.xl
+/* Op_comment */
+#define comment_type sub.val.idx
+#define EOL_COMMENT 1
+#define FULL_COMMENT 2
+
/* --------------------------------lint warning types----------------------------*/
typedef enum lintvals {
LINT_illegal,
@@ -597,6 +626,7 @@ typedef enum opcodeval {
Op_postincrement,
Op_postdecrement,
Op_unary_minus,
+ Op_unary_plus,
Op_field_spec,
/* unary relationals */
@@ -654,7 +684,6 @@ typedef enum opcodeval {
Op_builtin,
Op_sub_builtin, /* sub, gsub and gensub */
Op_ext_builtin,
- Op_old_ext_builtin, /* temporary */
Op_in_array, /* boolean test of membership in array */
/* function call instruction */
@@ -663,6 +692,7 @@ typedef enum opcodeval {
Op_push, /* scalar variable */
Op_push_arg, /* variable type (scalar or array) argument to built-in */
+ Op_push_arg_untyped, /* like Op_push_arg, but for typeof */
Op_push_i, /* number, string */
Op_push_re, /* regex */
Op_push_array,
@@ -690,6 +720,7 @@ typedef enum opcodeval {
Op_func,
+ Op_comment, /* for pretty printing */
Op_exec_count,
Op_breakpoint,
Op_lint,
@@ -1203,6 +1234,7 @@ extern void r_unref(NODE *tmp);
static inline void
DEREF(NODE *r)
{
+ assert(r->valref > 0);
if (--r->valref == 0)
r_unref(r);
}
@@ -1339,6 +1371,7 @@ extern NODE *do_aoption(int nargs);
extern NODE *do_asort(int nargs);
extern NODE *do_asorti(int nargs);
extern unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code);
+extern void init_env_array(NODE *env_node);
/* awkgram.c */
extern NODE *variable(int location, char *name, NODETYPE type);
extern int parse_program(INSTRUCTION **pcode);
@@ -1401,17 +1434,20 @@ extern NODE *do_or(int nargs);
extern NODE *do_xor(int nargs);
extern NODE *do_compl(int nargs);
extern NODE *do_strtonum(int nargs);
-extern AWKNUM nondec2awknum(char *str, size_t len);
+extern AWKNUM nondec2awknum(char *str, size_t len, char **endptr);
extern NODE *do_dcgettext(int nargs);
extern NODE *do_dcngettext(int nargs);
extern NODE *do_bindtextdomain(int nargs);
+extern NODE *do_intdiv(int nargs);
+extern NODE *do_typeof(int nargs);
extern int strncasecmpmbs(const unsigned char *,
const unsigned char *, size_t);
+extern int sanitize_exit_status(int status);
/* eval.c */
extern void PUSH_CODE(INSTRUCTION *cp);
extern INSTRUCTION *POP_CODE(void);
extern void init_interpret(void);
-extern int cmp_nodes(NODE *t1, NODE *t2);
+extern int cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp);
extern int cmp_awknums(const NODE *t1, const NODE *t2);
extern void set_IGNORECASE(void);
extern void set_OFS(void);
@@ -1443,10 +1479,8 @@ extern NODE **r_get_field(NODE *n, Func_ptr *assign, bool reference);
/* ext.c */
extern NODE *do_ext(int nargs);
void load_ext(const char *lib_name); /* temporary */
-extern NODE *load_old_ext(SRCFILE *s, const char *init_func, const char *fini_func, NODE *obj);
extern void close_extensions(void);
#ifdef DYNAMIC
-extern void make_old_builtin(const char *, NODE *(*)(int), int);
extern awk_bool_t make_builtin(const awk_ext_func_t *);
extern NODE *get_argument(int);
extern NODE *get_actual_argument(NODE *, int, bool);
@@ -1508,7 +1542,10 @@ extern void register_two_way_processor(awk_two_way_processor_t *processor);
extern void set_FNR(void);
extern void set_NR(void);
-extern struct redirect *redirect(NODE *redir_exp, int redirtype, int *errflg);
+extern struct redirect *redirect(NODE *redir_exp, int redirtype, int *errflg, bool failure_fatal);
+extern struct redirect *redirect_string(const char *redir_exp_str,
+ size_t redir_exp_len, bool not_string_flag, int redirtype,
+ int *errflg, int extfd, bool failure_fatal);
extern NODE *do_close(int nargs);
extern int flush_io(void);
extern int close_io(bool *stdio_problem);
@@ -1523,6 +1560,8 @@ extern NODE *do_getline(int intovar, IOBUF *iop);
extern struct redirect *getredirect(const char *str, int len);
extern bool inrec(IOBUF *iop, int *errcode);
extern int nextfile(IOBUF **curfile, bool skipping);
+extern bool is_non_fatal_std(FILE *fp);
+extern bool is_non_fatal_redirect(const char *str);
/* main.c */
extern int arg_assign(char *arg, bool initing);
extern int is_std_var(const char *var);
@@ -1530,6 +1569,7 @@ extern int is_off_limits_var(const char *var);
extern char *estrdup(const char *str, size_t len);
extern void update_global_values();
extern long getenv_long(const char *name);
+extern void after_beginfile(IOBUF **curfile);
/* mpfr.c */
extern void set_PREC(void);
@@ -1546,6 +1586,7 @@ extern NODE *do_mpfr_compl(int);
extern NODE *do_mpfr_cos(int);
extern NODE *do_mpfr_exp(int);
extern NODE *do_mpfr_int(int);
+extern NODE *do_mpfr_intdiv(int);
extern NODE *do_mpfr_log(int);
extern NODE *do_mpfr_lshift(int);
extern NODE *do_mpfr_or(int);
@@ -1614,9 +1655,9 @@ extern void reg_error(const char *s);
extern Regexp *re_update(NODE *t);
extern void resyntax(int syntax);
extern void resetup(void);
-extern int avoid_dfa(NODE *re, char *str, size_t len);
extern int reisstring(const char *text, size_t len, Regexp *re, const char *buf);
extern int get_numbase(const char *str, bool use_locale);
+extern bool using_utf8(void);
/* symbol.c */
extern void load_symbols();
@@ -1777,7 +1818,7 @@ static inline NODE *
force_string(NODE *s)
{
if ((s->flags & STRCUR) != 0
- && (s->stfmt == -1 || s->stfmt == CONVFMTidx)
+ && (s->stfmt == STFMT_UNUSED || s->stfmt == CONVFMTidx)
)
return s;
return format_val(CONVFMT, CONVFMTidx, s);
@@ -1807,6 +1848,51 @@ force_number(NODE *n)
#endif /* GAWKDEBUG */
+
+/* fixtype --- make a node decide if it's a number or a string */
+
+/*
+ * In certain contexts, the true type of a scalar value matters, and we
+ * must ascertain whether it is a NUMBER or a STRING. In such situations,
+ * please use this function to resolve the type.
+ *
+ * It is safe to assume that the return value will be the same NODE,
+ * since force_number on a MAYBE_NUM should always return the same NODE,
+ * and force_string on an INTIND should as well.
+ *
+ * There is no way to handle a Node_typedregex correctly, so we ignore
+ * that case.
+ */
+
+static inline NODE *
+fixtype(NODE *n)
+{
+ assert(n->type == Node_val);
+ if (n->type == Node_val) {
+ if ((n->flags & MAYBE_NUM) != 0)
+ return force_number(n);
+ if ((n->flags & INTIND) != 0)
+ return force_string(n);
+ }
+ return n;
+}
+
+/* boolval --- return true/false based on awk's criteria */
+
+/*
+ * In awk, a value is considered to be true if it is nonzero _or_
+ * non-null. Otherwise, the value is false.
+ */
+
+static inline int
+boolval(NODE *t)
+{
+ (void) fixtype(t);
+ if ((t->flags & NUMBER) != 0)
+ return ! iszero(t);
+ return (t->stlen > 0);
+}
+
/* emalloc_real --- malloc with error checking */
static inline void *