diff options
Diffstat (limited to 'gawkapi.h')
-rw-r--r-- | gawkapi.h | 240 |
1 files changed, 188 insertions, 52 deletions
@@ -2,22 +2,22 @@ * gawkapi.h -- Definitions for use by extension functions calling into gawk. */ -/* +/* * Copyright (C) 2012-2016 the Free Software Foundation, Inc. - * + * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. - * + * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. - * + * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -53,7 +53,7 @@ * This API purposely restricts itself to ISO C 90 features. In particular, no * bool, no // comments, no use of the restrict keyword, or anything else, * in order to provide maximal portability. - * + * * Exception: the "inline" keyword is used below in the "constructor" * functions. If your compiler doesn't support it, you should either * -Dinline='' on your command line, or use the autotools and include a @@ -117,6 +117,32 @@ typedef enum awk_bool { awk_true } awk_bool_t; /* we don't use <stdbool.h> on purpose */ +/* + * If the input parser would like to specify the field positions in the input + * record, it may populate an awk_fieldwidth_info_t structure to indicate + * the location of each field. The use_chars boolean controls whether the + * field lengths are specified in terms of bytes or potentially multi-byte + * characters. Performance will be better if the values are supplied in + * terms of bytes. The fields[0].skip value indicates how many bytes (or + * characters) to skip before $1, and fields[0].len is the length of $1, etc. + */ + +typedef struct { + awk_bool_t use_chars; /* false ==> use bytes */ + size_t nf; + struct awk_field_info { + size_t skip; /* amount to skip before field starts */ + size_t len; /* length of field */ + } fields[1]; /* actual dimension should be nf */ +} awk_fieldwidth_info_t; + +/* + * This macro calculates the total struct size needed. This is useful when + * calling malloc or realloc. + */ +#define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \ + (((NF)-1) * sizeof(struct awk_field_info))) + /* The information about input files that input parsers need to know: */ typedef struct awk_input { const char *name; /* filename */ @@ -136,7 +162,7 @@ typedef struct awk_input { * parser is responsible for managing its own memory buffer. * Similarly, gawk will make its own copy of RT, so the parser * is also responsible for managing this memory. - * + * * It is guaranteed that errcode is a valid pointer, so there is * no need to test for a NULL value. Gawk sets *errcode to 0, * so there is no need to set it unless an error occurs. @@ -146,9 +172,19 @@ typedef struct awk_input { * than zero, gawk will automatically update the ERRNO variable based * on the value of *errcode (e.g., setting *errcode = errno should do * the right thing). + * + * If field_width is non-NULL, then *field_width will be initialized + * to NULL, and the function may set it to point to a structure + * supplying field width information to override the default + * gawk field parsing mechanism. Note that this structure will not + * be copied by gawk; it must persist at least until the next call + * to get_record or close_func. Note also that field_width will + * be NULL when getline is assigning the results to a variable, thus + * field parsing is not needed. */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, - char **rt_start, size_t *rt_len); + char **rt_start, size_t *rt_len, + const awk_fieldwidth_info_t **field_width); /* * No argument prototype on read_func to allow for older systems @@ -165,7 +201,7 @@ typedef struct awk_input { /* put last, for alignment. bleah */ struct stat sbuf; /* stat buf */ - + } awk_input_buf_t; typedef struct awk_input_parser { @@ -260,8 +296,8 @@ typedef struct awk_two_way_processor { awk_const struct awk_two_way_processor *awk_const next; /* for use by gawk */ } awk_two_way_processor_t; -#define gawk_api_major_version 1 -#define gawk_api_minor_version 1 +#define gawk_api_major_version 2 +#define gawk_api_minor_version 0 /* Current version of the API. */ enum { @@ -278,6 +314,8 @@ enum { * The API deals exclusively with regular chars; these strings may * be multibyte encoded in the current locale's encoding and character * set. Gawk will convert internally to wide characters if necessary. + * + * Note that the string will always be terminated with a '\0' character. */ typedef struct awk_string { char *str; /* data */ @@ -304,6 +342,8 @@ typedef enum { AWK_UNDEFINED, AWK_NUMBER, AWK_STRING, + AWK_REGEX, + AWK_STRNUM, AWK_ARRAY, AWK_SCALAR, /* opaque access to a variable */ AWK_VALUE_COOKIE /* for updating a previously created value */ @@ -323,6 +363,8 @@ typedef struct awk_value { awk_value_cookie_t vc; } u; #define str_value u.s +#define strnum_value str_value +#define regex_value str_value #define num_value u.d #define array_cookie u.a #define scalar_cookie u.scl @@ -345,7 +387,7 @@ typedef struct awk_element { AWK_ELEMENT_DELETE = 1 /* set by extension if should be deleted */ } flags; - awk_value_t index; /* guaranteed to be a string! */ + awk_value_t index; awk_value_t value; } awk_element_t; @@ -374,11 +416,26 @@ typedef struct awk_flat_array { * Each extension function may decide what to do if the number of * arguments isn't what it expected. Following awk functions, it * is likely OK to ignore extra arguments. + * + * 'min_required_args' indicates how many arguments MUST be passed. + * The API will throw a fatal error if not enough are passed. + * + * 'max_expected_args' is more benign; if more than that are passed, + * the API prints a lint message (IFF lint is enabled, of course). + * + * In any case, the extension function itself need not compare the + * actual number of arguments passed to those two values if it does + * not want to. */ typedef struct awk_ext_func { const char *name; - awk_value_t *(*function)(int num_actual_args, awk_value_t *result); - size_t num_expected_args; + awk_value_t *(*const function)(int num_actual_args, + awk_value_t *result, + struct awk_ext_func *finfo); + const size_t max_expected_args; + const size_t min_required_args; + awk_bool_t suppress_lint; + void *data; /* opaque pointer to any extra state */ } awk_ext_func_t; typedef void *awk_ext_id_t; /* opaque type for extension id */ @@ -411,9 +468,14 @@ typedef struct gawk_api { /* Next, registration functions: */ - /* Add a function to the interpreter, returns true upon success */ + /* + * Add a function to the interpreter, returns true upon success. + * Gawk does not modify what func points to, but the extension + * function itself receives this pointer and can modify what it + * points to, thus it's not const. + */ awk_bool_t (*api_add_ext_func)(awk_ext_id_t id, const char *namespace, - const awk_ext_func_t *func); + awk_ext_func_t *func); /* Register an input parser; for opening files read-only */ void (*api_register_input_parser)(awk_ext_id_t id, @@ -447,6 +509,7 @@ typedef struct gawk_api { void (*api_fatal)(awk_ext_id_t id, const char *format, ...); void (*api_warning)(awk_ext_id_t id, const char *format, ...); void (*api_lintwarn)(awk_ext_id_t id, const char *format, ...); + void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...); /* Functions to update ERRNO */ void (*api_update_ERRNO_int)(awk_ext_id_t id, int errno_val); @@ -471,27 +534,28 @@ typedef struct gawk_api { Table entry is type returned: - +-------------------------------------------------+ - | Type of Actual Value: | - +------------+------------+-----------+-----------+ - | String | Number | Array | Undefined | - +-----------+-----------+------------+------------+-----------+-----------+ - | | String | String | String | false | false | - | |-----------+------------+------------+-----------+-----------+ - | | Number | Number if | Number | false | false | - | | | can be | | | | - | | | converted, | | | | - | | | else false | | | | - | |-----------+------------+------------+-----------+-----------+ - | Type | Array | false | false | Array | false | - | Requested |-----------+------------+------------+-----------+-----------+ - | | Scalar | Scalar | Scalar | false | false | - | |-----------+------------+------------+-----------+-----------+ - | | Undefined | String | Number | Array | Undefined | - | |-----------+------------+------------+-----------+-----------+ - | | Value | false | false | false | false | - | | Cookie | | | | | - +-----------+-----------+------------+------------+-----------+-----------+ + +-------------------------------------------------------+ + | Type of Actual Value: | + +--------+--------+--------+--------+-------+-----------+ + | String | Strnum | Number | Regex | Array | Undefined | + +-----------+-----------+--------+--------+--------+--------+-------+-----------+ + | | String | String | String | String | String | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Strnum | false | Strnum | Strnum | false | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Number | Number | Number | Number | false | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Regex | false | false | false | Regex | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | Type | Array | false | false | false | false | Array | false | + | Requested +-----------+--------+--------+--------+--------+-------+-----------+ + | | Scalar | Scalar | Scalar | Scalar | Scalar | false | false | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Undefined | String | Strnum | Number | Regex | Array | Undefined | + | +-----------+--------+--------+--------+--------+-------+-----------+ + | | Value | false | false | false | false | false | false | + | | Cookie | | | | | | | + +-----------+-----------+--------+--------+--------+--------+-------+-----------+ */ /* Functions to handle parameters passed to the extension. */ @@ -507,7 +571,7 @@ typedef struct gawk_api { awk_value_t *result); /* - * Convert a paramter that was undefined into an array + * Convert a parameter that was undefined into an array * (provide call-by-reference for arrays). Returns false * if count is too big, or if the argument's type is * not undefined. @@ -521,7 +585,7 @@ typedef struct gawk_api { * - Read-only access to special variables (NF, etc.) * - One special exception: PROCINFO. * - Use sym_update() to change a value, including from UNDEFINED - * to scalar or array. + * to scalar or array. */ /* * Lookup a variable, fill in value. No messing with the value @@ -658,10 +722,15 @@ typedef struct gawk_api { /* Clear out an array */ awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie); - /* Flatten out an array so that it can be looped over easily. */ - awk_bool_t (*api_flatten_array)(awk_ext_id_t id, + /* + * Flatten out an array with type conversions as requested. + * This supersedes the api_flatten_array function that did not allow + * the caller to specify the requested types. + */ + awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id, awk_array_t a_cookie, - awk_flat_array_t **data); + awk_flat_array_t **data, + awk_valtype_t index_type, awk_valtype_t value_type); /* When done, delete any marked elements, release the memory. */ awk_bool_t (*api_release_flattened_array)(awk_ext_id_t id, @@ -677,6 +746,40 @@ typedef struct gawk_api { void *(*api_calloc)(size_t nmemb, size_t size); void *(*api_realloc)(void *ptr, size_t size); void (*api_free)(void *ptr); + + /* + * Look up a file. If the name is NULL or name_len is 0, it returns + * data for the currently open input file corresponding to FILENAME + * (and it will not access the filetype argument, so that may be + * undefined). + * If the file is not already open, it tries to open it. + * The "filetype" argument should be one of: + * ">", ">>", "<", "|>", "|<", and "|&" + * If the file is not already open, and the fd argument is non-negative, + * gawk will use that file descriptor instead of opening the file + * in the usual way. If the fd is non-negative, but the file exists + * already, gawk ignores the fd and returns the existing file. It is + * the caller's responsibility to notice that the fd in the returned + * awk_input_buf_t does not match the requested value. Note that + * supplying a file descriptor is currently NOT supported for pipes. + * It should work for input, output, append, and two-way (coprocess) + * sockets. If the filetype is two-way, we assume that it is a socket! + * Note that in the two-way case, the input and output file descriptors + * may differ. To check for success, one must check that either of + * them matches. + */ + awk_bool_t (*api_get_file)(awk_ext_id_t id, + const char *name, + size_t name_len, + const char *filetype, + int fd, + /* + * Return values (on success, one or both should + * be non-NULL): + */ + const awk_input_buf_t **ibufp, + const awk_output_buf_t **obufp); + } gawk_api_t; #ifndef GAWK /* these are not for the gawk code itself! */ @@ -698,6 +801,7 @@ typedef struct gawk_api { (api->api_set_argument(ext_id, count, new_array)) #define fatal api->api_fatal +#define nonfatal api->api_nonfatal #define warning api->api_warning #define lintwarn api->api_lintwarn @@ -742,8 +846,11 @@ typedef struct gawk_api { #define clear_array(array) (api->api_clear_array(ext_id, array)) +#define flatten_array_typed(array, data, index_type, value_type) \ + (api->api_flatten_array_typed(ext_id, array, data, index_type, value_type)) + #define flatten_array(array, data) \ - (api->api_flatten_array(ext_id, array, data)) + flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED) #define release_flattened_array(array, data) \ (api->api_release_flattened_array(ext_id, array, data)) @@ -759,6 +866,9 @@ typedef struct gawk_api { #define release_value(value) \ (api->api_release_value(ext_id, value)) +#define get_file(name, namelen, filetype, fd, ibuf, obuf) \ + (api->api_get_file(ext_id, name, namelen, filetype, fd, ibuf, obuf)) + #define register_ext_version(version) \ (api->api_register_ext_version(ext_id, version)) @@ -776,25 +886,26 @@ typedef struct gawk_api { /* Constructor functions */ -/* r_make_string --- make a string value in result from the passed-in string */ +/* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */ static inline awk_value_t * -r_make_string(const gawk_api_t *api, /* needed for emalloc */ - awk_ext_id_t *ext_id, /* ditto */ - const char *string, - size_t length, - awk_bool_t duplicate, - awk_value_t *result) +r_make_string_type(const gawk_api_t *api, /* needed for emalloc */ + awk_ext_id_t *ext_id, /* ditto */ + const char *string, + size_t length, + awk_bool_t duplicate, + awk_value_t *result, + awk_valtype_t val_type) { char *cp = NULL; memset(result, 0, sizeof(*result)); - result->val_type = AWK_STRING; + result->val_type = val_type; result->str_value.len = length; if (duplicate) { - emalloc(cp, char *, length + 2, "r_make_string"); + emalloc(cp, char *, length + 1, "r_make_string"); memcpy(cp, string, length); cp[length] = '\0'; result->str_value.str = cp; @@ -805,9 +916,33 @@ r_make_string(const gawk_api_t *api, /* needed for emalloc */ return result; } +/* r_make_string --- make a string value in result from the passed-in string */ + +static inline awk_value_t * +r_make_string(const gawk_api_t *api, /* needed for emalloc */ + awk_ext_id_t *ext_id, /* ditto */ + const char *string, + size_t length, + awk_bool_t duplicate, + awk_value_t *result) +{ + return r_make_string_type(api, ext_id, string, length, duplicate, result, AWK_STRING); +} + #define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, 1, result) #define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, 0, result) +#define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_REGEX) +#define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_REGEX) + +/* + * Note: The caller may not create a Strnum, but it can create a string that is + * flagged as user input that MAY be a Strnum. Gawk will decide whether it's a + * Strnum or a String by checking whether the string is numeric. + */ +#define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM) +#define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM) + /* make_null_string --- make a null string value */ static inline awk_value_t * @@ -832,6 +967,7 @@ make_number(double num, awk_value_t *result) return result; } + /* * Each extension must define a function with this prototype: * |