diff options
Diffstat (limited to 'doc/gawk.texi')
-rw-r--r-- | doc/gawk.texi | 223 |
1 files changed, 144 insertions, 79 deletions
diff --git a/doc/gawk.texi b/doc/gawk.texi index 5f3d6efa..35db3479 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -933,6 +933,7 @@ particular records in a file and perform operations upon them. * Extension API Informational Variables:: Variables providing information about @command{gawk}'s invocation. * Extension API Boilerplate:: Boilerplate code for using the API. +* Changes from API V1:: Changes from V1 of the API. * Finding Extensions:: How @command{gawk} finds compiled extensions. * Extension Example:: Example C code for an extension. @@ -32323,6 +32324,7 @@ This (rather large) @value{SECTION} describes the API in detail. redirections. * Extension API Variables:: Variables provided by the API. * Extension API Boilerplate:: Boilerplate code for using the API. +* Changes from API V1:: Changes from V1 of the API. @end menu @node Extension API Functions Introduction @@ -32557,7 +32559,8 @@ multibyte encoding. @itemx @ @ @ @ AWK_STRING, @itemx @ @ @ @ AWK_ARRAY, @itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */ -@itemx @ @ @ @ AWK_VALUE_COOKIE@ @ @ @ /* for updating a previously created value */ +@itemx @ @ @ @ AWK_VALUE_COOKIE,@ @ @ /* for updating a previously created value */ +@itemx @ @ @ @ AWK_REGEX @itemx @} awk_valtype_t; This @code{enum} indicates the type of a value. It is used in the following @code{struct}. @@ -32577,6 +32580,7 @@ The @code{val_type} member indicates what kind of value the @code{union} holds, and each member is of the appropriate type. @item #define str_value@ @ @ @ @ @ u.s +@itemx #define regex_value@ @ @ @ str_value @itemx #define num_value@ @ @ @ @ @ u.d @itemx #define array_cookie@ @ @ u.a @itemx #define scalar_cookie@ @ u.scl @@ -32597,7 +32601,7 @@ and in more detail in @ref{Cached values}. @end table -Scalar values in @command{awk} are either numbers or strings. The +Scalar values in @command{awk} are numbers, strings, or typed regexps. The @code{awk_value_t} struct represents values. The @code{val_type} member indicates what is in the @code{union}. @@ -32606,6 +32610,12 @@ require more work. Because @command{gawk} allows embedded @sc{nul} bytes in string values, a string must be represented as a pair containing a data pointer and length. This is the @code{awk_string_t} type. +Typed regexp values (@pxref{Strong Regexp Constants}) are not of +much use to extension functions. Extension functions can tell that +they've received them, and create them for scalar values. Otherwise, +they can examine the text of the regexp through @code{regex_value.str} +and @code{regex_value.len}. + Identifiers (i.e., the names of global variables) can be associated with either scalar values or with arrays. In addition, @command{gawk} provides true arrays of arrays, where any given array element can @@ -32772,6 +32782,11 @@ It returns @code{result}. @itemx make_number(double num, awk_value_t *result); This function simply creates a numeric value in the @code{awk_value_t} variable pointed to by @code{result}. + +@item static inline awk_value_t * +@itemx make_regex(const char *string, size_t length, awk_value_t *result); +This function creates a strongly typed regexp value. +@code{string} is the regular expression of length @code{len}. @end table @node Registration Functions @@ -32799,8 +32814,13 @@ Extension functions are described by the following record: @example typedef struct awk_ext_func @{ @ @ @ @ const char *name; -@ @ @ @ awk_value_t *(*function)(int num_actual_args, awk_value_t *result); -@ @ @ @ size_t max_expected_args; +@ @ @ @ awk_value_t *(*const function)(int num_actual_args, +@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result, +@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo); +@ @ @ @ const size_t max_expected_args; +@ @ @ @ const size_t min_required_args; +@ @ @ @ awk_bool_t suppress_lint; +@ @ @ @ void *data; /* opaque pointer to any extra state */ @} awk_ext_func_t; @end example @@ -32818,42 +32838,95 @@ or an underscore, which may be followed by any number of letters, digits, and underscores. Letter case in function names is significant. -@item awk_value_t *(*function)(int num_actual_args, awk_value_t *result); +@item awk_value_t *(*const function)(int num_actual_args, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo); This is a pointer to the C function that provides the extension's functionality. The function must fill in @code{*result} with either a number -or a string. @command{gawk} takes ownership of any string memory. +or a string. +@c FIXME: Change to a scalar - number, string or regex once regex api stuff is merged. +@command{gawk} takes ownership of any string memory. As mentioned earlier, string memory @emph{must} come from one of @code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}. The @code{num_actual_args} argument tells the C function how many actual parameters were passed from the calling @command{awk} code. +The @code{finfo} parameter is a pointer to the @code{awk_ext_func_t} for +this function. The called function may access data within it as desired, or not. + The function must return the value of @code{result}. This is for the convenience of the calling code inside @command{gawk}. -@item size_t max_expected_args; +@item const size_t max_expected_args; This is the maximum number of arguments the function expects to receive. -Each extension function may decide what to do if the number of -arguments isn't what it expected. As with real @command{awk} functions, it -is likely OK to ignore extra arguments. This value does not affect -actual program execution. - -Extension functions should compare this value to the number of actual -arguments passed and possibly issue a lint warning if there is an -undesirable mismatch. Of course, if -@samp{--lint=fatal} is used, this would cause the program to exit. +If called with more arguments than this, and if lint checking has +been enabled, then @command{gawk} prints a warning message. For more +information, see the entry for @code{suppress_lint}, later in this list. + +@item const size_t min_required_args; +This is the minimum number of arguments the function expects to receive. +If called with fewer arguments, @command{gawk} prints a fatal error +message and exits. + +@item awk_bool_t suppress_lint; +This flag tells @command{gawk} not to print a lint message if lint +checking has been enabled and if more arguments were supplied in the call +than expected. An extension function can tell if @command{gawk} already +printed at least one such message by checking if @samp{num_actual_args > +finfo->max_expected_args}. If so, and the function does not want more +lint messages to be printed, it should set @code{finfo->suppress_lint} +to @code{awk_true}. + +@item void *data; +This is an opaque pointer to any data that an extension function may +wish to have available when called. Passing the @code{awk_ext_func_t} +structure to the extension function, and having this pointer available +in it enable writing a single C or C++ function that implements multiple +@command{awk}-level extension functions. @end table Once you have a record representing your extension function, you register it with @command{gawk} using this API function: @table @code -@item awk_bool_t add_ext_func(const char *namespace, const awk_ext_func_t *func); +@item awk_bool_t add_ext_func(const char *namespace, awk_ext_func_t *func); This function returns true upon success, false otherwise. The @code{namespace} parameter is currently not used; you should pass in an empty string (@code{""}). The @code{func} pointer is the address of a @code{struct} representing your function, as just described. + +@command{gawk} does not modify what @code{func} points to, but the +extension function itself receives this pointer and can modify what it +points to, thus it is purposely not declared to be @code{const}. +@end table + +The combination of @code{min_required_args}, @code{max_expected_args}, +and @code{suppress_lint} may be confusing. Here is how you should +set things up. + +@table @asis +@item Any number of arguments is valid +Set @code{min_required_args} and @code{max_expected_args} to zero and +set @code{suppress_lint} to @code{awk_true}. + +@item A minimum number of arguments is required, no limit on maximum number of arguments +Set @code{min_required_args} to the minimum required. Set +@code{max_expected_args} to zero and +set @code{suppress_lint} to @code{awk_true}. + +@item A minium number of arguments is required, a maximum number is expected +Set @code{min_required_args} to the minimum required. Set +@code{max_expected_args} to the maximum expected. +Set @code{suppress_lint} to @code{awk_false}. + +@item A minum number of arguments is required, and no more than a maximum is allowed +Set @code{min_required_args} to the minimum required. Set +@code{max_expected_args} to the maximum expected. +Set @code{suppress_lint} to @code{awk_false}. +In your extension function, check that @code{num_actual_args} does not +exceed @code{f->max_expected_args}. If it does, issue a fatal error message. @end table @node Exit Callback Functions @@ -33398,6 +33471,7 @@ value type, as appropriate. This behavior is summarized in @float Table,table-value-types-returned @caption{API value types returned} +@c FIXME: This needs doing. @docbook <informaltable> <tgroup cols="6"> @@ -33473,6 +33547,7 @@ value type, as appropriate. This behavior is summarized in </informaltable> @end docbook +@c FIXME: This needs doing. @ifnotplaintext @ifnotdocbook @multitable @columnfractions .50 .50 @@ -33495,27 +33570,29 @@ value type, as appropriate. This behavior is summarized in @end ifnotplaintext @ifplaintext @example - +-------------------------------------------------+ - | Type of Actual Value: | - +------------+------------+-----------+-----------+ - | String | Number | Array | Undefined | -+-----------+-----------+------------+------------+-----------+-----------+ -| | String | String | String | False | False | -| |-----------+------------+------------+-----------+-----------+ -| | Number | Number if | Number | False | False | -| | | can be | | | | -| | | converted, | | | | -| | | else false | | | | -| |-----------+------------+------------+-----------+-----------+ -| Type | Array | False | False | Array | False | -| Requested |-----------+------------+------------+-----------+-----------+ -| | Scalar | Scalar | Scalar | False | False | -| |-----------+------------+------------+-----------+-----------+ -| | Undefined | String | Number | Array | Undefined | -| |-----------+------------+------------+-----------+-----------+ -| | Value | False | False | False | False | -| | cookie | | | | | -+-----------+-----------+------------+------------+-----------+-----------+ + +-------------------------------------------------------------+ + | Type of Actual Value: | + +------------+------------+-----------+-----------+-----------+ + | String | Number | Regex | Array | Undefined | ++-----------+-----------+------------+------------+-----------+-----------+-----------+ +| | String | String | String | String | false | false | +| +-----------+------------+------------+-----------+-----------+-----------+ +| | Number | Number if | Number | false | false | false | +| | | can be | | | | | +| | | converted, | | | | | +| | | else false | | | | | +| +-----------+------------+------------+-----------+-----------+-----------+ +| | Regex | false | false | Regex | false | false | +| +-----------+------------+------------+-----------+-----------+-----------+ +| Type | Array | false | false | false | Array | false | +| Requested +-----------+------------+------------+-----------+-----------+-----------+ +| | Scalar | Scalar | Scalar | Scalar | false | false | +| +-----------+------------+------------+-----------+-----------+-----------+ +| | Undefined | String | Number | Regex | Array | Undefined | +| +-----------+------------+------------+-----------+-----------+-----------+ +| | Value | false | false | false | false | false | +| | Cookie | | | | | | ++-----------+-----------+------------+------------+-----------+-----------+-----------+ @end example @end ifplaintext @end float @@ -33594,13 +33671,6 @@ An extension can look up the value of @command{gawk}'s special variables. However, with the exception of the @code{PROCINFO} array, an extension cannot change any of those variables. -@quotation CAUTION -It is possible for the lookup of @code{PROCINFO} to fail. This happens if -the @command{awk} program being run does not reference @code{PROCINFO}; -in this case, @command{gawk} doesn't bother to create the array and -populate it. -@end quotation - @node Symbol table by cookie @subsubsection Variable Access and Update by Cookie @@ -33622,7 +33692,7 @@ Return false if the value cannot be retrieved. @item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value); Update the value associated with a scalar cookie. Return false if -the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}. +the new value is not of type @code{AWK_STRING}, @code{AWK_REGEX}, or @code{AWK_NUMBER}. Here too, the predefined variables may not be updated. @end table @@ -33743,7 +33813,7 @@ is what the routines in this @value{SECTION} let you do. The functions are as f @table @code @item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result); Create a cached string or numeric value from @code{value} for -efficient later assignment. Only values of type @code{AWK_NUMBER} +efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, and @code{AWK_STRING} are allowed. Any other type is rejected. @code{AWK_UNDEFINED} could be allowed, but doing so would result in inferior performance. @@ -34486,10 +34556,10 @@ debugging: @float Table,gawk-api-version @caption{gawk API version constants} -@multitable @columnfractions .33 .33 .33 -@headitem API Version @tab C preprocessor define @tab enum constant -@item Major @tab gawk_api_major_version @tab GAWK_API_MAJOR_VERSION -@item Minor @tab gawk_api_minor_version @tab GAWK_API_MINOR_VERSION +@multitable {@b{API Version}} {@code{gawk_api_major_version}} {@code{GAWK_API_MAJOR_VERSION}} +@headitem API Version @tab C Preprocessor Define @tab enum constant +@item Major @tab @code{gawk_api_major_version} @tab @code{GAWK_API_MAJOR_VERSION} +@item Minor @tab @code{gawk_api_minor_version} @tab @code{GAWK_API_MINOR_VERSION} @end multitable @end float @@ -34508,10 +34578,10 @@ constant integers: @table @code @item api->major_version -The major version of the running @command{gawk} +The major version of the running @command{gawk}. @item api->minor_version -The minor version of the running @command{gawk} +The minor version of the running @command{gawk}. @end table It is up to the extension to decide if there are API incompatibilities. @@ -34584,7 +34654,7 @@ static awk_ext_id_t ext_id; static const char *ext_version = NULL; /* or @dots{} = "some string" */ static awk_ext_func_t func_table[] = @{ - @{ "name", do_name, 1 @}, + @{ "name", do_name, 1, 0, awk_false, NULL @}, /* @dots{} */ @}; @@ -34685,6 +34755,19 @@ If @code{ext_version} is not @code{NULL}, register the version string with @command{gawk}. @end enumerate + +@node Changes from API V1 +@subsection Changes From Version 1 of the API + +The current API is @emph{not} binary compatible with version 1 of the API. +You will have to recompile your extensions in order to use them with +the current version of @command{gawk}. + +Fortunately, at the possible expense of some compile-time warnings, the API remains +source-code--compatible with the previous API. The major differences are +the additional members in the @code{awk_ext_func_t} structure, and the +addition of the third argument to the C implementation function. + @node Finding Extensions @section How @command{gawk} Finds Extensions @cindex extension search path @@ -34925,17 +35008,12 @@ The second is a pointer to an @code{awk_value_t} structure, usually named /* do_chdir --- provide dynamically loaded chdir() function for gawk */ static awk_value_t * -do_chdir(int nargs, awk_value_t *result) +do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused) @{ awk_value_t newdir; int ret = -1; assert(result != NULL); - - if (do_lint && nargs != 1) - lintwarn(ext_id, - _("chdir: called with incorrect number of arguments, " - "expecting 1")); @end example The @code{newdir} @@ -34944,8 +35022,8 @@ with @code{get_argument()}. Note that the first argument is numbered zero. If the argument is retrieved successfully, the function calls the -@code{chdir()} system call. If the @code{chdir()} fails, @code{ERRNO} -is updated: +@code{chdir()} system call. Otherwise, if the @code{chdir()} fails, +it updates @code{ERRNO}: @example if (get_argument(0, AWK_STRING, & newdir)) @{ @@ -35149,15 +35227,11 @@ is set to point to @code{stat()}, instead. Here is the @code{do_stat()} function, which starts with variable declarations and argument checking: -@ignore -Changed message for page breaking. Used to be: - "stat: called with incorrect number of arguments (%d), should be 2", -@end ignore @example /* do_stat --- provide a stat() function for gawk */ static awk_value_t * -do_stat(int nargs, awk_value_t *result) +do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused) @{ awk_value_t file_param, array_param; char *name; @@ -35168,13 +35242,6 @@ do_stat(int nargs, awk_value_t *result) int (*statfunc)(const char *path, struct stat *sbuf) = lstat; assert(result != NULL); - - if (nargs != 2 && nargs != 3) @{ - if (do_lint) - lintwarn(ext_id, - _("stat: called with wrong number of arguments")); - return make_number(-1, result); - @} @end example Then comes the actual work. First, the function gets the arguments. @@ -35242,11 +35309,9 @@ structures for loading each function into @command{gawk}: @example static awk_ext_func_t func_table[] = @{ - @{ "chdir", do_chdir, 1 @}, - @{ "stat", do_stat, 2 @}, -#ifndef __MINGW32__ - @{ "fts", do_fts, 3 @}, -#endif + @{ "chdir", do_chdir, 1, 1, awk_false, NULL @}, + @{ "stat", do_stat, 3, 2, awk_false, NULL @}, + @dots{} @}; @end example |