diff options
Diffstat (limited to 'doc/gawk.texi')
-rw-r--r-- | doc/gawk.texi | 214 |
1 files changed, 160 insertions, 54 deletions
diff --git a/doc/gawk.texi b/doc/gawk.texi index 1a41f880..d646175f 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -23443,16 +23443,25 @@ main(int argc, char *argv[]) @} @end example +The GNU project's version of the original Unix utilities popularized +the use of long command line options. For example, @option{--help} +in addition to @option{-h}. Arguments to long options are either provided +as separate command line arguments (@samp{--source '@var{program-text}'}) +or separated from the option with an @samp{=} sign +(@samp{--source='@var{program-text}'}). + As a side point, @command{gawk} actually uses the GNU @code{getopt_long()} function to process both normal and GNU-style long options (@pxref{Options}). The abstraction provided by @code{getopt()} is very useful and is quite handy in @command{awk} programs as well. Following is an @command{awk} -version of @code{getopt()}. This function highlights one of the +version of @code{getopt()} that accepts both short and long options. + +This function highlights one of the greatest weaknesses in @command{awk}, which is that it is very poor at -manipulating single characters. Repeated calls to @code{substr()} are -necessary for accessing individual characters +manipulating single characters. The function needs repeated calls to +@code{substr()} in order to access individual characters (@pxref{String Functions}).@footnote{This function was written before @command{gawk} acquired the ability to split strings into single characters using @code{""} as the separator. @@ -23465,6 +23474,7 @@ The discussion that follows walks through the code a bit at a time: @example @c file eg/lib/getopt.awk # getopt.awk --- Do C library getopt(3) function in awk +# Also supports long options. @c endfile @ignore @c file eg/lib/getopt.awk @@ -23473,6 +23483,7 @@ The discussion that follows walks through the code a bit at a time: # # Initial version: March, 1991 # Revised: May, 1993 +# Long options added by Greg Minshall, January 2020 @c endfile @end ignore @c file eg/lib/getopt.awk @@ -23486,7 +23497,7 @@ The discussion that follows walks through the code a bit at a time: # Returns: # -1 at end of options # "?" for unrecognized option -# <c> a character representing the current option +# <s> a string representing the current option # Private Data: # _opti -- index in multiflag option, e.g., -abc @@ -23500,17 +23511,18 @@ are ``private'' to this library function. Such documentation is essential for any program, and particularly for library functions. The @code{getopt()} function first checks that it was indeed called with -a string of options (the @code{options} parameter). If @code{options} -has a zero length, @code{getopt()} immediately returns @minus{}1: +a string of options (the @code{options} parameter). If both +@code{options} and @code{longoptions} have a zero length, +@code{getopt()} immediately returns @minus{}1: @cindex @code{getopt()} user-defined function @cindex user-defined @subentry function @subentry @code{getopt()} @example @c file eg/lib/getopt.awk -function getopt(argc, argv, options, thisopt, i) +function getopt(argc, argv, options, longopts, thisopt, i, j) @{ - if (length(options) == 0) # no options given - return -1 + if (length(options) == 0 && length(longopts) == 0) + return -1 # no options given @group if (argv[Optind] == "--") @{ # all done @@ -23527,33 +23539,39 @@ function getopt(argc, argv, options, thisopt, i) The next thing to check for is the end of the options. A @option{--} ends the command-line options, as does any command-line argument that -does not begin with a @samp{-}. @code{Optind} is used to step through +does not begin with a @samp{-} (unless it is an argument to a preceding +option). @code{Optind} steps through the array of command-line arguments; it retains its value across calls to @code{getopt()}, because it is a global variable. -The regular expression that is used, @code{@w{/^-[^:[:space:]/}}, +The regular expression @code{@w{/^-[^:[:space:]/}} checks for a @samp{-} followed by anything that is not whitespace and not a colon. If the current command-line argument does not match this pattern, -it is not an option, and it ends option processing. Continuing on: +it is not an option, and it ends option processing. +Now, we +check to see if we are processing a short (single letter) option, or a +long option (indicated by two dashes, e.g., @samp{--filename}). If it +is a short option, we continue on: @example @c file eg/lib/getopt.awk - if (_opti == 0) - _opti = 2 - thisopt = substr(argv[Optind], _opti, 1) - Optopt = thisopt - i = index(options, thisopt) - if (i == 0) @{ - if (Opterr) - printf("%c -- invalid option\n", thisopt) > "/dev/stderr" - if (_opti >= length(argv[Optind])) @{ - Optind++ - _opti = 0 - @} else - _opti++ - return "?" - @} + if (argv[Optind] !~ /^--/) @{ # if this is a short option + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) @{ + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) @{ + Optind++ + _opti = 0 + @} else + _opti++ + return "?" + @} @c endfile @end example @@ -23586,15 +23604,15 @@ invalid option letter actually is. Continuing on: @example @c file eg/lib/getopt.awk - if (substr(options, i + 1, 1) == ":") @{ - # get option argument - if (length(substr(argv[Optind], _opti + 1)) > 0) - Optarg = substr(argv[Optind], _opti + 1) - else - Optarg = argv[++Optind] - _opti = 0 - @} else - Optarg = "" + if (substr(options, i + 1, 1) == ":") @{ + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + @} else + Optarg = "" @c endfile @end example @@ -23608,22 +23626,97 @@ examine in the current command-line argument. Continuing: @example @c file eg/lib/getopt.awk - if (_opti == 0 || _opti >= length(argv[Optind])) @{ + if (_opti == 0 || _opti >= length(argv[Optind])) @{ + Optind++ + _opti = 0 + @} else + _opti++ + return thisopt +@c endfile +@end example + +Finally, for a short option, if @code{_opti} is either zero or greater +than the length of the current command-line argument, it means this +element in @code{argv} is through being processed, so @code{Optind} is +incremented to point to the next element in @code{argv}. If neither +condition is true, then only @code{_opti} is incremented, so that the +next option letter can be processed on the next call to @code{getopt()}. + +On the other hand, if the earlier test found that this was a long +option, we take a different branch: + +@example +@c file eg/lib/getopt.awk + @} else @{ + j = index(argv[Optind], "=") + if (j > 0) + thisopt = substr(argv[Optind], 3, j - 3) + else + thisopt = substr(argv[Optind], 3) + Optopt = thisopt +@c endfile +@end example + +First, we search this option for a possible embedded equal sign, as the +specification of long options allows an argument to an option +@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as +@samp{@w{--someopt answer}}. + +@example +@c file eg/lib/getopt.awk + i = match(longopts, "(^|,)" thisopt "($|[,:])") + if (i == 0) @{ + if (Opterr) + printf("%s -- invalid option\n", thisopt) > "/dev/stderr" + Optind++ + return "?" + @} +@c endfile +@end example + +Next, we try to find the current option in @code{longopts}. The regular +expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}}, +matches this option at the beginninng of @code{longopts}, or at the +beginning of a subsequent long option (the previous long option would +have been terminated by a comma), and, in any case, either at the end of +the @code{longopts} string (@samp{$}), or followed by a comma +(separating this option from a subsequent option) or a colon (indicating +this long option takes an argument (@samp{@w{[,:]}}). + +Using this regular expression, we check to see if the current option +might possibly be in @code{longopts} (if @code{longopts} is not +specified, this test will also fail). In case of an error, we possibly +print an error message and then return @code{"?"}. Continuing on: + +@example +@c file eg/lib/getopt.awk + if (substr(longopts, i+1+length(thisopt), 1) == ":") @{ + if (j > 0) + Optarg = substr(argv[Optind], j + 1) + else + Optarg = argv[++Optind] + @} else + Optarg = "" +@c endfile +@end example + +We now check to see if this option takes an argument and, if so, we set +@code{Optarg} to the value of that argument (either a value after an +equal sign specified on the command line, immediately adjoining the long +option string, or as the next argument on the command line). + +@example +@c file eg/lib/getopt.awk Optind++ - _opti = 0 - @} else - _opti++ - return thisopt + return thisopt + @} @} @c endfile @end example -Finally, if @code{_opti} is either zero or greater than the length of the -current command-line argument, it means this element in @code{argv} is -through being processed, so @code{Optind} is incremented to point to the -next element in @code{argv}. If neither condition is true, then only -@code{_opti} is incremented, so that the next option letter can be processed -on the next call to @code{getopt()}. +We increase @code{Optind} (which we already increased once if a required +argument was separated from its option by an equal sign), and return the +long option (minus its leading dashes). The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one. @code{Opterr} is set to one, because the default behavior is for @code{getopt()} @@ -23639,20 +23732,21 @@ BEGIN @{ # test program if (_getopt_test) @{ - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, Optarg = <%s>\n", - _go_c, Optarg) + _myshortopts = "ab:cd" + _mylongopts = "longa,longb:,otherc,otherd" + + while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1) + printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg) printf("non-option arguments:\n") for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) + printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind]) @} @} @c endfile @end example The rest of the @code{BEGIN} rule is a simple test program. Here are the -results of two sample runs of the test program: +results of some sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} @@ -23670,9 +23764,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc} @print{} non-option arguments: @print{} ARGV[4] = <xyz> @print{} ARGV[5] = <abc> + +$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \} +> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2} +@print{} c = <a>, Optarg = <> +@print{} c = <longa>, Optarg = <> +@print{} c = <b>, Optarg = <xx> +@print{} c = <longb>, Optarg = <foo=bar> +@print{} c = <otherd>, Optarg = <> +@print{} c = <otherc>, Optarg = <> +@print{} non-option arguments: +@print{} ARGV[8] = <arg1> +@print{} ARGV[9] = <arg2> @end example -In both runs, the first @option{--} terminates the arguments to +In all the runs, the first @option{--} terminates the arguments to @command{awk}, so that it does not try to interpret the @option{-a}, etc., as its own options. |