aboutsummaryrefslogtreecommitdiffstats
path: root/doc/gawk.texi
diff options
context:
space:
mode:
Diffstat (limited to 'doc/gawk.texi')
-rw-r--r--doc/gawk.texi214
1 files changed, 160 insertions, 54 deletions
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 1a41f880..d646175f 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -23443,16 +23443,25 @@ main(int argc, char *argv[])
@}
@end example
+The GNU project's version of the original Unix utilities popularized
+the use of long command line options. For example, @option{--help}
+in addition to @option{-h}. Arguments to long options are either provided
+as separate command line arguments (@samp{--source '@var{program-text}'})
+or separated from the option with an @samp{=} sign
+(@samp{--source='@var{program-text}'}).
+
As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
function to process both normal and GNU-style long options
(@pxref{Options}).
The abstraction provided by @code{getopt()} is very useful and is quite
handy in @command{awk} programs as well. Following is an @command{awk}
-version of @code{getopt()}. This function highlights one of the
+version of @code{getopt()} that accepts both short and long options.
+
+This function highlights one of the
greatest weaknesses in @command{awk}, which is that it is very poor at
-manipulating single characters. Repeated calls to @code{substr()} are
-necessary for accessing individual characters
+manipulating single characters. The function needs repeated calls to
+@code{substr()} in order to access individual characters
(@pxref{String Functions}).@footnote{This
function was written before @command{gawk} acquired the ability to
split strings into single characters using @code{""} as the separator.
@@ -23465,6 +23474,7 @@ The discussion that follows walks through the code a bit at a time:
@example
@c file eg/lib/getopt.awk
# getopt.awk --- Do C library getopt(3) function in awk
+# Also supports long options.
@c endfile
@ignore
@c file eg/lib/getopt.awk
@@ -23473,6 +23483,7 @@ The discussion that follows walks through the code a bit at a time:
#
# Initial version: March, 1991
# Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
@c endfile
@end ignore
@c file eg/lib/getopt.awk
@@ -23486,7 +23497,7 @@ The discussion that follows walks through the code a bit at a time:
# Returns:
# -1 at end of options
# "?" for unrecognized option
-# <c> a character representing the current option
+# <s> a string representing the current option
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
@@ -23500,17 +23511,18 @@ are ``private'' to this library function. Such documentation is essential
for any program, and particularly for library functions.
The @code{getopt()} function first checks that it was indeed called with
-a string of options (the @code{options} parameter). If @code{options}
-has a zero length, @code{getopt()} immediately returns @minus{}1:
+a string of options (the @code{options} parameter). If both
+@code{options} and @code{longoptions} have a zero length,
+@code{getopt()} immediately returns @minus{}1:
@cindex @code{getopt()} user-defined function
@cindex user-defined @subentry function @subentry @code{getopt()}
@example
@c file eg/lib/getopt.awk
-function getopt(argc, argv, options, thisopt, i)
+function getopt(argc, argv, options, longopts, thisopt, i, j)
@{
- if (length(options) == 0) # no options given
- return -1
+ if (length(options) == 0 && length(longopts) == 0)
+ return -1 # no options given
@group
if (argv[Optind] == "--") @{ # all done
@@ -23527,33 +23539,39 @@ function getopt(argc, argv, options, thisopt, i)
The next thing to check for is the end of the options. A @option{--}
ends the command-line options, as does any command-line argument that
-does not begin with a @samp{-}. @code{Optind} is used to step through
+does not begin with a @samp{-} (unless it is an argument to a preceding
+option). @code{Optind} steps through
the array of command-line arguments; it retains its value across calls
to @code{getopt()}, because it is a global variable.
-The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+The regular expression @code{@w{/^-[^:[:space:]/}}
checks for a @samp{-} followed by anything
that is not whitespace and not a colon.
If the current command-line argument does not match this pattern,
-it is not an option, and it ends option processing. Continuing on:
+it is not an option, and it ends option processing.
+Now, we
+check to see if we are processing a short (single letter) option, or a
+long option (indicated by two dashes, e.g., @samp{--filename}). If it
+is a short option, we continue on:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0)
- _opti = 2
- thisopt = substr(argv[Optind], _opti, 1)
- Optopt = thisopt
- i = index(options, thisopt)
- if (i == 0) @{
- if (Opterr)
- printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
- if (_opti >= length(argv[Optind])) @{
- Optind++
- _opti = 0
- @} else
- _opti++
- return "?"
- @}
+ if (argv[Optind] !~ /^--/) @{ # if this is a short option
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
@c endfile
@end example
@@ -23586,15 +23604,15 @@ invalid option letter actually is. Continuing on:
@example
@c file eg/lib/getopt.awk
- if (substr(options, i + 1, 1) == ":") @{
- # get option argument
- if (length(substr(argv[Optind], _opti + 1)) > 0)
- Optarg = substr(argv[Optind], _opti + 1)
- else
- Optarg = argv[++Optind]
- _opti = 0
- @} else
- Optarg = ""
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
@c endfile
@end example
@@ -23608,22 +23626,97 @@ examine in the current command-line argument. Continuing:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@c endfile
+@end example
+
+Finally, for a short option, if @code{_opti} is either zero or greater
+than the length of the current command-line argument, it means this
+element in @code{argv} is through being processed, so @code{Optind} is
+incremented to point to the next element in @code{argv}. If neither
+condition is true, then only @code{_opti} is incremented, so that the
+next option letter can be processed on the next call to @code{getopt()}.
+
+On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+@example
+@c file eg/lib/getopt.awk
+ @} else @{
+ j = index(argv[Optind], "=")
+ if (j > 0)
+ thisopt = substr(argv[Optind], 3, j - 3)
+ else
+ thisopt = substr(argv[Optind], 3)
+ Optopt = thisopt
+@c endfile
+@end example
+
+First, we search this option for a possible embedded equal sign, as the
+specification of long options allows an argument to an option
+@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as
+@samp{@w{--someopt answer}}.
+
+@example
+@c file eg/lib/getopt.awk
+ i = match(longopts, "(^|,)" thisopt "($|[,:])")
+ if (i == 0) @{
+ if (Opterr)
+ printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+ Optind++
+ return "?"
+ @}
+@c endfile
+@end example
+
+Next, we try to find the current option in @code{longopts}. The regular
+expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}},
+matches this option at the beginninng of @code{longopts}, or at the
+beginning of a subsequent long option (the previous long option would
+have been terminated by a comma), and, in any case, either at the end of
+the @code{longopts} string (@samp{$}), or followed by a comma
+(separating this option from a subsequent option) or a colon (indicating
+this long option takes an argument (@samp{@w{[,:]}}).
+
+Using this regular expression, we check to see if the current option
+might possibly be in @code{longopts} (if @code{longopts} is not
+specified, this test will also fail). In case of an error, we possibly
+print an error message and then return @code{"?"}. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+ if (substr(longopts, i+1+length(thisopt), 1) == ":") @{
+ if (j > 0)
+ Optarg = substr(argv[Optind], j + 1)
+ else
+ Optarg = argv[++Optind]
+ @} else
+ Optarg = ""
+@c endfile
+@end example
+
+We now check to see if this option takes an argument and, if so, we set
+@code{Optarg} to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
+@example
+@c file eg/lib/getopt.awk
Optind++
- _opti = 0
- @} else
- _opti++
- return thisopt
+ return thisopt
+ @}
@}
@c endfile
@end example
-Finally, if @code{_opti} is either zero or greater than the length of the
-current command-line argument, it means this element in @code{argv} is
-through being processed, so @code{Optind} is incremented to point to the
-next element in @code{argv}. If neither condition is true, then only
-@code{_opti} is incremented, so that the next option letter can be processed
-on the next call to @code{getopt()}.
+We increase @code{Optind} (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
@code{Opterr} is set to one, because the default behavior is for @code{getopt()}
@@ -23639,20 +23732,21 @@ BEGIN @{
# test program
if (_getopt_test) @{
- while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, Optarg = <%s>\n",
- _go_c, Optarg)
+ _myshortopts = "ab:cd"
+ _mylongopts = "longa,longb:,otherc,otherd"
+
+ while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+ printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
- printf("\tARGV[%d] = <%s>\n",
- Optind, ARGV[Optind])
+ printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
@}
@}
@c endfile
@end example
The rest of the @code{BEGIN} rule is a simple test program. Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
@@ -23670,9 +23764,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \}
+> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2}
+@print{} c = <a>, Optarg = <>
+@print{} c = <longa>, Optarg = <>
+@print{} c = <b>, Optarg = <xx>
+@print{} c = <longb>, Optarg = <foo=bar>
+@print{} c = <otherd>, Optarg = <>
+@print{} c = <otherc>, Optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[8] = <arg1>
+@print{} ARGV[9] = <arg2>
@end example
-In both runs, the first @option{--} terminates the arguments to
+In all the runs, the first @option{--} terminates the arguments to
@command{awk}, so that it does not try to interpret the @option{-a},
etc., as its own options.