aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2020-02-02 20:31:21 +0200
committerArnold D. Robbins <arnold@skeeve.com>2020-02-21 13:12:07 +0200
commita28cbc85b64d5f8a3d318cea5c30bdb57338256c (patch)
tree6603853d1bb53b47f80545375ff8e5a2d51546da
parent49a3b8595db2c6d265f3e6635e4deb7accff8ced (diff)
downloadegawk-a28cbc85b64d5f8a3d318cea5c30bdb57338256c.tar.gz
egawk-a28cbc85b64d5f8a3d318cea5c30bdb57338256c.tar.bz2
egawk-a28cbc85b64d5f8a3d318cea5c30bdb57338256c.zip
Add long option support to getopt function.
-rw-r--r--awklib/eg/lib/getopt.awk94
-rw-r--r--doc/ChangeLog5
-rw-r--r--doc/gawk.info773
-rw-r--r--doc/gawk.texi214
-rw-r--r--doc/gawktexi.in214
5 files changed, 815 insertions, 485 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index 6b1f4c50..69944f3f 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -1,9 +1,11 @@
# getopt.awk --- Do C library getopt(3) function in awk
+# Also supports long options.
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
#
# Initial version: March, 1991
# Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
# External variables:
# Optind -- index in ARGV of first nonoption argument
@@ -14,14 +16,14 @@
# Returns:
# -1 at end of options
# "?" for unrecognized option
-# <c> a character representing the current option
+# <s> a string representing the current option
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
-function getopt(argc, argv, options, thisopt, i)
+function getopt(argc, argv, options, longopts, thisopt, i, j)
{
- if (length(options) == 0) # no options given
- return -1
+ if (length(options) == 0 && length(longopts) == 0)
+ return -1 # no options given
if (argv[Optind] == "--") { # all done
Optind++
@@ -31,36 +33,61 @@ function getopt(argc, argv, options, thisopt, i)
_opti = 0
return -1
}
- if (_opti == 0)
- _opti = 2
- thisopt = substr(argv[Optind], _opti, 1)
- Optopt = thisopt
- i = index(options, thisopt)
- if (i == 0) {
- if (Opterr)
- printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
- if (_opti >= length(argv[Optind])) {
+ if (argv[Optind] !~ /^--/) { # if this is a short option
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) {
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) {
+ Optind++
+ _opti = 0
+ } else
+ _opti++
+ return "?"
+ }
+ if (substr(options, i + 1, 1) == ":") {
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ } else
+ Optarg = ""
+ if (_opti == 0 || _opti >= length(argv[Optind])) {
Optind++
_opti = 0
} else
_opti++
- return "?"
- }
- if (substr(options, i + 1, 1) == ":") {
- # get option argument
- if (length(substr(argv[Optind], _opti + 1)) > 0)
- Optarg = substr(argv[Optind], _opti + 1)
+ return thisopt
+ } else {
+ j = index(argv[Optind], "=")
+ if (j > 0)
+ thisopt = substr(argv[Optind], 3, j - 3)
else
- Optarg = argv[++Optind]
- _opti = 0
- } else
- Optarg = ""
- if (_opti == 0 || _opti >= length(argv[Optind])) {
+ thisopt = substr(argv[Optind], 3)
+ Optopt = thisopt
+ i = match(longopts, "(^|,)" thisopt "($|[,:])")
+ if (i == 0) {
+ if (Opterr)
+ printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+ Optind++
+ return "?"
+ }
+ if (substr(longopts, i+1+length(thisopt), 1) == ":") {
+ if (j > 0)
+ Optarg = substr(argv[Optind], j + 1)
+ else
+ Optarg = argv[++Optind]
+ } else
+ Optarg = ""
Optind++
- _opti = 0
- } else
- _opti++
- return thisopt
+ return thisopt
+ }
}
BEGIN {
Opterr = 1 # default is to diagnose
@@ -68,12 +95,13 @@ BEGIN {
# test program
if (_getopt_test) {
- while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, Optarg = <%s>\n",
- _go_c, Optarg)
+ _myshortopts = "ab:cd"
+ _mylongopts = "longa,longb:,otherc,otherd"
+
+ while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+ printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
- printf("\tARGV[%d] = <%s>\n",
- Optind, ARGV[Optind])
+ printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
}
}
diff --git a/doc/ChangeLog b/doc/ChangeLog
index c8265436..01ee0923 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,8 @@
+2020-02-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Getopt Function): Add support for long options,
+ contributed by Greg Minshall <minshall@acm.org>.
+
2020-01-23 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Document arry sorting by value for FUNCTAB.
diff --git a/doc/gawk.info b/doc/gawk.info
index 999ee49f..ed117478 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -16514,20 +16514,29 @@ command-line arguments for 'awk':
...
}
+ The GNU project's version of the original Unix utilities popularized
+the use of long command line options. For example, '--help' in addition
+to '-h'. Arguments to long options are either provided as separate
+command line arguments ('--source 'PROGRAM-TEXT'') or separated from the
+option with an '=' sign ('--source='PROGRAM-TEXT'').
+
As a side point, 'gawk' actually uses the GNU 'getopt_long()'
function to process both normal and GNU-style long options (*note
Options::).
The abstraction provided by 'getopt()' is very useful and is quite
handy in 'awk' programs as well. Following is an 'awk' version of
-'getopt()'. This function highlights one of the greatest weaknesses in
-'awk', which is that it is very poor at manipulating single characters.
-Repeated calls to 'substr()' are necessary for accessing individual
-characters (*note String Functions::).(1)
+'getopt()' that accepts both short and long options.
+
+ This function highlights one of the greatest weaknesses in 'awk',
+which is that it is very poor at manipulating single characters. The
+function needs repeated calls to 'substr()' in order to access
+individual characters (*note String Functions::).(1)
The discussion that follows walks through the code a bit at a time:
# getopt.awk --- Do C library getopt(3) function in awk
+ # Also supports long options.
# External variables:
# Optind -- index in ARGV of first nonoption argument
@@ -16538,7 +16547,7 @@ characters (*note String Functions::).(1)
# Returns:
# -1 at end of options
# "?" for unrecognized option
- # <c> a character representing the current option
+ # <s> a string representing the current option
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
@@ -16550,13 +16559,13 @@ documentation is essential for any program, and particularly for library
functions.
The 'getopt()' function first checks that it was indeed called with a
-string of options (the 'options' parameter). If 'options' has a zero
-length, 'getopt()' immediately returns -1:
+string of options (the 'options' parameter). If both 'options' and
+'longoptions' have a zero length, 'getopt()' immediately returns -1:
- function getopt(argc, argv, options, thisopt, i)
+ function getopt(argc, argv, options, longopts, thisopt, i, j)
{
- if (length(options) == 0) # no options given
- return -1
+ if (length(options) == 0 && length(longopts) == 0)
+ return -1 # no options given
if (argv[Optind] == "--") { # all done
Optind++
@@ -16569,30 +16578,34 @@ length, 'getopt()' immediately returns -1:
The next thing to check for is the end of the options. A '--' ends
the command-line options, as does any command-line argument that does
-not begin with a '-'. 'Optind' is used to step through the array of
-command-line arguments; it retains its value across calls to 'getopt()',
-because it is a global variable.
-
- The regular expression that is used, '/^-[^:[:space:]/', checks for a
-'-' followed by anything that is not whitespace and not a colon. If the
-current command-line argument does not match this pattern, it is not an
-option, and it ends option processing. Continuing on:
-
- if (_opti == 0)
- _opti = 2
- thisopt = substr(argv[Optind], _opti, 1)
- Optopt = thisopt
- i = index(options, thisopt)
- if (i == 0) {
- if (Opterr)
- printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
- if (_opti >= length(argv[Optind])) {
- Optind++
- _opti = 0
- } else
- _opti++
- return "?"
- }
+not begin with a '-' (unless it is an argument to a preceding option).
+'Optind' steps through the array of command-line arguments; it retains
+its value across calls to 'getopt()', because it is a global variable.
+
+ The regular expression '/^-[^:[:space:]/' checks for a '-' followed
+by anything that is not whitespace and not a colon. If the current
+command-line argument does not match this pattern, it is not an option,
+and it ends option processing. Now, we check to see if we are
+processing a short (single letter) option, or a long option (indicated
+by two dashes, e.g., '--filename'). If it is a short option, we
+continue on:
+
+ if (argv[Optind] !~ /^--/) { # if this is a short option
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) {
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) {
+ Optind++
+ _opti = 0
+ } else
+ _opti++
+ return "?"
+ }
The '_opti' variable tracks the position in the current command-line
argument ('argv[Optind]'). If multiple options are grouped together
@@ -16620,15 +16633,15 @@ incremented.
The main program can examine 'Optopt' if it needs to know what the
invalid option letter actually is. Continuing on:
- if (substr(options, i + 1, 1) == ":") {
- # get option argument
- if (length(substr(argv[Optind], _opti + 1)) > 0)
- Optarg = substr(argv[Optind], _opti + 1)
- else
- Optarg = argv[++Optind]
- _opti = 0
- } else
- Optarg = ""
+ if (substr(options, i + 1, 1) == ":") {
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ } else
+ Optarg = ""
If the option requires an argument, the option letter is followed by
a colon in the 'options' string. If there are remaining characters in
@@ -16638,20 +16651,79 @@ argument is used ('-xFOO' versus '-x FOO'). In either case, '_opti' is
reset to zero, because there are no more characters left to examine in
the current command-line argument. Continuing:
- if (_opti == 0 || _opti >= length(argv[Optind])) {
+ if (_opti == 0 || _opti >= length(argv[Optind])) {
+ Optind++
+ _opti = 0
+ } else
+ _opti++
+ return thisopt
+
+ Finally, for a short option, if '_opti' is either zero or greater
+than the length of the current command-line argument, it means this
+element in 'argv' is through being processed, so 'Optind' is incremented
+to point to the next element in 'argv'. If neither condition is true,
+then only '_opti' is incremented, so that the next option letter can be
+processed on the next call to 'getopt()'.
+
+ On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+ } else {
+ j = index(argv[Optind], "=")
+ if (j > 0)
+ thisopt = substr(argv[Optind], 3, j - 3)
+ else
+ thisopt = substr(argv[Optind], 3)
+ Optopt = thisopt
+
+ First, we search this option for a possible embedded equal sign, as
+the specification of long options allows an argument to an option
+'--someopt:' to be specified as '--someopt=answer' as well as
+'--someopt answer'.
+
+ i = match(longopts, "(^|,)" thisopt "($|[,:])")
+ if (i == 0) {
+ if (Opterr)
+ printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+ Optind++
+ return "?"
+ }
+
+ Next, we try to find the current option in 'longopts'. The regular
+expression givent to 'match()', '"(^|,)" thisopt "($|[,:])"', matches
+this option at the beginninng of 'longopts', or at the beginning of a
+subsequent long option (the previous long option would have been
+terminated by a comma), and, in any case, either at the end of the
+'longopts' string ('$'), or followed by a comma (separating this option
+from a subsequent option) or a colon (indicating this long option takes
+an argument ('[,:]').
+
+ Using this regular expression, we check to see if the current option
+might possibly be in 'longopts' (if 'longopts' is not specified, this
+test will also fail). In case of an error, we possibly print an error
+message and then return '"?"'. Continuing on:
+
+ if (substr(longopts, i+1+length(thisopt), 1) == ":") {
+ if (j > 0)
+ Optarg = substr(argv[Optind], j + 1)
+ else
+ Optarg = argv[++Optind]
+ } else
+ Optarg = ""
+
+ We now check to see if this option takes an argument and, if so, we
+set 'Optarg' to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
Optind++
- _opti = 0
- } else
- _opti++
- return thisopt
+ return thisopt
+ }
}
- Finally, if '_opti' is either zero or greater than the length of the
-current command-line argument, it means this element in 'argv' is
-through being processed, so 'Optind' is incremented to point to the next
-element in 'argv'. If neither condition is true, then only '_opti' is
-incremented, so that the next option letter can be processed on the next
-call to 'getopt()'.
+ We increase 'Optind' (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
The 'BEGIN' rule initializes both 'Opterr' and 'Optind' to one.
'Opterr' is set to one, because the default behavior is for 'getopt()'
@@ -16665,18 +16737,19 @@ which is in 'ARGV[0]':
# test program
if (_getopt_test) {
- while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, Optarg = <%s>\n",
- _go_c, Optarg)
+ _myshortopts = "ab:cd"
+ _mylongopts = "longa,longb:,otherc,otherd"
+
+ while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+ printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
- printf("\tARGV[%d] = <%s>\n",
- Optind, ARGV[Optind])
+ printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
}
}
The rest of the 'BEGIN' rule is a simple test program. Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
-| c = <a>, Optarg = <>
@@ -16694,7 +16767,19 @@ results of two sample runs of the test program:
-| ARGV[4] = <xyz>
-| ARGV[5] = <abc>
- In both runs, the first '--' terminates the arguments to 'awk', so
+ $ awk -f getopt.awk -v _getopt_test=1 -- -a \
+ > --longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2
+ -| c = <a>, Optarg = <>
+ -| c = <longa>, Optarg = <>
+ -| c = <b>, Optarg = <xx>
+ -| c = <longb>, Optarg = <foo=bar>
+ -| c = <otherd>, Optarg = <>
+ -| c = <otherc>, Optarg = <>
+ -| non-option arguments:
+ -| ARGV[8] = <arg1>
+ -| ARGV[9] = <arg2>
+
+ In all the runs, the first '--' terminates the arguments to 'awk', so
that it does not try to interpret the '-a', etc., as its own options.
NOTE: After 'getopt()' is through, user-level code must clear out
@@ -35560,8 +35645,8 @@ Index
* getlocaltime() user-defined function: Getlocaltime Function.
(line 16)
* getopt() function (C library): Getopt Function. (line 15)
-* getopt() user-defined function: Getopt Function. (line 108)
-* getopt() user-defined function <1>: Getopt Function. (line 134)
+* getopt() user-defined function: Getopt Function. (line 116)
+* getopt() user-defined function <1>: Getopt Function. (line 143)
* getpwent() function (C library): Passwd Functions. (line 16)
* getpwent() function (C library) <1>: Passwd Functions. (line 196)
* getpwent() user-defined function: Passwd Functions. (line 16)
@@ -37007,8 +37092,8 @@ Index
* user-defined, function, beginfile(): Filetrans Function. (line 62)
* user-defined, function, endfile(): Filetrans Function. (line 62)
* user-defined, function, rewind(): Rewind Function. (line 15)
-* user-defined, function, getopt(): Getopt Function. (line 108)
-* user-defined, function, getopt() <1>: Getopt Function. (line 134)
+* user-defined, function, getopt(): Getopt Function. (line 116)
+* user-defined, function, getopt() <1>: Getopt Function. (line 143)
* user-defined, function, getpwent(): Passwd Functions. (line 16)
* user-defined, function, _pw_init(): Passwd Functions. (line 105)
* user-defined, function, getpwnam(): Passwd Functions. (line 180)
@@ -37446,285 +37531,285 @@ Ref: File Checking-Footnote-1680647
Node: Empty Files680848
Node: Ignoring Assigns682827
Node: Getopt Function684377
-Ref: Getopt Function-Footnote-1695846
-Node: Passwd Functions696046
-Ref: Passwd Functions-Footnote-1704885
-Node: Group Functions704973
-Ref: Group Functions-Footnote-1712871
-Node: Walking Arrays713078
-Node: Library Functions Summary716086
-Node: Library Exercises717492
-Node: Sample Programs717957
-Node: Running Examples718727
-Node: Clones719455
-Node: Cut Program720679
-Node: Egrep Program730608
-Ref: Egrep Program-Footnote-1738120
-Node: Id Program738230
-Node: Split Program741910
-Ref: Split Program-Footnote-1745368
-Node: Tee Program745497
-Node: Uniq Program748287
-Node: Wc Program755908
-Ref: Wc Program-Footnote-1760163
-Node: Miscellaneous Programs760257
-Node: Dupword Program761470
-Node: Alarm Program763500
-Node: Translate Program768355
-Ref: Translate Program-Footnote-1772920
-Node: Labels Program773190
-Ref: Labels Program-Footnote-1776541
-Node: Word Sorting776625
-Node: History Sorting780697
-Node: Extract Program782922
-Node: Simple Sed790976
-Node: Igawk Program794050
-Ref: Igawk Program-Footnote-1808381
-Ref: Igawk Program-Footnote-2808583
-Ref: Igawk Program-Footnote-3808705
-Node: Anagram Program808820
-Node: Signature Program811882
-Node: Programs Summary813129
-Node: Programs Exercises814343
-Ref: Programs Exercises-Footnote-1818472
-Node: Advanced Features818563
-Node: Nondecimal Data820553
-Node: Array Sorting822144
-Node: Controlling Array Traversal822844
-Ref: Controlling Array Traversal-Footnote-1831212
-Node: Array Sorting Functions831330
-Ref: Array Sorting Functions-Footnote-1836421
-Node: Two-way I/O836617
-Ref: Two-way I/O-Footnote-1844338
-Ref: Two-way I/O-Footnote-2844525
-Node: TCP/IP Networking844607
-Node: Profiling847725
-Node: Advanced Features Summary856740
-Node: Internationalization858584
-Node: I18N and L10N860064
-Node: Explaining gettext860751
-Ref: Explaining gettext-Footnote-1866643
-Ref: Explaining gettext-Footnote-2866828
-Node: Programmer i18n866993
-Ref: Programmer i18n-Footnote-1871942
-Node: Translator i18n871991
-Node: String Extraction872785
-Ref: String Extraction-Footnote-1873917
-Node: Printf Ordering874003
-Ref: Printf Ordering-Footnote-1876789
-Node: I18N Portability876853
-Ref: I18N Portability-Footnote-1879309
-Node: I18N Example879372
-Ref: I18N Example-Footnote-1882647
-Ref: I18N Example-Footnote-2882720
-Node: Gawk I18N882829
-Node: I18N Summary883478
-Node: Debugger884819
-Node: Debugging885819
-Node: Debugging Concepts886260
-Node: Debugging Terms888069
-Node: Awk Debugging890644
-Ref: Awk Debugging-Footnote-1891589
-Node: Sample Debugging Session891721
-Node: Debugger Invocation892255
-Node: Finding The Bug893641
-Node: List of Debugger Commands900115
-Node: Breakpoint Control901448
-Node: Debugger Execution Control905142
-Node: Viewing And Changing Data908504
-Node: Execution Stack912045
-Node: Debugger Info913682
-Node: Miscellaneous Debugger Commands917753
-Node: Readline Support922815
-Node: Limitations923711
-Node: Debugging Summary926265
-Node: Namespaces927544
-Node: Global Namespace928655
-Node: Qualified Names930053
-Node: Default Namespace931052
-Node: Changing The Namespace931793
-Node: Naming Rules933407
-Node: Internal Name Management935255
-Node: Namespace Example936297
-Node: Namespace And Features938859
-Node: Namespace Summary940294
-Node: Arbitrary Precision Arithmetic941771
-Node: Computer Arithmetic943258
-Ref: table-numeric-ranges947024
-Ref: table-floating-point-ranges947517
-Ref: Computer Arithmetic-Footnote-1948175
-Node: Math Definitions948232
-Ref: table-ieee-formats951548
-Ref: Math Definitions-Footnote-1952151
-Node: MPFR features952256
-Node: FP Math Caution953974
-Ref: FP Math Caution-Footnote-1955046
-Node: Inexactness of computations955415
-Node: Inexact representation956375
-Node: Comparing FP Values957735
-Node: Errors accumulate958976
-Node: Getting Accuracy960409
-Node: Try To Round963119
-Node: Setting precision964018
-Ref: table-predefined-precision-strings964715
-Node: Setting the rounding mode966545
-Ref: table-gawk-rounding-modes966919
-Ref: Setting the rounding mode-Footnote-1970850
-Node: Arbitrary Precision Integers971029
-Ref: Arbitrary Precision Integers-Footnote-1974204
-Node: Checking for MPFR974353
-Node: POSIX Floating Point Problems975827
-Ref: POSIX Floating Point Problems-Footnote-1980112
-Node: Floating point summary980150
-Node: Dynamic Extensions982340
-Node: Extension Intro983893
-Node: Plugin License985159
-Node: Extension Mechanism Outline985956
-Ref: figure-load-extension986395
-Ref: figure-register-new-function987960
-Ref: figure-call-new-function989052
-Node: Extension API Description991114
-Node: Extension API Functions Introduction992756
-Ref: table-api-std-headers994592
-Node: General Data Types998457
-Ref: General Data Types-Footnote-11006818
-Node: Memory Allocation Functions1007117
-Ref: Memory Allocation Functions-Footnote-11011327
-Node: Constructor Functions1011426
-Node: Registration Functions1015012
-Node: Extension Functions1015697
-Node: Exit Callback Functions1021019
-Node: Extension Version String1022269
-Node: Input Parsers1022932
-Node: Output Wrappers1035653
-Node: Two-way processors1040165
-Node: Printing Messages1042430
-Ref: Printing Messages-Footnote-11043601
-Node: Updating ERRNO1043754
-Node: Requesting Values1044493
-Ref: table-value-types-returned1045230
-Node: Accessing Parameters1046166
-Node: Symbol Table Access1047401
-Node: Symbol table by name1047913
-Ref: Symbol table by name-Footnote-11050937
-Node: Symbol table by cookie1051065
-Ref: Symbol table by cookie-Footnote-11055250
-Node: Cached values1055314
-Ref: Cached values-Footnote-11058850
-Node: Array Manipulation1059003
-Ref: Array Manipulation-Footnote-11060094
-Node: Array Data Types1060131
-Ref: Array Data Types-Footnote-11062789
-Node: Array Functions1062881
-Node: Flattening Arrays1067379
-Node: Creating Arrays1074355
-Node: Redirection API1079122
-Node: Extension API Variables1081955
-Node: Extension Versioning1082666
-Ref: gawk-api-version1083095
-Node: Extension GMP/MPFR Versioning1084826
-Node: Extension API Informational Variables1086454
-Node: Extension API Boilerplate1087527
-Node: Changes from API V11091501
-Node: Finding Extensions1093073
-Node: Extension Example1093632
-Node: Internal File Description1094430
-Node: Internal File Ops1098510
-Ref: Internal File Ops-Footnote-11109860
-Node: Using Internal File Ops1110000
-Ref: Using Internal File Ops-Footnote-11112383
-Node: Extension Samples1112657
-Node: Extension Sample File Functions1114186
-Node: Extension Sample Fnmatch1121835
-Node: Extension Sample Fork1123322
-Node: Extension Sample Inplace1124540
-Node: Extension Sample Ord1128165
-Node: Extension Sample Readdir1129001
-Ref: table-readdir-file-types1129890
-Node: Extension Sample Revout1130957
-Node: Extension Sample Rev2way1131546
-Node: Extension Sample Read write array1132286
-Node: Extension Sample Readfile1134228
-Node: Extension Sample Time1135323
-Node: Extension Sample API Tests1137075
-Node: gawkextlib1137567
-Node: Extension summary1140485
-Node: Extension Exercises1144187
-Node: Language History1145429
-Node: V7/SVR3.11147085
-Node: SVR41149237
-Node: POSIX1150671
-Node: BTL1152051
-Node: POSIX/GNU1152780
-Node: Feature History1158558
-Node: Common Extensions1174751
-Node: Ranges and Locales1176034
-Ref: Ranges and Locales-Footnote-11180650
-Ref: Ranges and Locales-Footnote-21180677
-Ref: Ranges and Locales-Footnote-31180912
-Node: Contributors1181133
-Node: History summary1187086
-Node: Installation1188466
-Node: Gawk Distribution1189410
-Node: Getting1189894
-Node: Extracting1190857
-Node: Distribution contents1192495
-Node: Unix Installation1198975
-Node: Quick Installation1199657
-Node: Shell Startup Files1202071
-Node: Additional Configuration Options1203160
-Node: Configuration Philosophy1205475
-Node: Non-Unix Installation1207844
-Node: PC Installation1208304
-Node: PC Binary Installation1209142
-Node: PC Compiling1209577
-Node: PC Using1210694
-Node: Cygwin1214247
-Node: MSYS1215471
-Node: VMS Installation1215972
-Node: VMS Compilation1216763
-Ref: VMS Compilation-Footnote-11217992
-Node: VMS Dynamic Extensions1218050
-Node: VMS Installation Details1219735
-Node: VMS Running1221988
-Node: VMS GNV1226267
-Node: VMS Old Gawk1227002
-Node: Bugs1227473
-Node: Bug address1228136
-Node: Usenet1231118
-Node: Maintainers1232122
-Node: Other Versions1233383
-Node: Installation summary1240471
-Node: Notes1241673
-Node: Compatibility Mode1242467
-Node: Additions1243249
-Node: Accessing The Source1244174
-Node: Adding Code1245611
-Node: New Ports1251830
-Node: Derived Files1256205
-Ref: Derived Files-Footnote-11261865
-Ref: Derived Files-Footnote-21261900
-Ref: Derived Files-Footnote-31262498
-Node: Future Extensions1262612
-Node: Implementation Limitations1263270
-Node: Extension Design1264453
-Node: Old Extension Problems1265597
-Ref: Old Extension Problems-Footnote-11267115
-Node: Extension New Mechanism Goals1267172
-Ref: Extension New Mechanism Goals-Footnote-11270536
-Node: Extension Other Design Decisions1270725
-Node: Extension Future Growth1272838
-Node: Notes summary1273674
-Node: Basic Concepts1274832
-Node: Basic High Level1275513
-Ref: figure-general-flow1275795
-Ref: figure-process-flow1276480
-Ref: Basic High Level-Footnote-11279781
-Node: Basic Data Typing1279966
-Node: Glossary1283294
-Node: Copying1315132
-Node: GNU Free Documentation License1352675
-Node: Index1377795
+Ref: Getopt Function-Footnote-1699591
+Node: Passwd Functions699791
+Ref: Passwd Functions-Footnote-1708630
+Node: Group Functions708718
+Ref: Group Functions-Footnote-1716616
+Node: Walking Arrays716823
+Node: Library Functions Summary719831
+Node: Library Exercises721237
+Node: Sample Programs721702
+Node: Running Examples722472
+Node: Clones723200
+Node: Cut Program724424
+Node: Egrep Program734353
+Ref: Egrep Program-Footnote-1741865
+Node: Id Program741975
+Node: Split Program745655
+Ref: Split Program-Footnote-1749113
+Node: Tee Program749242
+Node: Uniq Program752032
+Node: Wc Program759653
+Ref: Wc Program-Footnote-1763908
+Node: Miscellaneous Programs764002
+Node: Dupword Program765215
+Node: Alarm Program767245
+Node: Translate Program772100
+Ref: Translate Program-Footnote-1776665
+Node: Labels Program776935
+Ref: Labels Program-Footnote-1780286
+Node: Word Sorting780370
+Node: History Sorting784442
+Node: Extract Program786667
+Node: Simple Sed794721
+Node: Igawk Program797795
+Ref: Igawk Program-Footnote-1812126
+Ref: Igawk Program-Footnote-2812328
+Ref: Igawk Program-Footnote-3812450
+Node: Anagram Program812565
+Node: Signature Program815627
+Node: Programs Summary816874
+Node: Programs Exercises818088
+Ref: Programs Exercises-Footnote-1822217
+Node: Advanced Features822308
+Node: Nondecimal Data824298
+Node: Array Sorting825889
+Node: Controlling Array Traversal826589
+Ref: Controlling Array Traversal-Footnote-1834957
+Node: Array Sorting Functions835075
+Ref: Array Sorting Functions-Footnote-1840166
+Node: Two-way I/O840362
+Ref: Two-way I/O-Footnote-1848083
+Ref: Two-way I/O-Footnote-2848270
+Node: TCP/IP Networking848352
+Node: Profiling851470
+Node: Advanced Features Summary860485
+Node: Internationalization862329
+Node: I18N and L10N863809
+Node: Explaining gettext864496
+Ref: Explaining gettext-Footnote-1870388
+Ref: Explaining gettext-Footnote-2870573
+Node: Programmer i18n870738
+Ref: Programmer i18n-Footnote-1875687
+Node: Translator i18n875736
+Node: String Extraction876530
+Ref: String Extraction-Footnote-1877662
+Node: Printf Ordering877748
+Ref: Printf Ordering-Footnote-1880534
+Node: I18N Portability880598
+Ref: I18N Portability-Footnote-1883054
+Node: I18N Example883117
+Ref: I18N Example-Footnote-1886392
+Ref: I18N Example-Footnote-2886465
+Node: Gawk I18N886574
+Node: I18N Summary887223
+Node: Debugger888564
+Node: Debugging889564
+Node: Debugging Concepts890005
+Node: Debugging Terms891814
+Node: Awk Debugging894389
+Ref: Awk Debugging-Footnote-1895334
+Node: Sample Debugging Session895466
+Node: Debugger Invocation896000
+Node: Finding The Bug897386
+Node: List of Debugger Commands903860
+Node: Breakpoint Control905193
+Node: Debugger Execution Control908887
+Node: Viewing And Changing Data912249
+Node: Execution Stack915790
+Node: Debugger Info917427
+Node: Miscellaneous Debugger Commands921498
+Node: Readline Support926560
+Node: Limitations927456
+Node: Debugging Summary930010
+Node: Namespaces931289
+Node: Global Namespace932400
+Node: Qualified Names933798
+Node: Default Namespace934797
+Node: Changing The Namespace935538
+Node: Naming Rules937152
+Node: Internal Name Management939000
+Node: Namespace Example940042
+Node: Namespace And Features942604
+Node: Namespace Summary944039
+Node: Arbitrary Precision Arithmetic945516
+Node: Computer Arithmetic947003
+Ref: table-numeric-ranges950769
+Ref: table-floating-point-ranges951262
+Ref: Computer Arithmetic-Footnote-1951920
+Node: Math Definitions951977
+Ref: table-ieee-formats955293
+Ref: Math Definitions-Footnote-1955896
+Node: MPFR features956001
+Node: FP Math Caution957719
+Ref: FP Math Caution-Footnote-1958791
+Node: Inexactness of computations959160
+Node: Inexact representation960120
+Node: Comparing FP Values961480
+Node: Errors accumulate962721
+Node: Getting Accuracy964154
+Node: Try To Round966864
+Node: Setting precision967763
+Ref: table-predefined-precision-strings968460
+Node: Setting the rounding mode970290
+Ref: table-gawk-rounding-modes970664
+Ref: Setting the rounding mode-Footnote-1974595
+Node: Arbitrary Precision Integers974774
+Ref: Arbitrary Precision Integers-Footnote-1977949
+Node: Checking for MPFR978098
+Node: POSIX Floating Point Problems979572
+Ref: POSIX Floating Point Problems-Footnote-1983857
+Node: Floating point summary983895
+Node: Dynamic Extensions986085
+Node: Extension Intro987638
+Node: Plugin License988904
+Node: Extension Mechanism Outline989701
+Ref: figure-load-extension990140
+Ref: figure-register-new-function991705
+Ref: figure-call-new-function992797
+Node: Extension API Description994859
+Node: Extension API Functions Introduction996501
+Ref: table-api-std-headers998337
+Node: General Data Types1002202
+Ref: General Data Types-Footnote-11010563
+Node: Memory Allocation Functions1010862
+Ref: Memory Allocation Functions-Footnote-11015072
+Node: Constructor Functions1015171
+Node: Registration Functions1018757
+Node: Extension Functions1019442
+Node: Exit Callback Functions1024764
+Node: Extension Version String1026014
+Node: Input Parsers1026677
+Node: Output Wrappers1039398
+Node: Two-way processors1043910
+Node: Printing Messages1046175
+Ref: Printing Messages-Footnote-11047346
+Node: Updating ERRNO1047499
+Node: Requesting Values1048238
+Ref: table-value-types-returned1048975
+Node: Accessing Parameters1049911
+Node: Symbol Table Access1051146
+Node: Symbol table by name1051658
+Ref: Symbol table by name-Footnote-11054682
+Node: Symbol table by cookie1054810
+Ref: Symbol table by cookie-Footnote-11058995
+Node: Cached values1059059
+Ref: Cached values-Footnote-11062595
+Node: Array Manipulation1062748
+Ref: Array Manipulation-Footnote-11063839
+Node: Array Data Types1063876
+Ref: Array Data Types-Footnote-11066534
+Node: Array Functions1066626
+Node: Flattening Arrays1071124
+Node: Creating Arrays1078100
+Node: Redirection API1082867
+Node: Extension API Variables1085700
+Node: Extension Versioning1086411
+Ref: gawk-api-version1086840
+Node: Extension GMP/MPFR Versioning1088571
+Node: Extension API Informational Variables1090199
+Node: Extension API Boilerplate1091272
+Node: Changes from API V11095246
+Node: Finding Extensions1096818
+Node: Extension Example1097377
+Node: Internal File Description1098175
+Node: Internal File Ops1102255
+Ref: Internal File Ops-Footnote-11113605
+Node: Using Internal File Ops1113745
+Ref: Using Internal File Ops-Footnote-11116128
+Node: Extension Samples1116402
+Node: Extension Sample File Functions1117931
+Node: Extension Sample Fnmatch1125580
+Node: Extension Sample Fork1127067
+Node: Extension Sample Inplace1128285
+Node: Extension Sample Ord1131910
+Node: Extension Sample Readdir1132746
+Ref: table-readdir-file-types1133635
+Node: Extension Sample Revout1134702
+Node: Extension Sample Rev2way1135291
+Node: Extension Sample Read write array1136031
+Node: Extension Sample Readfile1137973
+Node: Extension Sample Time1139068
+Node: Extension Sample API Tests1140820
+Node: gawkextlib1141312
+Node: Extension summary1144230
+Node: Extension Exercises1147932
+Node: Language History1149174
+Node: V7/SVR3.11150830
+Node: SVR41152982
+Node: POSIX1154416
+Node: BTL1155796
+Node: POSIX/GNU1156525
+Node: Feature History1162303
+Node: Common Extensions1178496
+Node: Ranges and Locales1179779
+Ref: Ranges and Locales-Footnote-11184395
+Ref: Ranges and Locales-Footnote-21184422
+Ref: Ranges and Locales-Footnote-31184657
+Node: Contributors1184878
+Node: History summary1190831
+Node: Installation1192211
+Node: Gawk Distribution1193155
+Node: Getting1193639
+Node: Extracting1194602
+Node: Distribution contents1196240
+Node: Unix Installation1202720
+Node: Quick Installation1203402
+Node: Shell Startup Files1205816
+Node: Additional Configuration Options1206905
+Node: Configuration Philosophy1209220
+Node: Non-Unix Installation1211589
+Node: PC Installation1212049
+Node: PC Binary Installation1212887
+Node: PC Compiling1213322
+Node: PC Using1214439
+Node: Cygwin1217992
+Node: MSYS1219216
+Node: VMS Installation1219717
+Node: VMS Compilation1220508
+Ref: VMS Compilation-Footnote-11221737
+Node: VMS Dynamic Extensions1221795
+Node: VMS Installation Details1223480
+Node: VMS Running1225733
+Node: VMS GNV1230012
+Node: VMS Old Gawk1230747
+Node: Bugs1231218
+Node: Bug address1231881
+Node: Usenet1234863
+Node: Maintainers1235867
+Node: Other Versions1237128
+Node: Installation summary1244216
+Node: Notes1245418
+Node: Compatibility Mode1246212
+Node: Additions1246994
+Node: Accessing The Source1247919
+Node: Adding Code1249356
+Node: New Ports1255575
+Node: Derived Files1259950
+Ref: Derived Files-Footnote-11265610
+Ref: Derived Files-Footnote-21265645
+Ref: Derived Files-Footnote-31266243
+Node: Future Extensions1266357
+Node: Implementation Limitations1267015
+Node: Extension Design1268198
+Node: Old Extension Problems1269342
+Ref: Old Extension Problems-Footnote-11270860
+Node: Extension New Mechanism Goals1270917
+Ref: Extension New Mechanism Goals-Footnote-11274281
+Node: Extension Other Design Decisions1274470
+Node: Extension Future Growth1276583
+Node: Notes summary1277419
+Node: Basic Concepts1278577
+Node: Basic High Level1279258
+Ref: figure-general-flow1279540
+Ref: figure-process-flow1280225
+Ref: Basic High Level-Footnote-11283526
+Node: Basic Data Typing1283711
+Node: Glossary1287039
+Node: Copying1318877
+Node: GNU Free Documentation License1356420
+Node: Index1381540

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 1a41f880..d646175f 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -23443,16 +23443,25 @@ main(int argc, char *argv[])
@}
@end example
+The GNU project's version of the original Unix utilities popularized
+the use of long command line options. For example, @option{--help}
+in addition to @option{-h}. Arguments to long options are either provided
+as separate command line arguments (@samp{--source '@var{program-text}'})
+or separated from the option with an @samp{=} sign
+(@samp{--source='@var{program-text}'}).
+
As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
function to process both normal and GNU-style long options
(@pxref{Options}).
The abstraction provided by @code{getopt()} is very useful and is quite
handy in @command{awk} programs as well. Following is an @command{awk}
-version of @code{getopt()}. This function highlights one of the
+version of @code{getopt()} that accepts both short and long options.
+
+This function highlights one of the
greatest weaknesses in @command{awk}, which is that it is very poor at
-manipulating single characters. Repeated calls to @code{substr()} are
-necessary for accessing individual characters
+manipulating single characters. The function needs repeated calls to
+@code{substr()} in order to access individual characters
(@pxref{String Functions}).@footnote{This
function was written before @command{gawk} acquired the ability to
split strings into single characters using @code{""} as the separator.
@@ -23465,6 +23474,7 @@ The discussion that follows walks through the code a bit at a time:
@example
@c file eg/lib/getopt.awk
# getopt.awk --- Do C library getopt(3) function in awk
+# Also supports long options.
@c endfile
@ignore
@c file eg/lib/getopt.awk
@@ -23473,6 +23483,7 @@ The discussion that follows walks through the code a bit at a time:
#
# Initial version: March, 1991
# Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
@c endfile
@end ignore
@c file eg/lib/getopt.awk
@@ -23486,7 +23497,7 @@ The discussion that follows walks through the code a bit at a time:
# Returns:
# -1 at end of options
# "?" for unrecognized option
-# <c> a character representing the current option
+# <s> a string representing the current option
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
@@ -23500,17 +23511,18 @@ are ``private'' to this library function. Such documentation is essential
for any program, and particularly for library functions.
The @code{getopt()} function first checks that it was indeed called with
-a string of options (the @code{options} parameter). If @code{options}
-has a zero length, @code{getopt()} immediately returns @minus{}1:
+a string of options (the @code{options} parameter). If both
+@code{options} and @code{longoptions} have a zero length,
+@code{getopt()} immediately returns @minus{}1:
@cindex @code{getopt()} user-defined function
@cindex user-defined @subentry function @subentry @code{getopt()}
@example
@c file eg/lib/getopt.awk
-function getopt(argc, argv, options, thisopt, i)
+function getopt(argc, argv, options, longopts, thisopt, i, j)
@{
- if (length(options) == 0) # no options given
- return -1
+ if (length(options) == 0 && length(longopts) == 0)
+ return -1 # no options given
@group
if (argv[Optind] == "--") @{ # all done
@@ -23527,33 +23539,39 @@ function getopt(argc, argv, options, thisopt, i)
The next thing to check for is the end of the options. A @option{--}
ends the command-line options, as does any command-line argument that
-does not begin with a @samp{-}. @code{Optind} is used to step through
+does not begin with a @samp{-} (unless it is an argument to a preceding
+option). @code{Optind} steps through
the array of command-line arguments; it retains its value across calls
to @code{getopt()}, because it is a global variable.
-The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+The regular expression @code{@w{/^-[^:[:space:]/}}
checks for a @samp{-} followed by anything
that is not whitespace and not a colon.
If the current command-line argument does not match this pattern,
-it is not an option, and it ends option processing. Continuing on:
+it is not an option, and it ends option processing.
+Now, we
+check to see if we are processing a short (single letter) option, or a
+long option (indicated by two dashes, e.g., @samp{--filename}). If it
+is a short option, we continue on:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0)
- _opti = 2
- thisopt = substr(argv[Optind], _opti, 1)
- Optopt = thisopt
- i = index(options, thisopt)
- if (i == 0) @{
- if (Opterr)
- printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
- if (_opti >= length(argv[Optind])) @{
- Optind++
- _opti = 0
- @} else
- _opti++
- return "?"
- @}
+ if (argv[Optind] !~ /^--/) @{ # if this is a short option
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
@c endfile
@end example
@@ -23586,15 +23604,15 @@ invalid option letter actually is. Continuing on:
@example
@c file eg/lib/getopt.awk
- if (substr(options, i + 1, 1) == ":") @{
- # get option argument
- if (length(substr(argv[Optind], _opti + 1)) > 0)
- Optarg = substr(argv[Optind], _opti + 1)
- else
- Optarg = argv[++Optind]
- _opti = 0
- @} else
- Optarg = ""
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
@c endfile
@end example
@@ -23608,22 +23626,97 @@ examine in the current command-line argument. Continuing:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@c endfile
+@end example
+
+Finally, for a short option, if @code{_opti} is either zero or greater
+than the length of the current command-line argument, it means this
+element in @code{argv} is through being processed, so @code{Optind} is
+incremented to point to the next element in @code{argv}. If neither
+condition is true, then only @code{_opti} is incremented, so that the
+next option letter can be processed on the next call to @code{getopt()}.
+
+On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+@example
+@c file eg/lib/getopt.awk
+ @} else @{
+ j = index(argv[Optind], "=")
+ if (j > 0)
+ thisopt = substr(argv[Optind], 3, j - 3)
+ else
+ thisopt = substr(argv[Optind], 3)
+ Optopt = thisopt
+@c endfile
+@end example
+
+First, we search this option for a possible embedded equal sign, as the
+specification of long options allows an argument to an option
+@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as
+@samp{@w{--someopt answer}}.
+
+@example
+@c file eg/lib/getopt.awk
+ i = match(longopts, "(^|,)" thisopt "($|[,:])")
+ if (i == 0) @{
+ if (Opterr)
+ printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+ Optind++
+ return "?"
+ @}
+@c endfile
+@end example
+
+Next, we try to find the current option in @code{longopts}. The regular
+expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}},
+matches this option at the beginninng of @code{longopts}, or at the
+beginning of a subsequent long option (the previous long option would
+have been terminated by a comma), and, in any case, either at the end of
+the @code{longopts} string (@samp{$}), or followed by a comma
+(separating this option from a subsequent option) or a colon (indicating
+this long option takes an argument (@samp{@w{[,:]}}).
+
+Using this regular expression, we check to see if the current option
+might possibly be in @code{longopts} (if @code{longopts} is not
+specified, this test will also fail). In case of an error, we possibly
+print an error message and then return @code{"?"}. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+ if (substr(longopts, i+1+length(thisopt), 1) == ":") @{
+ if (j > 0)
+ Optarg = substr(argv[Optind], j + 1)
+ else
+ Optarg = argv[++Optind]
+ @} else
+ Optarg = ""
+@c endfile
+@end example
+
+We now check to see if this option takes an argument and, if so, we set
+@code{Optarg} to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
+@example
+@c file eg/lib/getopt.awk
Optind++
- _opti = 0
- @} else
- _opti++
- return thisopt
+ return thisopt
+ @}
@}
@c endfile
@end example
-Finally, if @code{_opti} is either zero or greater than the length of the
-current command-line argument, it means this element in @code{argv} is
-through being processed, so @code{Optind} is incremented to point to the
-next element in @code{argv}. If neither condition is true, then only
-@code{_opti} is incremented, so that the next option letter can be processed
-on the next call to @code{getopt()}.
+We increase @code{Optind} (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
@code{Opterr} is set to one, because the default behavior is for @code{getopt()}
@@ -23639,20 +23732,21 @@ BEGIN @{
# test program
if (_getopt_test) @{
- while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, Optarg = <%s>\n",
- _go_c, Optarg)
+ _myshortopts = "ab:cd"
+ _mylongopts = "longa,longb:,otherc,otherd"
+
+ while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+ printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
- printf("\tARGV[%d] = <%s>\n",
- Optind, ARGV[Optind])
+ printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
@}
@}
@c endfile
@end example
The rest of the @code{BEGIN} rule is a simple test program. Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
@@ -23670,9 +23764,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \}
+> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2}
+@print{} c = <a>, Optarg = <>
+@print{} c = <longa>, Optarg = <>
+@print{} c = <b>, Optarg = <xx>
+@print{} c = <longb>, Optarg = <foo=bar>
+@print{} c = <otherd>, Optarg = <>
+@print{} c = <otherc>, Optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[8] = <arg1>
+@print{} ARGV[9] = <arg2>
@end example
-In both runs, the first @option{--} terminates the arguments to
+In all the runs, the first @option{--} terminates the arguments to
@command{awk}, so that it does not try to interpret the @option{-a},
etc., as its own options.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index fe2cc17c..2d4409ba 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -22453,16 +22453,25 @@ main(int argc, char *argv[])
@}
@end example
+The GNU project's version of the original Unix utilities popularized
+the use of long command line options. For example, @option{--help}
+in addition to @option{-h}. Arguments to long options are either provided
+as separate command line arguments (@samp{--source '@var{program-text}'})
+or separated from the option with an @samp{=} sign
+(@samp{--source='@var{program-text}'}).
+
As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
function to process both normal and GNU-style long options
(@pxref{Options}).
The abstraction provided by @code{getopt()} is very useful and is quite
handy in @command{awk} programs as well. Following is an @command{awk}
-version of @code{getopt()}. This function highlights one of the
+version of @code{getopt()} that accepts both short and long options.
+
+This function highlights one of the
greatest weaknesses in @command{awk}, which is that it is very poor at
-manipulating single characters. Repeated calls to @code{substr()} are
-necessary for accessing individual characters
+manipulating single characters. The function needs repeated calls to
+@code{substr()} in order to access individual characters
(@pxref{String Functions}).@footnote{This
function was written before @command{gawk} acquired the ability to
split strings into single characters using @code{""} as the separator.
@@ -22475,6 +22484,7 @@ The discussion that follows walks through the code a bit at a time:
@example
@c file eg/lib/getopt.awk
# getopt.awk --- Do C library getopt(3) function in awk
+# Also supports long options.
@c endfile
@ignore
@c file eg/lib/getopt.awk
@@ -22483,6 +22493,7 @@ The discussion that follows walks through the code a bit at a time:
#
# Initial version: March, 1991
# Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
@c endfile
@end ignore
@c file eg/lib/getopt.awk
@@ -22496,7 +22507,7 @@ The discussion that follows walks through the code a bit at a time:
# Returns:
# -1 at end of options
# "?" for unrecognized option
-# <c> a character representing the current option
+# <s> a string representing the current option
# Private Data:
# _opti -- index in multiflag option, e.g., -abc
@@ -22510,17 +22521,18 @@ are ``private'' to this library function. Such documentation is essential
for any program, and particularly for library functions.
The @code{getopt()} function first checks that it was indeed called with
-a string of options (the @code{options} parameter). If @code{options}
-has a zero length, @code{getopt()} immediately returns @minus{}1:
+a string of options (the @code{options} parameter). If both
+@code{options} and @code{longoptions} have a zero length,
+@code{getopt()} immediately returns @minus{}1:
@cindex @code{getopt()} user-defined function
@cindex user-defined @subentry function @subentry @code{getopt()}
@example
@c file eg/lib/getopt.awk
-function getopt(argc, argv, options, thisopt, i)
+function getopt(argc, argv, options, longopts, thisopt, i, j)
@{
- if (length(options) == 0) # no options given
- return -1
+ if (length(options) == 0 && length(longopts) == 0)
+ return -1 # no options given
@group
if (argv[Optind] == "--") @{ # all done
@@ -22537,33 +22549,39 @@ function getopt(argc, argv, options, thisopt, i)
The next thing to check for is the end of the options. A @option{--}
ends the command-line options, as does any command-line argument that
-does not begin with a @samp{-}. @code{Optind} is used to step through
+does not begin with a @samp{-} (unless it is an argument to a preceding
+option). @code{Optind} steps through
the array of command-line arguments; it retains its value across calls
to @code{getopt()}, because it is a global variable.
-The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+The regular expression @code{@w{/^-[^:[:space:]/}}
checks for a @samp{-} followed by anything
that is not whitespace and not a colon.
If the current command-line argument does not match this pattern,
-it is not an option, and it ends option processing. Continuing on:
+it is not an option, and it ends option processing.
+Now, we
+check to see if we are processing a short (single letter) option, or a
+long option (indicated by two dashes, e.g., @samp{--filename}). If it
+is a short option, we continue on:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0)
- _opti = 2
- thisopt = substr(argv[Optind], _opti, 1)
- Optopt = thisopt
- i = index(options, thisopt)
- if (i == 0) @{
- if (Opterr)
- printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
- if (_opti >= length(argv[Optind])) @{
- Optind++
- _opti = 0
- @} else
- _opti++
- return "?"
- @}
+ if (argv[Optind] !~ /^--/) @{ # if this is a short option
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
@c endfile
@end example
@@ -22596,15 +22614,15 @@ invalid option letter actually is. Continuing on:
@example
@c file eg/lib/getopt.awk
- if (substr(options, i + 1, 1) == ":") @{
- # get option argument
- if (length(substr(argv[Optind], _opti + 1)) > 0)
- Optarg = substr(argv[Optind], _opti + 1)
- else
- Optarg = argv[++Optind]
- _opti = 0
- @} else
- Optarg = ""
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
@c endfile
@end example
@@ -22618,22 +22636,97 @@ examine in the current command-line argument. Continuing:
@example
@c file eg/lib/getopt.awk
- if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@c endfile
+@end example
+
+Finally, for a short option, if @code{_opti} is either zero or greater
+than the length of the current command-line argument, it means this
+element in @code{argv} is through being processed, so @code{Optind} is
+incremented to point to the next element in @code{argv}. If neither
+condition is true, then only @code{_opti} is incremented, so that the
+next option letter can be processed on the next call to @code{getopt()}.
+
+On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+@example
+@c file eg/lib/getopt.awk
+ @} else @{
+ j = index(argv[Optind], "=")
+ if (j > 0)
+ thisopt = substr(argv[Optind], 3, j - 3)
+ else
+ thisopt = substr(argv[Optind], 3)
+ Optopt = thisopt
+@c endfile
+@end example
+
+First, we search this option for a possible embedded equal sign, as the
+specification of long options allows an argument to an option
+@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as
+@samp{@w{--someopt answer}}.
+
+@example
+@c file eg/lib/getopt.awk
+ i = match(longopts, "(^|,)" thisopt "($|[,:])")
+ if (i == 0) @{
+ if (Opterr)
+ printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+ Optind++
+ return "?"
+ @}
+@c endfile
+@end example
+
+Next, we try to find the current option in @code{longopts}. The regular
+expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}},
+matches this option at the beginninng of @code{longopts}, or at the
+beginning of a subsequent long option (the previous long option would
+have been terminated by a comma), and, in any case, either at the end of
+the @code{longopts} string (@samp{$}), or followed by a comma
+(separating this option from a subsequent option) or a colon (indicating
+this long option takes an argument (@samp{@w{[,:]}}).
+
+Using this regular expression, we check to see if the current option
+might possibly be in @code{longopts} (if @code{longopts} is not
+specified, this test will also fail). In case of an error, we possibly
+print an error message and then return @code{"?"}. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+ if (substr(longopts, i+1+length(thisopt), 1) == ":") @{
+ if (j > 0)
+ Optarg = substr(argv[Optind], j + 1)
+ else
+ Optarg = argv[++Optind]
+ @} else
+ Optarg = ""
+@c endfile
+@end example
+
+We now check to see if this option takes an argument and, if so, we set
+@code{Optarg} to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
+@example
+@c file eg/lib/getopt.awk
Optind++
- _opti = 0
- @} else
- _opti++
- return thisopt
+ return thisopt
+ @}
@}
@c endfile
@end example
-Finally, if @code{_opti} is either zero or greater than the length of the
-current command-line argument, it means this element in @code{argv} is
-through being processed, so @code{Optind} is incremented to point to the
-next element in @code{argv}. If neither condition is true, then only
-@code{_opti} is incremented, so that the next option letter can be processed
-on the next call to @code{getopt()}.
+We increase @code{Optind} (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
@code{Opterr} is set to one, because the default behavior is for @code{getopt()}
@@ -22649,20 +22742,21 @@ BEGIN @{
# test program
if (_getopt_test) @{
- while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, Optarg = <%s>\n",
- _go_c, Optarg)
+ _myshortopts = "ab:cd"
+ _mylongopts = "longa,longb:,otherc,otherd"
+
+ while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+ printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
- printf("\tARGV[%d] = <%s>\n",
- Optind, ARGV[Optind])
+ printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
@}
@}
@c endfile
@end example
The rest of the @code{BEGIN} rule is a simple test program. Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
@@ -22680,9 +22774,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \}
+> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2}
+@print{} c = <a>, Optarg = <>
+@print{} c = <longa>, Optarg = <>
+@print{} c = <b>, Optarg = <xx>
+@print{} c = <longb>, Optarg = <foo=bar>
+@print{} c = <otherd>, Optarg = <>
+@print{} c = <otherc>, Optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[8] = <arg1>
+@print{} ARGV[9] = <arg2>
@end example
-In both runs, the first @option{--} terminates the arguments to
+In all the runs, the first @option{--} terminates the arguments to
@command{awk}, so that it does not try to interpret the @option{-a},
etc., as its own options.