diff options
Diffstat (limited to 'vms')
-rw-r--r-- | vms/descrip.mms | 200 | ||||
-rw-r--r-- | vms/fcntl.h | 10 | ||||
-rw-r--r-- | vms/gawk.cld | 46 | ||||
-rw-r--r-- | vms/gawk.hlp | 1156 | ||||
-rw-r--r-- | vms/unixlib.h | 24 | ||||
-rw-r--r-- | vms/varargs.h | 38 | ||||
-rw-r--r-- | vms/vms.h | 69 | ||||
-rw-r--r-- | vms/vms_args.c | 398 | ||||
-rw-r--r-- | vms/vms_cli.c | 88 | ||||
-rw-r--r-- | vms/vms_fwrite.c | 209 | ||||
-rw-r--r-- | vms/vms_gawk.c | 245 | ||||
-rw-r--r-- | vms/vms_misc.c | 159 | ||||
-rw-r--r-- | vms/vms_popen.c | 168 | ||||
-rw-r--r-- | vms/vmsbuild.com | 70 |
14 files changed, 2880 insertions, 0 deletions
diff --git a/vms/descrip.mms b/vms/descrip.mms new file mode 100644 index 00000000..040d458a --- /dev/null +++ b/vms/descrip.mms @@ -0,0 +1,200 @@ +# Descrip.MMS -- Makefile for building GNU Awk on VMS with VAXC and MMS. +# +# usage: +# $ MMS /Description=[.vms]Descrip.MMS gawk +# +# gawk.exe : +# You'll need to modify this Makefile to use gcc or vaxc v2.x rather +# than vaxc v3.x. Change the CFLAGS macro definition (move '#' from +# beginning of 2nd alternative to beginning of 1st), and also perhaps +# enable the following ".first" rule and its associated action. For +# GNU C, change the LIBS macro definition. +# +# awk_tab.c : +# If you have DEC/Shell, change the PARSER and PASERINIT macros to use +# yacc rather than bison. If you have neither yacc nor bison, you'll +# have to make sure that the distributed version of "awk.tab.c" is +# named "awk_tab.c" and that its modification date is later than the +# date of "awk.y", so that MMS won't try to build that target. If you +# use bison and it is already defined system-wide, comment out the +# PARSERINIT definition. +# +# install.help : +# You can make the target 'install.help' to load the VMS help text +# into a help library. Modify the HELPLIB macro if you don't want +# to put entry into the regular VMS library. (If you use an alternate +# help library, it must already exist; this target won't create it.) +# +# gawk.dvi : +# If you have TeX, you can make the target 'gawk.dvi' to process +# _The_GAWK_Manual_ from gawk.texinfo. You'll need to use a device +# specific post-processor on gawk.dvi in order to get printable data. +# + +# location of the VMS-specific files, relative to the 'main' directory +VMSDIR = [.vms] +MAKEFILE = $(VMSDIR)Descrip.MMS + +# debugging &c !'ccflags' is an escape to allow external compile flags +#CCFLAGS = /noOpt/Debug + +# work within the main directory, even when handling files in [.vms] +# note: use 2nd variant for either VAX C V2.x or for GNU C +CFLAGS = /Include=[]/Object=[]/Opt=noInline $(CCFLAGS) +#CFLAGS = /Include=([],$(VMSDIR))/Object=[] $(CCFLAGS) + +# uncomment this for GNU C +#CC = gcc + +# uncomment these two lines for GNU C _if_ it's not installed system-wide +#.first !compiler init, needed if there's no system-wide setup +# set command gnu_cc:[000000]gcc + +# uncomment these three lines for VAX C V2.x +#.first !compiler init, find all #include files +# define/nolog vaxc$library sys$library:,sys$disk:$(VMSDIR) +# define/nolog c$library [],$(VMSDIR) +#!(it appears that if vaxc$library is defined, then the /Include +#! qualifier is ignored, making a c$library definition essential) + +# run-time libraries; use the 2nd one for GNU C +LIBS = sys$share:vaxcrtl.exe/Shareable +#LIBS = gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library + +PARSER = bison +PARSERINIT = set command gnu_bison:[000000]bison +#PARSER = yacc +#PARSERINIT = yacc := $shell$exe:yacc + +# this is used for optional target 'install.help' +HELPLIB = sys$help:helplib.hlb +#HELPLIB = sys$help:local.hlb + +# +######## nothing below this line should need to be changed ######## +# + +# ALLOCA +ALLOCA = alloca.obj + +# object files +AWKOBJS = main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj,\ + field.obj,array.obj,node.obj,version.obj,missing.obj,re.obj + +ALLOBJS = $(AWKOBJS),awk_tab.obj + +# GNUOBJS +# GNU stuff that gawk uses as library routines. +GNUOBJS = regex.obj,dfa.obj,$(ALLOCA) + +# VMSOBJS +# VMS specific stuff +VMSCODE = vms_misc.obj,vms_popen.obj,vms_fwrite.obj,vms_args.obj,\ + vms_gawk.obj,vms_cli.obj +VMSCMD = gawk_cmd.obj # built from .cld file +VMSOBJS = $(VMSCODE),$(VMSCMD) + +VMSSRCS = $(VMSDIR)vms_misc.c,$(VMSDIR)vms_popen.c,$(VMSDIR)vms_fwrite.c,\ + $(VMSDIR)vms_args.c,$(VMSDIR)vms_gawk.c,$(VMSDIR)vms_cli.c +VMSHDRS = $(VMSDIR)vms.h,$(VMSDIR)fcntl.h,$(VMSDIR)varargs.h,$(VMSDIR)unixlib.h +VMSOTHR = $(VMSDIR)Descrip.MMS,$(VMSDIR)vmsbuild.com,$(VMSDIR)version.com,\ + $(VMSDIR)gawk.hlp + +# Release of gawk +REL=2.13 +PATCHLVL=2 + +# dummy target to allow building "gawk" in addition to explicit "gawk.exe" +gawk : gawk.exe + write sys$output " GAWK " + +# rules to build gawk +gawk.exe : $(ALLOBJS) $(GNUOBJS) $(VMSOBJS) gawk.opt + $(LINK) $(LINKFLAGS) gawk.opt/options + +gawk.opt : $(MAKEFILE) # create linker options file + open/write opt gawk.opt ! ~ 'cat <<close >gawk.opt' + write opt "! GAWK -- Gnu AWK" + @ write opt "$(ALLOBJS)" + @ write opt "$(GNUOBJS)" + @ write opt "$(VMSOBJS)" + @ write opt "$(LIBS)" + @ write opt "psect_attr=environ,noshr !extern [noshare] char **" + @ write opt "stack=50 !preallocate more pages (default is 20)" + write opt "identification=""V$(REL).$(PATCHLVL)""" + close opt + +$(AWKOBJS) : awk.h config.h +$(VMSCODE) : awk.h config.h $(VMSDIR)vms.h +vms_misc.obj : $(VMSDIR)vms_misc.c +vms_popen.obj : $(VMSDIR)vms_popen.c +vms_fwrite.obj : $(VMSDIR)vms_fwrite.c +vms_args.obj : $(VMSDIR)vms_args.c +vms_gawk.obj : $(VMSDIR)vms_gawk.c +vms_cli.obj : $(VMSDIR)vms_cli.c +dfa.obj : awk.h config.h dfa.h +regex.obj : awk.h config.h regex.h +main.obj : patchlevel.h +awk_tab.obj : awk.h awk_tab.c + +# bison or yacc required +awk_tab.c : awk.y # foo.y :: yacc => y_tab.c, bison => foo_tab.c + @- if f$search("y_tab.c").nes."" then delete y_tab.c;* + - $(PARSERINIT) + $(PARSER) $(YFLAGS) $< + @- if f$search("y_tab.c").nes."" then rename/new_vers y_tab.c $@ !yacc + +##version.c : version.sh $(MAKEFILE) +## @$(VMSDIR)version.com "$(REL)" + +config.h : [.config]vms-conf.h + copy $< $@ + +# Alloca - C simulation +alloca.obj : alloca.c + $(CC) $(CFLAGS) /define=("STACK_DIRECTION=(-1)","exit=vms_exit") $< + +$(VMSCMD) : $(VMSDIR)gawk.cld + set command/object=$@ $(CLDFLAGS) $< + +# special target for loading the help text into a VMS help library +install.help : $(VMS)gawk.hlp + library/help $(HELPLIB) $< /log + +# miscellaneous other targets +tidy : + - if f$search("*.*;-1").nes."" then purge + - if f$search("[.*]*.*;-1").nes."" then purge [.*] + +clean : + - delete *.obj;*,gawk.opt;* + +spotless : clean tidy + - delete gawk.dvi;*,gawk.exe;*,[.support]texindex.exe;* + +# +# build gawk.dvi from within the 'support' subdirectory +# +gawk.dvi : [.support]texindex.exe gawk.texinfo + @ set default [.support] + @ write sys$output " Warnings from TeX are expected during the first pass" + TeX [-]gawk.texinfo + mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr + @ write sys$output " Second pass" + TeX [-]gawk.texinfo + mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr + @ write sys$output " Third (final) pass" + TeX [-]gawk.texinfo + -@ purge + -@ delete gawk.lis;,.aux;,gawk.%%;,.cps;,.fns;,.kys;,.pgs;,.toc;,.tps;,.vrs; + @ rename/new_vers gawk.dvi [-]*.* + @ set default [-] + +[.support]texindex.exe : [.support]texindex.c + @ set default [.support] + $(CC) /noOpt/noList/Define=("lines=tlines") texindex.c + $(LINK) /noMap texindex.obj,sys$library:vaxcrtl.olb/Lib + -@ delete texindex.obj;* + @ set default [-] + +#eof diff --git a/vms/fcntl.h b/vms/fcntl.h new file mode 100644 index 00000000..d975db7a --- /dev/null +++ b/vms/fcntl.h @@ -0,0 +1,10 @@ +/* "fcntl.h" -- constants for BSD-style I/O routines (ala VAX C's <file.h>) */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_NDELAY 4 +#define O_NOWAIT 4 +#define O_APPEND 8 +#define O_CREAT 0x0200 +#define O_TRUNC 0x0400 +#define O_EXCL 0x0800 diff --git a/vms/gawk.cld b/vms/gawk.cld new file mode 100644 index 00000000..23d4c28a --- /dev/null +++ b/vms/gawk.cld @@ -0,0 +1,46 @@ +! Gawk.Cld -- command defintion for GAWK +! Pat Rankin, Nov'89 +! [ revised for 2.12, May'91 ] + module Gawk_Cmd +define verb GAWK + synonym AWK +! image gawk !usage $ DEFINE GAWK disk:[directory]GAWK + parameter p1, value(required,list), label=gawk_p1, prompt="data file(s)" + qualifier input, value(required,list,type=$infile), label=progfile + qualifier commands, value(required), label=program + qualifier field_separator, value(required), label=field_sep + qualifier reg_expr, value(type=reg_expr_keywords) + qualifier variables, value(required,list) + qualifier copyright + qualifier version + qualifier lint + qualifier posix + qualifier usage + qualifier strict, negatable + qualifier debug, negatable + qualifier output, value(type=$outfile,default="SYS$OUTPUT") + disallow progfile and program !or not progfile and not program +define type reg_expr_keywords + keyword awk + keyword egrep, default !synonym for 'posix' + keyword posix !equivalent to 'egrep' +! +! p1 = data file list (possibly including 'var=value' contructs) +!note: parameter required; use 'sys$input:' to read data from 'stdin' +! /input = program source file ('-f progfile') +! /commands = program source text ('program') +!note: either input or commands, but not both; if neither, usage message given +! /field_separator = character(s) delimiting record fields; default is "[ \t]" +! /reg_expr = type of regular expressions: awk or posix (posix == egrep) +!note: by default, use awk style; /reg_expr (w/o value), use egrep style +! /variables = list of 'var=value' items for assignment prior to BEGIN +! /posix = force POSIX compatability mode operation +! /strict = force compatability mode operation (UN*X SYS V, Release 4) +! /output = destination for print,printf (default is sys$output: ie, 'stdout') +! /lint = scan the awk program for possible problems and warn about them +! /debug = debugging mode +!note: compilation options determine whether debug mode is valid +! /usage = display 'usage' reminder [describing this VMS command syntax] +! /version = show program version +! /copyright = show abbreviated edition of FSF's copyright notice +! diff --git a/vms/gawk.hlp b/vms/gawk.hlp new file mode 100644 index 00000000..68892393 --- /dev/null +++ b/vms/gawk.hlp @@ -0,0 +1,1156 @@ +! Gawk.Hlp +! Pat Rankin, Jun'90 +! revised, Jun'91 +! Online help for GAWK. +! +1 GAWK + GAWK is GNU awk, the Free Software Foundation's implementation of + the awk programming language. awk is an interperative language which + can handle many data-reformatting jobs with just a few lines of code. + It has powerful string manipulation and pattern matching capabilities + built in. This version should be compatable with POSIX 1003.2 awk. + + The VMS version of GAWK supports both the original UN*X-style command + interface and a DCL interface. The only setup requirement for GAWK + is to define it as a 'foreign' command: a DCL symbol with a value + which begins with '$'. + $ GAWK :== $disk:[directory]GAWK +2 GNU_syntax + GAWK's UN*X-style interface uses the 'dash' convention for specifying + options and uses spaces to separate multiple arguments. + + There are two main alternatives, depending on how the awk program is + to be passed to GAWK. Both alternatives share most options. + + Usage: $ gawk [-W opts] [-F fs] [-v var=val] -f progfile [--] file ... + or $ gawk [-W opts] [-F fs] [-v var=val] [--] "program" file ... + + The options are case-sensitive. On VMS, the DCL command interpreter + converts unquoted text into uppercase before passing it to the running + program. However, GAWK is written in 'C' and the C Run-Time Library + (VAXCRTL) converts unquoted text into *lowercase*. Therefore, the + -Fval and -W options must be enclosed in quotes. +3 options + -f file use the specified file as the awk program source; if more + than one instance of -f is used, each file will be read + in succession + -Fstring define a value for the FS variable (field separator) + -v var=val assign a value of 'val' to the variable 'var' + -W 'options' additional gawk-specific options; multiple values may + be separated by commas, or by spaces if they're quoted, + or mulitple occurences of -W may be used. + -W compat use awk "compatibility mode" to disable GAWK extensions + and get the behavior of UN*X awk. + -W copyright [or -W copyleft] display an abbreivated version of + the GNU copyright information + -W lint warn about suspect or non-portable awk program code + -W posix compatibility mode with additional restrictions + -W version display program version number + -- don't check further arguments for leading dash +3 program_text + If the '-f file' option is not used on the command line, then the + first "non-dash" argument is assumed to be a string of text containing + the awk source program. Here is a complete sample program: + $ gawk -- "BEGIN {print ""\nHello, World!\n""}" + This program would print a blank line (based on first "\n"), followed + by a line reading "Hello, World!", followed by another blank line + (since awk's 'print' statement includes trailing 'newline'). + + On VMS, to include a quote character inside of a quoted string, two + successive quotes ("") must be used. +3 data_files + After all dash-options are examined, and after the program text if + there were no occurences of the -f option, remaining (space separated) + command line arguments are considered to be data files for the awk + program to process. If any of these actually contains an equals sign + (=), then it is interpreted as a variable assignment instead of a data + file. The syntax is 'variable_name=value'. For example, the command + $ gawk -f myprog.awk infile.one flag=2 start=0 infile.two + would read file 'infile.one' for the program in 'myprog.awk', then it + would set 'flag' to 2 and 'start' to 0, and finally it would read file + 'infile.two' for the program. Note that in a case like this, the two + assignments actually occur after the first file has been processed, + not at program startup when the command line is first scanned. +3 IO_redirection + The command parsing in the VMS implementation of GAWK does some + emulation of a UN*X-style shell, where certain characters on the + command line have special meaning. In particular, the symbols '<', + '>', '|', '*', and '?' receive special handling before the main part + of the program has a chance to see them. The symbols '<' and '>' + perform some file manipulation from the command line: + + <ifile open file 'ifile' (readonly) as 'stdin' [SYS$INPUT] + >nfile create 'nfile' at 'stdout' [SYS$OUTPUT], in stream-lf format + >>ofile append to 'ofile' for 'stdout'; create it if necessary + >&efile point 'stderr' [SYS$ERROR] at 'efile', but don't open it yet + >$vfile create 'vfile' as 'stdout', using RMS attributes appropriate + for a standard text file (variable length records with + implied carriage control) + 2>&1 route error messages into the regular output stream + 1>&2 send output data to the error destination + <<sentinal error; reading stdin until 'sentinal' not supported + <-, >- error; closer of stdin or stdout from cmd line not supported + >>$vfile incorrect; would be interpreted as file "$vfile" in stream-lf + format rather than as file "vfile" in RMS 'text' format + | error; command line pipes not supported +3 wildcard_expansion + The command parsing in the VMS implementation of GAWK does some + emulation of a UN*X-style shell, where certain characters on the + command line have special meaning. In particular, the symbols '<', + '>', '*', '%', and '?' receive special handling before the main part + of the program has a chance to see them. The symbols '*', '%' and '?' + are used as wildcards in filenames. '*' and '%' have their usual VMS + meanings of multiple character and single character wildcards, + respectively, and '?' is also treated as a single character wildcard. + + When a command line argument that should be a filename contains any + of the wildcard characters, a directory lookup is attempted for files + which match the specified pattern. If one or more matching files are + found, those filenames are put into the command line in place of the + original pattern. If no matching files are found, the original + pattern is left in place. +2 DCL_syntax + GAWK's DCL-style interface is more or less a standard DCL command, with + one required parameter. Multiple values--when present--are separated + by commas. + + There are two main alternatives, depending on how the awk program is + to be passed to GAWK. Both alternatives share most options. + + Usage: GAWK /COMMANDS="awk program text" data_file[,data_file,...] + or GAWK /INPUT=awk_file data_file[,"Var=value",data_file,...] + ( or GAWK /INPUT=(awk_file1,awk_file2,...) data_file[,...] ) +3 Parameter + data_file[,datafile,...] (data_file data_file ...) + data_file[,"Var=value",...,data_file,...] (data_file Var=value &c) + + Data file(s) for the awk program to process. If any of these + actually contains an equals sign (=), then it is interpreted as + a variable assignment instead of a data file. The syntax is + "variable_name=value". Quotes are required for non-file parameters. + + For example, the command + $ gawk/input=myprog.awk infile.one,"flag=2","start=0",infile.two + would read file 'infile.one' for the program in 'myprog.awk', then it + would set 'flag' to 2 and 'start' to 0, and finally it would read file + 'infile.two' for the program. Note that in a case like this, the two + assignments actually occur after the first file has been processed, + not at program startup when the command line is first scanned. + + Wildcard file lookups are attempted on data file specifications. See + subtopic 'GAWK GNU_syntax wildcard_expansion' for details. + + At least one data_file parameter value is required. An exception is + made if /usage, /version, or /copyright is specifed *and* if GAWK is + defined as a 'foreign' command rather than a 'native' DCL command. +3 Qualifiers +/COMMANDS + /COMMANDS="awk program text" (-- "awk program text") + + For short programs, it is possible to include the complete program + on the command line. The quotes are required. Here is a complete + sample program: + $ gawk/commands="BEGIN {print ""\nHello, World!\n""}" NL: + This program would print a blank line (based on first "\n"), followed + by a line reading "Hello, World!", followed by another blank line + (since awk's 'print' statement includes trailing 'newline'). + + To include a quote character inside of a quoted string, two + successive quotes ("") must be used. + + Either /COMMANDS or /INPUT (but not both) must be supplied. +/INPUT + /INPUT=(awk_file1,awk_file2) (-f awk_file1 -f awk_file2) + + Used to specify one or more files containing the source code of + the awk program. If more than one file is used, separate them + with commas and enclose the list in parentheses. + + Multiple source files are processed in order as if they had been + concatenated together. + + Either /INPUT or /COMMANDS (but not both) must be supplied. +/FIELD_SEPARATOR + /FIELD_SEPARATOR="FS_value" (-F"FS_value") + + Assign a value to the built in variable FS (field separator). +/VARIABLES + /VARIABLES=("Var1=val1","Var2=val2",...) (-v Var1=val1 -v Var2=val2) + + Assign value(s) to the specified variable(s). +/REG_EXPR + /REG_EXPR={AWK | EGREP | POSIX} (-a vs -e options [obsolete]) + + Specify regular expression syntax. + + /REG_EXPR=AWK use the original awk syntax for regular expressions + /REG_EXPR=EGREP use the egrep syntax for regular expressions + /REG_EXPR=POSIX equivalent to /REG_EXPR=EGREP + + If /REG_EXTR is omitted, then /REG_EXPR=AWK is the default. However, + if /REG_EXTR is included but its value is omitted, EGREP is used. + + This qualifier is obsolete and has no effect. +/STRICT + /[NO]STRICT (-"W compat" option) + + Use strict awk compatibility mode (/strict) and suppress GAWK + extensions. The default is /NOSTRICT. +/POSIX + /[NO]POSIX (-"W posix" option) + + Use POSIX compatibility mode (/posix) and suppress GAWK extensions. + The default is /NOPOSIX. Slightly more restrictive than /strict. +/LINT + /[NO]LINT (-"W lint" option) + + Check the awk program cafefully for potential problems that might + be encountered if it were to be used with other awk implementations, + and print warnings for anything found. The default in /NOLINT. +/VERSION + /VERSION (-"W version" option) + + Print GAWK's version number. +/COPYRIGHT + /COPYRIGHT (-"W copyright" or -"W copyleft" option) + + Print a brief version of GAWK's copyright notice. +/USAGE + /USAGE (no corresponding GNU_syntax option) + + Print a compact summary of the command line options. + + After the 'usage' message is printed, GAWK terminates regardless + of any other command line options. +/OUTPUT + /OUTPUT=out_file (>$out_file) + + Write program output into 'out_file'. The default is SYS$OUTPUT. +2 awk_language + An awk program consists of one or more pattern-action pairs, sometimes + referred to as "rules". For each record of an input (data) file, the + rules are checked sequentially. Any pattern which matches the input + record triggers that rule's action. Actions are instructions which + resemble statements in the 'C' programming language. Patterns come + in several varieties, including field comparisons, regular expression + matching, and special cases defined by reserved keywords. + + All awk keywords and variables are case-sensitive. Text matching is + also sensitive to character case unless the builtin variable IGNORECASE + is set to a non-zero value. +3 rules + The syntax for a pattern-action 'rule' is simply + PATTERN { ACTION } + where the braces ({}) are required punctuation for the action. + Semicolons (;) or 'newlines' (ie, having the text on a separate line) + delimit multiple rules and also multiple actions within a given rule. + Either the pattern or the action may be omitted; an empty pattern + matches every record of the input file; a missing action (not an empty + action inside of braces), is an implicit request to print the current + record; an empty action (ie, {}) is legal but not very useful. +3 patterns + There are several types of patterns available for awk rules. + + expression an 'expression' is something to be evaluated (perhaps + a comparison or function call) which will + be considered true if non-zero (for numeric + results) or if non-null (for strings) + /regular_expression/ slashes (/) delimit a regular expression + which is used as a pattern + pattern1, pattern2 a pair of patterns separated by a comma (,), + which causes a range of records to trigger + the associated action; the records which + match the patterns are included in the range + <null> an omitted pattern (in this text, the string '<null>' + is displayed, but in an awk program, it + would really be blank) matches every record + BEGIN keyword for specifying a rule to be executed prior to + reading the 1st record of the 1st input file + END keyword for specifying a rule to be executed after + handling the last input record of last file +4 examples + Some example patterns (mostly with the corresponding actions omitted) + + NF > 0 # comparison expression: matches non-null records + $0 # implied comparison: also matches non-null records + $2 > 1000 && sum <= 999999 # slightly more elaborate expression + /x/ # regular expression matching any record with an 'x' in it + /^ / # reg-expr matching records beginning with a space + $1 == "start", $NF == "stop" # range pattern for input in which + some data lines begin with 'start' and/or end with + 'stop' in order to collect groups of records + { sum += $1 } # null pattern: it's action (add field #1 to + variable 'sum') would be executed for every record + BEGIN { sum = 0 } # keyword 'BEGIN': perform this action before + reading the input file (note: initialization to 0 is + unnecessary in awk) + END { print "total =", sum } # keyword 'END': perform this + action after the last input record has been processed +3 actions + An 'action' is something to do when a given record has matched the + corresponding pattern in a rule. In general, actions resemble 'C' + statements and expressions. The action in a rule must be enclosed + in braces ({}). + + Each action can contain more than one statement or expression to be + executed, provided that they're separated by semicolons (;) and/or + on separate lines. + + An omitted action is equivalent to + { print $0 } + which prints the current record. +3 operators + Relational operators + == compare for equality + != compare for inequality + <, <=, >, >= numerical or lexical comparison (less than, less or + equal, greater than, greater or equal, respectively) + ~ match against a regular expression + !~ match against a regular expression, but accept failed matches + instead of successful ones + Arithmetic operators + + addition + - subtraction + * multiplication + / division + % remainder + ^, ** exponentiation ('**' is a synonym for '^', unless POSIX + compatibility is specified, in which case it's invalid) + Boolean operators (aka Logical operators) + a value is considered false if it's 0 or a null string, + it is true otherwise; the result of a boolean operation + (and also of a comparison operation) will be 0 when false + or 1 when true + || or [expression (a || b) is true if either a is true or b + is true or both a and b are true; it is false otherwise] + && and [expression (a && b) is true if both a and b are true; + it is false otherwise] + ! not [expression (!a) is true if a is false, false otherwise] + in array membership; the keyword 'in' tests whether the value + on the left represents a current subscript in the array + named on the right + Conditional operator + ? : the conditional operator takes three operands; the first is + an expression to evaluate, the second is the expression to + use if the first was true, the third is the expession to + use if it was false [simple example (a < b ? b : a) gives + the maximum of a and b] + Assignment operators + = store the value on the right into the variable or array slot + on the left [expression (a = b) stores the value of b in a] + +=, -=, *=, /=, %=, ^=, **= perform the indicated arithmetic + operation using the current value of the variable or array + element of the left side and the expression on the right + side, then store the result in the left side + ++ increment by 1 [expression (++a) gets the current value of + a and adds 1 to it, stores that back in a, and returns the + new value; expression (a++) gets the current value of a, + adds 1 to it, stores that back in a, but returns the + original value of a] + -- decrement by 1 (analogous to increment) + String operators + there is no explicit operator for string concatenation; + two values and/or variables side-by-side are implicitly + concatenated into a string (numeric values are first + converted into their string equivalents) + Conversion between numeric and string values + there is no explicit operator for conversion; adding 0 + to a string with force it to be converted to a number + (the numeric value will be 0 if the string does not + represent a decimal or floating point number); the + reverse, converting a number into a string, is done by + concatenating a null string ("") to it [the expression + (5.75 "") evaluates to "5.75"] + Field 'operator' + $ prefixing a number or variable with a dollar sign ($) + causes the appropriate record field to be returned [($2) + gives the second field of the record, ($NF) gives the + last field (since the builtin variable NF is set to the + number of fields in the current record)] + Array subscript operator + , multi-dimensional arrays are simulated by using comma (,) + separated array indices; the actual index is generated + by replacing commas with the value of builtin SUBSEP, + then concatenating the expression into a string index + [comma is also used to separate arguments in function + calls and user-defined function definitions] + [comma is *also* used to indicate a range pattern in an + awk rule] + Escape 'operator' + \ In quoted character strings, the backslash (\) character + causes the following character to be intrepreted in a + special manner [string "one\ntwo" has an embedded newline + character (linefeed on VMS, but treated as if it were both + carriage-return and linefeed); string "\033[" has an ASCII + 'escape' character (which has octal value 033) followed by + a 'right-bracket' character] + Backslash is also used in regular expressions + Redirection operators + < Read-from -- valid with 'getline' + > Write-to (create new file) -- valid with 'print' and 'printf' + >> Append-to (create file if it doesn't already exist) + | Pipe-from/to -- valid with 'getline', 'print', and 'printf' +4 precedence + Operator precedence, listed from highest to lowest. Assignment, + conditional, and exponentiation operators group from right to left; + all others group from left to right. Parentheses may be used to + override the normal order. + + field ($) + increment (++), decrement (--) + exponentiation (^, **) + unary plus (+), unary minus (-), boolean not (!) + multiplication (*), division (/), remainder (%) + addition (+), subtraction (-) + concatentation (no special symbol; implied by context) + relational (==, !=, <, >=, etc), and redirection (<, >, >>, |) + Relational and redirection operators have the same precedence + and use similar symbols; context distinguishes between them + matching (~, !~) + array membership ('in') + boolean and (&&) + boolean or (||) + conditional (? :) + assignment (=, +=, etc) +4 escaped_characters + Inside of a quoted string, the backslash (\) character gives special + meaning the the character(s) after it. Special character letters + are case sensitive. + \\ results in one backslash in the string + \a is an 'alert' (<ctrl/G>. the ASCII <bell> character) + \b is a backspace (BS, <ctrl/H>) + \f is a form feed (FF, <ctrl/L>) + \n 'newline' (<ctrl/J> [line feed treated as CR+LF] + \r carriage return (CR, <ctrl/M> [re-positions at the + beginning of the current line] + \t tab (HT, <ctrl/I>) + \v vertical tab (VT, <ctrl/K>) + \### is an arbitrary character, where '###' represents 1 to 3 + octal (ie, 0 thru 7) digits + \x## is an alternate arbitrary character, where '##' represents + 1 or more hexadecimal (ie, 0 thru 9 and/or A thru E and/or + a thru e) digits; if more than two digits follow, the + result is undefined; not recognized if POSIX compatibility + mode is specified. +3 statements + A statement refers to a unit of intruction found in the action + part of an awk rule, and also found in the definition of a function. + The distinction between action, statement, and expression usually + won't matter to an awk programmer. + + Compound statements consist of multiple statements separated by + semicolons or newlines and enclosed within braces ({}). They are + sometimes referred to as 'blocks'. +4 expressions + An expression such as 'a = 10' or 'n += i++' is a valid statement. + + Function invocations such as 'reformat_field($3)' are also valid + statements. +4 if-then-else + A conditional statement in awk uses the same syntax as for the 'C' + programming language: the 'if' keyword, followed by an expression + in parentheses, followed by a statement--or block of statements + enclosed within braces ({})--which will be executed if the expression + is true but skipped if it's false. This can optionally be followed + by the 'else' keyword and another statement--or block of statements-- + which will be executed if (and only if) the expression was false. +5 examples + Simple example showing a statement used to control how many numbers + are printed on a given line. + if ( ++i <= 10 ) #check whether this would be the 11th + printf(" %5d", k) #print on current line if not + else { + printf("\n %5d", k) #print on next line if so + i = 1 #and reset the counter + } + Another example ('next' is described under 'action-controls') + if ($1 > $2) { print "rejected"; next } else diff = $2 - $1 +4 loops + Three types of loop statements are available in awk. Each uses + the same syntax as 'C'. The simplest of the three is the 'while' + statement. It consists of the 'while' keyword, followed by an + expression enclosed within parentheses, followed by a statement--or + block of statements in braces ({})--which will be executed if the + expression evaluates to true. The expression is evaluated before + attempting to execute the statement; if it's true, the statement is + executed (the entire block of statements if there is a block) and + then the expression is re-evaluated. + + The second type of loop is the do-while loop. It consists of the + 'do' keyword, followed by a statement (usually a block of statements + enclosed within braces), followed by the 'while' keyword, followed + by a test expression enclosed within parentheses. The statement--or + block--is always executed at least once. Then the test expression + is evaluated, and the statement(s) re-executed if the result was + true (followed by re-evaluation of the test, and so on). + + The most complex of the three loops is the 'for' statement, and it + has a second variant that is not found in 'C'. The ordinary for-loop + consists of the 'for' keyword, followed by three semicolon-separated + expressions enclosed within parentheses, followed by a statement or + brace-enclosed block of statements. The first of the three + expressions is an initialization clause; it is done before starting + the loop. The second expression is used as a test, just like the + expression in a while-loop. It is checked before attempting to + execute the statement block, and then re-checked after each execution + (if any) of the block. The third expression is an 'increment' clause; + it is evaluated after an execution of the statement block and before + re-evaluation of the test (2nd) expression. Normally, the increment + clause will change a variable used in the test clause, in such a + fashion that the test clause will eventually evaluate to false and + cause the loop to finish. + + Note to 'C' programmers: the comma (,) operator commonly used in + 'C' for-loop expressions is not valid in awk. + + The awk-specific variant of the for-loop is used for processing + arrays. Its syntax is 'for' keyword, followed by variable_name 'in' + array_name (where 'var in array' is enclosed in parentheses), + followed by a statement (or block). Each valid subscript value for + the array in question is successively placed--in no particular + order--into the specified 'index' variable. +5 while_example + # strip fields from the input record until there's nothing left + while (NF > 0) { + $1 = "" #this causes $0 to be reconstructed + print + } +5 do_while_example + # This is a variation of the while_example; it gives a slightly + # different display due to the order of operation. + # echo input record until all fields have been stripped + do { + print #output $0 + $1 = "" #this causes $0 to be reconstructed + } while (NF > 0) +5 for_example + # print the ASCII alphabet (in lowercase) + for ( letter = 'a'; letter <= 'z'; letter++ ) print letter + + # display contents of builtin environment array + for (itm in ENVIRON) + print itm, ENVIRON[itm] +4 loop-controls + There are two special statements--both from 'C'--for changing the + behavior of loop execution. The 'continue' statement is useful in + a compound (block) statement; when executed, it effectively skips + the rest of the block so that the increment-expression (only for + for-loops) and loop-termination expression can be re-evaluated. + + The 'break' statement, when executed, effectively skips the rest + of the block and also treats the test expression as if it were + false (instead of actually re-evaluating it). In this case, the + increment-expression of a for-loop is also skipped. + + Both 'break' and 'continue' are only allowed within a loop ('for', + 'while', or 'do-while'), and in nested loops they only apply to the + innermost loop. +4 action-controls + There are two special statements for controlling statement execution. + The 'next' statement, when executed, causes the rest of the current + action and all further pattern-action rules to be skipped, so that + the next input record will be immediately processed. This is useful + if any early action knows that the current record will fail all the + remaining patterns; skipping those rules will reduce processing time. + + The 'exit' statement causes GAWK execution to terminate. All open + files are closed, and no further processing is done. The END rule, + if any, is executed. 'exit' takes an optional numeric value as a + argument which is used as an exit status value, so that some sort + of indication of why execution has stopped can be passed on to the + user's environment. +4 other_statements + The delete statement is used to remove an element from an array. + The syntax is 'delete' keyword followed by array name, followed + by index value enclosed in square brackets ([]). + + The return statement is used in user-defined functions. The syntax + is the keyword 'return' optionally followed by a string or numeric + expression. + + See also subtopic 'functions IO_functions' for a description of + 'print', 'printf', and 'getline'. +3 fields + When an input record is read, it is automatically split into fields + based on the current values of FS (builtin variable defining field + separator expression) and RS (builtin variable defining record + separator character). The default value of FS is an expression + which matches one or more spaces and tabs; the default for RS is + newline. If the FIELDWIDTHS variable is set to a space separated + list of numbers (as in ``FIELDWIDTHS = "2 3 2"'') then the input + is treated as if it had fixed-width fields of the indicated sizes + and the FS value will be ignored. + + The field prefix operator ($), is used to reference a particular + field. For example, $3 designates the third field of the current + record. The entire record can be referenced via $0 (and it holds + the actual input record, not the values of $1, $2, ... concatenated + together, so multiple spaces--when present--remain intact, unless + a new value gets assigned). + + The builtin variable NF holds the number of fields in the current + record. $NF is therefore the value of the last field. Attempts to + access fields beyond NF result in null values (if a record contained + 3 fields, the value of $5 would be ""). + + Assigning a new value to $0 causes all the other field values (and NF) + to be re-evaluated. Changing a specific field, causes $0 to receive + a new value, but the other existing fields remain unchanged. + + For efficiency, gawk only performs field splitting at the first time + a specific field (or NF) is actually needed. +3 variables + Variables in awk can hold both numeric and string values and do not + have to be pre-declared. In fact, there is no way to explicitly + declare them at all. Variable names consist of a leading letter + (either upper or lower case, which are distinct from each other) + or underscore (_) character followed by any number of letters, + digits, or underscores. + + When a variable that didn't previously exist is referenced, it is + created and given a null value. A null value is treated as 0 when + used as a number, and is a string of zero characters in length if + used as a string. +4 builtin_variables + GAWK maintains several 'built-in' variables. All have default values; + some are updated automatically. All the builtins have uppercase-only + names. + + These builtin variables control how awk behaves + FS input field separator; default is a single space, which is + treated as if it were a regular expression for matching + one or more spaces and/or tabs; a value of " " also has a + second special-case side-effect of causing leading blanks + to be ignored instead of producing a null first field; + initial value can be specified on the command line with + the -F option (or /field_separator); the value can be a + regular expression + RS input record separator; default value is a newline ("\n"); + only a single character is allowed [no regular expressions + or multi-character strings; expected to be remedied in a + future release of gawk] + OFS output field separator; value to place between variables in + a 'print' statement; default is one space; can be arbitrary + string + ORS output record separator; value to implicitly terminate 'print' + statement with; default is newline ("\n"); can be arbitrary + string + OFMT default output format used for printing numbers; default + value is "%.6g" + CONVFMT conversion format used for string-to-number conversions; + default value is also "%.6g", like OFMT + SUBSEP subscript separator for array indices; used when an array + subscript is specified as a comma separated list of values: + the comma is replaced by SUBSEP and the resulting index + is a concatenation of the values and SUBSEP(s); default + value is "\034"; value may be arbitrary string + IGNORECASE regular expression matching flag; if true (non-zero) + matching ignores differences between upper and lower case + letters; affects the '~' and '!~' operators, the 'index', + 'match', 'split', 'sub', and 'gsub' functions, and the + field splitting based on FS; default value is false (0); + has no effect if GAWK is in strict compatibility mode (via + the -"W compat" option or /strict) + FIELDWIDTHS space or tab separated list of width sizes; takes + precedence over FS when set, but is cleared if FS has a + value assigned to it; [note: the current implementation + of fixed-field input is considered experimental and is + expected to evolve over time] + + These builtin variables provide useful information + NF number of fields in the current record + NR record number (accumulated over all files when more than one + input file is processed by the same program) + FNR current record number of the current input file; reset to 0 + each time an input file is completed + RSTART starting position of substring matched by last invocation + of the 'match' function; set to 0 if a match fails and at + the start of each input record + RLENGTH length of substring matched by the last invocation of the + 'match' function; set to -1 if a match fails + FILENAME name of the input file currently being processed; the + special name "-" is used to represent the standard input + ENVIRON array of miscellaneous user environment values; the VMS + implementation of GAWK provides values for ["USER"] (the + username), ["PATH"] (current default directory), ["HOME"] + (the user's login directory), and "[TERM]" (terminal type + if available) [all info provided by VAXCRTL's environ] + ARGC number of elements in the ARGV array, counting [0] which is + the program name (ie, "gawk") + ARGV array of command-line arguments (in [0] to [ARGC-1]); the + program name (ie, "gawk") in held in ARGV[0]; command line + parameters (data files and "var=value" expressions, but not + program options or the awk program text string if present) + are stored in ARGV[1] through ARGV[ARGC-1]; the awk program + can change values of ARGC and ARGV[] during execution in + order to alter which files are processed or which between- + file assignments are made +4 arrays + awk supports associative arrays to collect data into tables. Array + elements can be either numeric or string, as can the indices used to + access them. Each array must have a unique name, but a given array + can hold both string and numeric elements at the same time. Arrays + are one-dimensional only, but multi-dimensional arrays can be + simulated using comma (,) separated indices, whereby a single index + value gets created by replacing commas with SUBSEP and concatenating + the resulting expression into a single string. + + Referencing an array element is done with the expression + Array[Index] + where 'Array' represents the array's name and 'Index' represents a + value or expression used for a subscript. If the requested array + element did not exist, it will be created and assigned an initial + null value. To check whether an element exists without creating it, + use the 'in' boolean operator. + Index in Array + would check 'Array' for element 'Index' and return 1 if it existed + or 0 otherwise. To remove an element from an array, use the 'delete' + statement + delete Array[Index] + Note: there is no way to delete an ordinary variable or an entire + array; 'delete' only works on a specific array element. + + To process all elements of an array (in succession) when their + subscripts might be unknown, use the 'in' variant of the for-loop + for (Index in Array) { ... } +3 functions + awk supports both built-in and user-defined functions. A function + may be considered a 'black-box' which accepts zero or more input + parameters, performs some calculations or other manipulations based + on them, and returns a single result. + + The syntax for calling a function consists of the function name + immediately followed by an open paren (left parenthesis '('), + optionally followed by white space (spaces and/or tabs), followed + by an appropriate argument value (number, string, variable, array + reference, or expression involving the above and/or nested function + call), optionally followed by more white space. That is followed by + either a closing paren (right parenthesis, ')'), or by a comma (,) + and another argument and so on until finally a closing paren. + + The parentheses are required punctuation, except for the 'print' and + 'printf' builtin IO functions, where they're optional, and for the + builtin IO function 'getline', where they're not allowed. Some + functions support optional [trailing] arguments which can be simply + omitted (along with the corresponding comma if applicable). +4 numeric_functions + Builtin numeric functions + int(n) returns the value of 'n' with any fraction truncated + [truncation of negative values is towards 0] + sqrt(n) the square root of n + exp(n) the exponential of n ('e' raised to the 'n'th power) + log(n) natural logarithm of n + sin(n) sine of n (in radians) + cos(n) cosine of n + atan2(m,n) arctangent of m/n (radians) + rand() random number in the range 0 to 1 (exclusive) + srand(s) sets the random number 'seed' to s, so that a sequence + of 'random' numbers can be repeated; returns the + previous seed value; srand() [argument omitted] sets + the seed to an 'unpredictable' value (based on date + and time, for instance, so should be unrepeatable) +4 string_functions + Builtin string functions + index(s,t) search string s for substring t; result is 1-based + offset of t within s, or 0 if not found + length(s) returns the length of string s; 'length' without + parenthesized argument returns length of $0 + match(s,r) search string s for regular expression r; the offset + of the longest, left-most substring which matches + is returned, or 0 if no match was found; the builtin + variables RSTART and RLENGTH are also set [RSTART to + the return value and RLENGTH to the size of the + matching substring, or to -1 if no match was found] + split(s,a,f) break string s into components based on field + separator f and store them in array a (into elements + [1], [2], and so on); the last argument is optional, + if omitted, the value of FS is used; the return value + is the number of components found + sprintf(f,e,...) format expression(s) e using format string f and + return the result as a string; formatting is similar + to the printf function + sub(r,t,s) search string target s for regular expression r, and + if a match is found, replace the matching text with + substring t, then store the result back in s; if s + is omitted, use $0 for the string; the result is + either 1 if a match+substitution was made, or 0 + otherwise; if substring t contains the character + '&', the text which matched the regular expression + is used instead of '&' [to suppress this feature + of '&', 'quote' it with a backslash (\); since this + will be inside a quoted string which will receive + 'backslash' processing before being passed to sub(), + *two* consecutive backslashes will be needed "\\&"] + gsub(r,t,s) similar to sub(), but gsub() replaces all nonoverlapping + substrings instead of just the first, and the return + value is the number of substitutions made + substr(s,p,l) extract a substring l characters long starting at + offset p in string s; l is optional, if omitted then + the remainder of the string (p thru end) is returned + tolower(s) return a copy of string s in which every uppercase + letter has been converted into lowercase + toupper(s) analogous to tolower(); convert lowercase to uppercase +4 time_functions + Builtin time functions + systime() return the current time of day as the number of seconds + since some reference point; on VMS the reference point + is January 1, 1970, at 12 AM local time (not UTC) + strftime(f,t) format time value t using format f; if t is omitted, + the default is systime() +5 time_formats + Formatting directives similar to the 'printf' & 'sprintf' functions + (each is introduced in the format string by preceding it with a + percent sign (%)); the directive is substituted by the corresponding + value + a abbreviated weekday name (Sun,Mon,Tue,Wed,Thu,Fri,Sat) + A full weekday name + b abbreviated month name (Jan,Feb,...) + B full month name + c date and time (Unix-style "aaa bbb dd HH:MM:SS YYYY" format) + C century prefix (19 or 20) [not century number, ie 20th] + d day of month as two digit decimal number (01-31) + D date in mm/dd/yy format + e day of month with leading space instead of leading 0 ( 1-31) + E ignored; following format character used + H hour (24 hour clock) as two digit number (00-23) + I hour (12 hour clock) as two digit number (01-12) + j day of year as three digit number (001-366) + m month as two digit number (01-12) + M minute as two digit number (00-59) + n 'newline' (ie, treat %n as \n) + O ignored; following format character used + p AM/PM designation for 12 hour clock + r time in AM/PM format ("II:MM:SS p") + R time without seconds ("HH:MM") + S second as two digit number (00-59) + t tab (ie, treat %t as \t) + T time ("HH:MM:SS") + U week of year (00-53) [first Sunday is first day of week 1] + V date (VMS-style "dd-bbb-YYYY" with 'bbb' forced to uppercase) + w weekday as decimal digit (0 [Sunday] through 6 [Saturday]) + W week of year (00-53) [first _Monday_ is first day of week 1] + x date ("aaa bbb dd YYYY") + X time ("HH:MM:SS") + y year without century (00-99) + Y year with century (19yy-20yy) + Z time zone name (always "local" for VMS) + % literal percent sign (%) +4 IO_functions + Builtin I/O functions + print x,... print the values of one or more expressions; if none + are listed, $0 is used; parentheses are optional; + when multiple values are printed, the current value + of builtin OFS (default is 1 space) is used to + separate them; the print line is implicitly + terminated with the current value of ORS (default + is newline); print does not have a return value + printf(f,x,...) print the values of one or more expressions, using + the specified format string; null strings are used + to supply missing values (if any); no between field + or trailing newline characters are printed, they + should be specified within the format string; the + argument-enclosing parentheses are optional; + printf does not have a return value + getline v read a record into variable v; if v is omitted, $0 is + used (and NF, NR, and FNR are updated); if v is + specified, then field-splitting won't be performed; + note: parentheses around the argument are *not* + allowed; return value is 1 for successful read, 0 + if end of file is encountered, or -1 if some sort + of error occured; [see 'redirection' for several + variants] + close(s) close a file or pipe specified by the string s; the + string used should have the same value as the one + used in a getline or print/printf redirection + system(s) pass string s to executed by the operating system; + the command string is executed in a subprocess +5 redirection + Both getline and print/printf support variant forms which use + redirection and pipes. + + To read from a file (instead of from the primary input file), use + getline var < "file" + or getline < "file" (read into $0) + where the string "file" represents either an actual file name (in + quotes) or a variable which contains a file name string value or an + expression which evaluates to a string filename. + + To create a pipe executing some command and read the result into + a variable (or into $0), use + "command" | getline var + or "command" | getline (read into $0) + where "command" is a literal string containing an operating system + command or a variable with a string value representing such a + command. + + To output into a file other that the primary output, use + print x,... > "file" (or >> "file") + or printf(f,x,...) > "file" (or >> "file") + similar to the 'getline' example above. '>>' causes output to be + appended to an existing file if it exists, or create the file if + it doesn't already exist. '>' always creates a new file. The + alternate redirection method of '>$' (for RMS text file attributes) + is *only* available on the command line, not with 'print' or + 'printf' in the current release. + + To output an error message, use 'print' or 'printf' and redirect + the output to file "/dev/stderr" (or equivalently to "SYS$ERROR:" + on VMS). 'stderr' will normally be the user's terminal, even if + ordinary output is being redirected into a file. + + To feed awk output into another command, use + print x,... | "command" (similarly for 'printf') + similar to the second 'getline' example. In this case, output + from awk will be passed as input to the specified operating system + command. The command must be capable of reading input from 'stdin' + ("SYS$INPUT:" on VMS) in order to receive data in this manner. + + The 'close' function operates on the "file" or "command" argument + specified here (either a literal string or a variable or expression + resulting in a string value). It completely closes the file or + pipe so that further references to the same file or command string + would re-open that file or command at the beginning. Closing a + pipe or redirection also releases some file-oriented resources. + + Note: the VMS implementation of GAWK uses temporary files to + simulate pipes, so a command must finish before 'getline' can get + any input from it, and 'close' must be called for an output pipe + before any data can be passed to the specified command. +5 formats + Formatting characters used by the 'printf' and 'sprintf' functions + (each is introduced in the format string by preceding it with a + percent sign (%)) + % include a literal percent sign (%) in the result + c format the next argument as a single ASCII character + (argument should be numeric in the range 0 to 255) + s format the next argument as a string (numeric arguments are + converted into strings on demand) + d decimal number (ie, integer value in base 10) + i integer (equivalent to decimal) + o octal number (integer in base 8) + x hecadecimal number (integer in base 16) [lowercase] + X hecadecimal number [digits 'A' thru 'E' in uppercase] + f floating point number (digits, decimal point, fraction digits) + e exponential (scientific notation) number (digit, decimal + point, fraction digits, letter 'e', sign '+' or '-', + exponent digits) + g 'fractional' number in either 'e' or 'f' format, whichever + produces shorter result + + Three optional modifiers can be placed between the initiating + percent sign and the format character (doesn't apply to %%). + - left justify (only matters when width specifier is present) + NN width ['NN' represents 1 or more decimal digits]; actually + minimum width to use, longer items will not be truncated; a + leading 0 will cause right-justified numbers to be padded on + the left with zeroes instead of spaces when they're aligned + .MM precision [decimal point followed by 1 or more digits]; used + as maximum width for strings (causing truncation if they're + actually longer) or as number of fraction digits for 'f' or + 'e' numeric formats, or number of significant digits for 'g' + numeric format +4 user_defined_functions + User-defined functions may be created as needed to simplify awk + programs or to collect commonly used code into one place. The + general syntax of a user-defined function is the 'function' keyword + followed by unique function name, followed by a comma-separated + parameter list enclosed in parentheses, followed by statement(s) + enclosed within braces ({}). A 'return' statement is customary + but is not required. + function FuncName(arg1,arg2) { + # arbitrary statements + return (arg1 + arg2) / 2 + } + If a function does not use 'return' to specify an output value, the + result received by the caller will be unpredictable. + + Functions may be placed in an awk program before, between, or after + the pattern-action rules. The abbreviation 'func' may be used in + place of 'function', unless POSIX compatibility mode is in effect. +3 regular_expressions + A regular expression is a shorthand way of specifying a 'wildcard' + type of string comparison. Regular expression matching is very + fundamental to awk's operation. + + Meta symbols + ^ matches beginning of line or beginning of string; note that + embedded newlines ('\n') create multi-line strings, so + beginning of line is not necessarily beginning of string + $ matches end of line or end of string + . any single character (except newline) + [ ] set of characters; [ABC] matches either 'A' or 'B' or 'C'; a + dash (other than first or last of the set) denotes a range + of characters: [A-Z] matches any upper case letter; if the + first character of the set is '^', then the sense of match + is reversed: [^0-9] matches any non-digit; several + characters need to be quoted with backslash (\) if they + occur in a set: '\', ']', '-', and '^' + | alternation (similar to boolean 'or'); match either of two + patterns [for example "^start|stop$" matches leading 'start' + or trailing 'stop'] + ( ) grouping, alter normal precedence [for example, "^(start|stop)$" + matches lines reading either 'start' or 'stop'] + * repeated matching; when placed after a pattern, indicates that + the pattern should match any number of times [for example, + "[a-z][0-9]*" matches a lower case letter followed by zero or + more digits] + + repeated matching; when placed after a pattern, indicates that + the pattern should match one or more times ["[0-9]+" matches + any non-empty sequence of digits] + ? optional matching; indicates that the pattern can match zero or + one times ["[a-z][0-9]?" matches lower case letter alone or + followed by a single digit] + \ quote; prevent the character which follows from having special + meaning + + A regular expression which matches a string or line will match against + the first (left-most) substring which meets the pattern and include + the longest sequence of characters which still meets that pattern. +3 comments + Comments in awk programs are introduced with '#'. Anything after + '#' on a line is ignored by GAWK. It's a good idea to include an + explanation of what an awk program is doing and also who wrote it + and when. +3 further_information + For complete documentation on GAWK, see "The_GAWK_Manual" from FSF. + Source text for it is present in the file GAWK.TEXINFO. A postscript + version is available via anonymous FTP from host prep.ai.mit.edu in + directory pub/gnu/. + + For additional documentation on awk--above and beyond that provided in + The_GAWK_Manual--see "The_AWK_Programming_Language" by Aho, Weinberger, + and Kernighan (2nd edition, 1988), published by Addison-Wesley. It is + both a reference on the awk language and a tutorial on awk's use, with + many sample programs. +3 authors + The awk programming language was originally created by Alfred V. Aho, + Peter J. Weinberger, and Brian W. Kernighan in 1977. The language + was revised and enhanced in a new version which was released in 1985. + + GAWK, the GNU implementation of awk, was written in 1986 by Paul Rubin + and Jay Fenlason, with advice from Richard Stallman, and with + contributions from John Woods. In 1988 and 1989, David Trueman and + Arnold Robbins revised GAWK for compatibility with the newer awk. + + GAWK version 2.11.1 was ported to VMS by Pat Rankin in November, 1989, + with further revisions in the Spring of 1990. The VMS port was + incorporated into the official GNU distribution of version 2.13 in + Spring 1991. (Version 2.12 was never publically released.) +2 release_notes + GAWK 2.13 tested under VMS V5.3 and V5.4-2, May, 1991; compatible with + VMS versions V4.6 and later. Current source code compatible with DEC's + VAXC v3.x and v2.4 or v2.3; also compiles successfully with GNUC (GNU's + gcc). +3 AWK_LIBRARY + GAWK uses a built in search path when looking for a program file + specified by the -f option (or the /input qualifier) when that file + name does not include a device and/or directory. GAWK will first + look in the current default directory, then if the file wasn't found + it will look in the directory specified by the translation of logical + name "AWK_LIBRARY". +3 known_problems + There are several known problems with GAWK running on VMS. Some can + be ignored, others require work-arounds. +4 command_line_parsing + The command + gawk "program text" + will pass the first phase of DCL parsing (the single required + parameter is present), then it will give an error that a required + element (either /input=awk_file or /commands="program text") is + missing. If what was intended (as is most likely) is to pass the + program text to the UN*X-style command interface, the following + variation is required + gawk -- "program text" + The presence of "--", which is normally optional, will inhibit the + attempt to use DCL parsing (as will any '-' option or redirection). +4 file_formats + If a file having the RMS attribute "Fortran carriage control" is + read as input, it will generate an empty first record if the first + actual record begins with a space (leading space becomes a newline). + Also, the last record of the file will give a "record not terminated" + warning. Both of these minor problems are due to the way that the + C Run-Time Library (VAXCRTL) converts record attributes. + + Another poor feature without a work-around is that there's no way to + specify "append if possible, create with RMS text attributes if not" + with the current command line I/O redirection. '>>$' isn't supported. +4 RS_peculiarities + Changing the record separator to something other than newline ('\n') + will produce anomolous results for ordinary files. For example, + using RS = "\f" and FS = "\n" with the following input + |rec 1, line 1 + |rec 1, line 2 + |^L (form feed) + |rec 2, line 1 + |rec 2, line 2 + |^L (form feed) + |rec 3, line 1 + |rec 3, line 2 + |(end of file) + will produce two fields for record 1, but three fields each for + records 2 and 3. This is because the form-feed record delimiter is + on its own line, so awk sees a newline after it. Since newline is + now a field separator, records 2 and 3 will have null first fields. + The following awk code will work-around this problem by inserting + a null first field in the first record, so that all records can be + handled the same by subsequent processing. + # fixup for first record (RS != "\n") + FNR == 1 { if ( $0 == "" ) #leading separator + next #skip its null record + else #otherwise, + $0 = FS $0 #realign fields + } + There is a second problem with this same example. It will always + trigger a "record not terminated" warning when it reaches the end of + file. In the sample shown, there is no final separator; however, if + a trailing form-feed were present, it would produce a spurious final + record with two null fields. This occurs because the I/O system + sees an implicit newline at the end of the last record, so awk sees + a pair of null fields separated by that newline. The following code + fragment will fix that provided there are no null records (in this + case, that would be two consecutive lines containing just form-feeds). + # fixup for last record (RS != "\n") + $0 == FS { next } #drop spurious final record + Note that the "record not terminated" warning will persist. +4 cmd_inconsistency + The DCL qualifier /OUTPUT is internally equivalent to '>$' output + redirection, but the qualifier /INPUT corresponds to the -f option + rather than to '<' input redirection. +4 exit + The exit statement can optionally pass a final status value to the + operating system. GAWK expects a UN*X-style value instead of a + VMS status value, so 0 indicates success and non-zero indicates + failure. The final exit status will be 1 (VMS success) if 0 is + used, or even (VMS non-success) if non-zero is used. +3 changes + Changes between version 2.13 and 2.11.1: (2.12 was not released) + + General + CONVFMT and FIELDWIDTHS builtin control variables added + systime() and strftime() date/time functions added + 'lint' and 'posix' run-time options added + '-W' command line option syntax supercedes '-c', '-C', and '-V' + '-a' and '-e' regular expression options made obsolete + Various bug fixes and effiency improvements + More platforms supported ('officially' including VMS) + + VMS-specific + %g printf format fixed + Handling of '\' on command line modified; no longer necessary to + double it up + Problem redirecting stderr (>&efile) at same time as stdin (<ifile) + or stdout (>ofile) has been fixed + ``2>&1'' and ``1>&2'' redirection constructs added +3 license + GAWK is covered by the "GNU General Public License", the gist of which + is that if you supply this software to a third party, you are expressly + forbidden to prevent them from supplying it to a fourth party, and if + you supply binaries you must make the source code available to them + at no additional cost. Any revisions or modified versions are also + covered by the same license. There is no warranty, express or implied, + for this software. It is provided "as is." + + [Disclaimer: This is just an informal summary with no legal basis; + refer to the actual GNU General Public License for specific details.] +!2 examples +! diff --git a/vms/unixlib.h b/vms/unixlib.h new file mode 100644 index 00000000..17d99706 --- /dev/null +++ b/vms/unixlib.h @@ -0,0 +1,24 @@ +/* "unixlib.h" -- limited substitute for VAX C V3.x's <unixlib.h>, + * for use with VAX C V2.x and/or GNU C when building gawk. + */ + + +/* declare the global environ[] array */ +#ifdef VAXC +extern char noshare **environ; +#else +# ifdef __GNUC__ +# define environ $$PsectAttributes_NOSHR$$environ +# endif +extern char **environ; +#endif + +/* miscellaneous Unix emulation routines available in VAXCRTL */ +char *getenv(), *getcwd(); + +char *ecvt(), *fcvt(), *gcvt(); + +int getpid(), getppid(); + +unsigned getgid(), getuid(), getegid(), geteuid(); +int setgid(), setuid(); /* no-ops */ diff --git a/vms/varargs.h b/vms/varargs.h new file mode 100644 index 00000000..ce66e7d5 --- /dev/null +++ b/vms/varargs.h @@ -0,0 +1,38 @@ +/* "varargs.h" -- old style variable argument list manipulation (for VAX) */ +#ifndef __GNUC__ + + /* Use the system's macros with the system's compiler. */ +#include <varargs.h> + +#else /*__GNUC__*/ + +# if defined(__VAX__) || defined(__vax__) || defined(VAX) || defined(vax) + /* These macros implement traditional (non-ANSI) varargs for GNU C on VAX */ + +# if !defined(_VA_LIST) && !defined(_VA_LIST_) +# define _VA_LIST +# define _VA_LIST_ +typedef char *va_list; +# endif + +# define va_alist _varargs +# define va_dcl int va_alist; +# define va_start(AP) AP = (va_list) &va_alist +# define va_end(AP) + +# define _va_rounded_size(TYPE) \ + (((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int)) + +# define va_arg(AP,TYPE) \ + (AP += _va_rounded_size(TYPE), \ + *((TYPE *) (AP - _va_rounded_size(TYPE)))) + +# if defined(__VMS__) || defined(__vms__) || defined(VMS) || defined(vms) + /* VAX C compatability macros */ +# define va_count(CNT) vaxc$va_count(&CNT) /* rtl routine */ +# define va_start_1(AP,OFFSET) AP = (va_list) (&va_alist + (OFFSET)) +# endif /* VMS */ + +# endif /* VAX */ + +#endif /*__GNUC__*/ diff --git a/vms/vms.h b/vms/vms.h new file mode 100644 index 00000000..6491a1f5 --- /dev/null +++ b/vms/vms.h @@ -0,0 +1,69 @@ +/* + * vms.h - miscellaneous definitions for use with VMS system services. + * Pat Rankin, Nov'89 + */ + +#if 0 +#include <iodef.h> +#else +#define IO$_WRITEVBLK 48 /* write virtual block */ +#define IO$V_CANCTRLO 6 /* cancel <ctrl/O> (ie, resume tty output) */ +#define IO$M_CANCTRLO (1 << IO$V_CANCTRLO) +#endif + +#if 0 +#include <clidef.h> +#include <cliverbdef.h> +#include <fscndef.h> +#else +#define CLI$K_GETCMD 1 +#define CLI$K_VERB_MCR 33 +#define CLI$K_VERB_RUN 36 +#define FSCN$_FILESPEC 1 +#endif + +#if 0 +#include <climsgdef.h> +#else +#define CLI$_RUNUSED 0x00030000 /* value returned by $CLI for "RUN" */ +#define CLI$_SYNTAX 0x000310FC /* error signalled by CLI$DCL_PARSE */ +#define CLI$_INSFPRM 0x00038048 /* insufficient parameters */ +#define CLI$_VALREQ 0x00038150 /* missing required value */ +#define CLI$_CONFLICT 0x00038258 /* conflicting qualifiers */ +#define CLI$_NOOPTPRS 0x00038840 /* no option present */ +#endif + +#if !defined(_TYPES_) || !defined(__GNUC__) +typedef unsigned long u_long; +typedef unsigned short u_short; +#endif +typedef struct _dsc { int len; char *adr; } Dsc; /* limited string descriptor */ + +#define vmswork(sts) ((sts)&1) +#define vmsfail(sts) (!vmswork(sts)) +#define CondVal(sts) ((sts)&0x0FFFFFF8) /* strip severity & msg inhibit */ +#define Descrip(strdsc,strbuf) Dsc strdsc = {sizeof strbuf - 1, strbuf} + +extern int shell$is_shell P((void)); +extern u_long LIB$FIND_FILE P((const Dsc *, Dsc *, void *, ...)); +extern u_long LIB$FIND_FILE_END P((void *)); +#ifndef NO_TTY_FWRITE +extern u_long LIB$GET_EF P((long *)); +extern u_long SYS$ASSIGN P((const Dsc *, short *, long, const Dsc *)); +extern u_long SYS$DASSGN P((short)); +extern u_long SYS$QIO P((long, short, long, void *, const void *, long, + const char *, int, int, u_long, int, int)); +extern u_long SYS$SYNCH P((long, void *)); +#endif !NO_TTY_FWRITE + +extern void v_add_arg P((int, const char *)); +extern void vms_exit P((int)); +extern char *vms_strerror P((int)); +extern char *vms_strdup P((const char *)); +extern int vms_devopen P((const char *)); +extern int vms_execute P((const char *, const char *, const char *)); +extern int vms_gawk P((void)); +extern u_long Cli_Present P((const char *)); +extern u_long Cli_Get_Value P((const char *, char *, int)); +extern u_long Cli_Parse_Command P((const void *, const char *)); + diff --git a/vms/vms_args.c b/vms/vms_args.c new file mode 100644 index 00000000..b6736ff3 --- /dev/null +++ b/vms/vms_args.c @@ -0,0 +1,398 @@ +/* + * vms_args.c -- command line parsing, to emulate shell i/o redirection. + * [ Escape sequence parsing now suppressed. ] + */ + +/* + * Copyright (C) 1991 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * [.vms]vms_arg_fixup - emulate shell's command line processing: handle + * stdio redirection, backslash escape sequences, and file wildcard + * expansion. Should be called immediately upon image startup. + * + * Pat Rankin, Nov'89 + * rankin@eql.Caltech.EDU + * + * <ifile - open 'ifile' (readonly) as 'stdin' + * >nfile - create 'nfile' as 'stdout' (stream-lf format) + * >>ofile - append to 'ofile' for 'stdout'; create it if necessary + * >&efile - point 'stderr' (SYS$ERROR) at 'efile', but don't open + * >$vfile - create 'vfile' as 'stdout', using rms attributes + * appropriate for a standard text file (variable length + * records with implied carriage control) + * 2>&1 - special case: direct error messages into output file + * 1>&2 - special case: direct output data to error destination + * <<sentinal - error; reading stdin until 'sentinal' not supported + * <-, >- - error: stdin/stdout closure not implemented + * | anything - error; pipes not implemented + * & <end-of-line> - error; background execution not implemented + * + * any\Xany - convert 'X' as appropriate; \000 will not work as + * intended since subsequent processing will misinterpret + * + * any*any - perform wildcard directory lookup to find file(s) + * any%any - " " ('%' is vms wildcard for '?' [ie, /./]) + * any?any - treat like 'any%any' unless no files match + * *, %, ? - if no file(s) match, leave original value in arg list + * + * + * Notes: a redirection operator can have optional white space between it + * and its filename; the operator itself *must* be preceded by white + * space so that it starts a separate argument. '<' is ambiguous + * since "<dir>file" is a valid VMS file specification; leading '<' is + * assumed to be stdin--use "\<dir>file" to override. '>$' is local + * kludge to force stdout to be created with text file RMS attributes + * instead of stream format; file sharing is disabled for stdout + * regardless. Multiple instances of stdin or stdout or stderr are + * treated as fatal errors rather than using the first or last. If a + * wildcard file specification is detected, it is expanded into a list + * of filenames which match; if there are no matches, the original + * file-spec is left in the argument list rather than having it expand + * into thin air. No attempt is made to identify and make $(var) + * environment substitutions--must draw the line somewhere! + */ + +#include "awk.h" /* really "../awk.h" */ +#include "vms.h" + + void v_add_arg(int, const char *); +static char *skipblanks(const char *); +static void vms_expand_wildcards(const char *); +static u_long vms_define(const char *, const char *); +static char *t_strstr(const char *, const char *); +#define strstr t_strstr /* strstr() missing from vaxcrtl for V4.x */ + +static int v_argc, v_argz = 0; +static char **v_argv; + +/* vms_arg_fixup() - scan argv[] for i/o redirection and wildcards and also */ +/* rebuild it with those removed or expanded, respectively */ +void +vms_arg_fixup( int *pargc, char ***pargv ) +{ + char *f_in, *f_out, *f_err, + *out_mode, *rms_opt1, *rms_opt2; + char **argv = *pargv; + int i, argc = *pargc; + int err_to_out_redirect = 0, out_to_err_redirect = 0; + +#ifndef NO_CHECK_SHELL + if (shell$is_shell()) + return; /* don't do anything if we're running DECshell */ +#endif +#ifndef NO_DCL_CMD + for (i = 1; i < argc ; i++) /* check for dash or other non-VMS args */ + if (strchr("->\\|", *argv[i])) break; /* found => (i < argc) */ + if (i >= argc && (v_argc = vms_gawk()) > 0) { /* vms_gawk => dcl_parse */ + /* if we successfully parsed the command, replace original argv[] */ + argc = v_argc, argv = v_argv; + v_argz = v_argc = 0, v_argv = NULL; + } +#endif + v_add_arg(v_argc = 0, basename(argv[0])); /* store arg #0 (image name) */ + + f_in = f_out = f_err = NULL; /* stdio setup (no filenames yet) */ + out_mode = "w"; /* default access for stdout */ + rms_opt1 = rms_opt2 = "ctx=stm"; /* ("context = stream") == no-opt */ + + for (i = 1; i < argc; i++) { + char *p, *fn; + int is_arg; + + is_arg = 0; /* current arg does not begin with dash */ + p = argv[i]; /* current arg */ + switch (*p) { + case '<': /* stdin */ + /*[should try to determine whether this is really a directory + spec using <>; for now, force user to quote them with '\<']*/ + if ( f_in ) { + fatal("multiple specification of '<' for stdin"); + } else if (*++p == '<') { /* '<<' is not supported */ + fatal("'<<' not available for stdin"); + } else { + p = skipblanks(p); + fn = (*p ? p : argv[++i]); /* use next arg if necessary */ + if (i >= argc || *fn == '-') + fatal("invalid i/o redirection, null filespec after '<'"); + else + f_in = fn; /* save filename for stdin */ + } + break; + case '>': { /* stdout or stderr */ + /*[vms-specific kludge '>$' added to force stdout to be created + as record-oriented text file instead of in stream-lf format]*/ + int is_out = 1; /* assume stdout */ + if (*++p == '>') /* '>>' => append */ + out_mode = "a", p++; + else if (*p == '&') /* '>&' => stderr */ + is_out = 0, p++; + else if (*p == '$') /* '>$' => kludge for record format */ + rms_opt1 = "rfm=var", rms_opt2 = "rat=cr", p++; + else /* '>' => create */ + ; /* use default values initialized prior to loop */ + p = skipblanks(p); + fn = (*p ? p : argv[++i]); /* use next arg if necessary */ + if (i >= argc || *fn == '-') { + fatal("invalid i/o redirection, null filespec after '>'"); + } else if (is_out) { + if (out_to_err_redirect) + fatal("conflicting specifications for stdout"); + else if (f_out) + fatal("multiple specification of '>' for stdout"); + else + f_out = fn; /* save filename for stdout */ + } else { + if (err_to_out_redirect) + fatal("conflicting specifications for stderr"); + else if (f_err) + fatal("multiple specification of '>&' for stderr"); + else + f_err = fn; /* save filename for stderr */ + } + } break; + case '2': /* check for ``2>&1'' special case'' */ + if (strcmp(p, "2>&1") != 0) + goto ordinary_arg; + else if (f_err || out_to_err_redirect) + fatal("conflicting specifications for stderr"); + else { + err_to_out_redirect = 1; + f_err = "SYS$OUTPUT:"; + } break; + case '1': /* check for ``1>&2'' special case'' */ + if (strcmp(p, "1>&2") != 0) + goto ordinary_arg; + else if (f_out || err_to_out_redirect) + fatal("conflicting specifications for stdout"); + else { + out_to_err_redirect = 1; + f_out = "SYS$ERROR:"; + } break; + case '|': /* pipe */ + /* command pipelines are not supported */ + fatal("command pipes not available ('|' encountered)"); + break; + case '&': /* background */ + /*[we could probably spawn or fork ourself--maybe someday]*/ + if (*(p+1) == '\0' && i == argc - 1) { + fatal("background tasks not available ('&' encountered)"); + break; + } else /* fall through */ + ; /*NOBREAK*/ + case '-': /* argument */ + is_arg = 1; /*(=> skip wildcard check)*/ + default: /* other (filespec assumed) */ +ordinary_arg: + /* process escape sequences or expand wildcards */ + v_add_arg(++v_argc, p); /* include this arg */ + p = strchr(p, '\\'); /* look for backslash */ + if (p != NULL) { /* does it have escape sequence(s)? */ +#if 0 /* disable escape parsing; it's now done elsewhere within gawk */ + register int c; + char *q = v_argv[v_argc] + (p - argv[i]); + do { + c = *p++; + if (c == '\\') + c = parse_escape(&p); + *q++ = (c >= 0 ? (char)c : '\\'); + } while (*p != '\0'); + *q = '\0'; +#endif /*0*/ + } else if (!is_arg && strchr(v_argv[v_argc], '=') == NULL) { + vms_expand_wildcards(v_argv[v_argc]); + } + break; + } /* end switch */ + } /* loop */ + + /* + * Now process any/all I/O options encountered above. + */ + + /* must do stderr first, or vaxcrtl init might not see it */ + /*[ catch 22: we'll also redirect errors encountered doing <in or >out ]*/ + if (f_err) { /* define logical name but don't open file */ + int len = strlen(f_err); + if (strncasecmp(f_err, "SYS$OUTPUT", len) == 0 + && (f_err[len] == ':' || f_err[len] == '\0')) + err_to_out_redirect = 1; + else + vms_define("SYS$ERROR", f_err); + } + /* do stdin before stdout, so we bomb we won't create empty output file */ + if (f_in) { /* [re]open file and define logical name */ + stdin = freopen(f_in, "r", stdin, "mbf=2"); + if (stdin != NULL) + vms_define("SYS$INPUT", f_in); + else + fatal("<%s (%s)", f_in, strerror(errno)); + } + if (f_out) { /* disallow file sharing to reduce overhead */ + stdout = freopen(f_out, out_mode, stdout, + rms_opt1, rms_opt2, "shr=nil", "mbf=2"); /*VAXCRTL*/ + if (stdout != NULL) { +#ifdef crtl_bug /* eof sometimes doesn't get set properly for stm_lf file */ +# define BIGBUF 8*BUFSIZ /* maximum record size: 4096 instead of 512 */ + setvbuf(stdout, malloc(BIGBUF), _IOFBF, BIGBUF); +#endif + vms_define("SYS$OUTPUT", f_out); + } else + fatal(">%s%s (%s)", (*out_mode == 'a' ? ">" : ""), + f_out, strerror(errno)); + } + if (err_to_out_redirect) { /* special case for ``2>&1'' construct */ + fclose(stderr); + dup(1, 2); /* make file 2 (stderr) share file 1 (stdout) */ + stderr = stdout; + vms_define("SYS$ERROR", "SYS$OUTPUT:"); + } else if (out_to_err_redirect) { /* ``1>&2'' */ + fclose(stdout); + dup(2, 1); /* make file 1 (stdout) share file 2 (stderr) */ + stdout = stderr; + vms_define("SYS$OUTPUT", "SYS$ERROR:"); + } + +#ifndef NO_DCL_CMD + /* if we replaced argv[] with our own, we can release it now */ + if (argv != *pargv) + free((void *)argv), argv = NULL; +#endif + *pargc = ++v_argc; /* increment to account for argv[0] */ + *pargv = v_argv; + return; +} + +/* vms_expand_wildcards() - check a string for wildcard punctuation; */ +/* if it has any, attempt a directory lookup */ +/* and store resulting name(s) in argv array */ +static void +vms_expand_wildcards( const char *prospective_filespec ) +{ + char *p, spec_buf[255+1], res_buf[255+1], *strstr(); + Dsc spec, result; + void *context; + register int len = strlen(prospective_filespec); + + if (len >= sizeof spec_buf) + return; /* can't be valid--or at least we can't handle it */ + strcpy(spec_buf, prospective_filespec); /* copy the arg */ + p = strchr(spec_buf, '?'); + if (p != NULL) /* change '?' single-char wildcard to '%' */ + do *p++ = '%', p = strchr(p, '?'); + while (p != NULL); + else if (strchr(spec_buf, '*') == strchr(spec_buf, '%') /* => both NULL */ + && strstr(spec_buf, "...") == NULL) + return; /* no wildcards present; don't attempt file lookup */ + spec.len = len, spec.adr = spec_buf; + result.len = sizeof res_buf - 1, result.adr = res_buf; + + /* The filespec is already in v_argv[v_argc]; if we fail to match anything, + we'll just leave it there (unlike most shells, where it would evaporate). + */ + len = -1; /* overload 'len' with flag value */ + context = NULL; /* init */ + while (vmswork(LIB$FIND_FILE(&spec, &result, &context))) { + for (len = sizeof(res_buf)-1; len > 0 && res_buf[len-1] == ' '; len--) ; + res_buf[len] = '\0'; /* terminate after discarding trailing blanks */ + v_add_arg(v_argc++, strdup(res_buf)); /* store result */ + } + (void)LIB$FIND_FILE_END(&context); + if (len >= 0) /* (still -1 => never entered loop) */ + --v_argc; /* undo final post-increment */ + return; +} + +/* v_add_arg() - store string pointer in v_argv[]; expand array if necessary */ +void +v_add_arg( int idx, const char *val ) +{ +#ifdef DEBUG_VMS + fprintf(stderr, "v_add_arg: v_argv[%d] ", idx); +#endif + if (idx + 1 >= v_argz) { /* 'v_argz' is the current size of v_argv[] */ + int old_size = v_argz; + + v_argz = idx + 10; /* increment by arbitrary amount */ + if (old_size == 0) + v_argv = (char **)malloc((unsigned)(v_argz * sizeof(char **))); + else + v_argv = (char **)realloc((char *)v_argv, + (unsigned)(v_argz * sizeof(char **))); + if (v_argv == NULL) { /* error */ + fatal("%s: %s: can't allocate memory (%s)", "vms_args", + "v_argv", strerror(errno)); + } else { + memmsg((oldsize == 0 ? "v_argv" : "re: v_argv"), v_argz, + "vms_args", v_argv); + while (old_size < v_argz) v_argv[old_size++] = NULL; + } + } + v_argv[idx] = (char *)val; +#ifdef DEBUG_VMS + fprintf(stderr, "= \"%s\"\n", val); +#endif +} + +/* skipblanks() - return a pointer to the first non-blank in the string */ +static char * +skipblanks( const char *ptr ) +{ + if (ptr) + while (*ptr == ' ' || *ptr == '\t') + ptr++; + return (char *)ptr; +} + +/* vms_define() - assign a value to a logical name [define/process/user_mode] */ +static u_long +vms_define( const char *log_name, const char *trans_val ) +{ + Dsc log_dsc, trn_dsc; +# define LOG_PROCESS_TABLE 2 /* <obsolete> */ +# define LOG_USERMODE 3 /* PSL$C_USER */ + extern u_long SYS$CRELOG(); /* <superceded by $CRELNM> */ + + /* avoid "define SYS$OUTPUT sys$output:" for redundant ">sys$output:" */ + if (strncasecmp(log_name, trans_val, strlen(log_name)) == 0) + return 0; + + log_dsc.len = strlen(log_dsc.adr = (char *)log_name); + trn_dsc.len = strlen(trn_dsc.adr = (char *)trans_val); + return SYS$CRELOG(LOG_PROCESS_TABLE, &log_dsc, &trn_dsc, LOG_USERMODE); +} + +/* t_strstr -- strstr() substitute; search 'str' for 'sub' */ +static char *t_strstr ( const char *str, const char *sub ) +{ + register const char *s0, *s1, *s2; + + /* special case: empty substring */ + if (!*sub) return (char *)str; + + /* brute force method */ + for (s0 = s1 = str; *s1; s1 = ++s0) { + s2 = sub; + while (*s1++ == *s2++) + if (!*s2) return (char *)s0; /* full match */ + } + return (char *)0; /* not found */ +} diff --git a/vms/vms_cli.c b/vms/vms_cli.c new file mode 100644 index 00000000..e4e33404 --- /dev/null +++ b/vms/vms_cli.c @@ -0,0 +1,88 @@ +/* + * vms_cli.c - command line interface routines. + * Pat Rankin, Nov'89 + * Routines called from vms_gawk.c for DCL parsing. + */ + +#define P(foo) () +#include "config.h" /* in case we want to suppress 'const' &c */ +#include "vms.h" + +extern u_long CLI$PRESENT(const Dsc *); +extern u_long CLI$GET_VALUE(const Dsc *, Dsc *, short *); +extern u_long CLI$DCL_PARSE(const Dsc *, const void *, ...); +extern u_long SYS$CLI(void *, ...); +extern u_long SYS$FILESCAN(const Dsc *, void *, long *); +extern void *LIB$ESTABLISH(u_long (*handler)(void *, void *)); +extern u_long LIB$SIG_TO_RET(void *, void *); /* condition handler */ + +/* Cli_Present() - call CLI$PRESENT to determine whether a parameter or */ +/* qualifier is present on the [already parsed] command line */ +u_long +Cli_Present( const char *item ) +{ + Dsc item_dsc; + (void)LIB$ESTABLISH(LIB$SIG_TO_RET); + + item_dsc.len = strlen(item_dsc.adr = (char *)item); + return CLI$PRESENT(&item_dsc); +} + +/* Cli_Get_Value() - call CLI$GET_VALUE to retreive the value of a */ +/* parameter or qualifier from the command line */ +u_long +Cli_Get_Value( const char *item, char *result, int size ) +{ + Dsc item_dsc, res_dsc; + u_long sts; + short len = 0; + (void)LIB$ESTABLISH(LIB$SIG_TO_RET); + + item_dsc.len = strlen(item_dsc.adr = (char *)item); + res_dsc.len = size, res_dsc.adr = result; + sts = CLI$GET_VALUE(&item_dsc, &res_dsc, &len); + result[len] = '\0'; + return sts; +} + +/* Cli_Parse_Command() - use the $CLI system service (undocumented) to */ +/* retreive the actual command line (which might be */ +/* "run prog" or "mcr prog [params]") and then call */ +/* CLI$DCL_PARSE to parse it using specified tables */ +u_long +Cli_Parse_Command( const void *cmd_tables, const char *cmd_verb ) +{ + struct { short len, code; void *adr; } fscn[2]; + struct { char rqtype, rqindx, rqflags, rqstat; unsigned :32; + Dsc rdesc; unsigned :32; unsigned :32; unsigned :32; } cmd; + u_long sts; + int ltmp; + char longbuf[2600]; + (void)LIB$ESTABLISH(LIB$SIG_TO_RET); + + memset(&cmd, 0, sizeof cmd); + cmd.rqtype = CLI$K_GETCMD; /* command line minus the verb */ + sts = SYS$CLI( &cmd, (void *)0, (void *)0); /* get actual command line */ + + if (vmswork(sts)) { /* ok => cli available & verb wasn't "RUN" */ + /* invoked via symbol => have command line (which might be empty) */ + /* [might also be invoked via mcr or dcl; that's ok] */ + if (cmd.rqstat == CLI$K_VERB_MCR) { + /* need to strip image name from MCR invocation */ + memset(fscn, 0, sizeof fscn); + fscn[0].code = FSCN$_FILESPEC; /* full file specification */ + (void)SYS$FILESCAN( &cmd.rdesc, fscn, (long *)0); + cmd.rdesc.len -= fscn[0].len; /* shrink size */ + cmd.rdesc.adr += fscn[0].len; /* advance ptr */ + } + /* prepend verb and then parse the command line */ + strcat(strcpy(longbuf, cmd_verb), " "), ltmp = strlen(longbuf); + if (cmd.rdesc.len + ltmp > sizeof longbuf) + cmd.rdesc.len = sizeof longbuf - ltmp; + strncpy(&longbuf[ltmp], cmd.rdesc.adr, cmd.rdesc.len); + cmd.rdesc.len += ltmp, cmd.rdesc.adr = longbuf; + sts = CLI$DCL_PARSE( &cmd.rdesc, cmd_tables); + } + + return sts; +} diff --git a/vms/vms_fwrite.c b/vms/vms_fwrite.c new file mode 100644 index 00000000..c0282c14 --- /dev/null +++ b/vms/vms_fwrite.c @@ -0,0 +1,209 @@ +/* + * vms_fwrite.c - augmentation for the fwrite() function. + */ + +/* + * Copyright (C) 1991 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" /* really "../awk.h" */ + +#ifndef NO_TTY_FWRITE +#include "vms.h" +#include <stdio.h> +#include <errno.h> + +#ifdef VAXC_BUILTINS +#pragma builtins /* VAXC V3.0 & up */ +# define find_c(s,n,c) ((n) - _LOCC((c),(n),(s))) +#else /*VAXC_BUILTINS*/ +static int find_c( const char *s, int n, char c ) { + register const char *t = (const char *)memchr(s, c, n); + return (t == 0 ? n : t - s); /* 0..n-1, or n if not found */ +} +#endif /*VAXC_BUILTINS*/ +#define is_stdout(file_no) ((file_no) == 1) /* fileno(stdout) */ +#define is_stderr(file_no) ((file_no) == 2) /* fileno(stderr) */ + +#define PREFIX_CR (0x8D << 16) /* leading carriage return */ +#define POSTFIX_CR (0x8D << 24) /* trailing carriage return (=> lf/cr) */ + +static short channel[_NFILE] = {0}; +static FILE *prev_file = 0; +static int prev_file_num; + + /* + * VAXCRTL's fwrite() seems to flush after every character when + * writing to a terminal. This routine is a limited functionality + * substitute that is *much* faster. However, calls to fwrite() + * should not be mixed with other stdio calls to the same file + * unless fflush() is always called first. Also, this routine + * will not detect that a freopen() call has finished with the + * original terminal; tty_fclose() should be used to close a file. + */ +#ifdef fwrite +# undef fwrite +#endif +/* tty_fwrite() - performance hack for fwrite() to a terminal */ +size_t +tty_fwrite( const void *buf, size_t size, size_t number, FILE *file ) +{ + static long evfn = -1; + short chan; + int file_num, result; + + if (!file || !*file) + return 0 * (errno = EBADF); /* kludge alert! */ + else if (file == prev_file) + file_num = prev_file_num; + else /* note: VAXCRTL's fileno() is a function, not just a macro */ + prev_file_num = file_num = fileno(file), prev_file = file; + + chan = file_num < _NFILE ? channel[file_num] : -1; + if (chan == 0) { /* if not initialized, need to assign a channel */ + if (isatty(file_num) > 0) { /* isatty: 1=yes, 0=no, -1=problem */ + Dsc device; + char devnam[255+1]; + fgetname(file, devnam); /* get 'file's name */ + device.len = strlen(device.adr = devnam); /* create descriptor */ + if (vmswork(SYS$ASSIGN(&device, &chan, 0, (Dsc *)0))) { + /* get an event flag; use #0 if problem */ + if (evfn == -1 && vmsfail(LIB$GET_EF(&evfn))) evfn = 0; + } else chan = 0; /* $ASSIGN failed */ + } + /* store channel for later use; -1 => don't repeat failed init attempt */ + channel[file_num] = (chan > 0 ? chan : -1); + } + if (chan > 0) { /* chan > 0 iff 'file' is a terminal */ + struct _iosbw { u_short status, count; u_long rt_kludge; } iosb; + register u_long sts = 1; + register char *pt = (char *)buf; + register int offset, pos, count = size * number; + u_long cc_fmt, io_func = IO$_WRITEVBLK; + int extra = 0; + result = 0; + if (is_stderr(file_num)) /* if it's SYS$ERROR (stderr)... */ + io_func |= IO$M_CANCTRLO; /* cancel ^O (resume tty output) */ + while (count > 0) { + /* special handling for line-feeds to make them be 'newlines' */ + offset = 0; + if (*pt == '\n') { /* got at least one leading line-feed */ + cc_fmt = PREFIX_CR, extra++; /* precede 1st LF with a CR */ + do offset++; + while (offset < count && *(pt + offset) == '\n'); + } else + cc_fmt = 0; + /* look for another line-feed; if found, break line there */ + pos = offset + find_c(pt + offset, count - offset, '\n'); + if (pos >= BUFSIZ) pos = BUFSIZ - 1; /* throttle quota usage */ + else if (pos < count) pos++, cc_fmt |= POSTFIX_CR, extra++; + /* wait for previous write, if any, to complete */ + if (pt > (char *)buf) { + sts = SYS$SYNCH(evfn, &iosb); + if (vmswork(sts)) sts = iosb.status, result += iosb.count; + if (vmsfail(sts)) break; + } + /* queue an asynchronous write */ + sts = SYS$QIO(evfn, chan, io_func, &iosb, (u_long (*)())0, 0, + pt, pos, 0, cc_fmt, 0, 0); + if (vmsfail(sts)) break; /*(should never happen)*/ + pt += pos, count -= pos; + } + /* wait for last write to complete */ + if (pt > (char *)buf && vmswork(sts)) { + sts = SYS$SYNCH(evfn, &iosb); + if (vmswork(sts)) sts = iosb.status, result += iosb.count; + } + if (vmsfail(sts)) errno = EVMSERR, vaxc$errno = sts; + else if (iosb.rt_kludge == 0) result = number + extra; + result -= extra; /* subtract the additional carriage-returns */ + } else { /* use stdio */ + /* Note: we assume that we're writing text, not binary data. + For stream format files, 'size' and 'number' are effectively + interchangable, and fwrite works fine. However, for record + format files, 'size' governs the maximum record length, so + fwrite(string, size(char), strlen(string), file) + will produce a sequence of 1-byte records, which is hardly + what we want in this (assumed) situation. Line-feeds ('\n') + are converted into newlines (ie, record separators) by the + run-time library, but strings that don't end with a newline + still become separate records. The simplest work around + is just to use fputs() instead of fwrite(); unfortunately, + we have to provide special treatment for NULs ('\0's). + At present, only stdout might be in record format (via + >$'filename' redirection on the command line). + */ + if (size > 1) { /* not used by GAWK */ + result = fwrite((void *)buf, size, number, file); + } else if (*((char *)buf + number - 1) == '\n' || !is_stdout(file_num)) { + result = fwrite((void *)buf, number, size, file); + result = result * number / size; /*(same as 'result = number')*/ + } else { +#ifdef NO_ALLOCA +# define alloca(n) ((n) <= abuf_siz ? abuf : \ + (abuf_siz > 0 ? (void *)free(abuf) : (void *)0), \ + (abuf = malloc(abuf_siz = (n)+20))) + static void *abuf = 0; + static size_t abuf_siz = 0; +#endif /*NO_ALLOCA*/ + register char *pt = (char *)buf; + register int pos, count = number; + if (pt[count] != '\0') { /*(out of bounds, but relatively safe)*/ + pt = (char *)alloca(count + 1); + memcpy(pt, buf, count), pt[count] = '\0'; + /* if exiting this block undoes the alloca(), we're hosed :-( */ + } + result = 0; + while (count > 0) { + pos = find_c(pt, count, '\0'); + if (fputs(pt, file) < 0) break; + if (pos < count) { + if (fputc('\0', file) < 0) break; + pos++; /* 0..n-1 -> 1..n */ + } + result += pos, pt += pos, count -= pos; + } + } + } + return result; +} +#define fwrite(b,s,n,f) tty_fwrite((b),(s),(n),(f)) + +#ifdef fclose +# undef fclose +#endif +/* tty_fclose() - keep tty_fwrite() up to date when closing a file */ +int +tty_fclose( FILE *file ) +{ + if (file && *file) { /* note: VAXCRTL stdio has extra level of indirection */ + int file_num = fileno(file); + short chan = file_num < _NFILE ? channel[file_num] : -1; + if (chan > 0) + (void)SYS$DASSGN(chan); /* deassign the channel (ie, close) */ + if (file_num < _NFILE) + channel[file_num] = 0; /* clear stale info */ + } + prev_file = 0; /* force tty_fwrite() to reset */ + return fclose(file); +} +#define fclose(f) tty_fclose(f) + +#endif /*!NO_TTY_FWRITE*/ diff --git a/vms/vms_gawk.c b/vms/vms_gawk.c new file mode 100644 index 00000000..57abff7e --- /dev/null +++ b/vms/vms_gawk.c @@ -0,0 +1,245 @@ +/* + * vms_gawk.c -- parse GAWK command line using DCL syntax ] + */ + +/* + * Copyright (C) 1991 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * vms_gawk.c - routines to parse the command line as a native DCL command + * rather than as a foreign command string. + * Pat Rankin, Nov'89 + * [ revised for 2.12, May'91 ] + */ + +#include "awk.h" +#include "vms.h" +#define COMMAND_NAME "GAWK" /* verb name & for 'usage' message(s) */ +#define USAGE_PROG_RQRD 1 +#define USAGE_FILE_RQRD 2 +#define USAGE_BAD_COMBO 3 +#define USAGE_RUN_CMD 4 +#define STS$M_INHIB_MSG 0x10000000 + +#define Present(arg) vmswork(Cli_Present(arg)) +#define Get_Value(arg,buf,siz) vmswork(Cli_Get_Value(arg,buf,siz)) + +extern void gawk_cmd(); /* created with $ SET COMMAND/OBJECT */ +static int vms_usage(int); + +#define ARG_SIZ 250 +union arg_w_prefix { /* structure used to simplify prepending of "-" */ + char value[3+ARG_SIZ+1]; + struct { + char prefix[3]; /* for "-? " */ + char buf[ARG_SIZ]; + char suffix[1]; /* room for '\0' */ + } arg; +}; + +#define chk_option(qualifier,optname) \ + if (Present(qualifier)) \ + strcat(strcat(buf.arg.buf, W_cnt++ ? "," : ""), optname) + + +/* vms_gawk() - parse GAWK command line using DCL and convert it into the */ +/* appropriate "-arg" values for compatability with GNU code */ +int +vms_gawk() +{ + u_long sts; + union arg_w_prefix buf; + char misc_args[10], *misc_argp; + int argc, W_cnt; + + /* check "GAWK_P1"--it's required; its presence will tip us off */ + sts = Cli_Present("GAWK_P1"); + if (CondVal(sts) == CondVal(CLI$_SYNTAX)) { + /* syntax error indicates that we weren't invoked as a native DCL + command, so we'll now attempt to generate a command from the + foreign command string and parse that. + */ + sts = Cli_Parse_Command(gawk_cmd, COMMAND_NAME); + if (vmswork(sts)) + sts = Cli_Present("GAWK_P1"); + } + if (vmswork(sts)) /* command parsed successfully */ + v_add_arg(argc = 0, COMMAND_NAME); /* save "GAWK" as argv[0] */ + else if (CondVal(sts) == CondVal(CLI$_INSFPRM)) + return vms_usage(USAGE_FILE_RQRD); /* insufficient parameters */ + else if (CondVal(sts) == CondVal(CLI$_CONFLICT)) + return vms_usage(USAGE_BAD_COMBO); /* conflicting qualifiers (/input+/command) */ + else if (CondVal(sts) == CondVal(CLI$_RUNUSED)) + return vms_usage(USAGE_RUN_CMD); /* RUN GAWK won't work (no command line) */ + else + return 0; /* forced to rely on original parsing */ + + if (Present("USAGE")) /* give usage message and quit */ + return vms_usage(0); + else if (! (Present("PROGRAM") || Present("PROGFILE")) ) + return vms_usage(USAGE_PROG_RQRD); /* missing required option */ + + misc_argp = misc_args; + *misc_argp++ = '-'; /* now points at &misc_args[1] */ + if (Present("REG_EXPR")) { + if (Present("REG_EXPR.AWK")) /* /reg_exp=awk -> -a */ + *misc_argp++ = 'a'; + else if (Present("REG_EXPR.EGREP") /* /reg_exp=egrep -> -e */ + || Present("REG_EXPR.POSIX")) /* /reg_exp=posix -> -e */ + *misc_argp++ = 'e'; + } +#if 0 /* gawk 2.11.1 */ + if (Present("STRICT")) /* /strict -> -c */ + *misc_argp++ = 'c'; + if (Present("COPYRIGHT")) /* /copyright -> -C */ + *misc_argp++ = 'C'; + if (Present("VERSION")) /* /version -> -V */ + *misc_argp++ = 'V'; +#else /* gawk 2.12 */ + W_cnt = 0, buf.arg.buf[0] = '\0'; + strncpy(buf.arg.prefix, "-W ", 3); + chk_option("LINT","lint"); + chk_option("POSIX","posix"); + chk_option("STRICT","compat"); + chk_option("COPYRIGHT","copyright"); + chk_option("VERSION","version"); + if (W_cnt > 0) /* got something */ + v_add_arg(++argc, strdup(buf.value)); +#endif /*0*/ +#ifdef DEBUG + if (Present("DEBUG")) { +#if 0 + int both = Present("DEBUG.ALL"); + if (both || Present("DEBUG.EXECUTION")) + *misc_argp++ = 'd'; + if (both || Present("DEBUG.PARSE")) +#endif + *misc_argp++ = 'D'; + } +#endif + *misc_argp = '\0'; /* terminate misc_args[] */ + if (misc_argp > &misc_args[1]) /* got something */ + v_add_arg(++argc, misc_args); /* store it/them */ + + if (Present("FIELD_SEP")) { /* field separator */ + strncpy(buf.arg.prefix, "-F ", 3); + if (Get_Value("FIELD_SEP", buf.arg.buf, sizeof buf.arg.buf)) + v_add_arg(++argc, strdup(buf.value)); + } + if (Present("VARIABLES")) { /* variables to init prior to BEGIN */ + strncpy(buf.arg.prefix, "-v ", 3); + while (Get_Value("VARIABLES", buf.arg.buf, sizeof buf.arg.buf)) + v_add_arg(++argc, strdup(buf.value)); + } + if (Present("PROGFILE")) { /* program files, /input=file -> -f file */ + strncpy(buf.arg.prefix, "-f ", 3); + while (Get_Value("PROGFILE", buf.arg.buf, sizeof buf.arg.buf)) + v_add_arg(++argc, strdup(buf.value)); + v_add_arg(++argc, "--"); + } else if (Present("PROGRAM")) { /* program text, /program -> 'text' */ + v_add_arg(++argc, "--"); + if (Get_Value("PROGRAM", buf.value, sizeof buf.value)) + v_add_arg(++argc, strdup(buf.value)); + } + + /* we know that "GAWK_P1" is present [data files and/or 'var=value'] */ + while (Get_Value("GAWK_P1", buf.value, sizeof buf.value)) + v_add_arg(++argc, strdup(buf.value)); + + if (Present("OUTPUT")) { /* let other parser treat this as 'stdout' */ + strncpy(buf.arg.prefix, ">$ ", 3); + if (Get_Value("OUTPUT", buf.arg.buf, sizeof buf.arg.buf)) + v_add_arg(++argc, strdup(buf.value)); + } + + return ++argc; /*(increment to account for arg[0])*/ +} + +/* vms_usage() - display one or more messages and then terminate */ +static int /* note: doesn't return anything; allows 'return vms_usage()' */ +vms_usage( int complaint ) +{ +static char + *usage_txt = "\n\ +usage: %s /COMMANDS=\"awk program text\" data_file[,data_file,...] \n\ + or %s /INPUT=awk_file data_file[,\"Var=value\",data_file,...] \n\ + or %s /INPUT=(awk_file1,awk_file2,...) data_file[,...] \n\ +", *options_txt = "\n\ +options: /FIELD_SEPARATOR=\"FS_value\" \n\ + - /VARIABLES=(\"Var1=value1\",\"Var2=value2\",...) \n\ + - /REG_EXPR= AWK or EGREP or POSIX \n\ + - /LINT /POSIX /[NO]STRICT /VERSION /COPYRIGHT /USAGE \n\ + - /OUTPUT=out_file \n\ +", *no_prog = "missing required element: /COMMANDS or /INPUT", + *no_file = "missing required element: data_file \n\ + (use \"SYS$INPUT:\" to read data lines from the terminal)", + *bad_combo = "invalid combination of qualifiers \n\ + (/INPUT=awk_file and /COMMANDS=\"awk program\" are mutually exclusive)", + *run_used = "\"RUN\" was used; required command components missing"; +int status, argc; + + fflush(stdout); + switch (complaint) { + case USAGE_PROG_RQRD: + fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "PROG_RQRD", no_prog); + status = CLI$_VALREQ | STS$M_INHIB_MSG; + break; + case USAGE_FILE_RQRD: + if (Present("USAGE")) { + status = 1; /* clean exit */ + } else if (Present("COPYRIGHT") || Present("VERSION")) { + v_add_arg(argc=0, COMMAND_NAME); /* save "GAWK" as argv[0] */ +#if 0 + v_add_arg(++argc, Present("COPYRIGHT") ? "-C" : "-V"); +#else + v_add_arg(++argc, "-W"); + v_add_arg(++argc, Present("COPYRIGHT") ? "copyright" : "version"); +#endif + v_add_arg(++argc, "{}"); /* kludge to suppress 'usage' */ + v_add_arg(++argc, "NL:"); /* dummy input for kludge */ + return ++argc; /* count argv[0] too */ + } else { + fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "FILE_RQRD", no_file); + status = CLI$_INSFPRM | STS$M_INHIB_MSG; + } + break; + case USAGE_BAD_COMBO: + fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "BAD_COMBO", bad_combo); + status = CLI$_CONFLICT | STS$M_INHIB_MSG; + break; + case USAGE_RUN_CMD: + fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "RUN_CMD", run_used); + status = CLI$_NOOPTPRS | STS$M_INHIB_MSG; + break; + default: + status = 1; + break; + } + fprintf(stderr, usage_txt, COMMAND_NAME, COMMAND_NAME, COMMAND_NAME); + fprintf(stderr, options_txt); + fflush(stderr); + + errno = EVMSERR; + vaxc$errno = status; + _exit(status); + /* NOTREACHED */ + return 0; +} diff --git a/vms/vms_misc.c b/vms/vms_misc.c new file mode 100644 index 00000000..8c7aee6a --- /dev/null +++ b/vms/vms_misc.c @@ -0,0 +1,159 @@ +/* + * vms_misc.c -- sustitute code for missing/different run-time library routines. + */ + +/* + * Copyright (C) 1991 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" /* really "../awk.h" */ +#include <ssdef.h> +#include <stsdef.h> + + /* + * VMS uses a completely different status scheme (odd => success, + * even => failure), so we'll trap calls to exit() and alter the + * exit status code. [VAXC can't handle this as a macro.] + */ +#ifdef exit +# undef exit +#endif +void +vms_exit( int errno ) /* note: local override of global 'errno' */ +{ + exit(errno == 0 ? SS$_NORMAL : (SS$_ABORT | STS$M_INHIB_MSG)); +} +#define exit(v) vms_exit(v) + + /* + * In VMS's VAXCRTL, strerror() takes an optional second argument. + * #define strerror(errnum) strerror(errnum,vaxc$errno) + * is all that's needed, but VAXC can't handle that (gcc can). + * [The 2nd arg is used iff errnum == EVMSERR.] + */ +#ifdef strerror +# undef strerror +#endif +/* vms_strerror() -- convert numeric error code into text string */ +char * +vms_strerror( int errnum ) +{ + extern char *strerror( /* int, ... */ ); + return ( errnum != EVMSERR ? strerror(errnum) + : strerror(EVMSERR, vaxc$errno) ); +} +# define strerror(v) vms_strerror(v) + + /* + * Miscellaneous utility routine, not part of the run-time library. + */ +/* vms_strdup() - allocate some new memory and copy a string into it */ +char * +vms_strdup( const char *str ) +{ + char *result; + int len = strlen(str); + + emalloc(result, char *, len+1, "strdup"); + return strcpy(result, str); +} + + /* + * VAXCRTL does not contain unlink(). This replacement has limited + * functionality which is sufficient for GAWK's needs. It works as + * desired even when we have the file open. + */ +/* unlink(file) -- delete a file (ignore soft links) */ +int +unlink( const char *file_spec ) { + char tmp[255+1]; /*(should use alloca(len+2+1)) */ + extern int delete(const char *); + + strcpy(tmp, file_spec); /* copy file name */ + if (strchr(tmp, ';') == NULL) + strcat(tmp, ";0"); /* append version number */ + return delete(tmp); +} + + /* + * Check for attempt to (re-)open known file. + */ +/* vms_devopen() - check for "SYS$INPUT" or "SYS$OUTPUT" or "SYS$ERROR" */ +int +vms_devopen( const char *name ) +{ + FILE *file = NULL; + + if (strncasecmp(name, "SYS$", 4) == 0) { + name += 4; /* skip "SYS$" */ + if (strncasecmp(name, "INPUT", 5) == 0) + file = stdin, name += 5; + else if (strncasecmp(name, "OUTPUT", 6) == 0) + file = stdout, name += 6; + else if (strncasecmp(name, "ERROR", 5) == 0) + file = stderr, name += 5; + if (*name == ':') name++; /* treat trailing colon as optional */ + } + /* note: VAXCRTL stdio has extra level of indirection (*file) */ + return (file && *file && *name == '\0') ? fileno(file) : -1; +} + + /* + * VMS has no timezone support. + */ +/* these are global for use by missing/strftime.c */ +char *tzname[2] = { "local", "" }; +int daylight = 0; + +/* dummy to satisfy linker */ +void tzset() +{ + return; +} + +#ifndef __GNUC__ +# ifdef bcopy +# undef bcopy +# endif +void bcopy( char *src, char *dst, int len ) +{ + (void) OTS$MOVE3(len, src, dst); +} +#endif /*!__GNUC__*/ + +/*----------------------------------------------------------------------*/ +#ifdef NO_VMS_ARGS /* real code is in "vms/vms_args.c" */ +void vms_arg_fixup( int *argc, char ***argv ) { return; } /* dummy */ +#endif + +#ifdef NO_VMS_PIPES /* real code is in "vms/vms_popen.c" */ +FILE *popen( const char *command, const char *mode ) { + fatal(" Cannot open pipe `%s' (not implemented)", command); + return NULL; +} +int pclose( FILE *current ) { + fatal(" Cannot close pipe #%d (not implemented)", fileno(current)); + return -1; +} +int fork( void ) { + fatal(" Cannot fork process (not implemented)"); + return -1; +} +#endif /*NO_VMS_PIPES*/ diff --git a/vms/vms_popen.c b/vms/vms_popen.c new file mode 100644 index 00000000..f0eaa037 --- /dev/null +++ b/vms/vms_popen.c @@ -0,0 +1,168 @@ +/* + * [.vms]vms_popen.c -- substitute routines for missing pipe calls. + */ + +/* + * Copyright (C) 1991 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef NO_VMS_PIPES + +#include "awk.h" /* really "../awk.h" */ +#include <stdio.h> + +#ifndef PIPES_SIMULATED + +FILE * +popen( const char *command, const char *mode ) +{ + fatal(" Cannot open pipe `%s' (not implemented)", command); + /* NOT REACHED */ + return 0; +} + +int +pclose( FILE *current ) +{ + fatal(" Internal error ('pclose' not implemented)"); + /* NOT REACHED */ + return -1; +} + +int +fork() +{ + fatal(" Internal error ('fork' not implemented)"); + /* NOT REACHED */ + return -1; +} + +#else PIPES_SIMULATED + /* + * Simulate pipes using temporary files; hope that the user + * doesn't expect pipe i/o to be interleaved with other i/o ;-}. + * + * This is essentially the same as the MSDOS version. The + * difference is that redirection is handled using LIB$SPAWN + * rather than constructing a command for system() which uses + * '<' or '>'. + */ +#include "vms.h" +#include <errno.h> + +typedef enum { unopened = 0, reading, writing } pipemode; +static +struct { + char *command; + char *name; + pipemode pmode; +} pipes[_NFILE]; + +FILE * +popen( const char *command, const char *mode ) +{ + FILE *current; + char *name, *mktemp(); + int cur, strcmp(); + pipemode curmode; + + if (strcmp(mode, "r") == 0) + curmode = reading; + else if (strcmp(mode, "w") == 0) + curmode = writing; + else + return NULL; + + /* make a name for the temporary file */ + if ((name = mktemp(strdup("sys$scratch:pipe_XXXX.tmp"))) == 0) + return NULL; + + if (curmode == reading) { + /* an input pipe reads a temporary file created by the command */ + vms_execute(command, (char *)0, name); /* 'command >tempfile' */ + } + if ((current = fopen(name, mode)) == NULL) { + free(name); + return NULL; + } + cur = fileno(current); + pipes[cur].name = name; + pipes[cur].pmode = curmode; + pipes[cur].command = strdup(command); + return current; +} + +int +pclose( FILE *current ) +{ + int rval, cur = fileno(current); + + if (pipes[cur].pmode == unopened) + return -1; /* should never happen */ + + rval = fclose(current); /* close temp file; if reading, we're done */ + if (pipes[cur].pmode == writing) { + /* an output pipe feeds the temporary file to the other program */ + rval = vms_execute(pipes[cur].command, pipes[cur].name, (char *)0); + } + /* clean up */ + unlink(pipes[cur].name); /* get rid of the temporary file */ + pipes[cur].pmode = unopened; + free(pipes[cur].name), pipes[cur].name = 0; + free(pipes[cur].command), pipes[cur].command = 0; + return rval; +} + + /* + * Create a process and execute a command in it. This is essentially + * the same as system() but allows us to specify SYS$INPUT (stdin) + * and/or SYS$OUTPUT (stdout) for the process. + * [With more work it could truly simulate a pipe using mailboxes.] + */ +int +vms_execute( const char *command, const char *input, const char *output ) +{ + Dsc cmd, in, out, *in_p, *out_p; + u_long sts, cmpltn_sts, LIB$SPAWN(); + + cmd.len = strlen(cmd.adr = (char *)command); + if (input) + in.len = strlen(in.adr = (char *)input), in_p = ∈ + else + in_p = 0; + if (output) + out.len = strlen(out.adr = (char *)output), out_p = &out; + else + out_p = 0; + + sts = LIB$SPAWN(&cmd, in_p, out_p, (long *)0, + (Dsc *)0, (u_long *)0, &cmpltn_sts); + + if (vmswork(sts) && vmsfail(cmpltn_sts)) sts = cmpltn_sts; + if (vmsfail(sts)) { + errno = EVMSERR, vaxc$errno = sts; + return -1; + } else + return 0; +} + +#endif /* PIPES_SIMULATED */ + +#endif /*!NO_VMS_PIPES*/ diff --git a/vms/vmsbuild.com b/vms/vmsbuild.com new file mode 100644 index 00000000..e823eb1b --- /dev/null +++ b/vms/vmsbuild.com @@ -0,0 +1,70 @@ +$! vmsbuild.com -- Commands to build GAWK Pat Rankin, Dec'89 +$! revised, Mar'90 +$! gawk 2.13 revised, Jun'91 +$! +$ REL = "2.13" !release version number +$ PATCHLVL = "2" +$! +$! [ remove "/optimize=noinline" for VAX C V2.x ] +$ if f$type(cc) .nes."STRING" then cc := cc/nolist/optimize=noinline +$ if f$type(link).nes."STRING" then link := link/nomap +$! +$ cc := 'cc'/include=[] +$ libs = "sys$share:vaxcrtl.exe/Shareable" +$ +$! uncomment the next two lines for VAX C V2.x +$ ! define vaxc$library sys$library:,sys$disk:[.vms] +$ ! define c$library [],[.vms] +$! +$! uncomment next two lines for GNU C +$ ! cc := gcc/include=([],[.vms]) !use GNU C rather than VAX C +$ ! libs = "gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library" +$! +$ if f$search("config.h") .eqs."" then copy [.config]vms-conf.h []config.h +$ if f$search("awk_tab.c").nes."" then goto awk_tab_ok +$ write sys$output " You must process `awk.y' with ""yacc"" or ""bison""" +$ if f$search("awk.tab_c").nes."" then - !unpacked with poor 'tar' reader + write sys$output " or else rename `awk.tab_c' to `awk_tab.c'." +$ if f$search("y_tab.c").nes."" then - !yacc was run manually + write sys$output " or else rename `y_tab.c' to `awk_tab.c'." +$ exit +$awk_tab_ok: +$ cc main.c +$ cc eval.c +$ cc builtin.c +$ cc msg.c +$ cc iop.c +$ cc io.c +$ cc field.c +$ cc array.c +$ cc node.c +$ cc version.c +$ cc missing.c +$ cc awk_tab.c +$ cc regex.c +$ cc re.c +$ cc dfa.c +$ cc/define=("STACK_DIRECTION=(-1)","exit=vms_exit") alloca +$ cc [.vms]vms_misc.c +$ cc [.vms]vms_popen.c +$ cc [.vms]vms_fwrite.c +$ cc [.vms]vms_args.c +$ cc [.vms]vms_gawk.c +$ cc [.vms]vms_cli.c +$ set command/object=[]gawk_cmd.obj [.vms]gawk.cld +$! +$ create gawk.opt +! GAWK -- Gnu AWK +main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj +field.obj,array.obj,node.obj,version.obj,missing.obj,awk_tab.obj +regex.obj,re.obj,dfa.obj,[]alloca.obj +[]vms_misc.obj,vms_popen.obj,vms_fwrite.obj +[]vms_args.obj,vms_gawk.obj,vms_cli.obj,gawk_cmd.obj +psect_attr=environ,noshr !extern [noshare] char ** +stack=50 !preallocate more pages (default is 20) +$ open/append Fopt gawk.opt +$ write Fopt libs +$ write Fopt "identification=""V''REL'.''PATCHLVL'""" +$ close Fopt +$! +$ link/exe=gawk.exe gawk.opt/options |