aboutsummaryrefslogtreecommitdiffstats
path: root/awk1.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-02 15:44:01 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-02 15:44:01 +0300
commit3ffce190be7a1d11725c224fa76876d2b621d00d (patch)
treeb4a0e665b7b106f448c7cf9ddebda8b26ceb0a64 /awk1.c
downloadegawk-3ffce190be7a1d11725c224fa76876d2b621d00d.tar.gz
egawk-3ffce190be7a1d11725c224fa76876d2b621d00d.tar.bz2
egawk-3ffce190be7a1d11725c224fa76876d2b621d00d.zip
Initial commit, gawk 1.01 sources.
Diffstat (limited to 'awk1.c')
-rw-r--r--awk1.c723
1 files changed, 723 insertions, 0 deletions
diff --git a/awk1.c b/awk1.c
new file mode 100644
index 00000000..79963bac
--- /dev/null
+++ b/awk1.c
@@ -0,0 +1,723 @@
+/*
+ * awk1 -- Expression tree constructors and main program for gawk.
+ *
+ * Copyright (C) 1986 Free Software Foundation
+ * Written by Paul Rubin, August 1986
+ *
+ */
+
+/*
+GAWK is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY. No author or distributor accepts responsibility to anyone
+for the consequences of using it or for whether it serves any
+particular purpose or works at all, unless he says so in writing.
+Refer to the GAWK General Public License for full details.
+
+Everyone is granted permission to copy, modify and redistribute GAWK,
+but only under the conditions described in the GAWK General Public
+License. A copy of this license is supposed to have been given to you
+along with GAWK so you can know your rights and responsibilities. It
+should be in a file named COPYING. Among other things, the copyright
+notice and this notice must be preserved on all copies.
+
+In other words, go ahead and share GAWK, but don't try to stop
+anyone else from sharing it farther. Help stamp out software hoarding!
+*/
+
+#include <stdio.h>
+#include "regex.h"
+#include "awk.h"
+
+/* Temporary nodes are stored here. ob_dummy is a dummy object used to
+ keep the obstack library from free()ing up the entire stack. */
+struct obstack temp_strings;
+char *ob_dummy;
+
+/* The parse tree and field nodes are stored here. Parse_end is a dummy
+ item used to free up unneeded fields without freeing the program being run
+ */
+struct obstack other_stack;
+char *parse_end;
+
+/* The global null string */
+NODE *Nnull_string;
+
+/* The special variable that contains the name of the current input file */
+extern NODE *FILENAME_node;
+
+/* The name the program was invoked under, for error messages */
+char *myname;
+
+/* A block of gAWK code to be run before running the program */
+NODE *begin_block = 0;
+
+/* A block of gAWK code to be run after the last input file */
+NODE *end_block = 0;
+
+FILE *input_file; /* Where to read from */
+
+#ifndef FAST
+/* non-zero means in debugging is enabled. Probably not very useful */
+int debugging;
+#endif
+
+char *index();
+
+
+main(argc, argv)
+ int argc;
+ char **argv;
+{
+ register int i;
+ register NODE *tmp;
+ char **do_vars;
+#ifndef FAST
+ /* Print out the parse tree. For debugging */
+ register int dotree = 0;
+ extern int yydebug;
+#endif
+ extern char *lexptr;
+ extern char *lexptr_begin;
+ FILE *fp,*fopen();
+
+ --argc;
+ myname= *argv++;
+ if(!argc)
+ usage();
+
+ /* Tell the regex routines how they should work. . . */
+ re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR);
+
+ /* Set up the stack for temporary strings */
+ obstack_init (&temp_strings);
+ ob_dummy=obstack_alloc(&temp_strings,0);
+
+ /* Set up the other stack for other things */
+ obstack_init(&other_stack);
+ /* initialize the null string */
+ Nnull_string = make_string("",0);
+ /* This was to keep Nnull_string from ever being free()d It didn't work */
+ /* Nnull_string->stref=32000; */
+ /* Set up the special variables */
+ /* Note that this must be done BEFORE arg parsing else -R and -F
+ break horribly */
+ init_vars();
+
+
+ for(;*argv && **argv=='-';argc--,argv++) {
+ switch(argv[0][1]) {
+#ifndef FAST
+ case 'd':
+ debugging++;
+ dotree++;
+ break;
+
+ case 'D':
+ debugging++;
+ yydebug=2;
+ break;
+#endif
+ /* This feature isn't in un*x awk, but might be useful */
+ case 'R':
+ set_rs(&argv[0][2]);
+ break;
+
+ case 'F':
+ set_fs(&argv[0][2]);
+ break;
+
+
+ /* It would be better to read the input file in as we parse
+ it. Its done this way for hysterical reasons. Feel
+ free to fix it. */
+ case 'f':
+ if(lexptr)
+ panic("Can only use one -f option");
+ if((fp=fopen(argv[1],"r"))==NULL)
+ er_panic(argv[1]);
+ else {
+ char *curptr;
+ int siz,nread;
+
+ curptr=lexptr=malloc(2000);
+ if(curptr==NULL)
+ panic("Memory exhausted"); /* jfw: instead of abort() */
+ siz=2000;
+ i=siz-1;
+ while((nread=fread(curptr,sizeof(char),i,fp)) > 0) {
+ curptr+=nread;
+ i-=nread;
+ if(i==0) {
+ lexptr=realloc(lexptr,siz*2);
+ if(lexptr==NULL)
+ panic("Memory exhausted"); /* jfw: instead of abort() */
+ curptr=lexptr+siz-1;
+ i=siz;
+ siz*=2;
+ }
+ }
+ *curptr='\0';
+ fclose(fp);
+ }
+ argc--;
+ argv++;
+ break;
+
+ case '\0': /* A file */
+ break;
+
+ default:
+ panic("Unknown option %s",argv[0]);
+ }
+ }
+ if (debugging) setbuf(stdout, 0); /* jfw: make debugging easier */
+ /* No -f option, use next arg */
+ if(!lexptr) {
+ if(!argc) usage();
+ lexptr= *argv++;
+ --argc;
+ }
+
+ /* Read in the program */
+ lexptr_begin=lexptr;
+ (void)yyparse ();
+
+ /* Anything allocated on the other_stack after here will be freed
+ when the next input line is read.
+ */
+ parse_end=obstack_alloc(&other_stack,0);
+
+#ifndef FAST
+ if(dotree)
+ print_parse_tree(expression_value);
+#endif
+ /* Set up the field variables */
+ init_fields();
+
+ /* Look for BEGIN and END blocks. Only one of each allowed */
+ for(tmp=expression_value;tmp;tmp=tmp->rnode) {
+ if(!tmp->lnode || !tmp->lnode->lnode)
+ continue;
+ if(tmp->lnode->lnode->type==Node_K_BEGIN)
+ begin_block=tmp->lnode->rnode;
+ else if(tmp->lnode->lnode->type==Node_K_END)
+ end_block=tmp->lnode->rnode;
+ }
+ if(begin_block && interpret(begin_block) == 0) exit(0); /* jfw */
+ do_vars=argv;
+ while(argc>0 && index(*argv,'=')) {
+ argv++;
+ --argc;
+ }
+ if(do_vars==argv) do_vars=0;
+ if(argc==0) {
+ static char *dumb[2]= { "-", 0};
+
+ argc=1;
+ argv= &dumb[0];
+ }
+ while(argc--) {
+ if(!strcmp(*argv,"-")) {
+ input_file=stdin;
+ FILENAME_node->var_value=Nnull_string;
+ ADD_ONE_REFERENCE(Nnull_string);
+ } else {
+ extern NODE *deref;
+
+ input_file=fopen(*argv,"r");
+ /* This should print the error message from errno */
+ if(!input_file)
+ er_panic(*argv);
+ /* This is a kludge. */
+ deref=FILENAME_node->var_value;
+ do_deref();
+ FILENAME_node->var_value=make_string(*argv,strlen(*argv));
+ }
+ /* This is where it spends all its time. The infamous MAIN LOOP */
+ if(inrec()==0) {
+ if(do_vars) {
+ while(do_vars!=argv && *do_vars) {
+ char *cp;
+
+ cp=index(*do_vars,'=');
+ *cp++='\0';
+ variable(*do_vars)->var_value=make_string(cp,strlen(cp));
+ do_vars++;
+ }
+ do_vars=0;
+ }
+ do
+ obstack_free(&temp_strings, ob_dummy);
+ while (interpret(expression_value) && inrec() == 0);
+ }
+ if(input_file!=stdin) fclose(input_file);
+ argv++;
+ }
+ if(end_block) (void)interpret(end_block);
+ exit(0);
+}
+
+/* These exit values are arbitrary */
+/*VARARGS1*/
+panic(str,arg)
+char *str;
+{
+ fprintf(stderr,"%s: ",myname);
+ fprintf(stderr,str,arg);
+ fprintf(stderr,"\n");
+ exit(12);
+}
+
+er_panic(str)
+char *str;
+{
+ fprintf(stderr,"%s: ",myname);
+ perror(str);
+ exit(15);
+}
+
+usage()
+{
+ fprintf(stderr,"%s: usage: %s {-f progfile | program } [-F{c} -R{c}] file . . .\n",myname,myname);
+ exit(11);
+}
+
+
+/* This allocates a new node of type ty. Note that this node will not go
+ away unless freed, so don't use it for tmp storage */
+NODE *
+newnode(ty)
+NODETYPE ty;
+{
+ register NODE *r;
+
+ r=(NODE *)malloc(sizeof(NODE));
+ if(r==NULL)
+ abort();
+ r->type=ty;
+ return r;
+}
+
+
+/* Duplicate a node. (For global strings, "duplicate" means crank up
+ the reference count.) This creates global nodes. . .*/
+NODE *
+dupnode(n)
+NODE *n;
+{
+ register NODE *r;
+
+ if(n->type==Node_string) {
+ n->stref++;
+ return n;
+ } else if(n->type==Node_temp_string) {
+ r=newnode(Node_string);
+ r->stlen=n->stlen;
+ r->stref=1;
+ r->stptr=malloc(n->stlen+1);
+ if(r->stptr==NULL)
+ abort();
+ bcopy (n->stptr, r->stptr, n->stlen);
+ r->stptr[r->stlen]='\0'; /* JF for hackval */
+ return r;
+ } else {
+ r=newnode(Node_illegal);
+ *r= *n;
+ return r;
+ }
+}
+
+/* This allocates a node with defined lnode and rnode. */
+/* This should only be used by yyparse+co while
+ reading in the program */
+NODE *
+node (left, op, right)
+ NODE *left, *right;
+ NODETYPE op;
+{
+ register NODE *r;
+
+ r = (NODE *)obstack_alloc(&other_stack,sizeof(NODE));
+ r->type=op;
+ r->lnode = left;
+ r->rnode = right;
+ return r;
+}
+
+/* This allocates a node with defined subnode and proc */
+/* Otherwise like node() */
+NODE *
+snode(subn, op, procp)
+NODETYPE op;
+NODE *(*procp)();
+NODE *subn;
+{
+ register NODE *r;
+
+ r=(NODE *)obstack_alloc(&other_stack,sizeof(NODE));
+ r->type=op;
+ r->subnode=subn;
+ r->proc=procp;
+ return r;
+}
+
+/* (jfw) This allocates a Node_line_range node
+ * with defined condpair and zeroes the trigger word
+ * to avoid the temptation of assuming that calling
+ * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ */
+/* Otherwise like node() */
+NODE *
+mkrangenode(cpair)
+NODE *cpair;
+{
+ register NODE *r;
+
+ r=(NODE *)obstack_alloc(&other_stack,sizeof(NODE));
+ r->type=Node_line_range;
+ r->condpair=cpair;
+ r->triggered = 0;
+ return r;
+}
+
+/* this allocates a node with defined numbr */
+/* This creates global nodes! */
+NODE *
+make_number (x)
+ AWKNUM x;
+{
+ register NODE *r;
+
+ r=newnode(Node_number);
+ r->numbr = x;
+ return r;
+}
+
+/* This creates temporary nodes. They go away quite quicly, so
+ don't use them for anything important */
+#ifndef FAST
+NODE *
+tmp_number(x)
+AWKNUM x;
+{
+#ifdef DONTDEF
+ return make_number(x);
+#endif
+ NODE *r;
+
+ r=(NODE *)obstack_alloc(&temp_strings,sizeof(NODE));
+ r->type=Node_number;
+ r->numbr=x;
+ return r;
+}
+#endif
+
+/* Make a string node. If len==0, the string passed in S is supposed to end
+ with a double quote, but have had the beginning double quote
+ already stripped off by yylex.
+ If LEN!=0, we don't care what s ends with. This creates a global node */
+
+NODE *
+make_string (s,len)
+ char *s;
+{
+ register NODE *r;
+ register char *pf,*pt;
+ register int c;
+
+ /* the aborts are impossible because yylex is supposed to have
+ already checked for unterminated strings */
+ if(len==-1) { /* Called from yyparse, find our own len */
+#ifndef FAST
+ if (s[-1] != '\"') /* Didn't start with " */
+ abort ();
+#endif
+
+ for(pf = pt = s; *pf != '\0' && *pf!='\"';) {
+ c= *pf++;
+ switch(c) {
+#ifndef FAST
+ case '\0':
+ abort();
+#endif
+
+ case '\\':
+#ifndef FAST
+ if(*pf=='\0')
+ abort();
+#endif
+
+ c= *pf++;
+ switch(c) {
+ case '\\': /* no massagary needed */
+ case '\'':
+ case '\"':
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ c-='0';
+ while(*pf && *pf>='0' && *pf<='7') {
+ c=c*8+ *pf++ - '0';
+ }
+ break;
+ case 'b':
+ c='\b';
+ break;
+ case 'f':
+ c='\f';
+ break;
+ case 'n':
+ c='\n';
+ break;
+ case 'r':
+ c='\r';
+ break;
+ case 't':
+ c='\t';
+ break;
+ case 'v':
+ c='\v';
+ break;
+ default:
+ *pt++='\\';
+ break;
+ }
+ /* FALL THROUGH */
+ default:
+ *pt++=c;
+ break;
+ }
+ }
+#ifndef FAST
+ if(*pf=='\0')
+ abort(); /* JF hit the end of the buf */
+#endif
+ len = pt - s; /* JF was p - s - 1 */
+ }
+
+ r=newnode(Node_string);
+ r->stptr=(char *)malloc(len+1);
+ if(r->stptr==0)
+ abort();
+ r->type=Node_string;
+ r->stlen=len;
+ r->stref=1;
+ bcopy (s, r->stptr, len);
+ r->stptr[len]='\0'; /* JF a hack */
+
+ return r;
+}
+
+/* #ifndef FAST */
+/* This should be a macro for speed, but the C compiler chokes. */
+/* Read the warning under tmp_number */
+NODE *
+tmp_string(s,len)
+char *s;
+{
+ register NODE *r;
+
+#ifdef DONTDEF
+ return make_string(s,len);
+#endif
+ r=(NODE *)obstack_alloc(&temp_strings,sizeof(NODE));
+ r->stptr=(char *)obstack_alloc(&temp_strings,len+1);
+ r->type=Node_temp_string;
+ r->stlen=len;
+ r->stref=1;
+ bcopy (s, r->stptr, len);
+ r->stptr[len]='\0'; /* JF a hack */
+
+ return r;
+}
+/* #endif */
+
+/* Generate compiled regular expressions */
+struct re_pattern_buffer *
+make_regexp (s)
+ char *s;
+{
+ typedef struct re_pattern_buffer RPAT;
+ RPAT *rp;
+ char *p, *err;
+
+ rp = (RPAT *) obstack_alloc(&other_stack, sizeof (RPAT));
+ bzero((char *)rp,sizeof(RPAT));
+ rp->buffer = (char *)malloc(8); /* JF I'd obstack allocate it,
+ except the regex routines
+ try to realloc() it, which fails. */
+ /* Note that this means it may never be freed. Someone fix, please? */
+
+ rp->allocated = 8;
+ rp->fastmap = (char *)obstack_alloc(&other_stack, 256);
+
+ for (p = s; *p != '\0'; p++) {
+ if (*p == '\\')
+ p++;
+ else if (*p == '/')
+ break;
+ }
+#ifndef FAST
+ if (*p != '/')
+ abort (); /* impossible */
+#endif
+
+ /* JF was re_compile_pattern, but that mishandles ( ) and |,
+ so I had to write my own front end. Sigh. */
+
+ if ((err = re_compile_pattern (s, p - s, rp)) != NULL) {
+ fprintf (stderr, "illegal regexp: ");
+ yyerror (err); /* fatal */
+ }
+
+ return rp;
+}
+
+/* Build a for loop */
+FOR_LOOP_HEADER *
+make_for_loop (init, cond, incr)
+ NODE *init, *cond, *incr;
+{
+ register FOR_LOOP_HEADER *r;
+
+ r = (FOR_LOOP_HEADER *)obstack_alloc(&other_stack,sizeof (FOR_LOOP_HEADER));
+ r->init = init;
+ r->cond = cond;
+ r->incr = incr;
+ return r;
+}
+
+/* Name points to a variable name. Make sure its in the symbol table */
+NODE *
+variable (name)
+ char *name;
+{
+ register NODE *r;
+ NODE *lookup(), *install();
+
+ if ((r = lookup (variables, name)) == NULL) {
+ r = install (variables, name, node(Nnull_string, Node_var, (NODE *)NULL));
+ /* JF make_number (0.0) is WRONG */
+ }
+ return r;
+}
+
+/* Create a special variable */
+NODE *
+spc_var (name,value)
+char *name;
+NODE *value;
+{
+ register NODE *r;
+ NODE *lookup(), *install();
+
+ if ((r = lookup(variables, name)) == NULL)
+ r = install (variables, name, node(value, Node_var, (NODE *)NULL));
+ return r;
+}
+
+
+/*
+ * Install a name in the hash table specified, even if it is already there.
+ * Name stops with first non alphanumeric.
+ * Caller must check against redefinition if that is desired.
+ */
+NODE *
+install (table, name, value)
+ HASHNODE **table;
+ char *name;
+ NODE *value;
+{
+ register HASHNODE *hp;
+ register int i, len, bucket;
+ register char *p;
+
+ len = 0;
+ p = name;
+ while (is_identchar(*p))
+ p++;
+ len = p - name;
+
+ i = sizeof (HASHNODE) + len + 1;
+ hp = (HASHNODE *)obstack_alloc(&other_stack,i);
+ bucket = hashf(name, len, HASHSIZE);
+ hp->next = table[bucket];
+ table[bucket] = hp;
+ hp->length = len;
+ hp->value = value;
+ hp->name = ((char *) hp) + sizeof (HASHNODE);
+ hp->length = len;
+ bcopy (name, hp->name, len);
+ return hp->value;
+}
+
+/*
+ * find the most recent hash node for name name (ending with first
+ * non-identifier char) installed by install
+ */
+NODE *
+lookup (table, name)
+ HASHNODE **table;
+ char *name;
+{
+ register char *bp;
+ register HASHNODE *bucket;
+ register int len;
+
+ for (bp = name; is_identchar(*bp); bp++)
+ ;
+ len = bp - name;
+ bucket = table[hashf(name, len, HASHSIZE)];
+ while (bucket) {
+ if (bucket->length == len && strncmp(bucket->name, name, len) == 0)
+ return bucket->value;
+ bucket = bucket->next;
+ }
+ return NULL;
+}
+
+#define HASHSTEP(old, c) ((old << 1) + c)
+#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */
+
+/*
+ * return hash function on name. must be compatible with the one
+ * computed a step at a time, elsewhere (JF: Where? I can't find it!)
+ */
+int
+hashf(name, len, hashsize)
+ register char *name;
+ register int len;
+ int hashsize;
+{
+ register int r = 0;
+
+ while (len--)
+ r = HASHSTEP(r, *name++);
+
+ return MAKE_POS(r) % hashsize;
+}
+
+/* Add new to the rightmost branch of LIST. This uses n^2 time, but
+ doesn't get used enough to make optimizing worth it. . . */
+/* You don't believe me? Profile it yourself! */
+
+NODE *
+append_right(list,new)
+NODE *list,*new;
+{
+ register NODE *oldlist;
+
+ oldlist = list;
+ while(list->rnode!=NULL)
+ list=list->rnode;
+ list->rnode = new;
+ return oldlist;
+}