Resolving parser conflicts.

author: Kaz Kylheku <kaz@kylheku.com> 2010-01-19 14:43:40 -0800
committer: Kaz Kylheku <kaz@kylheku.com> 2010-01-19 14:43:40 -0800
commit: da3ffa806c910ba2d03dcca145eea52b90ebdbdf (patch)
tree: 39ea11c9d746df8307a4f3353bb9ce610269ace8
parent: 667a0b7d777cfdd479f260f32e5344c28ebb30a1 (diff)
download: txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.gz
txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.bz2
txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.zip
3 files changed, 37 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index fa22c205..93a450de 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2010-01-19  Kaz Kylheku  <kkylheku@gmail.com>
+
+	* parser.l (grammar): The ^ character is no longer considered
+	a special regex token, just a regular character.
+
+	* parser.y (LOW): New phony terminal symbol, used as place holder
+	for lowest precedence.
+	(grammar): Fixed numerous conflicts in regex section by refactoring.
+	The regex nonterminal no longer has an empty derivation.
+	A regex character class no longer has an empty derivation; this is
+	handled by special rules. Ambiguity around ^ is resolved; this is
+	parsed as a regular character and specially recognized.
+	Ambiguity between catenation of terms and postfix operators
+	resolved in favor of shift by giving catenation low
+	precedence using %prec LOW.
+
 2010-01-18  Kaz Kylheku  <kkylheku@gmail.com>
 
 	Version 029
diff --git a/parser.l b/parser.l
index f869892f..7bde063d 100644
--- a/parser.l
+++ b/parser.l
@@ -440,7 +440,7 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                   yyerror("newline in regex");
                 }
 
-<REGEX>[.*?+^~&%]       {
+<REGEX>[.*?+~&%]        {
                           yylval.chr = yytext[0];
                           return yytext[0];
                         }
diff --git a/parser.y b/parser.y
index db3ff2c7..7841e5d4 100644
--- a/parser.y
+++ b/parser.y
@@ -74,11 +74,12 @@ static val parsed_spec;
 %type <obj> regterm regclass regclassterm regrange
 %type <obj> strlit chrlit quasilit quasi_items quasi_item litchars
 %type <chr> regchar
+%nonassoc LOW /* used for precedence assertion */
 %nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
 %nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
 %nonassoc '{' '}' '[' ']' '(' ')'
 %right IDENT TEXT NUMBER
-%left '^'
+%left '-'
 %left '|' '/'
 %left '&' 
 %right '~' '*' '?' '+' '%'
@@ -450,6 +451,7 @@ expr : IDENT                    { $$ = intern(string_own($1), nil); }
      ;
 
 regex : '/' regexpr '/'         { $$ = $2; end_of_regex(); }
+      | '/' '/'                 { $$ = nil; end_of_regex(); }
       | '/' error               { $$ = nil;
                                   yybadtoken(yychar, lit("regex"));
                                   end_of_regex(); }
@@ -461,37 +463,39 @@ regexpr : regbranch                     { $$ = if3(cdr($1),
         | regexpr '|' regexpr           { $$ = list(or_s, $1, $3, nao); }
         | regexpr '&' regexpr           { $$ = list(and_s, $1, $3, nao); }
         | '~' regexpr                   { $$ = list(compl_s, $2, nao); }
-        | /* empty */                   { $$ = nil; }
         ;
 
-regbranch : regterm             { $$ = cons($1, nil); }
+regbranch : regterm %prec LOW   { $$ = cons($1, nil); }
           | regterm regbranch   { $$ = cons($1, $2); }
           ;
 
-regterm : '[' regclass ']'      { $$ = cons(set_s, $2); }
-        | '[' '^' regclass ']'  { $$ = if3(nullp($3), wild_s,
-                                                      cons(cset_s, $3)); }
-        | '.'                   { $$ = wild_s; }
-        | '^'                   { $$ = chr('^'); }
-        | ']'                   { $$ = chr(']'); }
-        | '-'                   { $$ = chr('-'); }
-        | regterm '*'           { $$ = list(zeroplus_s, $1, nao); }
+regterm : regterm '*'           { $$ = list(zeroplus_s, $1, nao); }
         | regterm '+'           { $$ = list(oneplus_s, $1, nao); }
         | regterm '?'           { $$ = list(optional_s, $1, nao); }
         | regterm '%' regexpr   { $$ = list(nongreedy_s, $1, $3, nao); }
+        | '[' regclass ']'      { if (first($2) == chr('^'))
+                                  { if (rest($2))
+                                      $$ = cons(cset_s, rest($2));
+                                    else
+                                      $$ = wild_s; }
+                                  else
+                                    $$ = cons(set_s, $2); }
+        | '[' ']'               { $$ = cons(set_s, nil); }
+        | '[' error             { $$ = nil;
+                                  yybadtoken(yychar,
+                                            lit("regex character class")); }
+        | '.'                   { $$ = wild_s; }
+        | ']'                   { $$ = chr(']'); }
+        | '-'                   { $$ = chr('-'); }
         | REGCHAR               { $$ = chr($1); }
         | '(' regexpr ')'       { $$ = $2; }
         | '(' error             { $$ = nil;
-                                  yybadtoken(yychar,
+                                     yybadtoken(yychar,
                                              lit("regex subexpression")); }
-        | '[' error             { $$ = nil;
-                                  yybadtoken(yychar,
-                                            lit("regex character class")); }
         ;
 
 regclass : regclassterm                 { $$ = cons($1, nil); }
          | regclassterm regclass        { $$ = cons($1, $2); }
-         | /* empty */                  { $$ = nil; }
          ;
 
 regclassterm : regrange         { $$ = $1; }
@@ -506,7 +510,6 @@ regchar : '?'                   { $$ = '?'; }
         | '+'                   { $$ = '+'; }
         | '('                   { $$ = '('; }
         | ')'                   { $$ = ')'; }
-        | '^'                   { $$ = '^'; }
         | '|'                   { $$ = '|'; }
         | '~'                   { $$ = '~'; }
         | '&'                   { $$ = '&'; }
author	Kaz Kylheku <kaz@kylheku.com>	2010-01-19 14:43:40 -0800
committer	Kaz Kylheku <kaz@kylheku.com>	2010-01-19 14:43:40 -0800
commit	da3ffa806c910ba2d03dcca145eea52b90ebdbdf (patch)
tree	39ea11c9d746df8307a4f3353bb9ce610269ace8
parent	667a0b7d777cfdd479f260f32e5344c28ebb30a1 (diff)
download	txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.gz txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.bz2 txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.zip