summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2010-01-19 14:43:40 -0800
committerKaz Kylheku <kaz@kylheku.com>2010-01-19 14:43:40 -0800
commitda3ffa806c910ba2d03dcca145eea52b90ebdbdf (patch)
tree39ea11c9d746df8307a4f3353bb9ce610269ace8
parent667a0b7d777cfdd479f260f32e5344c28ebb30a1 (diff)
downloadtxr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.gz
txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.tar.bz2
txr-da3ffa806c910ba2d03dcca145eea52b90ebdbdf.zip
Resolving parser conflicts.
-rw-r--r--ChangeLog16
-rw-r--r--parser.l2
-rw-r--r--parser.y37
3 files changed, 37 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index fa22c205..93a450de 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2010-01-19 Kaz Kylheku <kkylheku@gmail.com>
+
+ * parser.l (grammar): The ^ character is no longer considered
+ a special regex token, just a regular character.
+
+ * parser.y (LOW): New phony terminal symbol, used as place holder
+ for lowest precedence.
+ (grammar): Fixed numerous conflicts in regex section by refactoring.
+ The regex nonterminal no longer has an empty derivation.
+ A regex character class no longer has an empty derivation; this is
+ handled by special rules. Ambiguity around ^ is resolved; this is
+ parsed as a regular character and specially recognized.
+ Ambiguity between catenation of terms and postfix operators
+ resolved in favor of shift by giving catenation low
+ precedence using %prec LOW.
+
2010-01-18 Kaz Kylheku <kkylheku@gmail.com>
Version 029
diff --git a/parser.l b/parser.l
index f869892f..7bde063d 100644
--- a/parser.l
+++ b/parser.l
@@ -440,7 +440,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
yyerror("newline in regex");
}
-<REGEX>[.*?+^~&%] {
+<REGEX>[.*?+~&%] {
yylval.chr = yytext[0];
return yytext[0];
}
diff --git a/parser.y b/parser.y
index db3ff2c7..7841e5d4 100644
--- a/parser.y
+++ b/parser.y
@@ -74,11 +74,12 @@ static val parsed_spec;
%type <obj> regterm regclass regclassterm regrange
%type <obj> strlit chrlit quasilit quasi_items quasi_item litchars
%type <chr> regchar
+%nonassoc LOW /* used for precedence assertion */
%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
%nonassoc '{' '}' '[' ']' '(' ')'
%right IDENT TEXT NUMBER
-%left '^'
+%left '-'
%left '|' '/'
%left '&'
%right '~' '*' '?' '+' '%'
@@ -450,6 +451,7 @@ expr : IDENT { $$ = intern(string_own($1), nil); }
;
regex : '/' regexpr '/' { $$ = $2; end_of_regex(); }
+ | '/' '/' { $$ = nil; end_of_regex(); }
| '/' error { $$ = nil;
yybadtoken(yychar, lit("regex"));
end_of_regex(); }
@@ -461,37 +463,39 @@ regexpr : regbranch { $$ = if3(cdr($1),
| regexpr '|' regexpr { $$ = list(or_s, $1, $3, nao); }
| regexpr '&' regexpr { $$ = list(and_s, $1, $3, nao); }
| '~' regexpr { $$ = list(compl_s, $2, nao); }
- | /* empty */ { $$ = nil; }
;
-regbranch : regterm { $$ = cons($1, nil); }
+regbranch : regterm %prec LOW { $$ = cons($1, nil); }
| regterm regbranch { $$ = cons($1, $2); }
;
-regterm : '[' regclass ']' { $$ = cons(set_s, $2); }
- | '[' '^' regclass ']' { $$ = if3(nullp($3), wild_s,
- cons(cset_s, $3)); }
- | '.' { $$ = wild_s; }
- | '^' { $$ = chr('^'); }
- | ']' { $$ = chr(']'); }
- | '-' { $$ = chr('-'); }
- | regterm '*' { $$ = list(zeroplus_s, $1, nao); }
+regterm : regterm '*' { $$ = list(zeroplus_s, $1, nao); }
| regterm '+' { $$ = list(oneplus_s, $1, nao); }
| regterm '?' { $$ = list(optional_s, $1, nao); }
| regterm '%' regexpr { $$ = list(nongreedy_s, $1, $3, nao); }
+ | '[' regclass ']' { if (first($2) == chr('^'))
+ { if (rest($2))
+ $$ = cons(cset_s, rest($2));
+ else
+ $$ = wild_s; }
+ else
+ $$ = cons(set_s, $2); }
+ | '[' ']' { $$ = cons(set_s, nil); }
+ | '[' error { $$ = nil;
+ yybadtoken(yychar,
+ lit("regex character class")); }
+ | '.' { $$ = wild_s; }
+ | ']' { $$ = chr(']'); }
+ | '-' { $$ = chr('-'); }
| REGCHAR { $$ = chr($1); }
| '(' regexpr ')' { $$ = $2; }
| '(' error { $$ = nil;
- yybadtoken(yychar,
+ yybadtoken(yychar,
lit("regex subexpression")); }
- | '[' error { $$ = nil;
- yybadtoken(yychar,
- lit("regex character class")); }
;
regclass : regclassterm { $$ = cons($1, nil); }
| regclassterm regclass { $$ = cons($1, $2); }
- | /* empty */ { $$ = nil; }
;
regclassterm : regrange { $$ = $1; }
@@ -506,7 +510,6 @@ regchar : '?' { $$ = '?'; }
| '+' { $$ = '+'; }
| '(' { $$ = '('; }
| ')' { $$ = ')'; }
- | '^' { $$ = '^'; }
| '|' { $$ = '|'; }
| '~' { $$ = '~'; }
| '&' { $$ = '&'; }