Bug #35625

* parser.l (BSCHR, BSYM, BTOK): New lexical definitions. (BRACED): New state. (grammar): Refactored so that braced variables are now handled in the BRACED state, allowing for lexical differences between braced variables and Lisp. This allows us to have the /regex/ syntax in braces, but /regex/ is just a symbol in the Lisp. The new #/ token is recognized and returned as HASH_SLASH. All rules reformatted to a more easily maintainble convention. * parser.y (HASH_SLASH): New token. (modifiers, lisp_regex): New nonterminals. (var): Grammar changed to use modifiers nonterminal instead of exprs. (var_op): Rule moved closer to var. (expr): Produces lisp_regex rather than regex. (yybadtoken): Handle HASH_SLASH in the switch statement. Bugfix: HASH_BACKSLASH was not handled. * txr.1: Documented #/regex/ syntax.
author: Kaz Kylheku <kaz@kylheku.com> 2012-02-26 03:01:10 -0800
committer: Kaz Kylheku <kaz@kylheku.com> 2012-02-26 03:01:10 -0800
commit: 7649d799041454321809da89a5716afc19c34f3d (patch)
tree: a3d1e0c24558c9c5e7842bc14327a1fe88afc5a6
parent: cdd51a57490deb19a0bd3d1b77d2e2aac4d6316b (diff)
download: txr-7649d799041454321809da89a5716afc19c34f3d.tar.gz
txr-7649d799041454321809da89a5716afc19c34f3d.tar.bz2
txr-7649d799041454321809da89a5716afc19c34f3d.zip
4 files changed, 547 insertions, 473 deletions
diff --git a/ChangeLog b/ChangeLog
index d131ea71..dff5c576 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,27 @@
+2012-02-26  Kaz Kylheku  <kaz@kylheku.com>
+
+	Bug #35625
+
+	* parser.l (BSCHR, BSYM, BTOK): New lexical definitions.
+	(BRACED): New state.
+	(grammar): Refactored so that braced variables are now handled
+	in the BRACED state, allowing for lexical differences between
+	braced variables and Lisp. This allows us to have
+	the /regex/ syntax in braces, but /regex/ is just a symbol
+	in the Lisp. The new #/ token is recognized and returned
+	as HASH_SLASH. All rules reformatted to a more easily
+	maintainble convention.
+
+	* parser.y (HASH_SLASH): New token.
+	(modifiers, lisp_regex): New nonterminals.
+	(var): Grammar changed to use modifiers nonterminal instead of exprs.
+	(var_op): Rule moved closer to var.
+	(expr): Produces lisp_regex rather than regex.
+	(yybadtoken): Handle HASH_SLASH in the switch statement.
+	Bugfix: HASH_BACKSLASH was not handled.
+
+	* txr.1: Documented #/regex/ syntax.
+
 2012-02-25  Kaz Kylheku  <kaz@kylheku.com>
 
 	* arith.c: Updated copyright year.
diff --git a/parser.l b/parser.l
index a53f8514..732e2ce0 100644
--- a/parser.l
+++ b/parser.l
@@ -150,10 +150,13 @@ static wchar_t num_esc(char *num)
 
 SYM     [a-zA-Z0-9_]+
 NUM     [+\-]?[0-9]+
-NSCHR   [a-zA-Z0-9!$%&*+\-<=>?\\^_~]
+BSCHR   [a-zA-Z0-9!$%&*+\-<=>?\\^_~]
+BSYM    {BSCHR}({BSCHR}|#)*
+NSCHR   [a-zA-Z0-9!$%&*+\-<=>?\\^_~/]
 NSYM    {NSCHR}({NSCHR}|#)*
 TOK     :?{SYM}
 ATNUM   @{NUM}
+BTOK    [:@]?{BSYM}
 NTOK    [:@]?{NSYM}
 ID_END  [^a-zA-Z0-9_]
 WS      [\t ]*
@@ -171,497 +174,520 @@ UANY    {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 UANYN   {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} 
 UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 
-%x      SPECIAL NESTED REGEX STRLIT CHRLIT QSILIT
+%x      SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT
 
 %%
 
-<SPECIAL,NESTED>{NUM}   {
-                          val str = string_own(utf8_dup_from(yytext));
-
-                          if (yy_top_state() == INITIAL
-                              || yy_top_state() == QSILIT)
-                            yy_pop_state();
-
-                          yylval.num = int_str(str, num(10));
-                          return NUMBER;
-                        }
-
-<NESTED,QSILIT>{ATNUM}  {
-                          val str = string_own(utf8_dup_from(yytext + 1));
-
-                          if (yy_top_state() == INITIAL
-                              || yy_top_state() == QSILIT)
-                            yy_pop_state();
-                          yylval.num = int_str(str, num(10));
-                          return METANUM;
-                        }
-
-<SPECIAL>{TOK}          |
-<NESTED>{NTOK}          {
-                          if (yy_top_state() == INITIAL
-                              || yy_top_state() == QSILIT)
-                            yy_pop_state();
-
-                          switch (yytext[0]) {
-                          case ':':
-                            yylval.lexeme = utf8_dup_from(yytext + 1);
-                            return KEYWORD;
-                          case '@':
-                            yylval.lexeme = utf8_dup_from(yytext + 1);
-                            return METAVAR;
-                          default:
-                            yylval.lexeme = utf8_dup_from(yytext);
-                            return IDENT;
-                          }
-                        }
-<NESTED>:               {
-                          if (yy_top_state() == INITIAL
-                              || yy_top_state() == QSILIT)
-                            yy_pop_state();
-                          yylval.lexeme = utf8_dup_from("");
-                          return KEYWORD;
-                        }
-
-<SPECIAL>\({WS}all{WS}\)        {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return ALL;
-                                }
-
-<SPECIAL>\({WS}some/{ID_END}    {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return SOME;
-                                }
-
-<SPECIAL>\({WS}none{WS}\)       {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return NONE;
-                                }
-
-<SPECIAL>\({WS}maybe{WS}\)      {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return MAYBE;
-                                }
-
-<SPECIAL>\({WS}cases{WS}\)      {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return CASES;
-                                }
-
-<SPECIAL>\({WS}choose/{ID_END}  {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return CHOOSE;
-                                }
-
-<SPECIAL>\({WS}gather/{ID_END}  {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return GATHER;
-                                }
-
-<SPECIAL>\({WS}and{WS}\)        {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return AND;
-                                }
-
-<SPECIAL>\({WS}or{WS}\)         {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return OR;
-                                }
-
-<SPECIAL>\({WS}end{WS}\)        {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return END;
-                                }
+<SPECIAL,NESTED,BRACED>{NUM} {
+  val str = string_own(utf8_dup_from(yytext));
+
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+
+  yylval.num = int_str(str, num(10));
+  return NUMBER;
+}
+
+<NESTED,QSILIT>{ATNUM} {
+  val str = string_own(utf8_dup_from(yytext + 1));
+
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+  yylval.num = int_str(str, num(10));
+  return METANUM;
+}
+
+<SPECIAL>{TOK} |
+<BRACED>{BTOK} |
+<NESTED>{NTOK} {
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+
+  switch (yytext[0]) {
+  case ':':
+    yylval.lexeme = utf8_dup_from(yytext + 1);
+    return KEYWORD;
+  case '@':
+    yylval.lexeme = utf8_dup_from(yytext + 1);
+    return METAVAR;
+  default:
+    yylval.lexeme = utf8_dup_from(yytext);
+    return IDENT;
+  }
+}
+
+<BRACED,NESTED>: {
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+  yylval.lexeme = utf8_dup_from("");
+  return KEYWORD;
+}
+
+<SPECIAL>\({WS}all{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return ALL;
+}
+
+<SPECIAL>\({WS}some/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return SOME;
+}
+
+<SPECIAL>\({WS}none{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return NONE;
+}
+
+<SPECIAL>\({WS}maybe{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return MAYBE;
+}
+
+<SPECIAL>\({WS}cases{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return CASES;
+}
+
+<SPECIAL>\({WS}choose/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return CHOOSE;
+}
+
+<SPECIAL>\({WS}gather/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return GATHER;
+}
+
+<SPECIAL>\({WS}and{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return AND;
+}
+
+<SPECIAL>\({WS}or{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return OR;
+}
+
+<SPECIAL>\({WS}end{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return END;
+}
 
 <SPECIAL>\({WS}collect/{ID_END} {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return COLLECT;
-                                }
-
-<SPECIAL>\({WS}coll/{ID_END}    {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return COLL;
-                                }
-
-<SPECIAL>\({WS}until{WS}\)      {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return UNTIL;
-                                }
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return COLLECT;
+}
+
+<SPECIAL>\({WS}coll/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return COLL;
+}
+
+<SPECIAL>\({WS}until{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return UNTIL;
+}
 
 <SPECIAL>\({WS}output/{ID_END}  {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return OUTPUT;
-                                }
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return OUTPUT;
+}
 
 <SPECIAL>\({WS}repeat/{ID_END}  {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return REPEAT;
-                                }
-
-
-<SPECIAL>\({WS}rep/{ID_END}     {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return REP;
-                                }
-
-<SPECIAL>\({WS}single{WS}\)     {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return SINGLE;
-                                }
-
-<SPECIAL>\({WS}first{WS}\)      {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return FIRST;
-                                }
-
-<SPECIAL>\({WS}last{WS}\)       {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return LAST;
-                                }
-
-<SPECIAL>\({WS}empty{WS}\)      {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return EMPTY;
-                                }
-
-<SPECIAL>\({WS}mod/{ID_END}     {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return MOD;
-                                }
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return REPEAT;
+}
+
+
+<SPECIAL>\({WS}rep/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return REP;
+}
+
+<SPECIAL>\({WS}single{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return SINGLE;
+}
+
+<SPECIAL>\({WS}first{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return FIRST;
+}
+
+<SPECIAL>\({WS}last{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return LAST;
+}
+
+<SPECIAL>\({WS}empty{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return EMPTY;
+}
+
+<SPECIAL>\({WS}mod/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return MOD;
+}
 
 <SPECIAL>\({WS}modlast/{ID_END} {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return MODLAST;
-                                }
-
-<SPECIAL>\({WS}define/{ID_END}  {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return DEFINE;
-                                }
-
-<SPECIAL>\({WS}try{WS}\)        {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return TRY;
-                                }
-
-<SPECIAL>\({WS}catch/{ID_END}   {
-                                  yy_push_state(NESTED);
-                                  yylval.lineno = lineno;
-                                  return CATCH;
-                                }
-
-<SPECIAL>\({WS}finally{WS}\)    {
-                                  yy_pop_state();
-                                  yylval.lineno = lineno;
-                                  return FINALLY;
-                                }
-
-<NESTED>@[\(\[]         |
-<SPECIAL,NESTED>[{(\[]  {
-                          yy_push_state(NESTED);
-                          if (yytext[0] == '@') {
-                            yylval.chr = yytext[1];
-                            return yytext[1] == '(' ? METAPAR : METABKT;
-                          }
-                          yylval.lineno = lineno;
-                          return yytext[0];
-                        }
-
-<SPECIAL,NESTED>,[*]    {
-                          yylval.chr = '*';
-                          return SPLICE;
-                        }
-
-<SPECIAL,NESTED>[,']    {
-                          yylval.chr = yytext[0];
-                          return yytext[0];
-                        }
-
-<SPECIAL,NESTED>[})\]]  {
-                          yy_pop_state();
-                          if (yy_top_state() == INITIAL
-                              || yy_top_state() == QSILIT)
-                            yy_pop_state();
-                          return yytext[0];
-                        }
-
-<SPECIAL,NESTED>{WS}    { /* Eat whitespace in directive */ }
-
-<SPECIAL,NESTED>\"      {
-                          yy_push_state(STRLIT);
-                          return '"';
-                        }
-
-<SPECIAL,NESTED>#\\     {
-                          yy_push_state(CHRLIT);
-                          return HASH_BACKSLASH;
-                        }
-
-<SPECIAL,NESTED>`       {
-                          yy_push_state(QSILIT);
-                          return '`';
-                        }
-
-<NESTED>#               {
-                          return '#';
-                        }
-
-<NESTED>\.\.            {
-                          yylval.lineno = lineno;
-                          return DOTDOT;
-                        }
-
-<SPECIAL>@              {
-                          yy_pop_state();
-                          yylval.lexeme = chk_strdup(L"@");
-                          return TEXT;
-                        }
-
-<SPECIAL,NESTED>\n      {
-                          lineno++;
-                        }
-
-<SPECIAL,NESTED>[/]     {
-                          yy_push_state(REGEX);
-                          return '/';
-                        }
-
-<SPECIAL,NESTED>\.      {
-                          yylval.chr = '.';
-                          return '.';
-                        }
-
-<SPECIAL,NESTED>[\\]\n{WS}      {
-                                  yy_pop_state();
-                                  lineno++;
-                                }
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return MODLAST;
+}
+
+<SPECIAL>\({WS}define/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return DEFINE;
+}
+
+<SPECIAL>\({WS}try{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return TRY;
+}
+
+<SPECIAL>\({WS}catch/{ID_END} {
+  yy_push_state(NESTED);
+  yylval.lineno = lineno;
+  return CATCH;
+}
+
+<SPECIAL>\({WS}finally{WS}\) {
+  yy_pop_state();
+  yylval.lineno = lineno;
+  return FINALLY;
+}
+
+<SPECIAL>[{] {
+  yy_push_state(BRACED);
+  yylval.lineno = lineno;
+  return yytext[0];
+}
+
+<SPECIAL>[(\[] |
+<NESTED,BRACED>@?[(\[] {
+ yy_push_state(NESTED);
+ if (yytext[0] == '@') {
+   yylval.chr = yytext[1];
+   return yytext[1] == '(' ? METAPAR : METABKT;
+ }
+ yylval.lineno = lineno;
+ return yytext[0];
+}
+
+<NESTED>,[*] {
+  yylval.chr = '*';
+  return SPLICE;
+}
+
+<NESTED>[,'] {
+  yylval.chr = yytext[0];
+  return yytext[0];
+}
+
+<BRACED>[}] {
+  yy_pop_state();
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+  return yytext[0];
+}
+
+<SPECIAL,NESTED>[)\]] {
+  yy_pop_state();
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+  return yytext[0];
+}
+
+<SPECIAL,NESTED,BRACED>{WS} {
+  /* Eat whitespace in directive */
+}
+
+<SPECIAL,NESTED,BRACED>\" {
+  yy_push_state(STRLIT);
+  return '"';
+}
+
+<SPECIAL,NESTED,BRACED>#\\ {
+  yy_push_state(CHRLIT);
+  return HASH_BACKSLASH;
+}
+
+<SPECIAL,NESTED,BRACED>#[/] {
+  yy_push_state(REGEX);
+  return HASH_SLASH;
+}
+
+<SPECIAL,NESTED,BRACED>` {
+  yy_push_state(QSILIT);
+  return '`';
+}
+
+<NESTED,BRACED># {
+  return '#';
+}
+
+<NESTED>\.\. {
+  yylval.lineno = lineno;
+  return DOTDOT;
+}
+
+<SPECIAL>@ {
+  yy_pop_state();
+  yylval.lexeme = chk_strdup(L"@");
+  return TEXT;
+}
+
+<SPECIAL,NESTED,BRACED>\n {
+  lineno++;
+}
+
+<SPECIAL,BRACED>[/] {
+  yy_push_state(REGEX);
+  return '/';
+}
+
+<SPECIAL,NESTED>\. {
+  yylval.chr = '.';
+  return '.';
+}
+
+<SPECIAL,NESTED,BRACED>[\\]\n{WS} {
+  yy_pop_state();
+  lineno++;
+}
 
 <SPECIAL>[\\][abtnvfre ] {
-                           wchar_t lexeme[2];
-                           lexeme[0] = char_esc(yytext[1]);
-                           lexeme[1] = 0;
-                           yylval.lexeme = chk_strdup(lexeme);
-                           yy_pop_state();
-                           return TEXT;
-                         }
-
-<SPECIAL>[\\](x{HEX}+|{OCT}+)   {
-                                  wchar_t lexeme[2];
-                                  lexeme[0] = num_esc(yytext + 1);
-                                  lexeme[1] = 0;
-                                  yylval.lexeme = chk_strdup(lexeme);
-                                  yy_pop_state();
-                                  return TEXT;
-                                }
-
-<SPECIAL,NESTED>[;].*   {
-                          /* comment */
-                        }
-<SPECIAL,NESTED>{UANYN} {
-                          yyerrprepf(lit("bad character in directive: '~a'"),
-                                     string_utf8(yytext), nao);
-                          return ERRTOK;
-                        }
-
-<SPECIAL,NESTED>.       {
-                          yyerrprepf(lit("non-UTF-8 byte in directive: "
-                                         "'\\x~02x'"),
-                                   num((unsigned char) yytext[0]), nao);
-                          return ERRTOK;
-                        }
-
-<REGEX>[/]      {
-                  yylval.chr = '/';
-                  return '/';
-                }
-
-
-<REGEX>[\\][abtnvfre\\ ]        {
-                                  yylval.chr = char_esc(yytext[1]);
-                                  return REGCHAR;
-                                }
-
-<REGEX>[\\](x{HEX}+|{OCT}+);?   {
-                                  yylval.chr = num_esc(yytext + 1);
-                                  return REGCHAR;
-                                }
-
-<REGEX>{WS}[\\]\n{WS}   {
-                          lineno++;
-                        }
-
-<REGEX>\n       {
-                  lineno++;
-                  yyerrprepf(lit("newline in regex"), nao);
-                  return ERRTOK;
-                }
-
-<REGEX>[.*?+~&%]        {
-                          yylval.chr = yytext[0];
-                          return yytext[0];
-                        }
+  wchar_t lexeme[2];
+  lexeme[0] = char_esc(yytext[1]);
+  lexeme[1] = 0;
+  yylval.lexeme = chk_strdup(lexeme);
+  yy_pop_state();
+  return TEXT;
+}
+
+<SPECIAL>[\\](x{HEX}+|{OCT}+) {
+  wchar_t lexeme[2];
+  lexeme[0] = num_esc(yytext + 1);
+  lexeme[1] = 0;
+  yylval.lexeme = chk_strdup(lexeme);
+  yy_pop_state();
+  return TEXT;
+}
+
+<SPECIAL,NESTED,BRACED>[;].* {
+  /* comment */
+}
 
+<SPECIAL,NESTED,BRACED>{UANYN} {
+  yyerrprepf(lit("bad character in directive: '~a'"),
+             string_utf8(yytext), nao);
+  return ERRTOK;
+}
+
+<SPECIAL,NESTED,BRACED>. {
+  yyerrprepf(lit("non-UTF-8 byte in directive: "
+                 "'\\x~02x'"),
+             num((unsigned char) yytext[0]), nao);
+  return ERRTOK;
+}
+
+<REGEX>[/] {
+  yylval.chr = '/';
+  return '/';
+}
+
+
+<REGEX>[\\][abtnvfre\\ ] {
+  yylval.chr = char_esc(yytext[1]);
+  return REGCHAR;
+}
+
+<REGEX>[\\](x{HEX}+|{OCT}+);? {
+  yylval.chr = num_esc(yytext + 1);
+  return REGCHAR;
+}
+
+<REGEX>{WS}[\\]\n{WS} {
+  lineno++;
+}
+
+<REGEX>\n {
+  lineno++;
+  yyerrprepf(lit("newline in regex"), nao);
+  return ERRTOK;
+}
+
+<REGEX>[.*?+~&%] {
+  yylval.chr = yytext[0];
+  return yytext[0];
+}
 
 <REGEX>[\[\]\-] {
-                  yylval.chr = yytext[0];
-                  return yytext[0];
-                }
+  yylval.chr = yytext[0];
+  return yytext[0];
+}
 
-<REGEX>[()|]    {
-                  yylval.chr = yytext[0];
-                  return yytext[0];
-                }
+<REGEX>[()|] {
+  yylval.chr = yytext[0];
+  return yytext[0];
+}
 
-<REGEX>[\\].    {
-                  yylval.chr = yytext[1];
-                  return REGCHAR;
-                }
+<REGEX>[\\]. {
+  yylval.chr = yytext[1];
+  return REGCHAR;
+}
 
 <REGEX>{UANYN}  {
-                  wchar_t buf[8];
-                  utf8_from(buf, yytext);
-                  yylval.chr = buf[0];
-                  return REGCHAR;
-                }
-
-<REGEX>.        {
-                   yyerrprepf(lit("non-UTF-8 byte in regex: '\\x~02x'"),
-                              num((unsigned char) yytext[0]), nao);
-                   return ERRTOK;
-                }
-
-<INITIAL>[ ]+   {
-                  yylval.lexeme = utf8_dup_from(yytext);
-                  return SPACE;
-                }
-
-<INITIAL>({UONLY}|[^@\n ])+       {
-                                    yylval.lexeme = utf8_dup_from(yytext);
-                                    return TEXT;
-                                  }
-
-<INITIAL>\n     {
-                  lineno++;
-                  return '\n';
-                }
-
-<INITIAL>@{WS}\*        {
-                          yy_push_state(SPECIAL);
-                          return '*';
-                        }
-
-<INITIAL>@      {
-                  yy_push_state(SPECIAL);
-                }
-
-<INITIAL>^@[#;].*\n     {
-                          /* eat whole line comment */
-                          lineno++;
-                        }
-
-<INITIAL>@[#;].*        {
-                          /* comment to end of line */
-                        }
-
-<STRLIT>\"      {
-                  yy_pop_state();
-                  return yytext[0];
-                }
-
-<QSILIT>`       {
-                  yy_pop_state();
-                  return yytext[0];
-                }
-
-<STRLIT,QSILIT>[\\][abtnvfre"`'\\]      {
-                                          yylval.chr = char_esc(yytext[1]);
-                                          return LITCHAR;
-                                        }
-
-<STRLIT,QSILIT>{WS}[\\]\n{WS}   {
-                                  lineno++;
-                                }
+  wchar_t buf[8];
+  utf8_from(buf, yytext);
+  yylval.chr = buf[0];
+  return REGCHAR;
+}
+
+<REGEX>. {
+  yyerrprepf(lit("non-UTF-8 byte in regex: '\\x~02x'"),
+             num((unsigned char) yytext[0]), nao);
+  return ERRTOK;
+}
+
+<INITIAL>[ ]+ {
+  yylval.lexeme = utf8_dup_from(yytext);
+  return SPACE;
+}
+
+<INITIAL>({UONLY}|[^@\n ])+ {
+  yylval.lexeme = utf8_dup_from(yytext);
+  return TEXT;
+}
+
+<INITIAL>\n {
+  lineno++;
+  return '\n';
+}
+
+<INITIAL>@{WS}\* {
+  yy_push_state(SPECIAL);
+  return '*';
+}
+
+<INITIAL>@ {
+  yy_push_state(SPECIAL);
+}
+
+<INITIAL>^@[#;].*\n {
+  /* eat whole line comment */
+  lineno++;
+}
+
+<INITIAL>@[#;].* {
+  /* comment to end of line */
+}
+
+<STRLIT>\" {
+  yy_pop_state();
+  return yytext[0];
+}
+
+<QSILIT>` {
+  yy_pop_state();
+  return yytext[0];
+}
+
+<STRLIT,QSILIT>[\\][abtnvfre"`'\\] {
+  yylval.chr = char_esc(yytext[1]);
+  return LITCHAR;
+}
+
+<STRLIT,QSILIT>{WS}[\\]\n{WS} {
+  lineno++;
+}
                                 
 <STRLIT,QSILIT>[\\](x{HEX}+|{OCT}+);?  {
-                                          yylval.chr = num_esc(yytext+1);
-                                          return LITCHAR;
-                                        }
+  yylval.chr = num_esc(yytext+1);
+  return LITCHAR;
+}
 
 <CHRLIT>(x{HEX}+|o{OCT}+) {
-                            yylval.chr = num_esc(yytext);
-                            return LITCHAR;
-                          }
-
-<CHRLIT>{SYM}           {
-                          yylval.lexeme = utf8_dup_from(yytext);
-                          return IDENT;
-                        }
-
-<CHRLIT>[^ \t\n]        {
-                          yylval.lexeme = utf8_dup_from(yytext);
-                          return IDENT; /* hack */
-                        }
-
-<STRLIT>\n              {
-                          yyerrprepf(lit("newline in string literal"), nao);
-                          lineno++;
-                          yylval.chr = yytext[0];
-                          return ERRTOK;
-                        }
-
-<CHRLIT>\n              {
-                          yyerrprepf(lit("newline in character literal"), nao);
-                          lineno++;
-                          yylval.chr = yytext[0];
-                          return ERRTOK;
-                        }
-
-<QSILIT>\n              {
-                          yyerrprepf(lit("newline in string quasiliteral"), nao);
-                          lineno++;
-                          yylval.chr = yytext[0];
-                          return ERRTOK;
-                        }
-
-<QSILIT>@               {
-                          yy_push_state(SPECIAL);
-                        }
+  yylval.chr = num_esc(yytext);
+  return LITCHAR;
+}
+
+<CHRLIT>{SYM} {
+  yylval.lexeme = utf8_dup_from(yytext);
+  return IDENT;
+}
+
+<CHRLIT>[^ \t\n] {
+  yylval.lexeme = utf8_dup_from(yytext);
+  return IDENT; /* hack */
+}
+
+<STRLIT>\n {
+  yyerrprepf(lit("newline in string literal"), nao);
+  lineno++;
+  yylval.chr = yytext[0];
+  return ERRTOK;
+}
+
+<CHRLIT>\n {
+  yyerrprepf(lit("newline in character literal"), nao);
+  lineno++;
+  yylval.chr = yytext[0];
+  return ERRTOK;
+}
+
+<QSILIT>\n {
+  yyerrprepf(lit("newline in string quasiliteral"), nao);
+  lineno++;
+  yylval.chr = yytext[0];
+  return ERRTOK;
+}
+
+<QSILIT>@ {
+  yy_push_state(SPECIAL);
+}
 
 <STRLIT,CHRLIT,QSILIT>{UANYN} {
-                                wchar_t buf[8];
-                                utf8_from(buf, yytext);
-                                yylval.chr = buf[0];
-                                return LITCHAR;
-                              }
+  wchar_t buf[8];
+  utf8_from(buf, yytext);
+  yylval.chr = buf[0];
+  return LITCHAR;
+}
 
 <STRLIT,CHRLIT,QSILIT>. {
-                           yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"),
-                                      num((unsigned char) yytext[0]), nao);
-                           return ERRTOK;
-                        }
+  yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"),
+             num((unsigned char) yytext[0]), nao);
+  return ERRTOK;
+}
 
 %%
 
diff --git a/parser.y b/parser.y
index 98f408a5..e5f06b54 100644
--- a/parser.y
+++ b/parser.y
@@ -72,7 +72,7 @@ static val parsed_spec;
 %token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY 
 %token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY
 %token <lineno> ERRTOK /* deliberately not used in grammar */
-%token <lineno> HASH_BACKSLASH DOTDOT
+%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT
 
 %token <val> NUMBER METANUM
 
@@ -85,11 +85,11 @@ static val parsed_spec;
 %type <val> clause_parts additional_parts gather_parts additional_gather_parts
 %type <val> output_clause define_clause try_clause catch_clauses_opt
 %type <val> line elems_opt elems clause_parts_h additional_parts_h
-%type <val> text texts elem var var_op meta_expr vector
+%type <val> text texts elem var var_op modifiers meta_expr vector
 %type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause
 %type <val> repeat_clause repeat_parts_opt o_line
 %type <val> o_elems_opt o_elems_opt2 o_elems o_elem o_var rep_elem rep_parts_opt
-%type <val> regex regexpr regbranch
+%type <val> regex lisp_regex regexpr regbranch
 %type <val> regterm regclass regclassterm regrange
 %type <val> strlit chrlit quasilit quasi_items quasi_item litchars
 %type <chr> regchar
@@ -607,9 +607,10 @@ var : IDENT                     { $$ = list(var_s, intern(string_own($1), nil),
                                             nao); }
     | '{' IDENT '}' elem        { $$ = list(var_s, intern(string_own($2), nil),
                                             $4, nao); }
-    | '{' IDENT exprs '}'       { $$ = list(var_s, intern(string_own($2), nil),
+    | '{' IDENT modifiers '}'   { $$ = list(var_s, intern(string_own($2), nil),
                                             nil, $3, nao); }
-    | '{' IDENT exprs '}' elem  { $$ = list(var_s, intern(string_own($2), nil),
+    | '{' IDENT modifiers '}' elem  
+                                { $$ = list(var_s, intern(string_own($2), nil),
                                             $5, $3, nao); }
     | var_op IDENT              { $$ = list(var_s, intern(string_own($2), nil),
                                             nil, $1, nao); }
@@ -632,6 +633,16 @@ var : IDENT                     { $$ = list(var_s, intern(string_own($1), nil),
                                   yybadtoken(yychar, lit("variable spec")); }
     ;
 
+var_op : '*'                    { $$ = list(t, nao); }
+       ;
+
+modifiers : NUMBER              { $$ = cons($1, nil); }
+          | regex               { $$ = cons(cons(regex_compile(rest($1)), 
+                                                 rest($1)), nil);
+                                  rlcp($$, $1); }
+          | list                { $$ = cons($1, nil); }
+          ;
+
 o_var : IDENT                   { $$ = list(var_s, intern(string_own($1), nil),
                                             nao); }
       | IDENT o_elem            { $$ = list(var_s, intern(string_own($1), nil),
@@ -644,9 +655,6 @@ o_var : IDENT                   { $$ = list(var_s, intern(string_own($1), nil),
                                     yybadtoken(yychar, lit("variable spec")); }
       ;
 
-var_op : '*'                    { $$ = list(t, nao); }
-       ;
-
 vector : '#' list               { $$ = rlcp(vector_list($2), $2); }
        ;
 
@@ -705,7 +713,7 @@ expr : IDENT                    { $$ = rl(intern(string_own($1), nil),
      | list                     { $$ = $1; }
      | vector                   { $$ = $1; }
      | meta_expr                { $$ = $1; }
-     | regex                    { $$ = cons(regex_compile(rest($1)),
+     | lisp_regex               { $$ = cons(regex_compile(rest($1)),
                                             rest($1));
                                   rlcp($$, $1); }
      | chrlit                   { $$ = rl($1, num(lineno)); }
@@ -721,6 +729,14 @@ regex : '/' regexpr '/'         { $$ = cons(regex_s, $2); end_of_regex();
                                   end_of_regex(); }
       ;
 
+lisp_regex : HASH_SLASH regexpr '/'    
+                                { $$ = cons(regex_s, $2); end_of_regex();
+                                  rl($$, num(lineno)); }
+      | HASH_SLASH error        { $$ = nil;
+                                  yybadtoken(yychar, lit("regex"));
+                                  end_of_regex(); }
+      ;
+
 regexpr : regbranch                     { $$ = if3(cdr($1), 
                                                    cons(compound_s, $1),
                                                    car($1)); }
@@ -1071,6 +1087,8 @@ void yybadtoken(int tok, val context)
   case METAPAR: problem = lit("@("); break;
   case METABKT: problem = lit("@["); break;
   case DOTDOT: problem = lit(".."); break;
+  case HASH_BACKSLASH: problem = lit("#\\"); break;
+  case HASH_SLASH:     problem = lit("#/"); break;
   }
 
   if (problem != 0)
diff --git a/txr.1 b/txr.1
index 275a6e8a..34f34bcd 100644
--- a/txr.1
+++ b/txr.1
@@ -4499,6 +4499,12 @@ according to a modified namespace lookup rule.
 
 More details are given in the documentation for the dwim operator.
 
+.SS Regular Expressions
+
+In TXR Lisp, the / character can occur in symbol names, and the / token
+is a symbol. Therefore the /regex/ syntax is absent, replaced with the
+#/regex/ syntax.
+
 .SS Lisp Operators
 
 When the first element of a compound expression is an operator symbol,
author	Kaz Kylheku <kaz@kylheku.com>	2012-02-26 03:01:10 -0800
committer	Kaz Kylheku <kaz@kylheku.com>	2012-02-26 03:01:10 -0800
commit	7649d799041454321809da89a5716afc19c34f3d (patch)
tree	a3d1e0c24558c9c5e7842bc14327a1fe88afc5a6
parent	cdd51a57490deb19a0bd3d1b77d2e2aac4d6316b (diff)
download	txr-7649d799041454321809da89a5716afc19c34f3d.tar.gz txr-7649d799041454321809da89a5716afc19c34f3d.tar.bz2 txr-7649d799041454321809da89a5716afc19c34f3d.zip