diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-10-07 21:53:20 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-10-07 21:53:20 -0700 |
commit | d73ddba9be79debbc96769de34d80710f08ae0c9 (patch) | |
tree | 3f8d6d45b56a2b4b625f8942d9f032d64ba5c656 | |
parent | 81c5eee132546c90d878065722f52e70b27c359f (diff) | |
download | txr-d73ddba9be79debbc96769de34d80710f08ae0c9.tar.gz txr-d73ddba9be79debbc96769de34d80710f08ae0c9.tar.bz2 txr-d73ddba9be79debbc96769de34d80710f08ae0c9.zip |
* match.c (greedy_k): New keyword symbol variable.
(match_line): Greedy skip implemented.
(match_files): Likewise.
(match_init): New keyword symbol variable initialized.
* txr.1: Updated.
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | match.c | 48 | ||||
-rw-r--r-- | txr.1 | 35 |
3 files changed, 78 insertions, 14 deletions
@@ -1,5 +1,14 @@ 2011-10-07 Kaz Kylheku <kaz@kylheku.com> + * match.c (greedy_k): New keyword symbol variable. + (match_line): Greedy skip implemented. + (match_files): Likewise. + (match_init): New keyword symbol variable initialized. + + * txr.1: Updated. + +2011-10-07 Kaz Kylheku <kaz@kylheku.com> + * lib.c (eol_s): New symbol variable. (obj_init): New variable initialized. @@ -48,7 +48,7 @@ int output_produced; val mingap_k, maxgap_k, gap_k, times_k, lines_k, chars_k; -val choose_s, longest_k, shortest_k; +val choose_s, longest_k, shortest_k, greedy_k; static void debugf(val fmt, ...) { @@ -491,6 +491,8 @@ static val match_line(val bindings, val specline, val dataline, val min = third(elem); cnum cmax = nump(max) ? c_num(max) : 0; cnum cmin = nump(min) ? c_num(min) : 0; + val greedy = eq(max, greedy_k); + val last_good_result = nil, last_good_pos = nil; if (!rest(specline)) break; @@ -516,17 +518,27 @@ static val match_line(val bindings, val specline, val dataline, num(reps_min), file, data_lineno, pos, nao); } - while (!max || reps_max++ < cmax) { + while (greedy || !max || reps_max++ < cmax) { val result = match_line(bindings, rest(specline), dataline, pos, spec_lineno, data_lineno, file); if (result) { - LOG_MATCH("skip", pos); - return result; + if (greedy) { + last_good_result = result; + last_good_pos = pos; + } else { + LOG_MATCH("skip", pos); + return result; + } } - if (length_str_le(dataline, pos)) + if (length_str_le(dataline, pos)) { + if (last_good_result) { + LOG_MATCH("greedy skip", last_good_pos); + return last_good_result; + } break; + } pos = plus(pos, one); } @@ -1249,6 +1261,9 @@ repeat_spec_same_data: val min = second(args); cnum cmax = nump(max) ? c_num(max) : 0; cnum cmin = nump(min) ? c_num(min) : 0; + val greedy = eq(max, greedy_k); + val last_good_result = nil; + cnum last_good_line = 0; if ((spec = rest(spec)) == nil) break; @@ -1276,14 +1291,22 @@ repeat_spec_same_data: num(data_lineno), nao); } - while (!max || reps_max++ < cmax) { + while (greedy || !max || reps_max++ < cmax) { result = match_files(spec, files, bindings, data, num(data_lineno)); if (result) { - debuglf(spec_linenum, lit("skip matched ~a:~a"), first(files), - num(data_lineno), nao); - break; + if (greedy) { + last_good_result = result; + last_good_line = data_lineno; + } else { + debuglf(spec_linenum, lit("skip matched ~a:~a"), first(files), + num(data_lineno), nao); + break; + } + } else { + debuglf(spec_linenum, lit("skip didn't match ~a:~a"), + first(files), num(data_lineno), nao); } if (!data) @@ -1291,6 +1314,7 @@ repeat_spec_same_data: debuglf(spec_linenum, lit("skip didn't match ~a:~a"), first(files), num(data_lineno), nao); + data = rest(data); data_lineno++; } @@ -1299,6 +1323,11 @@ repeat_spec_same_data: if (result) return result; + if (last_good_result) { + debuglf(spec_linenum, lit("greedy skip matched ~a:~a"), + first(files), num(last_good_line), nao); + return last_good_result; + } } debuglf(spec_linenum, lit("skip failed"), nao); @@ -2248,4 +2277,5 @@ void match_init(void) choose_s = intern(lit("choose"), user_package); longest_k = intern(lit("longest"), keyword_package); shortest_k = intern(lit("shortest"), keyword_package); + greedy_k = intern(lit("greedy"), keyword_package); } @@ -1152,8 +1152,8 @@ Skip and match the last character of the line: @(skip)@{last 1}@(eol) -The skip directive has an optional numeric argument. The value of this -argument limits the range of lines scanned for a match. Judicious use +The skip directive has two optional arguments. If the first argument is a +number, its value limits the range of lines scanned for a match. Judicious use of this feature can improve the performance of queries. Example: scan until "size: @SIZE" matches, which must happen within @@ -1190,6 +1190,24 @@ be written instead: If the symbol nil is used in place of a number, it means to scan an unlimited range of lines; thus, @(skip nil) is equivalent to @(skip). +If the symbol :greedy is used, it changes the semantics of the skip +to longest match semantics, like the regular expression * operator. +For instance, match the last three space-separated tokens of the line: + + @(skip :greedy) @a @b @c + +Without :greedy, the variable @c will can match multiple tokens, +and end up with spaces in it, because nothign follows @c and +so it matches from any position which follows a space to the +end of the line. Also note the space in front of @a. Without this +space, @a will get an empty string. + +A line oriented example of greedy skip: match the last line without +using @eof: + + @(skip :greedy) + @last_line + There may be a second numeric argument. This specifies a minimum number of lines to skip before looking for a match. For instance, skip 15 lines and then search indefinitely for "begin ...": @@ -1209,16 +1227,23 @@ is a noop, because it means: "the remainder of the query must match starting on the very next line", or, more briefly, "skip exactly zero lines", which is the behavior if the skip directive is omitted altogether. -Here is a trick for grabbing the fourth line from the bottom of the input: +Here is one trick for grabbing the fourth line from the bottom of the input: @(skip) @fourth_from_bottom @(skip 1 3) @(eof) -Last three space-separated tokens of the line: +Or using greedy skip: + + @(skip :greedy) + @fourth_from_bottom + @(skip 1 3) - @(skip)@a @b @c@(eol) +Nongreedy skip with the @(eof) has a slight advantage because the greedy skip +will keep scanning even though it has found the correct match, then backtrack +to the last good match once it runs out of data. The regular skip with explicit +@(eof) will stop when the @(eof) matches. .SS The Trailer Directive |