summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-10-07 21:53:20 -0700
committerKaz Kylheku <kaz@kylheku.com>2011-10-07 21:53:20 -0700
commitd73ddba9be79debbc96769de34d80710f08ae0c9 (patch)
tree3f8d6d45b56a2b4b625f8942d9f032d64ba5c656
parent81c5eee132546c90d878065722f52e70b27c359f (diff)
downloadtxr-d73ddba9be79debbc96769de34d80710f08ae0c9.tar.gz
txr-d73ddba9be79debbc96769de34d80710f08ae0c9.tar.bz2
txr-d73ddba9be79debbc96769de34d80710f08ae0c9.zip
* match.c (greedy_k): New keyword symbol variable.
(match_line): Greedy skip implemented. (match_files): Likewise. (match_init): New keyword symbol variable initialized. * txr.1: Updated.
-rw-r--r--ChangeLog9
-rw-r--r--match.c48
-rw-r--r--txr.135
3 files changed, 78 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 021132ca..322a5752 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2011-10-07 Kaz Kylheku <kaz@kylheku.com>
+ * match.c (greedy_k): New keyword symbol variable.
+ (match_line): Greedy skip implemented.
+ (match_files): Likewise.
+ (match_init): New keyword symbol variable initialized.
+
+ * txr.1: Updated.
+
+2011-10-07 Kaz Kylheku <kaz@kylheku.com>
+
* lib.c (eol_s): New symbol variable.
(obj_init): New variable initialized.
diff --git a/match.c b/match.c
index b310afcb..3d5f3531 100644
--- a/match.c
+++ b/match.c
@@ -48,7 +48,7 @@
int output_produced;
val mingap_k, maxgap_k, gap_k, times_k, lines_k, chars_k;
-val choose_s, longest_k, shortest_k;
+val choose_s, longest_k, shortest_k, greedy_k;
static void debugf(val fmt, ...)
{
@@ -491,6 +491,8 @@ static val match_line(val bindings, val specline, val dataline,
val min = third(elem);
cnum cmax = nump(max) ? c_num(max) : 0;
cnum cmin = nump(min) ? c_num(min) : 0;
+ val greedy = eq(max, greedy_k);
+ val last_good_result = nil, last_good_pos = nil;
if (!rest(specline))
break;
@@ -516,17 +518,27 @@ static val match_line(val bindings, val specline, val dataline,
num(reps_min), file, data_lineno, pos, nao);
}
- while (!max || reps_max++ < cmax) {
+ while (greedy || !max || reps_max++ < cmax) {
val result = match_line(bindings, rest(specline), dataline, pos,
spec_lineno, data_lineno, file);
if (result) {
- LOG_MATCH("skip", pos);
- return result;
+ if (greedy) {
+ last_good_result = result;
+ last_good_pos = pos;
+ } else {
+ LOG_MATCH("skip", pos);
+ return result;
+ }
}
- if (length_str_le(dataline, pos))
+ if (length_str_le(dataline, pos)) {
+ if (last_good_result) {
+ LOG_MATCH("greedy skip", last_good_pos);
+ return last_good_result;
+ }
break;
+ }
pos = plus(pos, one);
}
@@ -1249,6 +1261,9 @@ repeat_spec_same_data:
val min = second(args);
cnum cmax = nump(max) ? c_num(max) : 0;
cnum cmin = nump(min) ? c_num(min) : 0;
+ val greedy = eq(max, greedy_k);
+ val last_good_result = nil;
+ cnum last_good_line = 0;
if ((spec = rest(spec)) == nil)
break;
@@ -1276,14 +1291,22 @@ repeat_spec_same_data:
num(data_lineno), nao);
}
- while (!max || reps_max++ < cmax) {
+ while (greedy || !max || reps_max++ < cmax) {
result = match_files(spec, files, bindings,
data, num(data_lineno));
if (result) {
- debuglf(spec_linenum, lit("skip matched ~a:~a"), first(files),
- num(data_lineno), nao);
- break;
+ if (greedy) {
+ last_good_result = result;
+ last_good_line = data_lineno;
+ } else {
+ debuglf(spec_linenum, lit("skip matched ~a:~a"), first(files),
+ num(data_lineno), nao);
+ break;
+ }
+ } else {
+ debuglf(spec_linenum, lit("skip didn't match ~a:~a"),
+ first(files), num(data_lineno), nao);
}
if (!data)
@@ -1291,6 +1314,7 @@ repeat_spec_same_data:
debuglf(spec_linenum, lit("skip didn't match ~a:~a"), first(files),
num(data_lineno), nao);
+
data = rest(data);
data_lineno++;
}
@@ -1299,6 +1323,11 @@ repeat_spec_same_data:
if (result)
return result;
+ if (last_good_result) {
+ debuglf(spec_linenum, lit("greedy skip matched ~a:~a"),
+ first(files), num(last_good_line), nao);
+ return last_good_result;
+ }
}
debuglf(spec_linenum, lit("skip failed"), nao);
@@ -2248,4 +2277,5 @@ void match_init(void)
choose_s = intern(lit("choose"), user_package);
longest_k = intern(lit("longest"), keyword_package);
shortest_k = intern(lit("shortest"), keyword_package);
+ greedy_k = intern(lit("greedy"), keyword_package);
}
diff --git a/txr.1 b/txr.1
index deda5f7f..62bbcd8b 100644
--- a/txr.1
+++ b/txr.1
@@ -1152,8 +1152,8 @@ Skip and match the last character of the line:
@(skip)@{last 1}@(eol)
-The skip directive has an optional numeric argument. The value of this
-argument limits the range of lines scanned for a match. Judicious use
+The skip directive has two optional arguments. If the first argument is a
+number, its value limits the range of lines scanned for a match. Judicious use
of this feature can improve the performance of queries.
Example: scan until "size: @SIZE" matches, which must happen within
@@ -1190,6 +1190,24 @@ be written instead:
If the symbol nil is used in place of a number, it means to scan
an unlimited range of lines; thus, @(skip nil) is equivalent to @(skip).
+If the symbol :greedy is used, it changes the semantics of the skip
+to longest match semantics, like the regular expression * operator.
+For instance, match the last three space-separated tokens of the line:
+
+ @(skip :greedy) @a @b @c
+
+Without :greedy, the variable @c will can match multiple tokens,
+and end up with spaces in it, because nothign follows @c and
+so it matches from any position which follows a space to the
+end of the line. Also note the space in front of @a. Without this
+space, @a will get an empty string.
+
+A line oriented example of greedy skip: match the last line without
+using @eof:
+
+ @(skip :greedy)
+ @last_line
+
There may be a second numeric argument. This specifies a minimum
number of lines to skip before looking for a match. For instance,
skip 15 lines and then search indefinitely for "begin ...":
@@ -1209,16 +1227,23 @@ is a noop, because it means: "the remainder of the query must match starting on
the very next line", or, more briefly, "skip exactly zero lines", which is the
behavior if the skip directive is omitted altogether.
-Here is a trick for grabbing the fourth line from the bottom of the input:
+Here is one trick for grabbing the fourth line from the bottom of the input:
@(skip)
@fourth_from_bottom
@(skip 1 3)
@(eof)
-Last three space-separated tokens of the line:
+Or using greedy skip:
+
+ @(skip :greedy)
+ @fourth_from_bottom
+ @(skip 1 3)
- @(skip)@a @b @c@(eol)
+Nongreedy skip with the @(eof) has a slight advantage because the greedy skip
+will keep scanning even though it has found the correct match, then backtrack
+to the last good match once it runs out of data. The regular skip with explicit
+@(eof) will stop when the @(eof) matches.
.SS The Trailer Directive