summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-09-16 20:14:29 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-09-16 20:14:29 -0700
commitfd22fbc29fc0ee1a726880fa3bab64e187433854 (patch)
treea6d9dd45cc928333d34645cfa8fe6c2cea0adb26
parent3db9d4cb24375fc8fb84d0024f4e4b44be25c188 (diff)
downloadtxr-fd22fbc29fc0ee1a726880fa3bab64e187433854.tar.gz
txr-fd22fbc29fc0ee1a726880fa3bab64e187433854.tar.bz2
txr-fd22fbc29fc0ee1a726880fa3bab64e187433854.zip
Bugfix in tok-str: empty-match regexes.
* lib.c (tok_str): Only continue the loop if the new position isn't past the end of the string. This fixes the problem of recognizing an empty token past the last character in the string. Also, advance new_pos by one if there is a zero length match. Then don't advance pos by one later in that case. This fixes the bug that we collect empty separator pieces *and* empty tokens, and also prevents empty matches before the first character of the string. Logic in tok_str is now very similar to that in split_str_keep.
-rw-r--r--lib.c17
1 files changed, 7 insertions, 10 deletions
diff --git a/lib.c b/lib.c
index 56d4cf88..20ffd069 100644
--- a/lib.c
+++ b/lib.c
@@ -3926,30 +3926,27 @@ val tok_str(val str, val tok_regex, val keep_sep)
{
list_collect_decl (out, iter);
val pos = zero;
+ val slen = length(str);
keep_sep = default_bool_arg(keep_sep);
for (;;) {
cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
- val end;
- if (!len) {
+ if (len == zero && new_pos != slen)
+ new_pos = plus(new_pos, one);
+
+ if (new_pos == slen || !len) {
if (keep_sep)
iter = list_collect(iter, sub_str(str, pos, t));
break;
}
- end = plus(new_pos, len);
-
if (keep_sep)
iter = list_collect(iter, sub_str(str, pos, new_pos));
- iter = list_collect(iter, sub_str(str, new_pos, end));
-
- pos = end;
-
- if (len == zero)
- pos = plus(pos, one);
+ pos = plus(new_pos, len);
+ iter = list_collect(iter, sub_str(str, new_pos, pos));
}
return out;