diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-07-31 17:31:20 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-07-31 17:37:10 -0700 |
commit | c7edf3a752bc2522589246ff64f5a00fb96315d6 (patch) | |
tree | 9c4b043f52ea673054ccf38bc51a8310360d220c | |
parent | bf1cc0077168d7c1efa903afb5aa782d071533b6 (diff) | |
download | txr-c7edf3a752bc2522589246ff64f5a00fb96315d6.tar.gz txr-c7edf3a752bc2522589246ff64f5a00fb96315d6.tar.bz2 txr-c7edf3a752bc2522589246ff64f5a00fb96315d6.zip |
txr-013 2009-09-30txr-013
-rw-r--r-- | ChangeLog | 79 | ||||
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | extract.l | 12 | ||||
-rw-r--r-- | extract.y | 21 | ||||
-rw-r--r-- | gc.c | 23 | ||||
-rw-r--r-- | gc.h | 2 | ||||
-rw-r--r-- | lib.c | 23 | ||||
-rw-r--r-- | lib.h | 5 | ||||
-rw-r--r-- | txr.1 | 38 |
9 files changed, 184 insertions, 22 deletions
@@ -1,3 +1,82 @@ +2009-09-29 Kaz Kylheku <kkylheku@gmail.com> + + Version 013 + + Some minor garbage collection issues fixed. + + Infinite looping bug fixed. + + New @(trailer) directive. + + * extract.y (match_files): Implemented trailer directive. + * extract.l (version): Bump to 013. + * lib.h (trailer): Declaration added. + * lib.c (trailer): External definition added. + (obj_init): Initializer trailer with interned symbol. + * txr.1: Documented @(trailer) and bumped version to 013. + +2009-09-29 Kaz Kylheku <kkylheku@gmail.com> + + Looping bug fixed. Certain directives could cause an infinite + loop if the query has run out of data. + + * extract.y (match_files): The semantics of the first_file_parsed + argument changes a little bit. Previously, if nil was passed, + a new lazy stream would be opened for the first file. + But this is ambiguous because nil also means empty list; + sometimes when we recurse into match_files, the data has ran + out and this argument is thus nil. Now, that argument must be + the symbol t in order to mean ``open the first file''. + If the argument is nil, it unambiously means ``we are at the end of the + current file; don't open anything''. + (extract): The initial call to match_files now passes + the symbol t for the first_file_parsed argument. + +2009-09-29 Kaz Kylheku <kkylheku@gmail.com> + + Fixing some gc issues. The test cases were found to bomb + with an assertion when run with --gc-debug enabled, + due to a garbage-collected object still being used. + This was due to the way the main function was structured. + + Also, the stack ``top'' terminology in the gc was stupidly wrong. Leaf + function frames are at the stack top, and main is near the bottom. + I was thinking of the ``top caller''. + + * Makefile (TXR_DBG_OPTS): New variable. + Tests are now run with --gc-debug, which makes them slower, + but has much greater chance of trapping gc problems. + * extract.l (main): Two variables are now used for determining the + stack bottom. We don't know in which order the compiler + places local variables into a stack frame. (This is a separate + question from that of the direction of stack growth). + The call to the init function is now done right away. + The argument processing section of main does some processing + with GC objects, but the init function was being called afterward, + before the list of interned symbols is protected from garbage + collection! So with --gc-debug turned on, parts of the interned + symbol list were being garbage collected (since the variable + has not yet been added to the set of root pointers, which is + done in the init function). Also, the use of an unknown --long-option + is diagnosed properly now. + * gc.c (gc_stack_top): Renamed to gc_stack_bottom, and converted from + extern to static. + (mark): Follows rename of gc_stack_top to gc_stack_bottom. + (sweep): Eliminated the freed variable for counting freed objects, + and the associated debug message, which was not useful. + Commented why the free list is managed differently when dbg + is turned on. + (gc_init): New function. + * gc.h (gc_stack_top): Declaration removed. + (gc_init): Declaration added. + * lib.c (min): New macro. + (init): Takes two additional arguments which are used to + determine the stack bottom. The function first determiens + whether the stack grows up or down. Then it takes the + greater or smaller of the two potential stack top pointers, + based on that. The result is passed go gc_init. + * lib.h (init): Declaration updated. + 2009-09-28 Kaz Kylheku <kkylheku@gmail.com> Version 012 @@ -29,6 +29,7 @@ OPT_FLAGS := -O2 LANG_FLAGS := -ansi -D_GNU_SOURCE DIAG_FLAGS := -Wall DBG_FLAGS := -g +TXR_DBG_OPTS := --gc-debug LEXLIB := fl CFLAGS := $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS) @@ -67,7 +68,7 @@ tests/001/%: TXR_ARGS := tests/001/data tests/002/%: TXR_OPTS := -DTESTDIR=tests/002 %.ok: %.txr - ./txr $(TXR_OPTS) $^ $(TXR_ARGS) > $(@:.ok=.out) + ./txr $(TXR_DBG_OPTS) $(TXR_OPTS) $^ $(TXR_ARGS) > $(@:.ok=.out) diff $(@:.ok=.expected) $(@:.ok=.out) %.expected: %.txr @@ -40,7 +40,7 @@ #define YY_NO_UNPUT -const char *version = "012"; +const char *version = "013"; const char *progname = "txr"; const char *spec_file = "stdin"; long lineno = 1; @@ -490,13 +490,14 @@ void hint(void) int main(int argc, char **argv) { - obj_t *stack_top = nil; + obj_t *stack_bottom_0 = nil; obj_t *spec = nil; obj_t *bindings = nil; int match_loglevel = opt_loglevel; progname = argv[0] ? argv[0] : progname; + obj_t *stack_bottom_1 = nil; - gc_stack_top = &stack_top; + init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); if (argc <= 1) { hint(); @@ -614,6 +615,10 @@ int main(int argc, char **argv) case 'b': opt_nobindings = 1; break; + case '-': + fprintf(stderr, "%s: unrecognized long option: --%s\n", + progname, popt + 1); + return EXIT_FAILURE; default: fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt); return EXIT_FAILURE; @@ -642,7 +647,6 @@ int main(int argc, char **argv) { int gc; - init(progname, oom_realloc_handler); gc = gc_state(0); yyparse(); @@ -1152,7 +1152,7 @@ obj_t *match_files(obj_t *spec, obj_t *files, obj_t *data = nil; long data_lineno = 0; - if (first_file_parsed) { + if (listp(first_file_parsed)) { data = first_file_parsed; data_lineno = c_num(data_linenum); first_file_parsed = nil; @@ -1227,6 +1227,21 @@ repeat_spec_same_data: yyerrorlf(2, spec_lineno, "skip failed"); return nil; + } else if (sym == trailer) { + if (rest(specline)) + yyerrorlf(1, spec_lineno, "material after trailer directive ignored"); + if ((spec = rest(spec)) == nil) + break; + + { + cons_bind (new_bindings, success, + match_files(spec, files, bindings, + data, num(data_lineno))); + + if (success) + return cons(new_bindings, cons(data, num(data_lineno))); + return nil; + } } else if (sym == block) { obj_t *name = first(rest(first_spec)); if (rest(specline)) @@ -1281,7 +1296,7 @@ repeat_spec_same_data: make a straight tail call here. */ { cons_bind (new_bindings, success, - match_files(spec, files, bindings, nil, nil)); + match_files(spec, files, bindings, t, nil)); if (success) return cons(new_bindings, if3(data, cons(data, num(data_lineno)), t)); @@ -1596,7 +1611,7 @@ repeat_spec_same_data: int extract(obj_t *spec, obj_t *files, obj_t *predefined_bindings) { cons_bind (bindings, success, match_files(spec, files, predefined_bindings, - nil, nil)); + t, nil)); if (!output_produced) { if (!opt_nobindings) { @@ -44,7 +44,7 @@ typedef struct heap { } heap_t; int opt_gc_debug; -obj_t **gc_stack_top; +static obj_t **gc_stack_bottom; static obj_t **prot_stack[PROT_STACK_SIZE]; static obj_t ***prot_stack_limit = prot_stack + PROT_STACK_SIZE; @@ -274,7 +274,7 @@ static void mark_mem_region(obj_t **bottom, obj_t **top) static void mark(void) { - obj_t *gc_stack_bottom; + obj_t *gc_stack_top; obj_t ***rootloc; /* @@ -286,14 +286,13 @@ static void mark(void) mark_obj(**rootloc); } - mark_mem_region(&gc_stack_bottom, gc_stack_top); + mark_mem_region(&gc_stack_top, gc_stack_bottom); } static void sweep(void) { heap_t *heap; int dbg = opt_gc_debug; - long freed = 0; for (heap = heap_list; heap != 0; heap = heap->next) { obj_t *block, *end; @@ -316,6 +315,13 @@ static void sweep(void) } finalize(block); block->t.type |= FREE; + /* If debugging is turned on, we want to catch instances + where a reachable object is wrongly freed. This is difficult + to do if the object is recycled soon after. + So when debugging is on, the free list is FIFO + rather than LIFO, which increases our chances that the + code which is still using the object will trip on + the freed object before it is recycled. */ if (dbg) { *free_tail = block; block->t.next = nil; @@ -324,12 +330,8 @@ static void sweep(void) block->t.next = free_list; free_list = block; } - freed++; } } - - if (dbg) - fprintf(stderr, "%s: gc freed %ld blocks\n", progname, freed); } void gc(void) @@ -349,6 +351,11 @@ int gc_state(int enabled) return old; } +void gc_init(obj_t **stack_bottom) +{ + gc_stack_bottom = stack_bottom; +} + /* * Useful functions for gdb'ing. */ @@ -25,8 +25,8 @@ */ extern int opt_gc_debug; -extern obj_t **gc_stack_top; +void gc_init(obj_t **stack_bottom); obj_t *prot1(obj_t **loc); void rel1(obj_t **loc); void protect(obj_t **, ...); @@ -36,13 +36,14 @@ #include "gc.h" #define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) obj_t *interned_syms; obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; obj_t *zeroplus, *optional, *compound, *or; -obj_t *skip, *block, *next, *fail, *accept; +obj_t *skip, *trailer, *block, *next, *fail, *accept; obj_t *all, *some, *none, *maybe, *collect, *until, *coll; obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep; obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; @@ -1502,6 +1503,7 @@ static void obj_init(void) compound = intern(string(strdup("compound"))); or = intern(string(strdup("or"))); skip = intern(string(strdup("skip"))); + trailer = intern(string(strdup("trailer"))); block = intern(string(strdup("block"))); next = intern(string(strdup("next"))); fail = intern(string(strdup("fail"))); @@ -1661,9 +1663,26 @@ void obj_print(obj_t *obj, FILE *out) } } -void init(const char *pn, void *(*oom)(void *, size_t)) +void init(const char *pn, void *(*oom)(void *, size_t), + obj_t **maybe_bottom_0, obj_t **maybe_bottom_1) { + int growsdown; + obj_t *local_bottom = nil; progname = pn; + + /* If the local_bottom variable has a smaller address than + either of the two possible top variables from + the initializing function, then the stack grows + downward in memory. In that case, we take the + greater of the two values to be the top. + Otherwise we take the smaller of the two values. */ + + growsdown = &local_bottom < maybe_bottom_0; + + gc_init(growsdown + ? max(maybe_bottom_0, maybe_bottom_1) + : min(maybe_bottom_0, maybe_bottom_1)); + obj_init(); } @@ -158,7 +158,7 @@ extern obj_t *interned_syms; extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; extern obj_t *zeroplus, *optional, *compound, *or; -extern obj_t *skip, *block, *next, *fail, *accept; +extern obj_t *skip, *trailer, *block, *next, *fail, *accept; extern obj_t *all, *some, *none, *maybe, *collect, *until, *coll; extern obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep; extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; @@ -287,7 +287,8 @@ obj_t *mappend(obj_t *fun, obj_t *list); obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun); void obj_print(obj_t *obj, FILE *); -void init(const char *progname, void *(*oom_realloc)(void *, size_t)); +void init(const char *progname, void *(*oom_realloc)(void *, size_t), + obj_t **maybe_bottom_0, obj_t **maybe_bottom_1); void dump(obj_t *obj, FILE *); char *snarf_line(FILE *in); obj_t *snarf(FILE *in); @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-09-09 "txr v. 012" "Text Extraction Utility" +.TH txr 1 2009-09-09 "txr v. 013" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -637,6 +637,10 @@ Treat the remaining query as a subquery unit, and search the lines of the input file until that subquery matches somewhere. A skip is also an anonymous block. +.IP @(trailer) +Treat the remaining query or subquery as a match for a trailing context. That +is to say, if the remainder matches, the data position is not advanced. + .IP @(some) Match some clauses in parallel. At least one has to match. @@ -804,6 +808,38 @@ be written instead: end @BEG_SYMBOL @(end) +.SS The Trailer directive + +The trailer directive introduces a trailing portion of a query or subquery +which matches input material normally, but in the event of a successful match, +does not advance the current position. This can be used, for instance, to +cause @(collect) to match partially overlapping regions. + +Example: + + @(collect) + @line + @(trailer) + @(skip) + @line + @(end) + +This script collects each line which has a duplicate somewhere later +in the input. Without the @(trailer) directive, this does not work properly +for inputs like: + + 111 + 222 + 111 + 222 + +Without @(trailer), the first duplicate pair constitutes a match which +spans over the 222. After that pair is found, the matching continues +after the second 111. + +With the @(trailer) directive in place, the collect body, on each +iteration, only consumes the lines matched prior to @(trailer). + .SS The Some, All, None and Maybe directives These directives combine multiple subqueries, which are applied at the same position in parallel. The syntax of all three follows this example: |