summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-10-02 07:45:20 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-10-02 09:02:35 -0700
commit24ea2b8c11bcaa99f1e72df7fee8feb1ad80e5b1 (patch)
treed6dfb786b757737c759810aab17c938927de328a
parentbadc1562267ee35f8a8fa6f7dcc0751c5c897af6 (diff)
downloadtxr-24ea2b8c11bcaa99f1e72df7fee8feb1ad80e5b1.tar.gz
txr-24ea2b8c11bcaa99f1e72df7fee8feb1ad80e5b1.tar.bz2
txr-24ea2b8c11bcaa99f1e72df7fee8feb1ad80e5b1.zip
Using unified COBJ representation for both regex kinds,
rather than the list-based notation for derivative-based regexes, and an encapsulated COBJ for NFA-based regexes. * lib.c (compiled_regex_s): Variable removed. (obj_init): Initialization of compiled_regex_s removed. * lib.h (compiled_regex_s): Declaration removed. * regex.c (struct regex, regex_t): New type. (regex_destroy): Object is now a regex_t, not nfa_t. (regex_mark): New function. (regex_obj_ops): Register regex_mark operation. (reg_nullable, reg_derivative): Remove cases that handles compiled_regex_s. (regex_compile): Output of dv_compile_regex becomes a cobj nwo. Output of nfa_compile_regex must be embedded in regex_t structure. (regexp): Drop the check for compiles_regex_s. (regex_nfa): Function removed. (regex_run, regex_machine_init): Use cobj_handle to retrieve regex_t * pointer and dispatch appropriate code based on regex->kind.
-rw-r--r--ChangeLog25
-rw-r--r--lib.c3
-rw-r--r--lib.h2
-rw-r--r--regex.c71
4 files changed, 69 insertions, 32 deletions
diff --git a/ChangeLog b/ChangeLog
index fd5d7dc3..01d897be 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2014-10-02 Kaz Kylheku <kaz@kylheku.com>
+
+ Using unified COBJ representation for both regex kinds,
+ rather than the list-based notation for derivative-based
+ regexes, and an encapsulated COBJ for NFA-based regexes.
+
+ * lib.c (compiled_regex_s): Variable removed.
+ (obj_init): Initialization of compiled_regex_s removed.
+
+ * lib.h (compiled_regex_s): Declaration removed.
+
+ * regex.c (struct regex, regex_t): New type.
+ (regex_destroy): Object is now a regex_t, not nfa_t.
+ (regex_mark): New function.
+ (regex_obj_ops): Register regex_mark operation.
+ (reg_nullable, reg_derivative): Remove cases that handles
+ compiled_regex_s.
+ (regex_compile): Output of dv_compile_regex becomes
+ a cobj nwo. Output of nfa_compile_regex must be
+ embedded in regex_t structure.
+ (regexp): Drop the check for compiles_regex_s.
+ (regex_nfa): Function removed.
+ (regex_run, regex_machine_init): Use cobj_handle to retrieve regex_t *
+ pointer and dispatch appropriate code based on regex->kind.
+
2014-09-30 Kaz Kylheku <kaz@kylheku.com>
* genman.txr: Add PayPal donation button.
diff --git a/lib.c b/lib.c
index fbe6743b..0b107c93 100644
--- a/lib.c
+++ b/lib.c
@@ -73,7 +73,7 @@ val null_s, t, cons_s, str_s, chr_s, fixnum_s, sym_s, pkg_s, fun_s, vec_s;
val lit_s, stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s;
val env_s, bignum_s, float_s;
val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
-val nongreedy_s, compiled_regex_s;
+val nongreedy_s;
val quote_s, qquote_s, unquote_s, splice_s;
val sys_qquote_s, sys_unquote_s, sys_splice_s;
val zeroplus_s, optional_s, compl_s, compound_s;
@@ -6120,7 +6120,6 @@ static void obj_init(void)
expr_s = intern(lit("expr"), system_package);
regex_s = intern(lit("regex"), system_package);
nongreedy_s = intern(lit("ng0+"), user_package);
- compiled_regex_s = intern(lit("compiled-regex"), system_package);
quote_s = intern(lit("quote"), user_package);
qquote_s = intern(lit("qquote"), user_package);
unquote_s = intern(lit("unquote"), user_package);
diff --git a/lib.h b/lib.h
index 5198feac..e0afffac 100644
--- a/lib.h
+++ b/lib.h
@@ -353,7 +353,7 @@ extern val sym_s, pkg_s, fun_s, vec_s;
extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s;
extern val env_s, bignum_s, float_s;
extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
-extern val nongreedy_s, compiled_regex_s;
+extern val nongreedy_s;
extern val quote_s, qquote_s, unquote_s, splice_s;
extern val sys_qquote_s, sys_unquote_s, sys_splice_s;
extern val zeroplus_s, optional_s, compl_s, compound_s;
diff --git a/regex.c b/regex.c
index 313a1a76..8c93c964 100644
--- a/regex.c
+++ b/regex.c
@@ -54,6 +54,14 @@ typedef struct nfa {
nfa_state_t *accept;
} nfa_t;
+typedef struct regex {
+ enum { REGEX_NFA, REGEX_DV } kind;
+ union {
+ struct nfa nfa;
+ val dv;
+ } r;
+} regex_t;
+
/*
* Result from regex_machine_feed.
* These values have two meanings, based on whether
@@ -1279,19 +1287,27 @@ static cnum regex_machine_match_span(regex_machine_t *regm)
return regm->n.last_accept_pos;
}
-static void regex_destroy(val regex)
+static void regex_destroy(val obj)
+{
+ regex_t *regex = (regex_t *) obj->co.handle;
+ if (regex->kind == REGEX_NFA)
+ nfa_free(regex->r.nfa);
+ free(regex);
+ obj->co.handle = 0;
+}
+
+static void regex_mark(val obj)
{
- nfa_t *pnfa = (nfa_t *) regex->co.handle;
- nfa_free(*pnfa);
- free(pnfa);
- regex->co.handle = 0;
+ regex_t *regex = (regex_t *) obj->co.handle;
+ if (regex->kind == REGEX_DV)
+ gc_mark(regex->r.dv);
}
static struct cobj_ops regex_obj_ops = {
eq,
cobj_print_op,
regex_destroy,
- cobj_mark_op,
+ regex_mark,
cobj_hash_op
};
@@ -1406,7 +1422,7 @@ static val reg_nullable(val exp)
return nil;
} else if (sym == compound_s) {
return reg_nullable_list(args);
- } else if (sym == oneplus_s || sym == compiled_regex_s) {
+ } else if (sym == oneplus_s) {
return reg_nullable(first(args));
} else if (sym == zeroplus_s || sym == optional_s) {
return t;
@@ -1537,8 +1553,6 @@ static val reg_derivative(val exp, val ch)
if (sym == set_s || sym == cset_s) {
internal_error("uncompiled regex passed to reg_derivative");
- } else if (sym == compiled_regex_s) {
- return reg_derivative(first(args), ch);
} else if (sym == compound_s) {
return reg_derivative_list(args, ch);
} else if (sym == optional_s) {
@@ -1648,33 +1662,30 @@ val regex_compile(val regex_sexp, val error_stream)
regex_sexp = regex_parse(regex_sexp, default_bool_arg(error_stream));
return if2(regex_sexp, regex_compile(regex_sexp, error_stream));
} else if (opt_derivative_regex || regex_requires_dv(regex_sexp)) {
- return cons(compiled_regex_s, cons(dv_compile_regex(regex_sexp), nil));
+ regex_t *regex = (regex_t *) chk_malloc(sizeof *regex);
+ regex->kind = REGEX_DV;
+ regex->r.dv = dv_compile_regex(regex_sexp);
+ return cobj((mem_t *) regex, regex_s, &regex_obj_ops);
} else {
- nfa_t *pnfa = (nfa_t *) chk_malloc(sizeof *pnfa);
- *pnfa = nfa_compile_regex(regex_sexp);
- return cobj((mem_t *) pnfa, regex_s, &regex_obj_ops);
+ regex_t *regex = (regex_t *) chk_malloc(sizeof *regex);
+ regex->kind = REGEX_NFA;
+ regex->r.nfa = nfa_compile_regex(regex_sexp);
+ return cobj((mem_t *) regex, regex_s, &regex_obj_ops);
}
}
val regexp(val obj)
{
- if (consp(obj))
- return eq(car(obj), compiled_regex_s);
-
return typeof(obj) == regex_s ? t : nil;
}
-static nfa_t *regex_nfa(val reg)
-{
- assert (typeof(reg) == regex_s);
- return (nfa_t *) reg->co.handle;
-}
-
static cnum regex_run(val compiled_regex, const wchar_t *str)
{
- if (consp(compiled_regex))
- return dv_run(compiled_regex, str);
- return nfa_run(*regex_nfa(compiled_regex), str);
+ regex_t *regex = (regex_t *) cobj_handle(compiled_regex, regex_s);
+
+ return if3(regex->kind == REGEX_DV,
+ dv_run(regex->r.dv, str),
+ nfa_run(regex->r.nfa, str));
}
/*
@@ -1706,14 +1717,16 @@ static void regex_machine_reset(regex_machine_t *regm)
regm->n.last_accept_pos = regm->n.count;
}
-static void regex_machine_init(regex_machine_t *regm, val regex)
+static void regex_machine_init(regex_machine_t *regm, val reg)
{
- if (consp(regex)) {
+ regex_t *regex = (regex_t *) cobj_handle(reg, regex_s);
+
+ if (regex->kind == REGEX_DV) {
regm->n.is_nfa = 0;
- regm->d.regex = regex;
+ regm->d.regex = regex->r.dv;
} else {
regm->n.is_nfa = 1;
- regm->n.nfa = *regex_nfa(regex);
+ regm->n.nfa = regex->r.nfa;
regm->n.move = (nfa_state_t **)
chk_malloc(NFA_SET_SIZE * sizeof *regm->n.move);
regm->n.clos = (nfa_state_t **)