summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-15 12:20:27 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-15 12:20:27 -0800
commit24130a3641ae179816ca5672bde12cc04be17813 (patch)
tree81916c7061137fbe4209e5aa5827bd8fb1630a4c
parent19cdd145d345c4560ccc9d841025415e9696b64e (diff)
downloadtxr-24130a3641ae179816ca5672bde12cc04be17813.tar.gz
txr-24130a3641ae179816ca5672bde12cc04be17813.tar.bz2
txr-24130a3641ae179816ca5672bde12cc04be17813.zip
Version 021 preparation.txr-021
Bumped version numbers, and cleaned up trailing whitespace from some files.
-rw-r--r--ChangeLog45
-rwxr-xr-xconfigure16
-rw-r--r--hash.c4
-rw-r--r--lib.c2
-rw-r--r--stream.c4
-rw-r--r--txr.12
-rw-r--r--txr.c4
-rw-r--r--utf8.c4
8 files changed, 63 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 8b1a7ea7..1d436115 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,48 @@
+2009-11-15 Kaz Kylheku <kkylheku@gmail.com>
+
+ Version 021.
+
+ Text is represented using wide characters now. Queries and data
+ are parsed as UTF-8, so extended characters can be directly used.
+ Numeric character escapes can go up to \x10FFF. (More limited on
+ platforms where wchar_t is 16 bit). Regular expressions support
+ extended characters, directly or through escapes. Regex character set
+ matches can use full Unicode range. New test case 005 exercises
+ some of these features over Japanese text.
+
+ Failed exit status of pipes, and file close errors are exceptions now.
+
+ Bug fixed in regex character classes.
+
+ Fixed off-by-one error in lazy string implementation, which broke
+ some uses of the @(freeform) directive.
+
+ Fixed all instances of gc bug 28086: objects being prematurely
+ reclaimed. This showed up when compiling for profiling (gcc -pg).
+
+ The --cc argument of the configure script works properly now.
+
+ Numbers and characters are unboxed types now, encoded directly in
+ the (obj_t *) value. Lowest two bits of (obj_t *) are a tag
+ distinguishing characters, integers and pointers. The program
+ performs better from not having to cons memory when operating
+ on numbers and characters.
+
+ Discovered bug in glibc: getwc function segfaults when applied to
+ stream returned by popen. Worked around this bug. Bug is filed
+ as 10958 in glibc bugzilla.
+
+ Internals:
+
+ Hash tables implemented. Hash tables support weak keys and values.
+
+ * configure, hash.c, lib.c, stream.c, utf8.c: Removed trailing
+ from some lines.
+
+ * txr.c (version): Bumped to 021. Removed trailing whitespace.
+
+ * txr.1: Bumped version to 021.
+
2009-11-14 Kaz Kylheku <kkylheku@gmail.com>
Provide both char * and unsigned char * interfaces in UTF-8 module.
diff --git a/configure b/configure
index 6e26ea9e..3ca925dd 100755
--- a/configure
+++ b/configure
@@ -127,7 +127,7 @@ usage: $0 { variable=value }*
The configure script prepares txr program for compilation and installation.
To configure a program means to establish the values of Makefile variables
-which influence how the software is built, where it is installed.
+which influence how the software is built, where it is installed.
These variables can also influence what features are present in the
software, and can determine various defaults for those behaviors which are
dynamically configurable when the software is run.
@@ -154,7 +154,7 @@ Long-option style:
No variables are required. The configure script establishes default values
for any variables which are needed by the build, but which are not specified
-on the command line.
+on the command line.
After running $0, check that the config.make contents are sane.
@@ -165,7 +165,7 @@ prefix [$prefix]
Specifies root directory where the software will ultimately be installed and
run from.
-
+
install_prefix [$install_prefix]
Specifies an extra path prefix that will be prepended to all paths during
@@ -175,7 +175,7 @@ install_prefix [$install_prefix]
bindir [$bindir]
Specifies where the program executable will be installed.
-
+
datadir [$datadir]
Specifies where read-only program data is to be stored.
@@ -234,7 +234,7 @@ lang_flags [$lang_flags]
Specifies compiler flags which control the C language dialect and standard
conformance in the language and header files. The txr program is written
- in C90, and requires POSIX and possibly other extensions.
+ in C90, and requires POSIX and possibly other extensions.
diag_flags [$diag_flags]
@@ -256,7 +256,7 @@ txr_dbg_opts [$txr_dbg_opts]
of "make tests".
!
exit 1
-fi
+fi
#
# Variables are read, --help wasn't given, so let's configure!
@@ -266,7 +266,7 @@ fi
txr_ver=019
#
-# The all important banner.
+# The all important banner.
#
if [ $txr_ver ] ; then
@@ -280,7 +280,7 @@ printf "+%s+\n|%s|\n+%s+\n" $banner_box "$banner_text" $banner_box
#
# From here on in, we bail if any command fails.
-#
+#
set -e
diff --git a/hash.c b/hash.c
index 0b7c099d..f7b43f7f 100644
--- a/hash.c
+++ b/hash.c
@@ -60,7 +60,7 @@ static struct hash *reachable_weak_hashes;
/*
* This is is an adaptation of hashpjw, from Compilers: Principles, Techniques
* and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by
- * a few bits, and we bring down five overflow bits instead of four.
+ * a few bits, and we bring down five overflow bits instead of four.
* We don't reduce the final result modulo a small prime, but leave it
* as it is; let the hashing routines do their own reduction.
*/
@@ -100,7 +100,7 @@ static long ll_hash(obj_t *obj)
long i, h = ll_hash(obj->v.vec[vec_fill]);
long len = c_num(fill);
- for (i = 0; i < len; i++)
+ for (i = 0; i < len; i++)
h = (h + ll_hash(obj->v.vec[i])) & NUM_MAX;
return h;
diff --git a/lib.c b/lib.c
index 25e8198e..05927cb9 100644
--- a/lib.c
+++ b/lib.c
@@ -141,7 +141,7 @@ obj_t *type_check2(obj_t *obj, int t1, int t2)
obj_t *type_check3(obj_t *obj, int t1, int t2, int t3)
{
- if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2
+ if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2
&& obj->t.type != t3))
type_mismatch(L"~s is not of type ~s, ~s nor ~s", obj,
code2type(t1), code2type(t2), code2type(t3), nao);
diff --git a/stream.c b/stream.c
index 2ec402bb..7d31c89d 100644
--- a/stream.c
+++ b/stream.c
@@ -169,7 +169,7 @@ static obj_t *stdio_put_string(obj_t *stream, const wchar_t *s)
static obj_t *stdio_put_char(obj_t *stream, wchar_t ch)
{
struct stdio_handle *h = (struct stdio_handle *) stream->co.handle;
- return (h->f && putwc(ch, h->f) != WEOF)
+ return (h->f && putwc(ch, h->f) != WEOF)
? t : stdio_maybe_write_error(stream);
}
@@ -396,7 +396,7 @@ struct byte_input {
static obj_t *byte_in_get_byte(obj_t *stream)
{
struct byte_input *bi = (struct byte_input *) stream->co.handle;
-
+
if (bi->index < bi->size)
return num(bi->buf[bi->index++]);
return nil;
diff --git a/txr.1 b/txr.1
index 5b2b4fbc..4a54c357 100644
--- a/txr.1
+++ b/txr.1
@@ -21,7 +21,7 @@
.\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
.\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-.TH txr 1 2009-10-17 "txr v. 020" "Text Extraction Utility"
+.TH txr 1 2009-11-15 "txr v. 021" "Text Extraction Utility"
.SH NAME
txr \- text extractor
.SH SYNOPSIS
diff --git a/txr.c b/txr.c
index dcf316ad..bb2814a5 100644
--- a/txr.c
+++ b/txr.c
@@ -42,7 +42,7 @@
#include "utf8.h"
#include "txr.h"
-const wchar_t *version = L"020";
+const wchar_t *version = L"021";
const wchar_t *progname = L"txr";
const wchar_t *spec_file = L"stdin";
obj_t *spec_file_str;
@@ -307,7 +307,7 @@ static int txr_main(int argc, char **argv)
if (specstring) {
spec_file = L"cmdline";
spec_file_str = string(spec_file);
- if (gt(length_str(specstring), zero) &&
+ if (gt(length_str(specstring), zero) &&
chr_str(specstring, minus(length_str(specstring), one)) != chr('\n'))
specstring = cat_str(list(specstring, string(L"\n"), nao), nil);
yyin_stream = make_string_byte_input_stream(specstring);
diff --git a/utf8.c b/utf8.c
index a5a1fc7e..5936e43a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -231,7 +231,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(void *ctx), void *ctx)
wchar_t wch = 0xdc00 | ud->buf[ud->back];
ud->tail = ud->back = (ud->back + 1) % 8;
ud->state = utf8_init;
- return wch;
+ return wch;
}
}
@@ -268,7 +268,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(void *ctx), void *ctx)
wchar_t wch = 0xdc00 | ud->buf[ud->back];
ud->tail = ud->back = (ud->back + 1) % 8;
ud->state = utf8_init;
- return wch;
+ return wch;
}
break;
}