diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2009-11-15 12:20:27 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2009-11-15 12:20:27 -0800 |
commit | 24130a3641ae179816ca5672bde12cc04be17813 (patch) | |
tree | 81916c7061137fbe4209e5aa5827bd8fb1630a4c | |
parent | 19cdd145d345c4560ccc9d841025415e9696b64e (diff) | |
download | txr-24130a3641ae179816ca5672bde12cc04be17813.tar.gz txr-24130a3641ae179816ca5672bde12cc04be17813.tar.bz2 txr-24130a3641ae179816ca5672bde12cc04be17813.zip |
Version 021 preparation.txr-021
Bumped version numbers, and cleaned up trailing whitespace from some files.
-rw-r--r-- | ChangeLog | 45 | ||||
-rwxr-xr-x | configure | 16 | ||||
-rw-r--r-- | hash.c | 4 | ||||
-rw-r--r-- | lib.c | 2 | ||||
-rw-r--r-- | stream.c | 4 | ||||
-rw-r--r-- | txr.1 | 2 | ||||
-rw-r--r-- | txr.c | 4 | ||||
-rw-r--r-- | utf8.c | 4 |
8 files changed, 63 insertions, 18 deletions
@@ -1,3 +1,48 @@ +2009-11-15 Kaz Kylheku <kkylheku@gmail.com> + + Version 021. + + Text is represented using wide characters now. Queries and data + are parsed as UTF-8, so extended characters can be directly used. + Numeric character escapes can go up to \x10FFF. (More limited on + platforms where wchar_t is 16 bit). Regular expressions support + extended characters, directly or through escapes. Regex character set + matches can use full Unicode range. New test case 005 exercises + some of these features over Japanese text. + + Failed exit status of pipes, and file close errors are exceptions now. + + Bug fixed in regex character classes. + + Fixed off-by-one error in lazy string implementation, which broke + some uses of the @(freeform) directive. + + Fixed all instances of gc bug 28086: objects being prematurely + reclaimed. This showed up when compiling for profiling (gcc -pg). + + The --cc argument of the configure script works properly now. + + Numbers and characters are unboxed types now, encoded directly in + the (obj_t *) value. Lowest two bits of (obj_t *) are a tag + distinguishing characters, integers and pointers. The program + performs better from not having to cons memory when operating + on numbers and characters. + + Discovered bug in glibc: getwc function segfaults when applied to + stream returned by popen. Worked around this bug. Bug is filed + as 10958 in glibc bugzilla. + + Internals: + + Hash tables implemented. Hash tables support weak keys and values. + + * configure, hash.c, lib.c, stream.c, utf8.c: Removed trailing + from some lines. + + * txr.c (version): Bumped to 021. Removed trailing whitespace. + + * txr.1: Bumped version to 021. + 2009-11-14 Kaz Kylheku <kkylheku@gmail.com> Provide both char * and unsigned char * interfaces in UTF-8 module. @@ -127,7 +127,7 @@ usage: $0 { variable=value }* The configure script prepares txr program for compilation and installation. To configure a program means to establish the values of Makefile variables -which influence how the software is built, where it is installed. +which influence how the software is built, where it is installed. These variables can also influence what features are present in the software, and can determine various defaults for those behaviors which are dynamically configurable when the software is run. @@ -154,7 +154,7 @@ Long-option style: No variables are required. The configure script establishes default values for any variables which are needed by the build, but which are not specified -on the command line. +on the command line. After running $0, check that the config.make contents are sane. @@ -165,7 +165,7 @@ prefix [$prefix] Specifies root directory where the software will ultimately be installed and run from. - + install_prefix [$install_prefix] Specifies an extra path prefix that will be prepended to all paths during @@ -175,7 +175,7 @@ install_prefix [$install_prefix] bindir [$bindir] Specifies where the program executable will be installed. - + datadir [$datadir] Specifies where read-only program data is to be stored. @@ -234,7 +234,7 @@ lang_flags [$lang_flags] Specifies compiler flags which control the C language dialect and standard conformance in the language and header files. The txr program is written - in C90, and requires POSIX and possibly other extensions. + in C90, and requires POSIX and possibly other extensions. diag_flags [$diag_flags] @@ -256,7 +256,7 @@ txr_dbg_opts [$txr_dbg_opts] of "make tests". ! exit 1 -fi +fi # # Variables are read, --help wasn't given, so let's configure! @@ -266,7 +266,7 @@ fi txr_ver=019 # -# The all important banner. +# The all important banner. # if [ $txr_ver ] ; then @@ -280,7 +280,7 @@ printf "+%s+\n|%s|\n+%s+\n" $banner_box "$banner_text" $banner_box # # From here on in, we bail if any command fails. -# +# set -e @@ -60,7 +60,7 @@ static struct hash *reachable_weak_hashes; /* * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by - * a few bits, and we bring down five overflow bits instead of four. + * a few bits, and we bring down five overflow bits instead of four. * We don't reduce the final result modulo a small prime, but leave it * as it is; let the hashing routines do their own reduction. */ @@ -100,7 +100,7 @@ static long ll_hash(obj_t *obj) long i, h = ll_hash(obj->v.vec[vec_fill]); long len = c_num(fill); - for (i = 0; i < len; i++) + for (i = 0; i < len; i++) h = (h + ll_hash(obj->v.vec[i])) & NUM_MAX; return h; @@ -141,7 +141,7 @@ obj_t *type_check2(obj_t *obj, int t1, int t2) obj_t *type_check3(obj_t *obj, int t1, int t2, int t3) { - if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2 + if (!is_ptr(obj) || (obj->t.type != t1 && obj->t.type != t2 && obj->t.type != t3)) type_mismatch(L"~s is not of type ~s, ~s nor ~s", obj, code2type(t1), code2type(t2), code2type(t3), nao); @@ -169,7 +169,7 @@ static obj_t *stdio_put_string(obj_t *stream, const wchar_t *s) static obj_t *stdio_put_char(obj_t *stream, wchar_t ch) { struct stdio_handle *h = (struct stdio_handle *) stream->co.handle; - return (h->f && putwc(ch, h->f) != WEOF) + return (h->f && putwc(ch, h->f) != WEOF) ? t : stdio_maybe_write_error(stream); } @@ -396,7 +396,7 @@ struct byte_input { static obj_t *byte_in_get_byte(obj_t *stream) { struct byte_input *bi = (struct byte_input *) stream->co.handle; - + if (bi->index < bi->size) return num(bi->buf[bi->index++]); return nil; @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-10-17 "txr v. 020" "Text Extraction Utility" +.TH txr 1 2009-11-15 "txr v. 021" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -42,7 +42,7 @@ #include "utf8.h" #include "txr.h" -const wchar_t *version = L"020"; +const wchar_t *version = L"021"; const wchar_t *progname = L"txr"; const wchar_t *spec_file = L"stdin"; obj_t *spec_file_str; @@ -307,7 +307,7 @@ static int txr_main(int argc, char **argv) if (specstring) { spec_file = L"cmdline"; spec_file_str = string(spec_file); - if (gt(length_str(specstring), zero) && + if (gt(length_str(specstring), zero) && chr_str(specstring, minus(length_str(specstring), one)) != chr('\n')) specstring = cat_str(list(specstring, string(L"\n"), nao), nil); yyin_stream = make_string_byte_input_stream(specstring); @@ -231,7 +231,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(void *ctx), void *ctx) wchar_t wch = 0xdc00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; ud->state = utf8_init; - return wch; + return wch; } } @@ -268,7 +268,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(void *ctx), void *ctx) wchar_t wch = 0xdc00 | ud->buf[ud->back]; ud->tail = ud->back = (ud->back + 1) % 8; ud->state = utf8_init; - return wch; + return wch; } break; } |