diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-12-04 21:50:25 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-12-04 21:50:25 +0200 |
commit | 16458663c3bdf640e3352653ea94a89fb2949ad4 (patch) | |
tree | a5f4d91a8264affc34a2b9f91028419fb70535c2 | |
parent | 3c4a8232caabe74517277ec31adaca838251a256 (diff) | |
download | egawk-16458663c3bdf640e3352653ea94a89fb2949ad4.tar.gz egawk-16458663c3bdf640e3352653ea94a89fb2949ad4.tar.bz2 egawk-16458663c3bdf640e3352653ea94a89fb2949ad4.zip |
Speed up single byte cases. See ChangeLog.
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | awk.h | 4 | ||||
-rw-r--r-- | io.c | 12 | ||||
-rw-r--r-- | main.c | 3 | ||||
-rw-r--r-- | node.c | 21 |
6 files changed, 45 insertions, 10 deletions
@@ -1,3 +1,14 @@ +Sat Dec 4 21:44:38 2010 Arnold D. Robbins <arnold@skeeve.com> + + * node.c (init_btowc_cache): New function. + (btowc_cache): New array. + (str2wstr): Use is_valid_character in test instead of several isXXX + calls. + * awk.h [is_valid_character]: Macro to use btowc_cache. + * main.c (main): Call init_btowc_cache(). + * io.c (rs1scan): Add call to is_valid_character when processing + characters byte by byte. + Wed Dec 1 08:10:21 2010 Arnold D. Robbins <arnold@skeeve.com> * awk.h, awkgram.y, debug.c: Change CONTEXT to AWK_CONTEXT @@ -1,9 +1,7 @@ FIX regular field splitting to use FPAT algorithm. #Revise plug-in code to check for special symbol per GNU coding standards. - -Paolo's code for checking for single char values in str2wstr(). - +#Paolo's code for checking for single char values in str2wstr(). #Fix os_close_on_exec to do read/modify/write of the fd flags. #Consider forcing [a-z] int abc...wxyz in regexes, no matter what the locale. @@ -1323,8 +1323,12 @@ extern const wchar_t *wstrstr(const wchar_t *haystack, size_t hs_len, extern const wchar_t *wcasestrstr(const wchar_t *haystack, size_t hs_len, const wchar_t *needle, size_t needle_len); extern void free_wstr(NODE *n); +extern wint_t btowc_cache[]; +extern void init_btowc_cache(); +#define is_valid_character(b) (btowc_cache[b] != WEOF) #else #define free_wstr(NODE) /* empty */ +#define is_valid_character(c) (TRUE) #endif /* re.c */ extern Regexp *make_regexp(const char *s, size_t len, int ignorecase, int dfa, int canfatal); @@ -2684,13 +2684,19 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) if (rs != '\n' && gawk_mb_cur_max > 1) { int len = iop->dataend - bp; int found = 0; + memset(&mbs, 0, sizeof(mbstate_t)); do { if (*bp == rs) found = 1; - mbclen = mbrlen(bp, len, &mbs); - if ((mbclen == 1) || (mbclen == (size_t) -1) - || (mbclen == (size_t) -2) || (mbclen == 0)) { + if (is_valid_character(*bp)) + mbclen = 1; + else + mbclen = mbrlen(bp, len, &mbs); + if ( (mbclen == 1) + || (mbclen == (size_t) -1) + || (mbclen == (size_t) -2) + || (mbclen == 0)) { /* We treat it as a singlebyte character. */ mbclen = 1; } @@ -293,6 +293,9 @@ main(int argc, char **argv) */ gawk_mb_cur_max = MB_CUR_MAX; /* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */ + + /* init the cache for checking bytes if they're characters */ + init_btowc_cache(); #endif (void) bindtextdomain(PACKAGE, LOCALEDIR); @@ -706,11 +706,9 @@ str2wstr(NODE *n, size_t **ptr) * 9/2010: Check the current byte; if it's a valid character, * then it doesn't start a multibyte sequence. This brings a * big speed up. Thanks to Ulrich Drepper for the tip. + * 11/2010: Thanks to Paolo Bonzini for some even faster code. */ - if ( isprint(*sp) - || isgraph(*sp) - || iscntrl(*sp) - || *sp == '\0' ) { + if (is_valid_character(*sp)) { count = 1; wc = *sp; } else @@ -894,3 +892,18 @@ get_ieee_magic_val(const char *val) return v; } + +#ifdef MBS_SUPPORT +wint_t btowc_cache[256]; + +/* init_btowc_cache --- initialize the cache */ + +void init_btowc_cache() +{ + int i; + + for (i = 0; i < 255; i++) { + btowc_cache[i] = btowc(i); + } +} +#endif |