diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2018-07-26 21:41:49 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2018-07-26 21:41:49 +0300 |
commit | b98257919b20bdfc14f363761cc6215c1ad8bcee (patch) | |
tree | 0709d74988db880ef510289bf6fb576c0f221300 /support/regex_internal.h | |
parent | 1bc2871bbe4ae6b99fd1862a412440672846bc05 (diff) | |
download | egawk-b98257919b20bdfc14f363761cc6215c1ad8bcee.tar.gz egawk-b98257919b20bdfc14f363761cc6215c1ad8bcee.tar.bz2 egawk-b98257919b20bdfc14f363761cc6215c1ad8bcee.zip |
Switch to GNULIB regex.
Diffstat (limited to 'support/regex_internal.h')
-rw-r--r-- | support/regex_internal.h | 507 |
1 files changed, 302 insertions, 205 deletions
diff --git a/support/regex_internal.h b/support/regex_internal.h index cdd6e3a1..3b836ed2 100644 --- a/support/regex_internal.h +++ b/support/regex_internal.h @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2017 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. @@ -15,7 +15,7 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ + <https://www.gnu.org/licenses/>. */ #ifndef _REGEX_INTERNAL_H #define _REGEX_INTERNAL_H 1 @@ -26,51 +26,80 @@ #include <stdlib.h> #include <string.h> -#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC -# include <langinfo.h> -#endif -#if defined HAVE_LOCALE_H || defined _LIBC -# include <locale.h> +#include <langinfo.h> +#include <locale.h> +#include <wchar.h> +#include <wctype.h> +#include <stdbool.h> +#include <stdint.h> + +/* Properties of integers. Although Gnulib has intprops.h, glibc does + without for now. */ +#ifndef _LIBC +# include "intprops.h" +#else +/* True if the real type T is signed. */ +# define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) + +/* True if adding the nonnegative Idx values A and B would overflow. + If false, set *R to A + B. A, B, and R may be evaluated more than + once, or zero times. Although this is not a full implementation of + Gnulib INT_ADD_WRAPV, it is good enough for glibc regex code. + FIXME: This implementation is a fragile stopgap, and this file would + be simpler and more robust if intprops.h were migrated into glibc. */ +# define INT_ADD_WRAPV(a, b, r) \ + (IDX_MAX - (a) < (b) ? true : (*(r) = (a) + (b), false)) #endif -#if defined HAVE_WCHAR_H || defined _LIBC -# include <wchar.h> -#endif /* HAVE_WCHAR_H || _LIBC */ -#if defined HAVE_WCTYPE_H || defined _LIBC -# include <wctype.h> -#endif /* HAVE_WCTYPE_H || _LIBC */ -#if defined HAVE_STDBOOL_H || defined _LIBC -# include <stdbool.h> -#endif /* HAVE_STDBOOL_H || _LIBC */ -#if defined HAVE_STDINT_H || defined _LIBC -# include <stdint.h> -#endif /* HAVE_STDINT_H || _LIBC */ -#if defined _LIBC + +#ifdef _LIBC # include <libc-lock.h> +# define lock_define(name) __libc_lock_define (, name) +# define lock_init(lock) (__libc_lock_init (lock), 0) +# define lock_fini(lock) ((void) 0) +# define lock_lock(lock) __libc_lock_lock (lock) +# define lock_unlock(lock) __libc_lock_unlock (lock) +#elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO +# include "glthread/lock.h" + /* Use gl_lock_define if empty macro arguments are known to work. + Otherwise, fall back on less-portable substitutes. */ +# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \ + || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__)) +# define lock_define(name) gl_lock_define (, name) +# elif USE_POSIX_THREADS +# define lock_define(name) pthread_mutex_t name; +# elif USE_PTH_THREADS +# define lock_define(name) pth_mutex_t name; +# elif USE_SOLARIS_THREADS +# define lock_define(name) mutex_t name; +# elif USE_WINDOWS_THREADS +# define lock_define(name) gl_lock_t name; +# else +# define lock_define(name) +# endif +# define lock_init(lock) glthread_lock_init (&(lock)) +# define lock_fini(lock) glthread_lock_destroy (&(lock)) +# define lock_lock(lock) glthread_lock_lock (&(lock)) +# define lock_unlock(lock) glthread_lock_unlock (&(lock)) +#elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO +# include <pthread.h> +# define lock_define(name) pthread_mutex_t name; +# define lock_init(lock) pthread_mutex_init (&(lock), 0) +# define lock_fini(lock) pthread_mutex_destroy (&(lock)) +# define lock_lock(lock) pthread_mutex_lock (&(lock)) +# define lock_unlock(lock) pthread_mutex_unlock (&(lock)) #else -# define __libc_lock_init(NAME) do { } while (0) -# define __libc_lock_lock(NAME) do { } while (0) -# define __libc_lock_unlock(NAME) do { } while (0) +# define lock_define(name) +# define lock_init(lock) 0 +# define lock_fini(lock) ((void) 0) + /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC. */ +# define lock_lock(lock) ((void) dfa) +# define lock_unlock(lock) ((void) 0) #endif -#ifndef GAWK /* In case that the system doesn't have isblank(). */ -#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK)) # define isblank(ch) ((ch) == ' ' || (ch) == '\t') #endif -#else /* GAWK */ -/* - * This is a freaking mess. On glibc systems you have to define - * a magic constant to get isblank() out of <ctype.h>, since it's - * a C99 function. To heck with all that and borrow a page from - * dfa.c's book. - */ - -static int -is_blank (int c) -{ - return (c == ' ' || c == '\t'); -} -#endif /* GAWK */ #ifdef _LIBC # ifndef _RE_DEFINE_LOCALE_FUNCTIONS @@ -89,6 +118,7 @@ is_blank (int c) __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) # endif #else +# undef gettext # define gettext(msgid) (msgid) #endif @@ -98,25 +128,17 @@ is_blank (int c) # define gettext_noop(String) String #endif -/* For loser systems without the definition. */ -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) -#endif - -#if ! defined(__DJGPP__) && (defined(GAWK) || _LIBC) +#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC # define RE_ENABLE_I18N #endif -#if __GNUC__ >= 3 -# define BE(expr, val) __builtin_expect (expr, val) -#else -# define BE(expr, val) (expr) -# undef inline -# define inline -#endif +#define BE(expr, val) __builtin_expect (expr, val) + +/* Number of ASCII characters. */ +#define ASCII_CHARS 0x80 -/* Number of single byte character. */ -#define SBC_MAX 256 +/* Number of single byte characters. */ +#define SBC_MAX (UCHAR_MAX + 1) #define COLL_ELEM_LEN_MAX 8 @@ -126,58 +148,88 @@ is_blank (int c) /* Rename to standard API for using out of glibc. */ #ifndef _LIBC -# ifdef __wctype # undef __wctype -# endif -# define __wctype wctype -# ifdef __iswctype # undef __iswctype -# endif +# define __wctype wctype +# define __iswalnum iswalnum # define __iswctype iswctype +# define __towlower towlower +# define __towupper towupper # define __btowc btowc # define __mbrtowc mbrtowc -#undef __mempcpy /* GAWK */ -# define __mempcpy mempcpy # define __wcrtomb wcrtomb # define __regfree regfree +# define attribute_hidden #endif /* not _LIBC */ #if __GNUC__ < 3 + (__GNUC_MINOR__ < 1) # define __attribute__(arg) #endif -#ifdef GAWK -/* - * Instead of trying to figure out which GCC version introduced - * this symbol, just define it out and be done. - */ -# undef __attribute_warn_unused_result__ -# define __attribute_warn_unused_result__ +#ifndef SSIZE_MAX +# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) #endif +/* The type of indexes into strings. This is signed, not size_t, + since the API requires indexes to fit in regoff_t anyway, and using + signed integers makes the code a bit smaller and presumably faster. + The traditional GNU regex implementation uses int for indexes. + The POSIX-compatible implementation uses a possibly-wider type. + The name 'Idx' is three letters to minimize the hassle of + reindenting a lot of regex code that formerly used 'int'. */ +typedef regoff_t Idx; +#ifdef _REGEX_LARGE_OFFSETS +# define IDX_MAX SSIZE_MAX +#else +# define IDX_MAX INT_MAX +#endif + +/* A hash value, suitable for computing hash tables. */ +typedef __re_size_t re_hashval_t; + /* An integer used to represent a set of bits. It must be unsigned, and must be at least as wide as unsigned int. */ typedef unsigned long int bitset_word_t; /* All bits set in a bitset_word_t. */ #define BITSET_WORD_MAX ULONG_MAX -/* Number of bits in a bitset_word_t. */ -#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) -/* Number of bitset_word_t in a bit_set. */ -#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) + +/* Number of bits in a bitset_word_t. For portability to hosts with + padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)'; + instead, deduce it directly from BITSET_WORD_MAX. Avoid + greater-than-32-bit integers and unconditional shifts by more than + 31 bits, as they're not portable. */ +#if BITSET_WORD_MAX == 0xffffffffUL +# define BITSET_WORD_BITS 32 +#elif BITSET_WORD_MAX >> 31 >> 4 == 1 +# define BITSET_WORD_BITS 36 +#elif BITSET_WORD_MAX >> 31 >> 16 == 1 +# define BITSET_WORD_BITS 48 +#elif BITSET_WORD_MAX >> 31 >> 28 == 1 +# define BITSET_WORD_BITS 60 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1 +# define BITSET_WORD_BITS 64 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1 +# define BITSET_WORD_BITS 72 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1 +# define BITSET_WORD_BITS 128 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1 +# define BITSET_WORD_BITS 256 +#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1 +# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */ +# if BITSET_WORD_BITS <= SBC_MAX +# error "Invalid SBC_MAX" +# endif +#else +# error "Add case for new bitset_word_t size" +#endif + +/* Number of bitset_word_t values in a bitset_t. */ +#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) + typedef bitset_word_t bitset_t[BITSET_WORDS]; typedef bitset_word_t *re_bitset_ptr_t; typedef const bitset_word_t *re_const_bitset_ptr_t; -#define bitset_set(set,i) \ - (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) -#define bitset_clear(set,i) \ - (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) -#define bitset_contain(set,i) \ - (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) -#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) -#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) -#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) - #define PREV_WORD_CONSTRAINT 0x0001 #define PREV_NOTWORD_CONSTRAINT 0x0002 #define NEXT_WORD_CONSTRAINT 0x0004 @@ -205,9 +257,9 @@ typedef enum typedef struct { - int alloc; - int nelem; - int *elems; + Idx alloc; + Idx nelem; + Idx *elems; } re_node_set; typedef enum @@ -293,19 +345,19 @@ typedef struct unsigned int non_match : 1; /* # of multibyte characters. */ - int nmbchars; + Idx nmbchars; /* # of collating symbols. */ - int ncoll_syms; + Idx ncoll_syms; /* # of equivalence classes. */ - int nequiv_classes; + Idx nequiv_classes; /* # of range expressions. */ - int nranges; + Idx nranges; /* # of character classes. */ - int nchar_classes; + Idx nchar_classes; } re_charset_t; #endif /* RE_ENABLE_I18N */ @@ -318,10 +370,10 @@ typedef struct #ifdef RE_ENABLE_I18N re_charset_t *mbcset; /* for COMPLEX_BRACKET */ #endif /* RE_ENABLE_I18N */ - int idx; /* for BACK_REF */ + Idx idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ } opr; -#if __GNUC__ >= 2 +#if __GNUC__ >= 2 && !defined __STRICT_ANSI__ re_token_type_t type : 8; #else re_token_type_t type; @@ -352,30 +404,30 @@ struct re_string_t #ifdef RE_ENABLE_I18N /* Store the wide character string which is corresponding to MBS. */ wint_t *wcs; - int *offsets; + Idx *offsets; mbstate_t cur_state; #endif /* Index in RAW_MBS. Each character mbs[i] corresponds to raw_mbs[raw_mbs_idx + i]. */ - int raw_mbs_idx; + Idx raw_mbs_idx; /* The length of the valid characters in the buffers. */ - int valid_len; + Idx valid_len; /* The corresponding number of bytes in raw_mbs array. */ - int valid_raw_len; + Idx valid_raw_len; /* The length of the buffers MBS and WCS. */ - int bufs_len; + Idx bufs_len; /* The index in MBS, which is updated by re_string_fetch_byte. */ - int cur_idx; + Idx cur_idx; /* length of RAW_MBS array. */ - int raw_len; + Idx raw_len; /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ - int len; + Idx len; /* End of the buffer may be shorter than its length in the cases such as re_match_2, re_search_2. Then, we use STOP for end of the buffer instead of LEN. */ - int raw_stop; + Idx raw_stop; /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ - int stop; + Idx stop; /* The context of mbs[0]. We store the context independently, since the context of mbs[0] may be different from raw_mbs[0], which is @@ -385,7 +437,7 @@ struct re_string_t RE_TRANSLATE_TYPE trans; /* Copy of re_dfa_t's word_char. */ re_const_bitset_ptr_t word_char; - /* 1 if REG_ICASE. */ + /* true if REG_ICASE. */ unsigned char icase; unsigned char is_utf8; unsigned char map_notascii; @@ -401,18 +453,10 @@ typedef struct re_string_t re_string_t; struct re_dfa_t; typedef struct re_dfa_t re_dfa_t; -#ifndef NOT_IN_libc -static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, - int new_buf_len); -# ifdef RE_ENABLE_I18N -static void build_wcs_buffer (re_string_t *pstr); -static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); -# endif /* RE_ENABLE_I18N */ -static void build_upper_buffer (re_string_t *pstr); -static void re_string_translate_buffer (re_string_t *pstr); -static unsigned int re_string_context_at (const re_string_t *input, int idx, - int eflags) __attribute__ ((pure)); +#ifndef _LIBC +# define IS_IN(libc) false #endif + #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_fetch_byte(pstr) \ @@ -430,9 +474,12 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx, #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) +#if defined _LIBC || HAVE_ALLOCA +# include <alloca.h> +#endif + #ifndef _LIBC # if HAVE_ALLOCA -# include <alloca.h> /* The OS usually guarantees only one guard page at the bottom of the stack, and a page size can be as small as 4096 bytes. So we cannot safely allocate anything larger than 4096 bytes. Also care for the possibility @@ -441,41 +488,26 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx, # else /* alloca is implemented with malloc, so just use malloc. */ # define __libc_use_alloca(n) 0 +# undef alloca +# define alloca(n) malloc (n) # endif #endif -/* - * GAWK checks for zero-size allocations everywhere else, - * do it here too. - */ -#ifndef GAWK -#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) -#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) -#else -static void * -test_malloc(size_t count, const char *file, size_t line) -{ - if (count == 0) { - fprintf(stderr, "%s:%lu: allocation of zero bytes\n", - file, (unsigned long) line); - exit(1); - } - return malloc(count); -} +#ifdef _LIBC +# define MALLOC_0_IS_NONNULL 1 +#elif !defined MALLOC_0_IS_NONNULL +# define MALLOC_0_IS_NONNULL 0 +#endif -static void * -test_realloc(void *p, size_t count, const char *file, size_t line) -{ - if (count == 0) { - fprintf(stderr, "%s:%lu: reallocation of zero bytes\n", - file, (unsigned long) line); - exit(1); - } - return realloc(p, count); -} -#define re_malloc(t,n) ((t *) test_malloc (((n) * sizeof (t)), __FILE__, __LINE__)) -#define re_realloc(p,t,n) ((t *) test_realloc (p, (n) * sizeof (t), __FILE__, __LINE__)) +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) #define re_free(p) free (p) struct bin_tree_t @@ -488,9 +520,9 @@ struct bin_tree_t re_token_t token; - /* `node_idx' is the index in dfa->nodes, if `type' == 0. - Otherwise `type' indicate the type of this node. */ - int node_idx; + /* 'node_idx' is the index in dfa->nodes, if 'type' == 0. + Otherwise 'type' indicate the type of this node. */ + Idx node_idx; }; typedef struct bin_tree_t bin_tree_t; @@ -517,7 +549,7 @@ typedef struct bin_tree_storage_t bin_tree_storage_t; #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) -#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_') #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ @@ -534,7 +566,7 @@ typedef struct bin_tree_storage_t bin_tree_storage_t; struct re_dfastate_t { - unsigned int hash; + re_hashval_t hash; re_node_set nodes; re_node_set non_eps_nodes; re_node_set inveclosure; @@ -542,9 +574,9 @@ struct re_dfastate_t struct re_dfastate_t **trtable, **word_trtable; unsigned int context : 4; unsigned int halt : 1; - /* If this state can accept `multi byte'. + /* If this state can accept "multi byte". Note that we refer to multibyte characters, and multi character - collating elements as `multi byte'. */ + collating elements as "multi byte". */ unsigned int accept_mb : 1; /* If this state has backreference node(s). */ unsigned int has_backref : 1; @@ -554,8 +586,8 @@ typedef struct re_dfastate_t re_dfastate_t; struct re_state_table_entry { - int num; - int alloc; + Idx num; + Idx alloc; re_dfastate_t **array; }; @@ -563,8 +595,8 @@ struct re_state_table_entry typedef struct { - int next_idx; - int alloc; + Idx next_idx; + Idx alloc; re_dfastate_t **array; } state_array_t; @@ -572,8 +604,8 @@ typedef struct typedef struct { - int node; - int str_idx; /* The position NODE match at. */ + Idx node; + Idx str_idx; /* The position NODE match at. */ state_array_t path; } re_sub_match_last_t; @@ -583,20 +615,20 @@ typedef struct typedef struct { - int str_idx; - int node; + Idx str_idx; + Idx node; state_array_t *path; - int alasts; /* Allocation size of LASTS. */ - int nlasts; /* The number of LASTS. */ + Idx alasts; /* Allocation size of LASTS. */ + Idx nlasts; /* The number of LASTS. */ re_sub_match_last_t **lasts; } re_sub_match_top_t; struct re_backref_cache_entry { - int node; - int str_idx; - int subexp_from; - int subexp_to; + Idx node; + Idx str_idx; + Idx subexp_from; + Idx subexp_to; char more; char unused; unsigned short int eps_reachable_subexps_map; @@ -614,18 +646,18 @@ typedef struct /* EFLAGS of the argument of regexec. */ int eflags; /* Where the matching ends. */ - int match_last; - int last_node; + Idx match_last; + Idx last_node; /* The state log used by the matcher. */ re_dfastate_t **state_log; - int state_log_top; + Idx state_log_top; /* Back reference cache. */ - int nbkref_ents; - int abkref_ents; + Idx nbkref_ents; + Idx abkref_ents; struct re_backref_cache_entry *bkref_ents; int max_mb_elem_len; - int nsub_tops; - int asub_tops; + Idx nsub_tops; + Idx asub_tops; re_sub_match_top_t **sub_tops; } re_match_context_t; @@ -633,23 +665,23 @@ typedef struct { re_dfastate_t **sifted_states; re_dfastate_t **limited_states; - int last_node; - int last_str_idx; + Idx last_node; + Idx last_str_idx; re_node_set limits; } re_sift_context_t; struct re_fail_stack_ent_t { - int idx; - int node; + Idx idx; + Idx node; regmatch_t *regs; re_node_set eps_via_nodes; }; struct re_fail_stack_t { - int num; - int alloc; + Idx num; + Idx alloc; struct re_fail_stack_ent_t *stack; }; @@ -658,8 +690,8 @@ struct re_dfa_t re_token_t *nodes; size_t nodes_alloc; size_t nodes_len; - int *nexts; - int *org_indices; + Idx *nexts; + Idx *org_indices; re_node_set *edests; re_node_set *eclosures; re_node_set *inveclosures; @@ -673,10 +705,10 @@ struct re_dfa_t re_bitset_ptr_t sb_char; int str_tree_storage_idx; - /* number of subexpressions `re_nsub' is in regex_t. */ - unsigned int state_hash_mask; - int init_node; - int nbackref; /* The number of backreference in this dfa. */ + /* number of subexpressions 're_nsub' is in regex_t. */ + re_hashval_t state_hash_mask; + Idx init_node; + Idx nbackref; /* The number of backreference in this dfa. */ /* Bitmap expressing which backreference is used. */ bitset_word_t used_bkref_map; @@ -693,13 +725,11 @@ struct re_dfa_t int mb_cur_max; bitset_t word_char; reg_syntax_t syntax; - int *subexp_map; + Idx *subexp_map; #ifdef DEBUG char* re_str; #endif -#ifdef _LIBC - __libc_lock_define (, lock) -#endif + lock_define (lock) }; #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) @@ -730,16 +760,60 @@ typedef struct } bracket_elem_t; -/* Inline functions for bitset operation. */ -static void __attribute__ ((unused)) +/* Functions for bitset_t operation. */ + +static inline void +bitset_set (bitset_t set, Idx i) +{ + set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; +} + +static inline void +bitset_clear (bitset_t set, Idx i) +{ + set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); +} + +static inline bool +bitset_contain (const bitset_t set, Idx i) +{ + return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1; +} + +static inline void +bitset_empty (bitset_t set) +{ + memset (set, '\0', sizeof (bitset_t)); +} + +static inline void +bitset_set_all (bitset_t set) +{ + memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); + if (SBC_MAX % BITSET_WORD_BITS != 0) + set[BITSET_WORDS - 1] = + ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; +} + +static inline void +bitset_copy (bitset_t dest, const bitset_t src) +{ + memcpy (dest, src, sizeof (bitset_t)); +} + +static inline void bitset_not (bitset_t set) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) set[bitset_i] = ~set[bitset_i]; + if (SBC_MAX % BITSET_WORD_BITS != 0) + set[BITSET_WORDS - 1] = + ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) + & ~set[BITSET_WORDS - 1]); } -static void __attribute__ ((unused)) +static inline void bitset_merge (bitset_t dest, const bitset_t src) { int bitset_i; @@ -747,7 +821,7 @@ bitset_merge (bitset_t dest, const bitset_t src) dest[bitset_i] |= src[bitset_i]; } -static void __attribute__ ((unused)) +static inline void bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; @@ -756,10 +830,10 @@ bitset_mask (bitset_t dest, const bitset_t src) } #ifdef RE_ENABLE_I18N -/* Inline functions for re_string. */ +/* Functions for re_string. */ static int __attribute__ ((pure, unused)) -re_string_char_size_at (const re_string_t *pstr, int idx) +re_string_char_size_at (const re_string_t *pstr, Idx idx) { int byte_idx; if (pstr->mb_cur_max == 1) @@ -772,23 +846,22 @@ re_string_char_size_at (const re_string_t *pstr, int idx) static wint_t __attribute__ ((pure, unused)) -re_string_wchar_at (const re_string_t *pstr, int idx) +re_string_wchar_at (const re_string_t *pstr, Idx idx) { if (pstr->mb_cur_max == 1) return (wint_t) pstr->mbs[idx]; return (wint_t) pstr->wcs[idx]; } -# ifndef NOT_IN_libc -# ifdef _LIBC -# include <locale/weight.h> -# endif +# ifdef _LIBC +# include <locale/weight.h> +# endif static int __attribute__ ((pure, unused)) -re_string_elem_size_at (const re_string_t *pstr, int idx) +re_string_elem_size_at (const re_string_t *pstr, Idx idx) { -# ifdef _LIBC +# ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); @@ -805,10 +878,34 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) return p - pstr->mbs - idx; } else -# endif /* _LIBC */ +# endif /* _LIBC */ return 1; } -# endif #endif /* RE_ENABLE_I18N */ +#ifndef __GNUC_PREREQ +# if defined __GNUC__ && defined __GNUC_MINOR__ +# define __GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +# else +# define __GNUC_PREREQ(maj, min) 0 +# endif +#endif + +#if __GNUC_PREREQ (3,4) +# undef __attribute_warn_unused_result__ +# define __attribute_warn_unused_result__ \ + __attribute__ ((__warn_unused_result__)) +#else +# define __attribute_warn_unused_result__ /* empty */ +#endif + +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + #endif /* _REGEX_INTERNAL_H */ |