From e1e595a649d55a304416c1e8e23f99e0df58a452 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Thu, 4 Feb 2010 12:35:49 +0000 Subject: Replace regex files with multibyte-aware version from FreeBSD. * Makefile.in (install-headers): Remove extra command to install regex.h. (uninstall-headers): Remove extra command to uninstall regex.h. * nlsfuncs.cc (collate_lcid): Make externally available to allow access to collation internals from regex functions. (collate_charset): Ditto. * wchar.h: Add __cplusplus guards to make C-clean. * include/regex.h: New file, replacing regex/regex.h. Remove UCB advertising clause. * regex/COPYRIGHT: Accommodate BSD license. Remove UCB advertising clause. * regex/cclass.h: Remove. * regex/cname.h: New file from FreeBSD. * regex/engine.c: Ditto. (NONCHAR): Tweak for Cygwin. * regex/engine.ih: Remove. * regex/mkh: Remove. * regex/regcomp.c: New file from FreeBSD. Tweak slightly for Cygwin. Import required collate internals from nlsfunc.cc. (p_ere_exp): Add GNU-specific \< and \> handling for word boundaries. (p_simp_re): Ditto. (__collate_range_cmp): Define. (p_b_term): Use Cygwin-specific collate internals. (findmust): Ditto. * regex/regcomp.ih: Remove. * regex/regerror.c: New file from FreeBSD. Fix a few compiler warnings. * regex/regerror.ih: Remove. * regex/regex.7: New file from FreeBSD. Remove UCB advertising clause. * regex/regex.h: Remove. Replaced by include/regex.h. * regex/regexec.c: New file from FreeBSD. Fix a few compiler warnings. * regex/regfree.c: New file from FreeBSD. * regex/tests: Remove. * regex/utils.h: New file from FreeBSD. --- winsup/cygwin/regex/regexec.c | 153 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 126 insertions(+), 27 deletions(-) (limited to 'winsup/cygwin/regex/regexec.c') diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c index 35b99c272..6195e508c 100644 --- a/winsup/cygwin/regex/regexec.c +++ b/winsup/cygwin/regex/regexec.c @@ -1,48 +1,131 @@ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regexec.c 8.3 (Berkeley) 3/20/94 + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD: src/lib/libc/regex/regexec.c,v 1.8 2007/06/11 03:05:54 delphij Exp $"); + /* * the outer shell of regexec() * - * This file includes engine.c *twice*, after muchos fiddling with the + * This file includes engine.c three times, after muchos fiddling with the * macros that code uses. This lets the same code operate on two different - * representations for state sets. + * representations for state sets and characters. */ +#ifdef __CYGWIN__ #include "winsup.h" +#endif #include #include #include #include #include #include -#include "regex.h" +#include +#include +#include #include "utils.h" #include "regex2.h" -#ifdef lint -static int nope = 0; /* for use in asserts; shuts lint up */ +#ifdef __CYGWIN__ +#define __unused __attribute__ ((unused)) #endif +static int nope __unused = 0; /* for use in asserts; shuts lint up */ + +static __inline size_t +xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) +{ + size_t nr; + wchar_t wc; + + nr = mbrtowc(&wc, s, n, mbs); + if (wi != NULL) + *wi = wc; + if (nr == 0) + return (1); + else if (nr == (size_t)-1 || nr == (size_t)-2) { + memset(mbs, 0, sizeof(*mbs)); + if (wi != NULL) + *wi = dummy; + return (1); + } else + return (nr); +} + +static __inline size_t +xmbrtowc_dummy(wint_t *wi, + const char *s, + size_t n __unused, + mbstate_t *mbs __unused, + wint_t dummy __unused) +{ + + if (wi != NULL) + *wi = (unsigned char)*s; + return (1); +} + /* macros for manipulating states, small version */ -#define states unsigned -#define states1 unsigned /* for later use in regexec() decision */ +#define states long +#define states1 states /* for later use in regexec() decision */ #define CLEAR(v) ((v) = 0) -#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) -#define SET1(v, n) ((v) |= (unsigned)1 << (n)) -#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) +#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) +#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) #define ASSIGN(d, s) ((d) = (s)) #define EQ(a, b) ((a) == (b)) -#define STATEVARS int dummy /* dummy version */ +#define STATEVARS long dummy /* dummy version */ #define STATESETUP(m, n) /* nothing */ #define STATETEARDOWN(m) /* nothing */ #define SETUP(v) ((v) = 0) -#define onestate unsigned -#define INIT(o, n) ((o) = (unsigned)1 << (n)) +#define onestate long +#define INIT(o, n) ((o) = (unsigned long)1 << (n)) #define INC(o) ((o) <<= 1) -#define ISSTATEIN(v, o) ((v) & (o)) +#define ISSTATEIN(v, o) (((v) & (o)) != 0) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ -#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) -#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) -#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) +#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) /* function names */ #define SNAMES /* engine.c looks after details */ @@ -68,6 +151,8 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #undef BACK #undef ISSETBACK #undef SNAMES +#undef XMBRTOWC +#undef ZAPSTATE /* macros for manipulating states, large version */ #define states char * @@ -77,13 +162,13 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #define ISSET(v, n) ((v)[n]) #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) -#define STATEVARS int vn; char *space +#define STATEVARS long vn; char *space #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ if ((m)->space == NULL) return(REG_ESPACE); \ (m)->vn = 0; } #define STATETEARDOWN(m) { free((m)->space); } #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) -#define onestate int +#define onestate long #define INIT(o, n) ((o) = (n)) #define INC(o) ((o)++) #define ISSTATEIN(v, o) ((v)[o]) @@ -92,11 +177,24 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) #define ISSETBACK(v, n) ((v)[here - (n)]) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) /* function names */ #define LNAMES /* flag */ #include "engine.c" +/* multibyte character & large states version */ +#undef LNAMES +#undef XMBRTOWC +#undef ZAPSTATE +#define XMBRTOWC xmbrtowc +#define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs))) +#define MNAMES + +#include "engine.c" + /* - regexec - interface for matching = extern int regexec(const regex_t *, const char *, size_t, \ @@ -113,14 +211,13 @@ static int nope = 0; /* for use in asserts; shuts lint up */ * have been prototyped. */ int /* 0 success, REG_NOMATCH failure */ -regexec(preg, string, nmatch, pmatch, eflags) -const regex_t *preg; -const char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; +regexec(const regex_t * __restrict preg, + const char * __restrict string, + size_t nmatch, + regmatch_t pmatch[__restrict], + int eflags) { - register struct re_guts *g = preg->re_g; + struct re_guts *g = preg->re_g; #ifdef REDEBUG # define GOODFLAGS(f) (f) #else @@ -134,7 +231,9 @@ int eflags; return(REG_BADPAT); eflags = GOODFLAGS(eflags); - if ((unsigned) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + if (MB_CUR_MAX > 1) + return(mmatcher(g, (char *)string, nmatch, pmatch, eflags)); + else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); else return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); -- cgit v1.2.3