summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-11 21:19:19 +0000
committerCorinna Vinschen <corinna@vinschen.de>2010-02-11 21:19:19 +0000
commit44caccfca243364ea0282a8711ad788e3bc703dc (patch)
treee9e5c3805e68f99154841cf500473f67b2e56dbc
parent45c8c6469a00491b9f908d0d24a1affe88e0f0e3 (diff)
downloadcygnal-44caccfca243364ea0282a8711ad788e3bc703dc.tar.gz
cygnal-44caccfca243364ea0282a8711ad788e3bc703dc.tar.bz2
cygnal-44caccfca243364ea0282a8711ad788e3bc703dc.zip
* regex/engine.c (step): Drop Cygwin-specific definition.
(NONCHAR): Better cast here to make the test work. Move comment from step here. (matcher): Disable skipping initial string in multibyte case. * regex/regcomp.c (p_bracket): Don't simplify singleton in the invert case. (p_b_term): Handle early end of pattern after dash in bracket expression. (singleton): Don't ignore the wides just because there's already a singleton in the single byte chars. Fix condition for a singleton wide accordingly. (findmust): Check for LC_CTYPE charset, rather than LC_COLLATE charset. * regex2.h (CHIN): Fix condition in the icase & invert case. (ISWORD): Fix wrong cast to unsigned char.
-rw-r--r--winsup/cygwin/ChangeLog17
-rw-r--r--winsup/cygwin/regex/engine.c21
-rw-r--r--winsup/cygwin/regex/regcomp.c12
-rw-r--r--winsup/cygwin/regex/regex2.h14
4 files changed, 42 insertions, 22 deletions
diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog
index 39e72d4a7..7f67f4181 100644
--- a/winsup/cygwin/ChangeLog
+++ b/winsup/cygwin/ChangeLog
@@ -1,3 +1,20 @@
+2010-02-11 Corinna Vinschen <corinna@vinschen.de>
+
+ * regex/engine.c (step): Drop Cygwin-specific definition.
+ (NONCHAR): Better cast here to make the test work. Move comment
+ from step here.
+ (matcher): Disable skipping initial string in multibyte case.
+ * regex/regcomp.c (p_bracket): Don't simplify singleton in the invert
+ case.
+ (p_b_term): Handle early end of pattern after dash in bracket
+ expression.
+ (singleton): Don't ignore the wides just because there's already a
+ singleton in the single byte chars. Fix condition for a singleton
+ wide accordingly.
+ (findmust): Check for LC_CTYPE charset, rather than LC_COLLATE charset.
+ * regex2.h (CHIN): Fix condition in the icase & invert case.
+ (ISWORD): Fix wrong cast to unsigned char.
+
2010-02-11 Andy Koppe <andy.koppe@gmail.com>
* nlsfuncs.cc (initial_setlocale): Move check whether charset has
diff --git a/winsup/cygwin/regex/engine.c b/winsup/cygwin/regex/engine.c
index a517a67ee..4afaf8d9a 100644
--- a/winsup/cygwin/regex/engine.c
+++ b/winsup/cygwin/regex/engine.c
@@ -106,11 +106,7 @@ static const char *dissect(struct match *m, const char *start, const char *stop,
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
-#ifdef __CYGWIN__
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
-#else
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
-#endif
#define MAX_RECURSION 100
#define BOL (OUT-1)
#define EOL (BOL-1)
@@ -119,7 +115,10 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
#define BOW (BOL-4)
#define EOW (BOL-5)
#define BADCHAR (BOL-6)
-#define NONCHAR(c) ((c) <= OUT)
+/* When using wint_t, which is defined as unsigned int on BSD,
+ as well as on Cygwin or Linux, the NONCHAR test is broken without
+ the below cast. I'm wondering how this is supposed to work at all... */
+#define NONCHAR(c) ((int)(c) <= OUT)
#ifdef REDEBUG
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
#endif
@@ -248,9 +247,12 @@ matcher(struct re_guts *g,
ZAPSTATE(&m->mbs);
/* Adjust start according to moffset, to speed things up */
+#ifndef MNAMES
+ /* The code evaluating moffset doesn't seem to work right
+ in the multibyte case. */
if (g->moffset > -1)
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
-
+#endif
SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */
@@ -993,14 +995,7 @@ step(struct re_guts *g,
sopno start, /* start state within strip */
sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */
-#ifdef __CYGWIN__
- /* When using wint_t, which is defined as unsigned int on BSD,
- as well as on Cygwin or Linux, the NONCHAR test is broken.
- I'm wondering how this is supposed to work at all... */
- int ch, /* character or NONCHAR code */
-#else
wint_t ch, /* character or NONCHAR code */
-#endif
states aft) /* states already known reachable after */
{
cset *cs;
diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c
index 721982ab1..a7a48e023 100644
--- a/winsup/cygwin/regex/regcomp.c
+++ b/winsup/cygwin/regex/regcomp.c
@@ -762,7 +762,8 @@ p_bracket(struct parse *p)
if (cs->invert && p->g->cflags&REG_NEWLINE)
cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
- if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */
+ if ((ch = singleton(cs)) != OUT /* optimize singleton sets */
+ && cs->invert == 0) { /* But not in invert case. */
ordinary(p, ch);
freeset(p, cs);
} else
@@ -833,6 +834,9 @@ p_b_term(struct parse *p, cset *cs)
finish = '-';
else
finish = p_b_symbol(p);
+ } else if (SEE('-') && !MORE2()) {
+ SETERROR(REG_EBRACK);
+ return;
} else
finish = start;
if (start == finish)
@@ -1212,9 +1216,9 @@ singleton(cset *cs)
n++;
s = i;
}
- if (n == 1)
+ if (n == 1 && cs->nwides == 0)
return (s);
- if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
+ if (n == 0 && cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
cs->icase == 0)
return (cs->wides[0]);
/* Don't bother handling the other cases. */
@@ -1467,7 +1471,7 @@ findmust(struct parse *p, struct re_guts *g)
*/
if (MB_CUR_MAX > 1 &&
#ifdef __CYGWIN__
- strcmp(collate_charset, "UTF-8") != 0)
+ strcmp(__locale_charset (), "UTF-8") != 0)
#else
strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
#endif
diff --git a/winsup/cygwin/regex/regex2.h b/winsup/cygwin/regex/regex2.h
index 13bbf64a7..53f687bf6 100644
--- a/winsup/cygwin/regex/regex2.h
+++ b/winsup/cygwin/regex/regex2.h
@@ -151,10 +151,14 @@ CHIN(cset *cs, wint_t ch)
if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert);
- else if (cs->icase)
- return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
- CHIN1(cs, towupper(ch)));
- else
+ else if (cs->icase) {
+ if (cs->invert)
+ return (CHIN1(cs, ch) && CHIN1(cs, towlower(ch)) &&
+ CHIN1(cs, towupper(ch)));
+ else
+ return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
+ CHIN1(cs, towupper(ch)));
+ } else
return (CHIN1(cs, ch));
}
@@ -189,4 +193,4 @@ struct re_guts {
/* misc utilities */
#define OUT (CHAR_MIN - 1) /* a non-character value */
-#define ISWORD(c) (iswalnum((uch)(c)) || (c) == '_')
+#define ISWORD(c) (iswalnum((wint_t)(c)) || (c) == '_')