aboutsummaryrefslogtreecommitdiffstats
path: root/dfa.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-12-06 21:30:15 +0200
committerArnold D. Robbins <arnold@skeeve.com>2011-12-06 21:30:15 +0200
commit958b6cb452c88e7ba064253b6f461ebacfc1b736 (patch)
treea9cfdbc979d446fa7ae5d5184ce16a7df5825567 /dfa.c
parentfd3e8f9c332bd76e7f8fa0c61c1451f52f3b1998 (diff)
downloadegawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.tar.gz
egawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.tar.bz2
egawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.zip
Sync dfa.c with GNU grep.
Diffstat (limited to 'dfa.c')
-rw-r--r--dfa.c37
1 files changed, 23 insertions, 14 deletions
diff --git a/dfa.c b/dfa.c
index cba66886..37ddc1bc 100644
--- a/dfa.c
+++ b/dfa.c
@@ -608,7 +608,8 @@ setbit_c (int b, charclass c)
}
#else
# define setbit_c setbit
-static inline bool setbit_wc (wint_t wc, charclass c)
+static inline bool
+setbit_wc (wint_t wc, charclass c)
{
abort ();
/*NOTREACHED*/
@@ -782,7 +783,7 @@ static const struct dfa_ctype prednames[] = {
{ NULL, NULL, false }
};
-static const struct dfa_ctype *
+static const struct dfa_ctype * _GL_ATTRIBUTE_PURE
find_pred (const char *str)
{
unsigned int i;
@@ -802,7 +803,6 @@ parse_bracket_exp (void)
int invert;
int c, c1, c2;
charclass ccl;
- wint_t wc1 = 0;
/* Used to warn about [:space:].
Bit 0 = first character is a colon.
@@ -813,6 +813,7 @@ parse_bracket_exp (void)
wint_t wc;
wint_t wc2;
+ wint_t wc1 = 0;
/* Work area to build a mb_char_classes. */
struct mb_char_classes *work_mbc;
@@ -1080,8 +1081,18 @@ parse_bracket_exp (void)
return CSET + charclass_index(ccl);
}
+/* Add this to the test for whether a byte is word-constituent, since on
+ BSD-based systems, many values in the 128..255 range are classified as
+ alphabetic, while on glibc-based systems, they are not. */
+#ifdef __GLIBC__
+# define is_valid_unibyte_character(c) 1
+#else
+# define is_valid_unibyte_character(c) (MBS_SUPPORT && btowc (c) != WEOF)
+#endif
+
/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */
-#define IS_WORD_CONSTITUENT(C) (isalnum(C) || (C) == '_')
+#define IS_WORD_CONSTITUENT(C) \
+ (is_valid_unibyte_character(C) && (isalnum(C) || (C) == '_'))
static token
lex (void)
@@ -1678,7 +1689,7 @@ atom (void)
}
/* Return the number of tokens in the given subexpression. */
-static int
+static int _GL_ATTRIBUTE_PURE
nsubtoks (int tindex)
{
int ntoks1;
@@ -2128,9 +2139,7 @@ dfaanalyze (struct dfa *d, int searchflag)
CALLOC(d->follows, d->tindex);
for (i = 0; i < d->tindex; ++i)
-#ifdef DEBUG
- { /* Nonsyntactic #ifdef goo... */
-#endif
+ {
switch (d->tokens[i])
{
case EMPTY:
@@ -2255,8 +2264,8 @@ dfaanalyze (struct dfa *d, int searchflag)
prtok(d->tokens[lastpos[j].index]);
}
putc('\n', stderr);
- }
#endif
+ }
/* For each follow set that is the follow set of a real position, replace
it with its epsilon closure. */
@@ -3256,7 +3265,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
for (;;)
{
if (d->mb_cur_max > 1)
- while ((t = trans[s]))
+ while ((t = trans[s]) != NULL)
{
if (p > buf_end)
break;
@@ -3289,10 +3298,10 @@ dfaexec (struct dfa *d, char const *begin, char *end,
}
else
{
- while ((t = trans[s]) != 0)
+ while ((t = trans[s]) != NULL)
{
s1 = t[*p++];
- if ((t = trans[s1]) == 0)
+ if ((t = trans[s1]) == NULL)
{
int tmp = s; s = s1; s1 = tmp; /* swap */
break;
@@ -3603,7 +3612,7 @@ icpyalloc (char const *string)
return icatalloc (NULL, string);
}
-static char *
+static char * _GL_ATTRIBUTE_PURE
istrstr (char const *lookin, char const *lookfor)
{
char const *cp;
@@ -4015,7 +4024,7 @@ dfaalloc (void)
return xmalloc (sizeof (struct dfa));
}
-struct dfamust *
+struct dfamust * _GL_ATTRIBUTE_PURE
dfamusts (struct dfa const *d)
{
return d->musts;