From a19298ed002eb20d91c37fd5067de18466211f83 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 16 Mar 2012 15:25:40 +0200 Subject: Sync from GLIBC. --- regex_internal.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'regex_internal.c') diff --git a/regex_internal.c b/regex_internal.c index 45516709..80991613 100644 --- a/regex_internal.c +++ b/regex_internal.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc. + Copyright (C) 2002-2006, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA. */ + License along with the GNU C Library; if not, see + . */ static void re_string_construct_common (const char *str, int len, re_string_t *pstr, @@ -246,13 +245,8 @@ build_wcs_buffer (re_string_t *pstr) else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + if (BE (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; @@ -261,6 +255,12 @@ build_wcs_buffer (re_string_t *pstr) wc = pstr->trans[wc]; pstr->cur_state = prev_st; } + else if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; @@ -343,9 +343,11 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { - /* It is an invalid character or '\0'. Just use the byte. */ + /* It is an invalid character, an incomplete character + at the end of the string, or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ @@ -458,7 +460,8 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { /* It is an invalid character or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; @@ -738,12 +741,21 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) mbstate_t cur_state; wchar_t wc2; int mlen = raw + pstr->len - p; + unsigned char buf[6]; size_t mbclen; + const unsigned char *pp = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + pp = buf; + } /* XXX Don't use mbrtowc, we know which conversion to use (UTF-8 -> UCS4). */ memset (&cur_state, 0, sizeof (cur_state)); - mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + mbclen = __mbrtowc (&wc2, (const char *) pp, mlen, &cur_state); if (raw + offset - p <= mbclen && mbclen < (size_t) -2) -- cgit v1.2.3