diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2016-12-22 17:03:40 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2016-12-22 17:03:40 +0200 |
commit | cd35e365b6f8d356645093bab1c67c1867a63aef (patch) | |
tree | 2ea764f6930a6ac8c6cba3689988e7bb66fd8f66 | |
parent | 9d891b85c4bd0baf1a2eacd78f96b25db02a9525 (diff) | |
download | egawk-cd35e365b6f8d356645093bab1c67c1867a63aef.tar.gz egawk-cd35e365b6f8d356645093bab1c67c1867a63aef.tar.bz2 egawk-cd35e365b6f8d356645093bab1c67c1867a63aef.zip |
Sync dfa with GNULIB.
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | Makefile.in | 1 | ||||
-rw-r--r-- | dfa.c | 345 | ||||
-rw-r--r-- | intprops.h | 464 |
5 files changed, 689 insertions, 128 deletions
@@ -1,3 +1,9 @@ +2016-12-22 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.c: Sync with GNULIB. + * intprops.h: New file. + * Makefile.am (base_sources): Add intprops.h. + 2016-12-11 Arnold D. Robbins <arnold@skeeve.com> * dfa.c: Sync with GNULIB. diff --git a/Makefile.am b/Makefile.am index 9acae0bc..733b41ba 100644 --- a/Makefile.am +++ b/Makefile.am @@ -109,6 +109,7 @@ base_sources = \ gettext.h \ int_array.c \ interpret.h \ + intprops.h \ localeinfo.c \ localeinfo.h \ io.c \ diff --git a/Makefile.in b/Makefile.in index f103a420..664f284a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -519,6 +519,7 @@ base_sources = \ gettext.h \ int_array.c \ interpret.h \ + intprops.h \ localeinfo.c \ localeinfo.h \ io.c \ @@ -24,6 +24,7 @@ #include <assert.h> #include <ctype.h> +#include <stdint.h> #include <stdio.h> #ifndef VMS @@ -60,7 +61,13 @@ #include <wchar.h> +#include "intprops.h" #include "xalloc.h" +#include "localeinfo.h" + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif #if defined(__DJGPP__) #include "mbsupport.h" @@ -68,8 +75,6 @@ #include "dfa.h" -#include "localeinfo.h" - #ifdef GAWK static int is_blank (int c) @@ -203,6 +208,7 @@ to_uchar (char ch) codes are returned by the lexical analyzer. */ typedef ptrdiff_t token; +#define TOKEN_MAX PTRDIFF_MAX /* States are indexed by state_num values. These are normally nonnegative but -1 is used as a special value. */ @@ -311,8 +317,8 @@ typedef struct typedef struct { position *elems; /* Elements of this position set. */ - size_t nelem; /* Number of elements in this set. */ - size_t alloc; /* Number of elements allocated in ELEMS. */ + ptrdiff_t nelem; /* Number of elements in this set. */ + ptrdiff_t alloc; /* Number of elements allocated in ELEMS. */ } position_set; /* Sets of leaves are also stored as arrays. */ @@ -351,7 +357,7 @@ struct mb_char_classes ptrdiff_t cset; bool invert; wchar_t *chars; /* Normal characters. */ - size_t nchars; + ptrdiff_t nchars; }; struct regex_syntax @@ -428,8 +434,8 @@ struct dfa /* Fields filled by the scanner. */ charclass *charclasses; /* Array of character sets for CSET tokens. */ - size_t cindex; /* Index for adding new charclasses. */ - size_t calloc; /* Number of charclasses allocated. */ + ptrdiff_t cindex; /* Index for adding new charclasses. */ + ptrdiff_t calloc; /* Number of charclasses allocated. */ size_t canychar; /* Index of anychar class, or (size_t) -1. */ /* Scanner state */ @@ -475,8 +481,8 @@ struct dfa /* Array of the bracket expression in the DFA. */ struct mb_char_classes *mbcsets; - size_t nmbcsets; - size_t mbcsets_alloc; + ptrdiff_t nmbcsets; + ptrdiff_t mbcsets_alloc; /* Fields filled by the superset. */ struct dfa *superset; /* Hint of the dfa. */ @@ -484,7 +490,7 @@ struct dfa /* Fields filled by the state builder. */ dfa_state *states; /* States of the dfa. */ state_num sindex; /* Index for adding new states. */ - size_t salloc; /* Number of states currently allocated. */ + ptrdiff_t salloc; /* Number of states currently allocated. */ /* Fields filled by the parse tree->NFA conversion. */ position_set *follows; /* Array of follow sets, indexed by position @@ -516,7 +522,7 @@ struct dfa never accept. If the transitions for a state have not yet been computed, or the state could possibly accept, its entry in - this table is NULL. This points to one + this table is NULL. This points to two past the start of the allocated array, and trans[-1] and trans[-2] are always NULL. */ @@ -756,34 +762,95 @@ emptyset (charclass const s) return w == 0; } -/* Ensure that the array addressed by PTR holds at least NITEMS + - (PTR || !NITEMS) items. Either return PTR, or reallocate the array - and return its new address. Although PTR may be null, the returned - value is never null. +/* Grow PA, which points to an array of *NITEMS items, and return the + location of the reallocated array, updating *NITEMS to reflect its + new size. The new array will contain at least NITEMS_INCR_MIN more + items, but will not contain more than NITEMS_MAX items total. + ITEM_SIZE is the size of each item, in bytes. + + ITEM_SIZE and NITEMS_INCR_MIN must be positive. *NITEMS must be + nonnegative. If NITEMS_MAX is -1, it is treated as if it were + infinity. + + If PA is null, then allocate a new array instead of reallocating + the old one. + + Thus, to grow an array A without saving its old contents, do + { free (A); A = xpalloc (NULL, &AITEMS, ...); }. */ + +static void * +xpalloc (void *pa, ptrdiff_t *nitems, ptrdiff_t nitems_incr_min, + ptrdiff_t nitems_max, ptrdiff_t item_size) +{ + ptrdiff_t n0 = *nitems; + + /* The approximate size to use for initial small allocation + requests. This is the largest "small" request for the GNU C + library malloc. */ + enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 }; + + /* If the array is tiny, grow it to about (but no greater than) + DEFAULT_MXFAST bytes. Otherwise, grow it by about 50%. + Adjust the growth according to three constraints: NITEMS_INCR_MIN, + NITEMS_MAX, and what the C language can represent safely. */ + + ptrdiff_t n, nbytes; + if (INT_ADD_WRAPV (n0, n0 >> 1, &n)) + n = PTRDIFF_MAX; + if (0 <= nitems_max && nitems_max < n) + n = nitems_max; + + ptrdiff_t adjusted_nbytes + = ((INT_MULTIPLY_WRAPV (n, item_size, &nbytes) || SIZE_MAX < nbytes) + ? MIN (PTRDIFF_MAX, SIZE_MAX) + : nbytes < DEFAULT_MXFAST ? DEFAULT_MXFAST : 0); + if (adjusted_nbytes) + { + n = adjusted_nbytes / item_size; + nbytes = adjusted_nbytes - adjusted_nbytes % item_size; + } + + if (! pa) + *nitems = 0; + if (n - n0 < nitems_incr_min + && (INT_ADD_WRAPV (n0, nitems_incr_min, &n) + || (0 <= nitems_max && nitems_max < n) + || INT_MULTIPLY_WRAPV (n, item_size, &nbytes))) + xalloc_die (); + pa = xrealloc (pa, nbytes); + *nitems = n; + return pa; +} + +/* Ensure that the array addressed by PA holds at least I + 1 items. + Either return PA, or reallocate the array and return its new address. + Although PA may be null, the returned value is never null. - The array holds *NALLOC items; *NALLOC is updated on reallocation. - ITEMSIZE is the size of one item. Avoid O(N**2) behavior on arrays - growing linearly. */ + The array holds *NITEMS items, where 0 <= I <= *NITEMS; *NITEMS + is updated on reallocation. If PA is null, *NITEMS must be zero. + Do not allocate more than NITEMS_MAX items total; -1 means no limit. + ITEM_SIZE is the size of one item; it must be positive. + Avoid O(N**2) behavior on arrays growing linearly. */ static void * -maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize) +maybe_realloc (void *pa, ptrdiff_t i, ptrdiff_t *nitems, + ptrdiff_t nitems_max, ptrdiff_t item_size) { - if (nitems < *nalloc) - return ptr; - *nalloc = nitems; - return x2nrealloc (ptr, nalloc, itemsize); + if (i < *nitems) + return pa; + return xpalloc (pa, nitems, 1, nitems_max, item_size); } /* In DFA D, find the index of charclass S, or allocate a new one. */ -static size_t +static ptrdiff_t charclass_index (struct dfa *d, charclass const s) { - size_t i; + ptrdiff_t i; for (i = 0; i < d->cindex; ++i) if (equal (s, d->charclasses[i])) return i; d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc, - sizeof *d->charclasses); + TOKEN_MAX - CSET, sizeof *d->charclasses); ++d->cindex; copyset (s, d->charclasses[i]); return i; @@ -911,10 +978,6 @@ using_simple_locale (bool multibyte) } \ } while (false) -#ifndef MIN -# define MIN(a,b) ((a) < (b) ? (a) : (b)) -#endif - typedef int predicate (int); /* The following list maps the names of the Posix named character classes @@ -980,13 +1043,13 @@ parse_bracket_exp (struct dfa *dfa) /* Work area to build a mb_char_classes. */ struct mb_char_classes *work_mbc; - size_t chars_al; + ptrdiff_t chars_al; chars_al = 0; if (dfa->localeinfo.multibyte) { dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets, - &dfa->mbcsets_alloc, + &dfa->mbcsets_alloc, -1, sizeof *dfa->mbcsets); /* dfa->multibyte_prop[] hold the index of dfa->mbcsets. @@ -1174,7 +1237,7 @@ parse_bracket_exp (struct dfa *dfa) { work_mbc->chars = maybe_realloc (work_mbc->chars, work_mbc->nchars, - &chars_al, sizeof *work_mbc->chars); + &chars_al, -1, sizeof *work_mbc->chars); work_mbc->chars[work_mbc->nchars++] = folded[i]; } } @@ -1623,7 +1686,7 @@ addtok (struct dfa *dfa, token t) { bool need_or = false; struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1]; - size_t i; + ptrdiff_t i; /* Extract wide characters into alternations for better performance. This does not require UTF-8. */ @@ -1984,8 +2047,8 @@ copy (position_set const *src, position_set *dst) if (dst->alloc < src->nelem) { free (dst->elems); - dst->alloc = src->nelem; - dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems); + dst->elems = xpalloc (NULL, &dst->alloc, src->nelem - dst->alloc, -1, + sizeof *dst->elems); } memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems); dst->nelem = src->nelem; @@ -1995,6 +2058,8 @@ static void alloc_position_set (position_set *s, size_t size) { s->elems = xnmalloc (size, sizeof *s->elems); + if (PTRDIFF_MAX < SIZE_MAX / sizeof *s->elems && PTRDIFF_MAX < size) + xalloc_die (); s->alloc = size; s->nelem = 0; } @@ -2006,73 +2071,115 @@ alloc_position_set (position_set *s, size_t size) static void insert (position p, position_set *s) { - size_t count = s->nelem; - size_t lo = 0, hi = count; - size_t i; + ptrdiff_t count = s->nelem; + ptrdiff_t lo = 0, hi = count; + ptrdiff_t i; while (lo < hi) { - size_t mid = (lo + hi) >> 1; + ptrdiff_t mid = (lo + hi) >> 1; if (s->elems[mid].index > p.index) lo = mid + 1; + else if (s->elems[mid].index == p.index) + { + s->elems[mid].constraint |= p.constraint; + return; + } else hi = mid; } - if (lo < count && p.index == s->elems[lo].index) - { - s->elems[lo].constraint |= p.constraint; - return; - } - - s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems); + s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems); for (i = count; i > lo; i--) s->elems[i] = s->elems[i - 1]; s->elems[lo] = p; ++s->nelem; } -/* Merge two sets of positions into a third. The result is exactly as if - the positions of both sets were inserted into an initially empty set. */ +/* Merge S1 and S2 (with the additional constraint C2) into M. The + result is as if the positions of S1, and of S2 with the additional + constraint C2, were inserted into an initially empty set. */ static void -merge (position_set const *s1, position_set const *s2, position_set *m) +merge_constrained (position_set const *s1, position_set const *s2, + unsigned int c2, position_set *m) { - size_t i = 0, j = 0; + ptrdiff_t i = 0, j = 0; - if (m->alloc < s1->nelem + s2->nelem) + if (m->alloc - s1->nelem < s2->nelem) { free (m->elems); - m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc, - sizeof *m->elems); + m->alloc = s1->nelem; + m->elems = xpalloc (NULL, &m->alloc, s2->nelem, -1, sizeof *m->elems); } m->nelem = 0; - while (i < s1->nelem && j < s2->nelem) - if (s1->elems[i].index > s2->elems[j].index) - m->elems[m->nelem++] = s1->elems[i++]; - else if (s1->elems[i].index < s2->elems[j].index) - m->elems[m->nelem++] = s2->elems[j++]; + while (i < s1->nelem || j < s2->nelem) + if (! (j < s2->nelem) + || (i < s1->nelem && s1->elems[i].index >= s2->elems[j].index)) + { + unsigned int c = ((i < s1->nelem && j < s2->nelem + && s1->elems[i].index == s2->elems[j].index) + ? s2->elems[j++].constraint & c2 + : 0); + m->elems[m->nelem].index = s1->elems[i].index; + m->elems[m->nelem++].constraint = s1->elems[i++].constraint | c; + } else { - m->elems[m->nelem] = s1->elems[i++]; - m->elems[m->nelem++].constraint |= s2->elems[j++].constraint; + if (s2->elems[j].constraint & c2) + { + m->elems[m->nelem].index = s2->elems[j].index; + m->elems[m->nelem++].constraint = s2->elems[j].constraint & c2; + } + j++; } - while (i < s1->nelem) - m->elems[m->nelem++] = s1->elems[i++]; - while (j < s2->nelem) - m->elems[m->nelem++] = s2->elems[j++]; } -/* Delete a position from a set. */ +/* Merge two sets of positions into a third. The result is exactly as if + the positions of both sets were inserted into an initially empty set. */ static void -delete (position p, position_set *s) +merge (position_set const *s1, position_set const *s2, position_set *m) { - size_t i; + return merge_constrained (s1, s2, -1, m); +} - for (i = 0; i < s->nelem; ++i) - if (p.index == s->elems[i].index) - break; - if (i < s->nelem) - for (--s->nelem; i < s->nelem; ++i) - s->elems[i] = s->elems[i + 1]; +/* Delete a position from a set. Return the nonzero constraint of the + deleted position, or zero if there was no such position. */ +static unsigned int +delete (size_t del, position_set *s) +{ + size_t count = s->nelem; + size_t lo = 0, hi = count; + while (lo < hi) + { + size_t mid = (lo + hi) >> 1; + if (s->elems[mid].index > del) + lo = mid + 1; + else if (s->elems[mid].index == del) + { + unsigned int c = s->elems[mid].constraint; + size_t i; + for (i = mid; i + 1 < count; i++) + s->elems[i] = s->elems[i + 1]; + s->nelem = i; + return c; + } + else + hi = mid; + } + return 0; +} + +/* Replace a position with the followed set. */ +static void +replace (position_set *dst, size_t del, position_set *add, + unsigned int constraint, position_set *tmp) +{ + unsigned int c = delete (del, dst) & constraint; + + if (c) + { + copy (dst, tmp); + merge_constrained (tmp, add, c, dst); + } } /* Find the index of the state corresponding to the given position set with @@ -2141,7 +2248,7 @@ state_index (struct dfa *d, position_set const *s, int context) /* Create a new state. */ - d->states = maybe_realloc (d->states, d->sindex, &d->salloc, + d->states = maybe_realloc (d->states, d->sindex, &d->salloc, -1, sizeof *d->states); d->states[i].hash = hash; alloc_position_set (&d->states[i].elems, s->nelem); @@ -2164,63 +2271,48 @@ state_index (struct dfa *d, position_set const *s, int context) constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ static void -epsclosure (position_set *s, struct dfa const *d, char *visited) +epsclosure (position_set *initial, struct dfa const *d) { - size_t i, j; - position p, old; - bool initialized = false; - - for (i = 0; i < s->nelem; ++i) - if (d->tokens[s->elems[i].index] >= NOTCHAR - && d->tokens[s->elems[i].index] != BACKREF - && d->tokens[s->elems[i].index] != ANYCHAR - && d->tokens[s->elems[i].index] != MBCSET - && d->tokens[s->elems[i].index] < CSET) + position_set tmp; + alloc_position_set (&tmp, d->nleaves); + for (size_t i = 0; i < d->tindex; ++i) + if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR + && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR + && d->tokens[i] != MBCSET && d->tokens[i] < CSET) { - if (!initialized) - { - memset (visited, 0, d->tindex * sizeof (*visited)); - initialized = true; - } - old = s->elems[i]; - p.constraint = old.constraint; - delete (s->elems[i], s); - if (visited[old.index]) - { - --i; - continue; - } - visited[old.index] = 1; - switch (d->tokens[old.index]) + unsigned int constraint; + switch (d->tokens[i]) { case BEGLINE: - p.constraint &= BEGLINE_CONSTRAINT; + constraint = BEGLINE_CONSTRAINT; break; case ENDLINE: - p.constraint &= ENDLINE_CONSTRAINT; + constraint = ENDLINE_CONSTRAINT; break; case BEGWORD: - p.constraint &= BEGWORD_CONSTRAINT; + constraint = BEGWORD_CONSTRAINT; break; case ENDWORD: - p.constraint &= ENDWORD_CONSTRAINT; + constraint = ENDWORD_CONSTRAINT; break; case LIMWORD: - p.constraint &= LIMWORD_CONSTRAINT; + constraint = LIMWORD_CONSTRAINT; break; case NOTLIMWORD: - p.constraint &= NOTLIMWORD_CONSTRAINT; + constraint = NOTLIMWORD_CONSTRAINT; break; default: + constraint = NO_CONSTRAINT; break; } - for (j = 0; j < d->follows[old.index].nelem; ++j) - { - p.index = d->follows[old.index].elems[j].index; - insert (p, s); - } - /* Force rescan to start at the beginning. */ - i = -1; + + delete (i, &d->follows[i]); + + for (size_t j = 0; j < d->tindex; j++) + if (i != j && d->follows[j].nelem > 0) + replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); + + replace (initial, i, &d->follows[i], constraint, &tmp); } } @@ -2347,7 +2439,6 @@ dfaanalyze (struct dfa *d, bool searchflag) int separate_contexts; /* Context wanted by some position. */ size_t i, j; position *pos; - char *visited = xnmalloc (d->tindex, sizeof *visited); #ifdef DEBUG fprintf (stderr, "dfaanalyze:\n"); @@ -2488,14 +2579,12 @@ dfaanalyze (struct dfa *d, bool searchflag) #endif } - /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ +#ifdef DEBUG for (i = 0; i < d->tindex; ++i) if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET || d->tokens[i] >= CSET) { -#ifdef DEBUG fprintf (stderr, "follows(%zu:", i); prtok (d->tokens[i]); fprintf (stderr, "):"); @@ -2505,18 +2594,18 @@ dfaanalyze (struct dfa *d, bool searchflag) prtok (d->tokens[d->follows[i].elems[j].index]); } putc ('\n', stderr); -#endif - copy (&d->follows[i], &merged); - epsclosure (&merged, d, visited); - copy (&merged, &d->follows[i]); } +#endif /* Get the epsilon closure of the firstpos of the regexp. The result will be the set of positions of state 0. */ merged.nelem = 0; for (i = 0; i < stk[-1].nfirstpos; ++i) insert (firstpos[i], &merged); - epsclosure (&merged, d, visited); + + /* For each follow set that is the follow set of a real position, replace + it with its epsilon closure. */ + epsclosure (&merged, d); /* Build the initial state. */ separate_contexts = state_separate_contexts (&merged); @@ -2532,7 +2621,6 @@ dfaanalyze (struct dfa *d, bool searchflag) free (posalloc); free (stkalloc); free (merged.elems); - free (visited); } @@ -2816,9 +2904,10 @@ realloc_trans_if_necessary (struct dfa *d, state_num new_state) if (oldalloc <= new_state) { state_num **realtrans = d->trans ? d->trans - 2 : NULL; - size_t newalloc, newalloc1; - newalloc1 = realtrans ? new_state + 2 : 0; - realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans); + ptrdiff_t newalloc, newalloc1; + newalloc1 = realtrans ? d->tralloc + 2 : 0; + realtrans = xpalloc (realtrans, &newalloc1, new_state - oldalloc + 1, + -1, sizeof *realtrans); realtrans[0] = realtrans[1] = NULL; d->trans = realtrans + 2; d->tralloc = newalloc = newalloc1 - 2; @@ -3292,7 +3381,7 @@ dfaisfast (struct dfa const *d) static void free_mbdata (struct dfa *d) { - size_t i; + ptrdiff_t i; free (d->multibyte_prop); diff --git a/intprops.h b/intprops.h new file mode 100644 index 00000000..716741ad --- /dev/null +++ b/intprops.h @@ -0,0 +1,464 @@ +/* intprops.h -- properties of integer types + + Copyright (C) 2001-2016 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Paul Eggert. */ + +#ifndef _GL_INTPROPS_H +#define _GL_INTPROPS_H + +#include <limits.h> +#include <verify.h> + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* Return a value with the common real type of E and V and the value of V. */ +#define _GL_INT_CONVERT(e, v) (0 * (e) + (v)) + +/* Act like _GL_INT_CONVERT (E, -V) but work around a bug in IRIX 6.5 cc; see + <http://lists.gnu.org/archive/html/bug-gnulib/2011-05/msg00406.html>. */ +#define _GL_INT_NEGATE_CONVERT(e, v) (0 * (e) - (v)) + +/* The extra casts in the following macros work around compiler bugs, + e.g., in Cray C 5.0.3.0. */ + +/* True if the arithmetic type T is an integer type. bool counts as + an integer. */ +#define TYPE_IS_INTEGER(t) ((t) 1.5 == 1) + +/* True if the real type T is signed. */ +#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) + +/* Return 1 if the real expression E, after promotion, has a + signed or floating type. */ +#define EXPR_SIGNED(e) (_GL_INT_NEGATE_CONVERT (e, 1) < 0) + + +/* Minimum and maximum values for integer types and expressions. */ + +/* The width in bits of the integer type or expression T. + Padding bits are not supported; this is checked at compile-time below. */ +#define TYPE_WIDTH(t) (sizeof (t) * CHAR_BIT) + +/* The maximum and minimum values for the integer type T. */ +#define TYPE_MINIMUM(t) ((t) ~ TYPE_MAXIMUM (t)) +#define TYPE_MAXIMUM(t) \ + ((t) (! TYPE_SIGNED (t) \ + ? (t) -1 \ + : ((((t) 1 << (TYPE_WIDTH (t) - 2)) - 1) * 2 + 1))) + +/* The maximum and minimum values for the type of the expression E, + after integer promotion. E should not have side effects. */ +#define _GL_INT_MINIMUM(e) \ + (EXPR_SIGNED (e) \ + ? ~ _GL_SIGNED_INT_MAXIMUM (e) \ + : _GL_INT_CONVERT (e, 0)) +#define _GL_INT_MAXIMUM(e) \ + (EXPR_SIGNED (e) \ + ? _GL_SIGNED_INT_MAXIMUM (e) \ + : _GL_INT_NEGATE_CONVERT (e, 1)) +#define _GL_SIGNED_INT_MAXIMUM(e) \ + (((_GL_INT_CONVERT (e, 1) << (TYPE_WIDTH ((e) + 0) - 2)) - 1) * 2 + 1) + +/* Work around OpenVMS incompatibility with C99. */ +#if !defined LLONG_MAX && defined __INT64_MAX +# define LLONG_MAX __INT64_MAX +# define LLONG_MIN __INT64_MIN +#endif + +/* This include file assumes that signed types are two's complement without + padding bits; the above macros have undefined behavior otherwise. + If this is a problem for you, please let us know how to fix it for your host. + As a sanity check, test the assumption for some signed types that + <limits.h> bounds. */ +verify (TYPE_MINIMUM (signed char) == SCHAR_MIN); +verify (TYPE_MAXIMUM (signed char) == SCHAR_MAX); +verify (TYPE_MINIMUM (short int) == SHRT_MIN); +verify (TYPE_MAXIMUM (short int) == SHRT_MAX); +verify (TYPE_MINIMUM (int) == INT_MIN); +verify (TYPE_MAXIMUM (int) == INT_MAX); +verify (TYPE_MINIMUM (long int) == LONG_MIN); +verify (TYPE_MAXIMUM (long int) == LONG_MAX); +#ifdef LLONG_MAX +verify (TYPE_MINIMUM (long long int) == LLONG_MIN); +verify (TYPE_MAXIMUM (long long int) == LLONG_MAX); +#endif +/* Similarly, sanity-check one ISO/IEC TS 18661-1:2014 macro if defined. */ +#ifdef UINT_WIDTH +verify (TYPE_WIDTH (unsigned int) == UINT_WIDTH); +#endif + +/* Does the __typeof__ keyword work? This could be done by + 'configure', but for now it's easier to do it by hand. */ +#if (2 <= __GNUC__ \ + || (1210 <= __IBMC__ && defined __IBM__TYPEOF__) \ + || (0x5110 <= __SUNPRO_C && !__STDC__)) +# define _GL_HAVE___TYPEOF__ 1 +#else +# define _GL_HAVE___TYPEOF__ 0 +#endif + +/* Return 1 if the integer type or expression T might be signed. Return 0 + if it is definitely unsigned. This macro does not evaluate its argument, + and expands to an integer constant expression. */ +#if _GL_HAVE___TYPEOF__ +# define _GL_SIGNED_TYPE_OR_EXPR(t) TYPE_SIGNED (__typeof__ (t)) +#else +# define _GL_SIGNED_TYPE_OR_EXPR(t) 1 +#endif + +/* Bound on length of the string representing an unsigned integer + value representable in B bits. log10 (2.0) < 146/485. The + smallest value of B where this bound is not tight is 2621. */ +#define INT_BITS_STRLEN_BOUND(b) (((b) * 146 + 484) / 485) + +/* Bound on length of the string representing an integer type or expression T. + Subtract 1 for the sign bit if T is signed, and then add 1 more for + a minus sign if needed. + + Because _GL_SIGNED_TYPE_OR_EXPR sometimes returns 0 when its argument is + signed, this macro may overestimate the true bound by one byte when + applied to unsigned types of size 2, 4, 16, ... bytes. */ +#define INT_STRLEN_BOUND(t) \ + (INT_BITS_STRLEN_BOUND (TYPE_WIDTH (t) - _GL_SIGNED_TYPE_OR_EXPR (t)) \ + + _GL_SIGNED_TYPE_OR_EXPR (t)) + +/* Bound on buffer size needed to represent an integer type or expression T, + including the terminating null. */ +#define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND (t) + 1) + + +/* Range overflow checks. + + The INT_<op>_RANGE_OVERFLOW macros return 1 if the corresponding C + operators might not yield numerically correct answers due to + arithmetic overflow. They do not rely on undefined or + implementation-defined behavior. Their implementations are simple + and straightforward, but they are a bit harder to use than the + INT_<op>_OVERFLOW macros described below. + + Example usage: + + long int i = ...; + long int j = ...; + if (INT_MULTIPLY_RANGE_OVERFLOW (i, j, LONG_MIN, LONG_MAX)) + printf ("multiply would overflow"); + else + printf ("product is %ld", i * j); + + Restrictions on *_RANGE_OVERFLOW macros: + + These macros do not check for all possible numerical problems or + undefined or unspecified behavior: they do not check for division + by zero, for bad shift counts, or for shifting negative numbers. + + These macros may evaluate their arguments zero or multiple times, + so the arguments should not have side effects. The arithmetic + arguments (including the MIN and MAX arguments) must be of the same + integer type after the usual arithmetic conversions, and the type + must have minimum value MIN and maximum MAX. Unsigned types should + use a zero MIN of the proper type. + + These macros are tuned for constant MIN and MAX. For commutative + operations such as A + B, they are also tuned for constant B. */ + +/* Return 1 if A + B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. */ +#define INT_ADD_RANGE_OVERFLOW(a, b, min, max) \ + ((b) < 0 \ + ? (a) < (min) - (b) \ + : (max) - (b) < (a)) + +/* Return 1 if A - B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. */ +#define INT_SUBTRACT_RANGE_OVERFLOW(a, b, min, max) \ + ((b) < 0 \ + ? (max) + (b) < (a) \ + : (a) < (min) + (b)) + +/* Return 1 if - A would overflow in [MIN,MAX] arithmetic. + See above for restrictions. */ +#define INT_NEGATE_RANGE_OVERFLOW(a, min, max) \ + ((min) < 0 \ + ? (a) < - (max) \ + : 0 < (a)) + +/* Return 1 if A * B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. Avoid && and || as they tickle + bugs in Sun C 5.11 2010/08/13 and other compilers; see + <http://lists.gnu.org/archive/html/bug-gnulib/2011-05/msg00401.html>. */ +#define INT_MULTIPLY_RANGE_OVERFLOW(a, b, min, max) \ + ((b) < 0 \ + ? ((a) < 0 \ + ? (a) < (max) / (b) \ + : (b) == -1 \ + ? 0 \ + : (min) / (b) < (a)) \ + : (b) == 0 \ + ? 0 \ + : ((a) < 0 \ + ? (a) < (min) / (b) \ + : (max) / (b) < (a))) + +/* Return 1 if A / B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. Do not check for division by zero. */ +#define INT_DIVIDE_RANGE_OVERFLOW(a, b, min, max) \ + ((min) < 0 && (b) == -1 && (a) < - (max)) + +/* Return 1 if A % B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. Do not check for division by zero. + Mathematically, % should never overflow, but on x86-like hosts + INT_MIN % -1 traps, and the C standard permits this, so treat this + as an overflow too. */ +#define INT_REMAINDER_RANGE_OVERFLOW(a, b, min, max) \ + INT_DIVIDE_RANGE_OVERFLOW (a, b, min, max) + +/* Return 1 if A << B would overflow in [MIN,MAX] arithmetic. + See above for restrictions. Here, MIN and MAX are for A only, and B need + not be of the same type as the other arguments. The C standard says that + behavior is undefined for shifts unless 0 <= B < wordwidth, and that when + A is negative then A << B has undefined behavior and A >> B has + implementation-defined behavior, but do not check these other + restrictions. */ +#define INT_LEFT_SHIFT_RANGE_OVERFLOW(a, b, min, max) \ + ((a) < 0 \ + ? (a) < (min) >> (b) \ + : (max) >> (b) < (a)) + +/* True if __builtin_add_overflow (A, B, P) works when P is non-null. */ +#define _GL_HAS_BUILTIN_OVERFLOW \ + (5 <= __GNUC__ || __has_builtin (__builtin_add_overflow)) + +/* True if __builtin_add_overflow_p (A, B, C) works. */ +#define _GL_HAS_BUILTIN_OVERFLOW_P \ + (7 <= __GNUC__ || __has_builtin (__builtin_add_overflow_p)) + +/* The _GL*_OVERFLOW macros have the same restrictions as the + *_RANGE_OVERFLOW macros, except that they do not assume that operands + (e.g., A and B) have the same type as MIN and MAX. Instead, they assume + that the result (e.g., A + B) has that type. */ +#if _GL_HAS_BUILTIN_OVERFLOW_P +# define _GL_ADD_OVERFLOW(a, b, min, max) \ + __builtin_add_overflow_p (a, b, (__typeof__ ((a) + (b))) 0) +# define _GL_SUBTRACT_OVERFLOW(a, b, min, max) \ + __builtin_sub_overflow_p (a, b, (__typeof__ ((a) - (b))) 0) +# define _GL_MULTIPLY_OVERFLOW(a, b, min, max) \ + __builtin_mul_overflow_p (a, b, (__typeof__ ((a) * (b))) 0) +#else +# define _GL_ADD_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? INT_ADD_RANGE_OVERFLOW (a, b, min, max) \ + : (a) < 0 ? (b) <= (a) + (b) \ + : (b) < 0 ? (a) <= (a) + (b) \ + : (a) + (b) < (b)) +# define _GL_SUBTRACT_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? INT_SUBTRACT_RANGE_OVERFLOW (a, b, min, max) \ + : (a) < 0 ? 1 \ + : (b) < 0 ? (a) - (b) <= (a) \ + : (a) < (b)) +# define _GL_MULTIPLY_OVERFLOW(a, b, min, max) \ + (((min) == 0 && (((a) < 0 && 0 < (b)) || ((b) < 0 && 0 < (a)))) \ + || INT_MULTIPLY_RANGE_OVERFLOW (a, b, min, max)) +#endif +#define _GL_DIVIDE_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? (b) == _GL_INT_NEGATE_CONVERT (min, 1) && (a) < - (max) \ + : (a) < 0 ? (b) <= (a) + (b) - 1 \ + : (b) < 0 && (a) + (b) <= (a)) +#define _GL_REMAINDER_OVERFLOW(a, b, min, max) \ + ((min) < 0 ? (b) == _GL_INT_NEGATE_CONVERT (min, 1) && (a) < - (max) \ + : (a) < 0 ? (a) % (b) != ((max) - (b) + 1) % (b) \ + : (b) < 0 && ! _GL_UNSIGNED_NEG_MULTIPLE (a, b, max)) + +/* Return a nonzero value if A is a mathematical multiple of B, where + A is unsigned, B is negative, and MAX is the maximum value of A's + type. A's type must be the same as (A % B)'s type. Normally (A % + -B == 0) suffices, but things get tricky if -B would overflow. */ +#define _GL_UNSIGNED_NEG_MULTIPLE(a, b, max) \ + (((b) < -_GL_SIGNED_INT_MAXIMUM (b) \ + ? (_GL_SIGNED_INT_MAXIMUM (b) == (max) \ + ? (a) \ + : (a) % (_GL_INT_CONVERT (a, _GL_SIGNED_INT_MAXIMUM (b)) + 1)) \ + : (a) % - (b)) \ + == 0) + +/* Check for integer overflow, and report low order bits of answer. + + The INT_<op>_OVERFLOW macros return 1 if the corresponding C operators + might not yield numerically correct answers due to arithmetic overflow. + The INT_<op>_WRAPV macros also store the low-order bits of the answer. + These macros work correctly on all known practical hosts, and do not rely + on undefined behavior due to signed arithmetic overflow. + + Example usage, assuming A and B are long int: + + if (INT_MULTIPLY_OVERFLOW (a, b)) + printf ("result would overflow\n"); + else + printf ("result is %ld (no overflow)\n", a * b); + + Example usage with WRAPV flavor: + + long int result; + bool overflow = INT_MULTIPLY_WRAPV (a, b, &result); + printf ("result is %ld (%s)\n", result, + overflow ? "after overflow" : "no overflow"); + + Restrictions on these macros: + + These macros do not check for all possible numerical problems or + undefined or unspecified behavior: they do not check for division + by zero, for bad shift counts, or for shifting negative numbers. + + These macros may evaluate their arguments zero or multiple times, so the + arguments should not have side effects. + + The WRAPV macros are not constant expressions. They support only + +, binary -, and *. The result type must be signed. + + These macros are tuned for their last argument being a constant. + + Return 1 if the integer expressions A * B, A - B, -A, A * B, A / B, + A % B, and A << B would overflow, respectively. */ + +#define INT_ADD_OVERFLOW(a, b) \ + _GL_BINARY_OP_OVERFLOW (a, b, _GL_ADD_OVERFLOW) +#define INT_SUBTRACT_OVERFLOW(a, b) \ + _GL_BINARY_OP_OVERFLOW (a, b, _GL_SUBTRACT_OVERFLOW) +#if _GL_HAS_BUILTIN_OVERFLOW_P +# define INT_NEGATE_OVERFLOW(a) INT_SUBTRACT_OVERFLOW (0, a) +#else +# define INT_NEGATE_OVERFLOW(a) \ + INT_NEGATE_RANGE_OVERFLOW (a, _GL_INT_MINIMUM (a), _GL_INT_MAXIMUM (a)) +#endif +#define INT_MULTIPLY_OVERFLOW(a, b) \ + _GL_BINARY_OP_OVERFLOW (a, b, _GL_MULTIPLY_OVERFLOW) +#define INT_DIVIDE_OVERFLOW(a, b) \ + _GL_BINARY_OP_OVERFLOW (a, b, _GL_DIVIDE_OVERFLOW) +#define INT_REMAINDER_OVERFLOW(a, b) \ + _GL_BINARY_OP_OVERFLOW (a, b, _GL_REMAINDER_OVERFLOW) +#define INT_LEFT_SHIFT_OVERFLOW(a, b) \ + INT_LEFT_SHIFT_RANGE_OVERFLOW (a, b, \ + _GL_INT_MINIMUM (a), _GL_INT_MAXIMUM (a)) + +/* Return 1 if the expression A <op> B would overflow, + where OP_RESULT_OVERFLOW (A, B, MIN, MAX) does the actual test, + assuming MIN and MAX are the minimum and maximum for the result type. + Arguments should be free of side effects. */ +#define _GL_BINARY_OP_OVERFLOW(a, b, op_result_overflow) \ + op_result_overflow (a, b, \ + _GL_INT_MINIMUM (0 * (b) + (a)), \ + _GL_INT_MAXIMUM (0 * (b) + (a))) + +/* Store the low-order bits of A + B, A - B, A * B, respectively, into *R. + Return 1 if the result overflows. See above for restrictions. */ +#define INT_ADD_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, +, __builtin_add_overflow, INT_ADD_OVERFLOW) +#define INT_SUBTRACT_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, -, __builtin_sub_overflow, INT_SUBTRACT_OVERFLOW) +#define INT_MULTIPLY_WRAPV(a, b, r) \ + _GL_INT_OP_WRAPV (a, b, r, *, __builtin_mul_overflow, INT_MULTIPLY_OVERFLOW) + +/* Nonzero if this compiler has GCC bug 68193 or Clang bug 25390. See: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68193 + https://llvm.org/bugs/show_bug.cgi?id=25390 + For now, assume all versions of GCC-like compilers generate bogus + warnings for _Generic. This matters only for older compilers that + lack __builtin_add_overflow. */ +#if __GNUC__ +# define _GL__GENERIC_BOGUS 1 +#else +# define _GL__GENERIC_BOGUS 0 +#endif + +/* Store the low-order bits of A <op> B into *R, where OP specifies + the operation. BUILTIN is the builtin operation, and OVERFLOW the + overflow predicate. Return 1 if the result overflows. See above + for restrictions. */ +#if _GL_HAS_BUILTIN_OVERFLOW +# define _GL_INT_OP_WRAPV(a, b, r, op, builtin, overflow) builtin (a, b, r) +#elif 201112 <= __STDC_VERSION__ && !_GL__GENERIC_BOGUS +# define _GL_INT_OP_WRAPV(a, b, r, op, builtin, overflow) \ + (_Generic \ + (*(r), \ + signed char: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned char, \ + signed char, SCHAR_MIN, SCHAR_MAX), \ + short int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned short int, \ + short int, SHRT_MIN, SHRT_MAX), \ + int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + int, INT_MIN, INT_MAX), \ + long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX), \ + long long int: \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + long long int, LLONG_MIN, LLONG_MAX))) +#else +# define _GL_INT_OP_WRAPV(a, b, r, op, builtin, overflow) \ + (sizeof *(r) == sizeof (signed char) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned char, \ + signed char, SCHAR_MIN, SCHAR_MAX) \ + : sizeof *(r) == sizeof (short int) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned short int, \ + short int, SHRT_MIN, SHRT_MAX) \ + : sizeof *(r) == sizeof (int) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned int, \ + int, INT_MIN, INT_MAX) \ + : _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow)) +# ifdef LLONG_MAX +# define _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow) \ + (sizeof *(r) == sizeof (long int) \ + ? _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX) \ + : _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long long int, \ + long long int, LLONG_MIN, LLONG_MAX)) +# else +# define _GL_INT_OP_WRAPV_LONGISH(a, b, r, op, overflow) \ + _GL_INT_OP_CALC (a, b, r, op, overflow, unsigned long int, \ + long int, LONG_MIN, LONG_MAX) +# endif +#endif + +/* Store the low-order bits of A <op> B into *R, where the operation + is given by OP. Use the unsigned type UT for calculation to avoid + overflow problems. *R's type is T, with extremal values TMIN and + TMAX. T must be a signed integer type. Return 1 if the result + overflows. */ +#define _GL_INT_OP_CALC(a, b, r, op, overflow, ut, t, tmin, tmax) \ + (sizeof ((a) op (b)) < sizeof (t) \ + ? _GL_INT_OP_CALC1 ((t) (a), (t) (b), r, op, overflow, ut, t, tmin, tmax) \ + : _GL_INT_OP_CALC1 (a, b, r, op, overflow, ut, t, tmin, tmax)) +#define _GL_INT_OP_CALC1(a, b, r, op, overflow, ut, t, tmin, tmax) \ + ((overflow (a, b) \ + || (EXPR_SIGNED ((a) op (b)) && ((a) op (b)) < (tmin)) \ + || (tmax) < ((a) op (b))) \ + ? (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a, b, op, ut, t, tmin, tmax), 1) \ + : (*(r) = _GL_INT_OP_WRAPV_VIA_UNSIGNED (a, b, op, ut, t, tmin, tmax), 0)) + +/* Return A <op> B, where the operation is given by OP. Use the + unsigned type UT for calculation to avoid overflow problems. + Convert the result to type T without overflow by subtracting TMIN + from large values before converting, and adding it afterwards. + Compilers can optimize all the operations except OP. */ +#define _GL_INT_OP_WRAPV_VIA_UNSIGNED(a, b, op, ut, t, tmin, tmax) \ + (((ut) (a) op (ut) (b)) <= (tmax) \ + ? (t) ((ut) (a) op (ut) (b)) \ + : ((t) (((ut) (a) op (ut) (b)) - (tmin)) + (tmin))) + +#endif /* _GL_INTPROPS_H */ |