diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | configh.in | 3 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | custom.h | 2 | ||||
-rw-r--r-- | support/ChangeLog | 5 | ||||
-rw-r--r-- | support/dfa.c | 73 | ||||
-rw-r--r-- | support/flexmember.h | 60 |
9 files changed, 124 insertions, 33 deletions
@@ -1,3 +1,9 @@ +2020-02-01 Arnold D. Robbins <arnold@skeeve.com> + + * awk.h, dfa.c: Move include of mbsupport.h to ... + * custom.h: ... here. + * configure.ac: Add check for isblank. + 2020-01-27 Arnold D. Robbins <arnold@skeeve.com> * custom.h: Fix non-VMS compilation. @@ -87,8 +87,6 @@ extern int errno; #include <wchar.h> #include <wctype.h> -#include "mbsupport.h" /* defines stuff for DJGPP to fake MBS */ - #ifdef STDC_HEADERS #include <float.h> #endif @@ -84,6 +84,9 @@ /* Define to 1 if you have the `isascii' function. */ #undef HAVE_ISASCII +/* Define to 1 if you have the `isblank' function. */ +#undef HAVE_ISBLANK + /* Define to 1 if you have the `iswctype' function. */ #undef HAVE_ISWCTYPE @@ -9953,7 +9953,7 @@ esac for ac_func in __etoa_l atexit btowc fmod gai_strerror \ getgrent getgroups grantpt \ fwrite_unlocked \ - isascii iswctype iswlower iswupper mbrlen \ + isascii isblank iswctype iswlower iswupper mbrlen \ memcmp memcpy memcpy_ulong memmove memset \ memset_ulong mkstemp posix_openpt setenv setlocale setsid sigprocmask \ snprintf strchr \ diff --git a/configure.ac b/configure.ac index af3432cb..db535963 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ dnl dnl configure.ac --- autoconf input file for gawk dnl -dnl Copyright (C) 1995-2019 the Free Software Foundation, Inc. +dnl Copyright (C) 1995-2020 the Free Software Foundation, Inc. dnl dnl This file is part of GAWK, the GNU implementation of the dnl AWK Programming Language. @@ -296,7 +296,7 @@ esac AC_CHECK_FUNCS(__etoa_l atexit btowc fmod gai_strerror \ getgrent getgroups grantpt \ fwrite_unlocked \ - isascii iswctype iswlower iswupper mbrlen \ + isascii isblank iswctype iswlower iswupper mbrlen \ memcmp memcpy memcpy_ulong memmove memset \ memset_ulong mkstemp posix_openpt setenv setlocale setsid sigprocmask \ snprintf strchr \ @@ -95,3 +95,5 @@ typedef unsigned long long uint_fast64_t; #else # define _GL_ATTRIBUTE_PURE /* empty */ #endif + +#include "mbsupport.h" /* defines stuff for DJGPP to fake MBS */ diff --git a/support/ChangeLog b/support/ChangeLog index bb275db6..202f9f02 100644 --- a/support/ChangeLog +++ b/support/ChangeLog @@ -1,3 +1,8 @@ +2020-02-01 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.c: Update from GNULIB. Should help Vax/VMS. + * flexmember.h: New file, for dfa.c. + 2020-01-28 Arnold D. Robbins <arnold@skeeve.com> * regex_internal.h: Update from GNULIB. Hopefully fixes diff --git a/support/dfa.c b/support/dfa.c index 65b0972c..d112443f 100644 --- a/support/dfa.c +++ b/support/dfa.c @@ -24,6 +24,8 @@ #include "dfa.h" +#include "flexmember.h" + #include <assert.h> #include <ctype.h> #ifndef VMS @@ -76,18 +78,6 @@ isasciidigit (char c) # define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif -#if defined(__DJGPP__) -#include "mbsupport.h" -#endif - -#ifdef GAWK -static int -is_blank (int c) -{ - return (c == ' ' || c == '\t'); -} -#endif /* GAWK */ - /* HPUX defines these as macros in sys/param.h. */ #ifdef setbit # undef setbit @@ -96,18 +86,46 @@ is_blank (int c) # undef clrbit #endif +/* For code that does not use Gnulib’s isblank module. */ +#if !defined isblank && !defined HAVE_ISBLANK && !defined GNULIB_ISBLANK +# define isblank dfa_isblank +static int +isblank (int c) +{ + return c == ' ' || c == '\t'; +} +#endif + /* First integer value that is greater than any character code. */ enum { NOTCHAR = 1 << CHAR_BIT }; +#ifdef UINT_LEAST64_MAX + /* Number of bits used in a charclass word. */ enum { CHARCLASS_WORD_BITS = 64 }; /* This represents part of a character class. It must be unsigned and at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */ -typedef uint_fast64_t charclass_word; +typedef uint_least64_t charclass_word; + +/* Part of a charclass initializer that represents 64 bits' worth of a + charclass, where LO and HI are the low and high-order 32 bits of + the 64-bit quantity. */ +# define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo)) + +#else +/* Fallbacks for pre-C99 hosts that lack 64-bit integers. */ +enum { CHARCLASS_WORD_BITS = 32 }; +typedef unsigned long charclass_word; +# define CHARCLASS_PAIR(lo, hi) lo, hi +#endif -/* An initializer for a charclass whose 64-bit words are A through D. */ -#define CHARCLASS_INIT(a, b, c, d) {{a, b, c, d}} +/* An initializer for a charclass whose 32-bit words are A through H. */ +#define CHARCLASS_INIT(a, b, c, d, e, f, g, h) \ + {{ \ + CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d), \ + CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h) \ + }} /* The maximum useful value of a charclass_word; all used bits are 1. */ static charclass_word const CHARCLASS_WORD_MASK @@ -964,7 +982,7 @@ static const struct dfa_ctype prednames[] = { {"print", isprint, false}, {"graph", isgraph, false}, {"cntrl", iscntrl, false}, - {"blank", is_blank, false}, + {"blank", isblank, false}, {NULL, NULL, false} }; @@ -1714,39 +1732,39 @@ add_utf8_anychar (struct dfa *dfa) static charclass const utf8_classes[] = { /* A. 00-7f: 1-byte sequence. */ - CHARCLASS_INIT (0xffffffffffffffff, 0xffffffffffffffff, 0, 0), + CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0), /* B. c2-df: 1st byte of a 2-byte sequence. */ - CHARCLASS_INIT (0, 0, 0, 0x00000000fffffffc), + CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0), /* C. 80-bf: non-leading bytes. */ - CHARCLASS_INIT (0, 0, 0xffffffffffffffff, 0), + CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0), /* D. e0 (just a token). */ /* E. a0-bf: 2nd byte of a "DEC" sequence. */ - CHARCLASS_INIT (0, 0, 0xffffffff00000000, 0), + CHARCLASS_INIT (0, 0, 0, 0, 0, 0xffffffff, 0, 0), /* F. e1-ec + ee-ef: 1st byte of an "FCC" sequence. */ - CHARCLASS_INIT (0, 0, 0, 0x0000dffe00000000), + CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xdffe), /* G. ed (just a token). */ /* H. 80-9f: 2nd byte of a "GHC" sequence. */ - CHARCLASS_INIT (0, 0, 0x000000000000ffff, 0), + CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0), /* I. f0 (just a token). */ /* J. 90-bf: 2nd byte of an "IJCC" sequence. */ - CHARCLASS_INIT (0, 0, 0xffffffffffff0000, 0), + CHARCLASS_INIT (0, 0, 0, 0, 0xffff0000, 0xffffffff, 0, 0), /* K. f1-f3: 1st byte of a "KCCC" sequence. */ - CHARCLASS_INIT (0, 0, 0, 0x000e000000000000), + CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xe0000), /* L. f4 (just a token). */ /* M. 80-8f: 2nd byte of a "LMCC" sequence. */ - CHARCLASS_INIT (0, 0, 0x00000000000000ff, 0), + CHARCLASS_INIT (0, 0, 0, 0, 0xff, 0, 0, 0), }; /* Define the character classes that are needed below. */ @@ -4289,11 +4307,11 @@ dfamust (struct dfa const *d) struct dfamust *dm = NULL; if (*result) { - dm = xmalloc (sizeof *dm); + dm = xmalloc (FLEXSIZEOF (struct dfamust, must, strlen (result) + 1)); dm->exact = exact; dm->begline = begline; dm->endline = endline; - dm->must = xstrdup (result); + strcpy (dm->must, result); } while (mp) @@ -4309,7 +4327,6 @@ dfamust (struct dfa const *d) void dfamustfree (struct dfamust *dm) { - free (dm->must); free (dm); } diff --git a/support/flexmember.h b/support/flexmember.h new file mode 100644 index 00000000..1e839f08 --- /dev/null +++ b/support/flexmember.h @@ -0,0 +1,60 @@ +/* Sizes of structs with flexible array members. + + Copyright 2016-2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. + + Written by Paul Eggert. */ + +#include <stddef.h> + +/* Nonzero multiple of alignment of TYPE, suitable for FLEXSIZEOF below. + On older platforms without _Alignof, use a pessimistic bound that is + safe in practice even if FLEXIBLE_ARRAY_MEMBER is 1. + On newer platforms, use _Alignof to get a tighter bound. */ + +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112 +# define FLEXALIGNOF(type) (sizeof (type) & ~ (sizeof (type) - 1)) +#else +# define FLEXALIGNOF(type) _Alignof (type) +#endif + +/* Yield a properly aligned upper bound on the size of a struct of + type TYPE with a flexible array member named MEMBER that is + followed by N bytes of other data. The result is suitable as an + argument to malloc. For example: + + struct s { int n; char d[FLEXIBLE_ARRAY_MEMBER]; }; + struct s *p = malloc (FLEXSIZEOF (struct s, d, n * sizeof (char))); + + FLEXSIZEOF (TYPE, MEMBER, N) is not simply (sizeof (TYPE) + N), + since FLEXIBLE_ARRAY_MEMBER may be 1 on pre-C11 platforms. Nor is + it simply (offsetof (TYPE, MEMBER) + N), as that might yield a size + that causes malloc to yield a pointer that is not properly aligned + for TYPE; for example, if sizeof (int) == alignof (int) == 4, + malloc (offsetof (struct s, d) + 3 * sizeof (char)) is equivalent + to malloc (7) and might yield a pointer that is not a multiple of 4 + (which means the pointer is not properly aligned for struct s), + whereas malloc (FLEXSIZEOF (struct s, d, 3 * sizeof (char))) is + equivalent to malloc (8) and must yield a pointer that is a + multiple of 4. + + Yield a value less than N if and only if arithmetic overflow occurs. */ + +#define FLEXSIZEOF(type, member, n) \ + ((offsetof (type, member) + FLEXALIGNOF (type) - 1 + (n)) \ + & ~ (FLEXALIGNOF (type) - 1)) |