diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:09:58 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:09:58 +0300 |
commit | cae8bc6ced84c12590e3554a06a952283735363a (patch) | |
tree | ca4f38bfcb1312bfb62fc693564d68f3e9b3e973 /regex.h | |
parent | dbd583bd2b8a6dd40c622875a4e197360cb5aba7 (diff) | |
download | egawk-cae8bc6ced84c12590e3554a06a952283735363a.tar.gz egawk-cae8bc6ced84c12590e3554a06a952283735363a.tar.bz2 egawk-cae8bc6ced84c12590e3554a06a952283735363a.zip |
Move to 2.14.
Diffstat (limited to 'regex.h')
-rw-r--r-- | regex.h | 702 |
1 files changed, 185 insertions, 517 deletions
@@ -1,11 +1,10 @@ /* Definitions for data structures callers pass the regex library. - Requires sys/types.h for size_t. - Version 0.1. - Copyright (C) 1985, 89, 90, 91 Free Software Foundation, Inc. + + Copyright (C) 1985, 1989-90 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -21,554 +20,233 @@ #ifndef __REGEXP_LIBRARY #define __REGEXP_LIBRARY +/* Define number of parens for which we record the beginnings and ends. + This affects how much space the `struct re_registers' type takes up. */ +#ifndef RE_NREGS +#define RE_NREGS 10 +#endif + +#define BYTEWIDTH 8 + + +/* Maximum number of duplicates an interval can allow. */ +#define RE_DUP_MAX ((1 << 15) - 1) -/* This defines the particular regexp syntax to use. */ -extern int obscure_syntax; +/* This defines the various regexp syntaxes. */ +extern long obscure_syntax; /* The following bits are used in the obscure_syntax variable to choose among alternative regexp syntaxes. */ -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS 1 +/* If this bit is set, plain parentheses serve as grouping, and backslash + parentheses are needed for literal searching. + If not set, backslash-parentheses are grouping, and plain parentheses + are for literal searching. */ +#define RE_NO_BK_PARENS 1L -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (1 << 1) +/* If this bit is set, plain | serves as the `or'-operator, and \| is a + literal. + If not set, \| serves as the `or'-operator, and | is a literal. */ +#define RE_NO_BK_VBAR (1L << 1) -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (1 << 2) - -/* If this bit is set, then | binds tighter than ^ or $. +/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are + literals. + If set, \+, \? are operators and plain +, ? are literals. */ +#define RE_BK_PLUS_QM (1L << 2) + +/* If this bit is set, | binds tighter than ^ or $. If not set, the contrary. */ -#define RE_TIGHT_ALT (1 << 3) - -/* If this bit is set, newline is an alternation operator. - If not set, then newline is literal. */ -#define RE_NEWLINE_ALT (1 << 4) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (1 << 5) - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_AWK_CLASS_HACK (1 << 6) - -/* If this bit is set, then either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, then \{, \}, {, and } are literals. */ -#define RE_INTERVALS (1 << 7) - -/* If this bit is not set, then \{ and \} defines an interval, - and { and } are literals. - If set, then { and } defines an interval, and \{ and \} are literals. */ -#define RE_NO_BK_BRACES (1 << 8) - -/* If this bit is set, then character classes are supported. They are: +#define RE_TIGHT_VBAR (1L << 3) + +/* If this bit is set, then treat newline as an OR operator. + If not set, treat it as a normal character. */ +#define RE_NEWLINE_OR (1L << 4) + +/* If this bit is set, then special characters may act as normal + characters in some contexts. Specifically, this applies to: + ^ -- only special at the beginning, or after ( or |; + $ -- only special at the end, or before ) or |; + *, +, ? -- only special when not after the beginning, (, or |. + If this bit is not set, special characters (such as *, ^, and $) + always have their special meaning regardless of the surrounding + context. */ +#define RE_CONTEXT_INDEP_OPS (1L << 5) + +/* If this bit is not set, then \ before anything inside [ and ] is taken as + a real \. + If set, then such a \ escapes the following character. This is a + special case for awk. */ +#define RE_AWK_CLASS_HACK (1L << 6) + +/* If this bit is set, then \{ and \} or { and } serve as interval operators. + If not set, then \{ and \} and { and } are treated as literals. */ +#define RE_INTERVALS (1L << 7) + +/* If this bit is not set, then \{ and \} serve as interval operators and + { and } are literals. + If set, then { and } serve as interval operators and \{ and \} are + literals. */ +#define RE_NO_BK_CURLY_BRACES (1L << 8) + +/* If this bit is set, then character classes are supported; they are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (1 << 9) +#define RE_CHAR_CLASSES (1L << 9) -/* If this bit is set, then period doesn't match a null. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (1 << 10) +/* If this bit is set, then the dot re doesn't match a null byte. + If not set, it does. */ +#define RE_DOT_NOT_NULL (1L << 10) /* If this bit is set, then [^...] doesn't match a newline. - If not set, then it does. */ -#define RE_HAT_LISTS_NOT_NEWLINE (1 << 11) - -/* If this bit is set, then back references are not recognized. - If not set, then they are. */ -#define RE_NO_BK_REFS (1 << 12) - -/* If this bit is set, then all back references must refer to a preceding - subexpression. - If not set, then a back reference to a nonexistent subexpression is - treated as literal characters. */ -#define RE_NO_MISSING_BK_REF (1 << 13) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after OR or BEGINGROUP. Furthermore, OR cannot be - first or last in an re, or immediately follow another OR or - BEGINGROUP. Also, ^ cannot appear in a nonleading position and $ - cannot appear in a nontrailing position (outside of bracket - expressions, that is). */ -#define RE_CONTEXT_INVALID_OPS (1 << 14) - -/* If this bit is set, then +, ? and | aren't recognized as operators. - If not set, then they are. */ -#define RE_LIMITED_OPS (1 << 15) - -/* If this bit is set, then an ending range point has to collate higher - than or equal to the starting range point. - If not set, then when the ending range point collates higher than the - starting range point, the range is considered to be empty. */ -#define RE_NO_EMPTY_RANGES (1 << 16) - -/* If this bit is set, then neither the match-beginning-of-line nor - the match-end-of-line operator match a newline. - If not set, then these operators can match a newline. */ -#define RE_NO_ANCHOR_AT_NEWLINE (1 << 17) - -/* If this bit is set, then you can't have empty groups. - If not set, then you can. */ -#define RE_NO_EMPTY_GROUPS (1 << 18) - -/* If this bit is set, then you can't have empty alternatives. - If not set, then you can. */ -#define RE_NO_EMPTY_ALTS (1 << 19) - -/* If this bit is set, then you can't have more than one non-interval - repetition operators (i.e., `*', `+' and `?') in a row, e.g., as in - `a*+?*'. - If not set, then you can. */ -#define RE_NO_CONSECUTIVE_REPEATS (1 << 20) - - -/* If this bit is set, then ignore anchors inside groups which in turn - are operated on by repetion operators. - If not set, then don't. */ -#define RE_REPEATED_ANCHORS_AWAY (1 << 21) - -/* If this bit is set, then the match-any-character operator (.) matches - a newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (1 << 22) - -/* If this bit is set, then '^' and '$' can be anchors only at the - beginning or the end of the pattern. - If not set, then they don't have to be at the beginning or end of the - pattern to be anchors. */ -#define RE_ANCHORS_ONLY_AT_ENDS (1 << 23) - -/* If this bit is set, then Regex considers an unmatched close-group - operator to be the ordinary character parenthesis. - If not set, then an unmatched close-group operator is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (1 << 24) - -/* If this bit is set, then ^ cannot appear in a nonleading position and - $ cannot appear in a nontrailing position (outside of bracket - expressions, that is). */ -#define RE_CONTEXT_INVALID_ANCHORS (1 << 25) - -/* If this bit is set, then ^ and $ are always anchors, regardless of - their positions in a regular expression. - If this bit is not set, then ^ is an anchor only if in a leading - position and $ is one only if in a trailing position. Specifically, - - ^ is in a leading position if at the beginning of a regular - expression , or after an open-group or an alternation operator; - - $ is in a trailing position if at the end of a regular - expression, or before close-group or an alternation operator. -*/ -#define RE_CONTEXT_INDEP_ANCHORS (1 << 26) - -/* If this bit is set, then the searching and matching routines will - allocate enough register space to accommodate the number of groups - in the regular expression. - If this bit is not set, then the user must allocate the space. */ -#define RE_ALLOCATE_REGISTERS (1 << 27) - - -/* Define combinations of the above bits for the standard possibilities. */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_ALLOCATE_REGISTERS \ - | RE_AWK_CLASS_HACK) - -#define RE_SYNTAX_POSIX_AWK RE_SYNTAX_AWK - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS) - -#define RE_SYNTAX_EGREP \ - (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS \ - | RE_CONTEXT_INDEP_ANCHORS | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS) - -#define RE_SYNTAX_POSIX_BASIC \ - (RE_INTERVALS | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ - | RE_NO_MISSING_BK_REF | RE_LIMITED_OPS | RE_NO_EMPTY_RANGES \ - | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_ALLOCATE_REGISTERS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (RE_INTERVALS | RE_NO_BK_BRACES | RE_NO_BK_VBAR \ - | RE_NO_BK_PARENS | RE_CHAR_CLASSES | RE_CONTEXT_INVALID_OPS \ - | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES | RE_UNMATCHED_RIGHT_PAREN_ORD \ - | RE_DOT_NOT_NULL | RE_NO_EMPTY_GROUPS | RE_NO_EMPTY_ALTS \ - | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS\ - | RE_ALLOCATE_REGISTERS) - - - -/* Maximum number of duplicates an interval can allow. */ -#define RE_DUP_MAX ((1 << 15) - 1) - - -/* POSIX cflags bits (i.e., information for regcomp). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then (line 526, p.687 of POSIX 1003.2/D10) - newline loses its special significance; i.e., anchors do not match at - newlines in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (1 << 1) + If not set, it does. */ +#define RE_HAT_NOT_NEWLINE (1L << 11) -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (1 << 2) - -/* If this bit is set, then report only success or fail in regexec (). - If not set, then return nonzero indicating either not match or an error. */ -#define REG_NOSUB (1 << 3) +/* If this bit is set, back references are recognized. + If not set, they aren't. */ +#define RE_NO_BK_REFS (1L << 12) +/* If this bit is set, back references must refer to a preceding + subexpression. If not set, a back reference to a nonexistent + subexpression is treated as literal characters. */ +#define RE_NO_EMPTY_BK_REF (1L << 13) -/* POSIX eflags bits (i.e., information for regexec). */ +/* If this bit is set, bracket expressions can't be empty. + If it is set, they can be empty. */ +#define RE_NO_EMPTY_BRACKETS (1L << 14) -/* If this bit is set, then the string's first character is not the - beginning of a line, so the beginning-of-line anchor shouldn't - match it. - If not set, then the string's first character can match the - beginning-of-line anchor. */ -#define REG_NOTBOL 1 +/* If this bit is set, then *, +, ? and { cannot be first in an re or + immediately after a |, or a (. Furthermore, a | cannot be first or + last in an re, or immediately follow another | or a (. Also, a ^ + cannot appear in a nonleading position and a $ cannot appear in a + nontrailing position (outside of bracket expressions, that is). */ +#define RE_CONTEXTUAL_INVALID_OPS (1L << 15) -/* If this bit is set, then the string's last character is not the - end of a line, so the end-of-line anchor shouldn't match it. - If not set, then the string's last character can match the - end-of-line anchor. */ -#define REG_NOTEOL (1 << 1) - - -/* POSIX regexec return error value. */ - -#define REG_NOMATCH 1 /* Didn't find a match. */ - -/* POSIX regcomp return error codes. */ - -#define REG_BADPAT 2 /* Found an invalid pattern. */ -#define REG_ECOLLATE 3 /* Not implemented. */ -#define REG_ECTYPE 4 /* Found an invalid character class name. */ -#define REG_EESCAPE 5 /* Found a trailing backslash. */ -#define REG_ESUBREG 6 /* Found an invalid back reference. */ -#define REG_EBRACK 7 /* Found an unmatched left bracket. */ -#define REG_EPAREN 8 /* Found a parentheses imbalance. */ -#define REG_EBRACE 9 /* Found an unmatched \{. */ -#define REG_BADBR 10 /* Found invalid contents of \{\}. */ -#define REG_ERANGE 11 /* Found invalid range end. */ -#define REG_ESPACE 12 /* Ran out of memory. */ -#define REG_BADRPT 13 /* No preceding re for repetition op. */ -#define REG_ENEWLINE 14 /* Not implemented. */ - -/* Some regcomp codes we've added. */ -#define REG_NOERROR 0 /* No error. */ -#define REG_EEND 15 -#define REG_ESIZE 16 - - - - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ +/* If this bit is set, then +, ? and | aren't recognized as operators. + If it's not, they are. */ +#define RE_LIMITED_OPS (1L << 16) -/* If this changes, change documentation in regex.texinfo. */ +/* If this bit is set, then an ending range point has to collate higher + or equal to the starting range point. + If it's not set, then when the ending range point collates higher + than the starting range point, the range is just considered empty. */ +#define RE_NO_EMPTY_RANGES (1L << 17) + +/* If this bit is set, then a hyphen (-) can't be an ending range point. + If it isn't, then it can. */ +#define RE_NO_HYPHEN_RANGE_END (1L << 18) + + +/* Define combinations of bits for the standard possibilities. */ +#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INDEP_OPS) +#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK) +#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) +#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) +#define RE_SYNTAX_EMACS 0 +#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ + | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ + | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ + | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ + | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) + +#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ + | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ + | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ + | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ + | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ + | RE_NO_HYPHEN_RANGE_END) + + +/* This data structure is used to represent a compiled pattern. */ struct re_pattern_buffer -{ - /* Space that holds the compiled pattern. */ - char *buffer; - - /* Number of bytes to which `buffer' points. */ - long allocated; - - /* Number of bytes actually used in `buffer'. */ - long used; - - /* Syntax setting with which the pattern was compiled. */ - int syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Set to 1 by re_compile_fastmap if this pattern can match the - null string; 0 prevents the searcher from matching it with - the null string. Set to 2 if it might match the null string - either at the end of a search range or just before a - character listed in the fastmap. */ - char can_be_null; - - - /* The remaining fields are all one-bit booleans. */ - - /* Set to zero when regex_compile compiles a pattern; set to one - by re_compile_fastmap when it updates the fastmap, if any. */ - unsigned fastmap_accurate : 1; - - /* If set, regexec reports only success or failure and does not - return anything in pmatch[]. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor never matches. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If set, and the regs argument is nonzero, the GNU - matching and searching functions return information - for as many registers as needed to report about the - whole pattern and all its subexpressions. If not set, - and the regs argument is nonzero, then the functions - return information for regs->num_regs registers. */ - unsigned return_default_num_regs : 1; -}; - -typedef struct re_pattern_buffer regex_t; - - -/* search.c (search_buffer) in Emacs needs this one value. It is - defined both in `regex.c' and here. */ + { + char *buffer; /* Space holding the compiled pattern commands. */ + long allocated; /* Size of space that `buffer' points to. */ + long used; /* Length of portion of buffer actually occupied */ + char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ + /* re_search uses the fastmap, if there is one, + to skip over totally implausible characters. */ + char *translate; /* Translate table to apply to all characters before + comparing, or zero for no translation. + The translation is applied to a pattern when it is + compiled and to data when it is matched. */ + char fastmap_accurate; + /* Set to zero when a new pattern is stored, + set to one when the fastmap is updated from it. */ + char can_be_null; /* Set to one by compiling fastmap + if this pattern might match the null string. + It does not necessarily match the null string + in that case, but if this is zero, it cannot. + 2 as value means can match null string + but at end of range or before a character + listed in the fastmap. */ + }; + + +/* search.c (search_buffer) needs this one value. It is defined both in + regex.c and here. */ #define RE_EXACTN_VALUE 1 - -/* struct re_registers: Structure to store register contents data in. - - (If change comments here, change in regex.texinfo also.) - - Some groups in a regular expression match (possibly empty) substrings - of the string that regular expression matched. The matcher remembers - the beginning and ending point of the substring matched by each - group. To get what they matched, pass the address of a structure of - this type to a GNU matching or searching function. - - When you call a GNU matching and searching function, it stores - information into this structure according to the following (in all - examples below, `(' represents the open-group and `)' the - close-group operator): - - If the regular expression has an i-th group that matches a substring - of string, then the function sets REGS->start[i] to the index in - string where the substring matched by the i-th group begins, and - REGS->end[i] to the index just beyond that substring's end. The - function sets REGS->start[0] and REGS->end[0] to analogous - information about the entire pattern. - - For example, when you match the pattern `((a)(b))' with the string - `ab', you get: - - 0 in REGS->start[0] and 2 in REGS->end[0] - 0 in REGS->start[1] and 2 in REGS->end[1] - 0 in REGS->start[2] and 1 in REGS->end[2] - 1 in REGS->start[3] and 2 in REGS->end[3] - - If a group matches more than once (as it might if followed by, e.g., - a repetition operator), then the function reports the information - about what the group @emph{last matched. - - For example, when you match the string `aa' with the pattern `(a)*', - you get: - - 0 in REGS->start[0] and 2 in REGS->end[0] - 1 in REGS->start[1] and 2 in REGS->end[1] - - - If the i-th group does not participate in a successful match, e.g., - it is an alternative not taken or a repetition operator allows zero - repetitions of it, then the function sets REGS->start[i] and - REGS->end[i] to -1. - - For example, when you match the string `b' with the pattern `(a)*b', - you get: - - 0 in REGS->start[0] and 1 in REGS->end[0] - -1 in REGS->start[1] and -1 in REGS->end[1] +/* Structure to store register contents data in. + Pass the address of such a structure as an argument to re_match, etc., + if you want this information back. - If the i-th group matches a zero-length string, then the function - sets REGS->start[i] and REGS->end[i] to the index just beyond that - zero-length string. - - For example, when you match the string `b' with the pattern `(a*)b', - you get: - - 0 in REGS->start[0] and 1 in REGS->end[0] - 0 in REGS->start[1] and 0 in REGS->end[1] - - The function sets REGS->start[0] and REGS->end[0] to analogous - information about the entire pattern. - - For example, when you match the empty string with the pattern `(a*)', - you get: - - 0 in REGS->start[0] and 0 in REGS->end[0] - 0 in REGS->start[1] and 0 in REGS->end[1] - - If an i-th group contains a j-th group and the function reports a - match of the i-th group, then it records in REGS->start[j] and - REGS->end[j] the last match (if it matched) of the j-th group. - - For example, when you match the string `abb' with the pattern - `((a*)b)*, group 2' last matches the empty string, so you get: - - 0 in REGS->start[0] and 3 in REGS->end[0] - 2 in REGS->start[1] and 3 in REGS->end[1] - 2 in REGS->start[2] and 2 in REGS->end[2] - - When you match the string `abb' with the pattern `((a)*b)*', group 2 - doesn't participate in the last match, so you get: - - 0 in REGS->start[0] and 3 in REGS->end[0] - 2 in REGS->start[1] and 3 in REGS->end[1] - 0 in REGS->start[2] and 1 in REGS->end[2] - - If an i-th group contains a j-th group and the function sets - REGS->start[i] and REGS->end[i] to -1, then it also sets REGS->start[j] - and REGS->end[j] to -1. - - For example, when you match the string `c' with the pattern - `((a)*b)*c', you get: - - 0 in REGS->start[0] and 1 in REGS->end[0] - -1 in REGS->start[1] and -1 in REGS->end[1] - -1 in REGS->start[2] and -1 in REGS->end[2] -*/ + For i from 1 to RE_NREGS - 1, start[i] records the starting index in + the string of where the ith subexpression matched, and end[i] records + one after the ending index. start[0] and end[0] are analogous, for + the entire pattern. */ struct re_registers -{ - unsigned num_regs; - int *start; - int *end; -}; - - -/* POSIX specification for registers. See comments for struct - re_registers for how this is used and read `POSIX' for `GNU', - `PMATCH' for `REGS', `PMATCH[i].rm_so' for `REGS->start' and - `PMATCH[i].rm_eo' for `REGS->end'. */ - -typedef off_t regoff_t; - -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring' start. */ - regoff_t rm_eo; /* Byte offset from string's end to substring' end. */ -} regmatch_t; + { + int start[RE_NREGS]; + int end[RE_NREGS]; + }; #ifdef __STDC__ -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `obscure_syntax', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ - -extern char *re_compile_pattern (const char *pattern, const int length, - struct re_pattern_buffer *buffer); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ - -extern int re_compile_fastmap (struct re_pattern_buffer *buffer); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match or -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS is non-null). */ - -extern int re_search (struct re_pattern_buffer *buffer, - const char *string, const int length, - const int start, const int range, - struct re_registers *regs); - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ - -extern int re_search_2 (struct re_pattern_buffer *buffer, - const char *string1, const int length1, - const char *string2, const int length2, - const int start, const int range, - struct re_registers *regs, - const int stop); - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ - -extern int re_match (const struct re_pattern_buffer *buffer, - const char *string, const int length, - const int start, struct re_registers *regs); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ - -extern int re_match_2 (const struct re_pattern_buffer *buffer, - const char *string1, const int length1, - const char *string2, const int length2, - const int start, - struct re_registers *regs, - const int stop); - - +extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *); +/* Is this really advertised? */ +extern void re_compile_fastmap (struct re_pattern_buffer *); +extern int re_search (struct re_pattern_buffer *, char*, int, int, int, + struct re_registers *); +extern int re_search_2 (struct re_pattern_buffer *, char *, int, + char *, int, int, int, + struct re_registers *, int); +extern int re_match (struct re_pattern_buffer *, char *, int, int, + struct re_registers *); +extern int re_match_2 (struct re_pattern_buffer *, char *, int, + char *, int, int, struct re_registers *, int); +extern long re_set_syntax (long syntax); + +#ifndef GAWK /* 4.2 bsd compatibility. */ -extern char *re_comp (const char *); -extern int re_exec (const char *); - -extern int regcomp (regex_t *preg, const char *pattern, int cflags); -extern int regexec (const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags); -extern size_t re_gerror (int errcode, const regex_t *preg, char* errbuf, - size_t errbuf_size); -extern void re_gfree (regex_t *preg); - -#else /* not __STDC__ */ +extern char *re_comp (char *); +extern int re_exec (char *); +#endif -/* Support old C compilers. */ -#define const +#else /* !__STDC__ */ extern char *re_compile_pattern (); +/* Is this really advertised? */ +extern void re_compile_fastmap (); extern int re_search (), re_search_2 (); extern int re_match (), re_match_2 (); +extern long re_set_syntax(); +#ifndef GAWK /* 4.2 bsd compatibility. */ extern char *re_comp (); extern int re_exec (); - -extern int regcomp (); -extern int regexec (); -extern size_t re_gerror (); -extern void re_gfree (); +#endif #endif /* __STDC__ */ @@ -577,14 +255,4 @@ extern void re_gfree (); extern char *re_syntax_table; #endif -#endif /* not __REGEXP_LIBRARY */ - - - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ +#endif /* !__REGEXP_LIBRARY */ |