From dbd583bd2b8a6dd40c622875a4e197360cb5aba7 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 16 Jul 2010 12:04:45 +0300 Subject: Move to 2.13.3 (from 2.13.tar.gz - sigh). --- regex.h | 666 ++++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 500 insertions(+), 166 deletions(-) (limited to 'regex.h') diff --git a/regex.h b/regex.h index 145b6d13..6f735156 100644 --- a/regex.h +++ b/regex.h @@ -1,10 +1,11 @@ /* Definitions for data structures callers pass the regex library. - - Copyright (C) 1985, 1989-90 Free Software Foundation, Inc. + Requires sys/types.h for size_t. + Version 0.1. + Copyright (C) 1985, 89, 90, 91 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) + the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -17,228 +18,546 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifdef __GNUC__ - #pragma once -#endif - #ifndef __REGEXP_LIBRARY #define __REGEXP_LIBRARY -/* Define number of parens for which we record the beginnings and ends. - This affects how much space the `struct re_registers' type takes up. */ -#ifndef RE_NREGS -#define RE_NREGS 10 -#endif - -#define BYTEWIDTH 8 - - -/* Maximum number of duplicates an interval can allow. */ -#define RE_DUP_MAX ((1 << 15) - 1) -/* This defines the various regexp syntaxes. */ +/* This defines the particular regexp syntax to use. */ extern int obscure_syntax; /* The following bits are used in the obscure_syntax variable to choose among alternative regexp syntaxes. */ -/* If this bit is set, plain parentheses serve as grouping, and backslash - parentheses are needed for literal searching. - If not set, backslash-parentheses are grouping, and plain parentheses - are for literal searching. */ +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ #define RE_NO_BK_PARENS 1 -/* If this bit is set, plain | serves as the `or'-operator, and \| is a - literal. - If not set, \| serves as the `or'-operator, and | is a literal. */ +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ #define RE_NO_BK_VBAR (1 << 1) -/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are - literals. - If set, \+, \? are operators and plain +, ? are literals. */ +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ #define RE_BK_PLUS_QM (1 << 2) - -/* If this bit is set, | binds tighter than ^ or $. + +/* If this bit is set, then | binds tighter than ^ or $. If not set, the contrary. */ -#define RE_TIGHT_VBAR (1 << 3) - -/* If this bit is set, then treat newline as an OR operator. - If not set, treat it as a normal character. */ -#define RE_NEWLINE_OR (1 << 4) - -/* If this bit is set, then special characters may act as normal - characters in some contexts. Specifically, this applies to: - ^ -- only special at the beginning, or after ( or |; - $ -- only special at the end, or before ) or |; - *, +, ? -- only special when not after the beginning, (, or |. - If this bit is not set, special characters (such as *, ^, and $) - always have their special meaning regardless of the surrounding - context. */ +#define RE_TIGHT_ALT (1 << 3) + +/* If this bit is set, newline is an alternation operator. + If not set, then newline is literal. */ +#define RE_NEWLINE_ALT (1 << 4) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ #define RE_CONTEXT_INDEP_OPS (1 << 5) -/* If this bit is not set, then \ before anything inside [ and ] is taken as - a real \. - If set, then such a \ escapes the following character. This is a - special case for awk. */ +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ #define RE_AWK_CLASS_HACK (1 << 6) -/* If this bit is set, then \{ and \} or { and } serve as interval operators. - If not set, then \{ and \} and { and } are treated as literals. */ +/* If this bit is set, then either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, then \{, \}, {, and } are literals. */ #define RE_INTERVALS (1 << 7) -/* If this bit is not set, then \{ and \} serve as interval operators and - { and } are literals. - If set, then { and } serve as interval operators and \{ and \} are - literals. */ -#define RE_NO_BK_CURLY_BRACES (1 << 8) +/* If this bit is not set, then \{ and \} defines an interval, + and { and } are literals. + If set, then { and } defines an interval, and \{ and \} are literals. */ +#define RE_NO_BK_BRACES (1 << 8) -/* If this bit is set, then character classes are supported; they are: +/* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ #define RE_CHAR_CLASSES (1 << 9) -/* If this bit is set, then the dot re doesn't match a null byte. - If not set, it does. */ +/* If this bit is set, then period doesn't match a null. + If not set, then it does. */ #define RE_DOT_NOT_NULL (1 << 10) /* If this bit is set, then [^...] doesn't match a newline. - If not set, it does. */ -#define RE_HAT_NOT_NEWLINE (1 << 11) + If not set, then it does. */ +#define RE_HAT_LISTS_NOT_NEWLINE (1 << 11) -/* If this bit is set, back references are recognized. - If not set, they aren't. */ +/* If this bit is set, then back references are not recognized. + If not set, then they are. */ #define RE_NO_BK_REFS (1 << 12) -/* If this bit is set, back references must refer to a preceding - subexpression. If not set, a back reference to a nonexistent - subexpression is treated as literal characters. */ -#define RE_NO_EMPTY_BK_REF (1 << 13) - -/* If this bit is set, bracket expressions can't be empty. - If it is set, they can be empty. */ -#define RE_NO_EMPTY_BRACKETS (1 << 14) +/* If this bit is set, then all back references must refer to a preceding + subexpression. + If not set, then a back reference to a nonexistent subexpression is + treated as literal characters. */ +#define RE_NO_MISSING_BK_REF (1 << 13) -/* If this bit is set, then *, +, ? and { cannot be first in an re or - immediately after a |, or a (. Furthermore, a | cannot be first or - last in an re, or immediately follow another | or a (. Also, a ^ - cannot appear in a nonleading position and a $ cannot appear in a - nontrailing position (outside of bracket expressions, that is). */ -#define RE_CONTEXTUAL_INVALID_OPS (1 << 15) +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after OR or BEGINGROUP. Furthermore, OR cannot be + first or last in an re, or immediately follow another OR or + BEGINGROUP. Also, ^ cannot appear in a nonleading position and $ + cannot appear in a nontrailing position (outside of bracket + expressions, that is). */ +#define RE_CONTEXT_INVALID_OPS (1 << 14) /* If this bit is set, then +, ? and | aren't recognized as operators. - If it's not, they are. */ -#define RE_LIMITED_OPS (1 << 16) + If not set, then they are. */ +#define RE_LIMITED_OPS (1 << 15) /* If this bit is set, then an ending range point has to collate higher - or equal to the starting range point. - If it's not set, then when the ending range point collates higher - than the starting range point, the range is just considered empty. */ -#define RE_NO_EMPTY_RANGES (1 << 17) - -/* If this bit is set, then a hyphen (-) can't be an ending range point. - If it isn't, then it can. */ -#define RE_NO_HYPHEN_RANGE_END (1 << 18) - - -/* Define combinations of bits for the standard possibilities. */ -#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INDEP_OPS) -#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK) -#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) -#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) + than or equal to the starting range point. + If not set, then when the ending range point collates higher than the + starting range point, the range is considered to be empty. */ +#define RE_NO_EMPTY_RANGES (1 << 16) + +/* If this bit is set, then neither the match-beginning-of-line nor + the match-end-of-line operator match a newline. + If not set, then these operators can match a newline. */ +#define RE_NO_ANCHOR_AT_NEWLINE (1 << 17) + +/* If this bit is set, then you can't have empty groups. + If not set, then you can. */ +#define RE_NO_EMPTY_GROUPS (1 << 18) + +/* If this bit is set, then you can't have empty alternatives. + If not set, then you can. */ +#define RE_NO_EMPTY_ALTS (1 << 19) + +/* If this bit is set, then you can't have more than one non-interval + repetition operators (i.e., `*', `+' and `?') in a row, e.g., as in + `a*+?*'. + If not set, then you can. */ +#define RE_NO_CONSECUTIVE_REPEATS (1 << 20) + + +/* If this bit is set, then ignore anchors inside groups which in turn + are operated on by repetion operators. + If not set, then don't. */ +#define RE_REPEATED_ANCHORS_AWAY (1 << 21) + +/* If this bit is set, then the match-any-character operator (.) matches + a newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (1 << 22) + +/* If this bit is set, then '^' and '$' can be anchors only at the + beginning or the end of the pattern. + If not set, then they don't have to be at the beginning or end of the + pattern to be anchors. */ +#define RE_ANCHORS_ONLY_AT_ENDS (1 << 23) + +/* If this bit is set, then Regex considers an unmatched close-group + operator to be the ordinary character parenthesis. + If not set, then an unmatched close-group operator is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (1 << 24) + +/* If this bit is set, then ^ cannot appear in a nonleading position and + $ cannot appear in a nontrailing position (outside of bracket + expressions, that is). */ +#define RE_CONTEXT_INVALID_ANCHORS (1 << 25) + +/* If this bit is set, then ^ and $ are always anchors, regardless of + their positions in a regular expression. + If this bit is not set, then ^ is an anchor only if in a leading + position and $ is one only if in a trailing position. Specifically, + + ^ is in a leading position if at the beginning of a regular + expression , or after an open-group or an alternation operator; + + $ is in a trailing position if at the end of a regular + expression, or before close-group or an alternation operator. +*/ +#define RE_CONTEXT_INDEP_ANCHORS (1 << 26) + +/* If this bit is set, then the searching and matching routines will + allocate enough register space to accommodate the number of groups + in the regular expression. + If this bit is not set, then the user must allocate the space. */ +#define RE_ALLOCATE_REGISTERS (1 << 27) + + +/* Define combinations of the above bits for the standard possibilities. */ #define RE_SYNTAX_EMACS 0 -#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ - | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ - | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ - | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ - | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) - -#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ - | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ - | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ - | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ - | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ - | RE_NO_HYPHEN_RANGE_END) - - -/* This data structure is used to represent a compiled pattern. */ + +#define RE_SYNTAX_AWK \ + (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_ALLOCATE_REGISTERS \ + | RE_AWK_CLASS_HACK) + +#define RE_SYNTAX_POSIX_AWK RE_SYNTAX_AWK + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS) + +#define RE_SYNTAX_EGREP \ + (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INDEP_ANCHORS | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS) + +#define RE_SYNTAX_POSIX_BASIC \ + (RE_INTERVALS | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ + | RE_NO_MISSING_BK_REF | RE_LIMITED_OPS | RE_NO_EMPTY_RANGES \ + | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_ALLOCATE_REGISTERS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (RE_INTERVALS | RE_NO_BK_BRACES | RE_NO_BK_VBAR \ + | RE_NO_BK_PARENS | RE_CHAR_CLASSES | RE_CONTEXT_INVALID_OPS \ + | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES | RE_UNMATCHED_RIGHT_PAREN_ORD \ + | RE_DOT_NOT_NULL | RE_NO_EMPTY_GROUPS | RE_NO_EMPTY_ALTS \ + | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS\ + | RE_ALLOCATE_REGISTERS) + + + +/* Maximum number of duplicates an interval can allow. */ +#define RE_DUP_MAX ((1 << 15) - 1) + + +/* POSIX cflags bits (i.e., information for regcomp). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then (line 526, p.687 of POSIX 1003.2/D10) + newline loses its special significance; i.e., anchors do not match at + newlines in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (1 << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (1 << 2) + +/* If this bit is set, then report only success or fail in regexec (). + If not set, then return nonzero indicating either not match or an error. */ +#define REG_NOSUB (1 << 3) + + +/* POSIX eflags bits (i.e., information for regexec). */ + +/* If this bit is set, then the string's first character is not the + beginning of a line, so the beginning-of-line anchor shouldn't + match it. + If not set, then the string's first character can match the + beginning-of-line anchor. */ +#define REG_NOTBOL 1 + +/* If this bit is set, then the string's last character is not the + end of a line, so the end-of-line anchor shouldn't match it. + If not set, then the string's last character can match the + end-of-line anchor. */ +#define REG_NOTEOL (1 << 1) + + +/* POSIX regexec return error value. */ + +#define REG_NOMATCH 1 /* Didn't find a match. */ + +/* POSIX regcomp return error codes. */ + +#define REG_BADPAT 2 /* Found an invalid pattern. */ +#define REG_ECOLLATE 3 /* Not implemented. */ +#define REG_ECTYPE 4 /* Found an invalid character class name. */ +#define REG_EESCAPE 5 /* Found a trailing backslash. */ +#define REG_ESUBREG 6 /* Found an invalid back reference. */ +#define REG_EBRACK 7 /* Found an unmatched left bracket. */ +#define REG_EPAREN 8 /* Found a parentheses imbalance. */ +#define REG_EBRACE 9 /* Found an unmatched \{. */ +#define REG_BADBR 10 /* Found invalid contents of \{\}. */ +#define REG_ERANGE 11 /* Found invalid range end. */ +#define REG_ESPACE 12 /* Ran out of memory. */ +#define REG_BADRPT 13 /* No preceding re for repetition op. */ +#define REG_ENEWLINE 14 /* Not implemented. */ + +/* Some regcomp codes we've added. */ +#define REG_NOERROR 0 /* No error. */ +#define REG_EEND 15 +#define REG_ESIZE 16 + + + + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +/* If this changes, change documentation in regex.texinfo. */ struct re_pattern_buffer - { - char *buffer; /* Space holding the compiled pattern commands. */ - long allocated; /* Size of space that `buffer' points to. */ - long used; /* Length of portion of buffer actually occupied */ - char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ - /* re_search uses the fastmap, if there is one, - to skip over totally implausible characters. */ - char *translate; /* Translate table to apply to all characters before - comparing, or zero for no translation. - The translation is applied to a pattern when it is - compiled and to data when it is matched. */ - char fastmap_accurate; - /* Set to zero when a new pattern is stored, - set to one when the fastmap is updated from it. */ - char can_be_null; /* Set to one by compiling fastmap - if this pattern might match the null string. - It does not necessarily match the null string - in that case, but if this is zero, it cannot. - 2 as value means can match null string - but at end of range or before a character - listed in the fastmap. */ - }; - - -/* search.c (search_buffer) needs this one value. It is defined both in - regex.c and here. */ +{ + /* Space that holds the compiled pattern. */ + char *buffer; + + /* Number of bytes to which `buffer' points. */ + long allocated; + + /* Number of bytes actually used in `buffer'. */ + long used; + + /* Syntax setting with which the pattern was compiled. */ + int syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + char *translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Set to 1 by re_compile_fastmap if this pattern can match the + null string; 0 prevents the searcher from matching it with + the null string. Set to 2 if it might match the null string + either at the end of a search range or just before a + character listed in the fastmap. */ + char can_be_null; + + + /* The remaining fields are all one-bit booleans. */ + + /* Set to zero when regex_compile compiles a pattern; set to one + by re_compile_fastmap when it updates the fastmap, if any. */ + unsigned fastmap_accurate : 1; + + /* If set, regexec reports only success or failure and does not + return anything in pmatch[]. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor never matches. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If set, and the regs argument is nonzero, the GNU + matching and searching functions return information + for as many registers as needed to report about the + whole pattern and all its subexpressions. If not set, + and the regs argument is nonzero, then the functions + return information for regs->num_regs registers. */ + unsigned return_default_num_regs : 1; +}; + +typedef struct re_pattern_buffer regex_t; + + +/* search.c (search_buffer) in Emacs needs this one value. It is + defined both in `regex.c' and here. */ #define RE_EXACTN_VALUE 1 -/* Structure to store register contents data in. + +/* struct re_registers: Structure to store register contents data in. + + (If change comments here, change in regex.texinfo also.) + + Some groups in a regular expression match (possibly empty) substrings + of the string that regular expression matched. The matcher remembers + the beginning and ending point of the substring matched by each + group. To get what they matched, pass the address of a structure of + this type to a GNU matching or searching function. + + When you call a GNU matching and searching function, it stores + information into this structure according to the following (in all + examples below, `(' represents the open-group and `)' the + close-group operator): + + If the regular expression has an i-th group that matches a substring + of string, then the function sets REGS->start[i] to the index in + string where the substring matched by the i-th group begins, and + REGS->end[i] to the index just beyond that substring's end. The + function sets REGS->start[0] and REGS->end[0] to analogous + information about the entire pattern. + + For example, when you match the pattern `((a)(b))' with the string + `ab', you get: + + 0 in REGS->start[0] and 2 in REGS->end[0] + 0 in REGS->start[1] and 2 in REGS->end[1] + 0 in REGS->start[2] and 1 in REGS->end[2] + 1 in REGS->start[3] and 2 in REGS->end[3] + + If a group matches more than once (as it might if followed by, e.g., + a repetition operator), then the function reports the information + about what the group @emph{last matched. + + For example, when you match the string `aa' with the pattern `(a)*', + you get: + + 0 in REGS->start[0] and 2 in REGS->end[0] + 1 in REGS->start[1] and 2 in REGS->end[1] + + + If the i-th group does not participate in a successful match, e.g., + it is an alternative not taken or a repetition operator allows zero + repetitions of it, then the function sets REGS->start[i] and + REGS->end[i] to -1. + + For example, when you match the string `b' with the pattern `(a)*b', + you get: + + 0 in REGS->start[0] and 1 in REGS->end[0] + -1 in REGS->start[1] and -1 in REGS->end[1] + + + If the i-th group matches a zero-length string, then the function + sets REGS->start[i] and REGS->end[i] to the index just beyond that + zero-length string. + + For example, when you match the string `b' with the pattern `(a*)b', + you get: + + 0 in REGS->start[0] and 1 in REGS->end[0] + 0 in REGS->start[1] and 0 in REGS->end[1] + + The function sets REGS->start[0] and REGS->end[0] to analogous + information about the entire pattern. + + For example, when you match the empty string with the pattern `(a*)', + you get: + + 0 in REGS->start[0] and 0 in REGS->end[0] + 0 in REGS->start[1] and 0 in REGS->end[1] + + If an i-th group contains a j-th group and the function reports a + match of the i-th group, then it records in REGS->start[j] and + REGS->end[j] the last match (if it matched) of the j-th group. + + For example, when you match the string `abb' with the pattern + `((a*)b)*, group 2' last matches the empty string, so you get: + + 0 in REGS->start[0] and 3 in REGS->end[0] + 2 in REGS->start[1] and 3 in REGS->end[1] + 2 in REGS->start[2] and 2 in REGS->end[2] + + When you match the string `abb' with the pattern `((a)*b)*', group 2 + doesn't participate in the last match, so you get: - Pass the address of such a structure as an argument to re_match, etc., - if you want this information back. + 0 in REGS->start[0] and 3 in REGS->end[0] + 2 in REGS->start[1] and 3 in REGS->end[1] + 0 in REGS->start[2] and 1 in REGS->end[2] - For i from 1 to RE_NREGS - 1, start[i] records the starting index in - the string of where the ith subexpression matched, and end[i] records - one after the ending index. start[0] and end[0] are analogous, for - the entire pattern. */ + If an i-th group contains a j-th group and the function sets + REGS->start[i] and REGS->end[i] to -1, then it also sets REGS->start[j] + and REGS->end[j] to -1. + + For example, when you match the string `c' with the pattern + `((a)*b)*c', you get: + + 0 in REGS->start[0] and 1 in REGS->end[0] + -1 in REGS->start[1] and -1 in REGS->end[1] + -1 in REGS->start[2] and -1 in REGS->end[2] +*/ struct re_registers - { - int start[RE_NREGS]; - int end[RE_NREGS]; - }; +{ + unsigned num_regs; + int *start; + int *end; +}; + + +/* POSIX specification for registers. See comments for struct + re_registers for how this is used and read `POSIX' for `GNU', + `PMATCH' for `REGS', `PMATCH[i].rm_so' for `REGS->start' and + `PMATCH[i].rm_eo' for `REGS->end'. */ + +typedef off_t regoff_t; + +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring' start. */ + regoff_t rm_eo; /* Byte offset from string's end to substring' end. */ +} regmatch_t; #ifdef __STDC__ -extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *); -/* Is this really advertised? */ -extern void re_compile_fastmap (struct re_pattern_buffer *); -extern int re_search (struct re_pattern_buffer *, char*, int, int, int, - struct re_registers *); -extern int re_search_2 (struct re_pattern_buffer *, char *, int, - char *, int, int, int, - struct re_registers *, int); -extern int re_match (struct re_pattern_buffer *, char *, int, int, - struct re_registers *); -extern int re_match_2 (struct re_pattern_buffer *, char *, int, - char *, int, int, struct re_registers *, int); +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `obscure_syntax', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ + +extern char *re_compile_pattern (const char *pattern, const int length, + struct re_pattern_buffer *buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ + +extern int re_compile_fastmap (struct re_pattern_buffer *buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match or -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS is non-null). */ + +extern int re_search (struct re_pattern_buffer *buffer, + const char *string, const int length, + const int start, const int range, + struct re_registers *regs); + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ + +extern int re_search_2 (struct re_pattern_buffer *buffer, + const char *string1, const int length1, + const char *string2, const int length2, + const int start, const int range, + struct re_registers *regs, + const int stop); + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ + +extern int re_match (const struct re_pattern_buffer *buffer, + const char *string, const int length, + const int start, struct re_registers *regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ + +extern int re_match_2 (const struct re_pattern_buffer *buffer, + const char *string1, const int length1, + const char *string2, const int length2, + const int start, + struct re_registers *regs, + const int stop); + /* 4.2 bsd compatibility. */ -extern char *re_comp (char *); -extern int re_exec (char *); +extern char *re_comp (const char *); +extern int re_exec (const char *); -#else /* !__STDC__ */ +extern int regcomp (regex_t *preg, const char *pattern, int cflags); +extern int regexec (const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); +extern size_t re_gerror (int errcode, const regex_t *preg, char* errbuf, + size_t errbuf_size); +extern void re_gfree (regex_t *preg); + +#else /* not __STDC__ */ + +/* Support old C compilers. */ +#define const extern char *re_compile_pattern (); -/* Is this really advertised? */ -extern void re_compile_fastmap (); extern int re_search (), re_search_2 (); extern int re_match (), re_match_2 (); @@ -246,6 +565,11 @@ extern int re_match (), re_match_2 (); extern char *re_comp (); extern int re_exec (); +extern int regcomp (); +extern int regexec (); +extern size_t re_gerror (); +extern void re_gfree (); + #endif /* __STDC__ */ @@ -253,4 +577,14 @@ extern int re_exec (); extern char *re_syntax_table; #endif -#endif /* !__REGEXP_LIBRARY */ +#endif /* not __REGEXP_LIBRARY */ + + + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ -- cgit v1.2.3