SF.net SVN: geany: [2053] trunk

eht16 at users.sourceforge.net eht16 at xxxxx
Sat Nov 17 19:27:50 UTC 2007


Revision: 2053
          http://geany.svn.sourceforge.net/geany/?rev=2053&view=rev
Author:   eht16
Date:     2007-11-17 11:27:50 -0800 (Sat, 17 Nov 2007)

Log Message:
-----------
Fix two more compiler warnings.
Use php.c and lregex.c from CTags SVN (closes #1795810).
Add regex.c and regex.h (GNU regex) for regex support on Windows.	 

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/tagmanager/Makefile.am
    trunk/tagmanager/include/Makefile.am
    trunk/tagmanager/makefile.win32
    trunk/tagmanager/parse.c
    trunk/tagmanager/parse.h
    trunk/tagmanager/php.c
    trunk/tagmanager/regex.c
    trunk/tagmanager/tm_tag.c

Added Paths:
-----------
    trunk/tagmanager/include/regex.h
    trunk/tagmanager/lregex.c

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/ChangeLog	2007-11-17 19:27:50 UTC (rev 2053)
@@ -11,6 +11,13 @@
    Add native GTK printing support.
  * src/printing.c: Set line width for page header, handle empty filename
                    in page header correctly.
+ * tagmanager/Makefile.am, tagmanager/lregex.c,
+   tagmanager/makefile.win32, tagmanager/parse.c, tagmanager/parse.h,
+   tagmanager/php.c, tagmanager/regex.c, tagmanager/tm_tag.c,
+   tagmanager/include/Makefile.am, tagmanager/include/regex.h:
+   Fix two more compiler warnings.
+   Use php.c and lregex.c from CTags SVN (closes #1795810).
+   Add regex.c and regex.h (GNU regex) for regex support on Windows.
 
 
 2007-11-14  Nick Treleaven  <nick(dot)treleaven(at)btinternet(dot)com>

Modified: trunk/tagmanager/Makefile.am
===================================================================
--- trunk/tagmanager/Makefile.am	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/Makefile.am	2007-11-17 19:27:50 UTC (rev 2053)
@@ -7,6 +7,8 @@
 #	-DGDK_PIXBUF_DEPRECATED \
 #	-DGTK_DISABLE_DEPRECATED -DGNOME_DISABLE_DEPRECATED
 
+# regex.c is the GNU regex implementation needed for Windows
+EXTRA_DIST = regex.c
 
 noinst_LIBRARIES = libtagmanager.a
 libtagmanager_a_SOURCES =\
@@ -39,6 +41,7 @@
 	make.c\
 	asm.c\
 	latex.c\
+	lregex.c\
 	pascal.c\
 	perl.c\
 	rest.c\
@@ -46,7 +49,6 @@
 	sql.c\
 	php.c\
 	python.c\
-	regex.c\
 	tcl.c\
 	sh.c\
 	vhdl.c\

Modified: trunk/tagmanager/include/Makefile.am
===================================================================
--- trunk/tagmanager/include/Makefile.am	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/include/Makefile.am	2007-11-17 19:27:50 UTC (rev 2053)
@@ -1,4 +1,5 @@
 noinst_HEADERS = \
+	regex.h \
 	tm_project.h\
 	tm_source_file.h\
 	tm_tag.h\

Added: trunk/tagmanager/include/regex.h
===================================================================
--- trunk/tagmanager/include/regex.h	                        (rev 0)
+++ trunk/tagmanager/include/regex.h	2007-11-17 19:27:50 UTC (rev 2053)
@@ -0,0 +1,490 @@
+/* Definitions for data structures and routines for the regular
+   expression library, version 0.12.
+
+   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals. 
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.  
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically, 
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES. 
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal. 
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */ 
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK 						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1) 
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+ 
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */ 
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+	/* Space that holds the compiled pattern.  It is declared as
+          `unsigned char *' because its elements are
+           sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+	/* Number of bytes to which `buffer' points.  */
+  unsigned long allocated;
+
+	/* Number of bytes actually used in `buffer'.  */
+  unsigned long used;	
+
+        /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+           the fastmap, if there is one, to skip over impossible
+           starting points for matches.  */
+  char *fastmap;
+
+        /* Either a translate table to apply to all characters before
+           comparing them, or zero for no translation.  The translation
+           is applied to a pattern when it is compiled and to a string
+           when it is matched.  */
+  char *translate;
+
+	/* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+        /* Zero if this pattern cannot match the empty string, one else.
+           Well, in truth it's used only in `re_search_2', to see
+           whether or not we should use the fastmap, so we don't set
+           this absolutely perfectly; see `re_compile_fastmap' (the
+           `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+             for `max (RE_NREGS, re_nsub + 1)' groups.
+           If REGS_REALLOCATE, reallocate space if necessary.
+           If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+        /* Set to zero when `regex_compile' compiles a pattern; set to one
+           by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+        /* If set, `re_match_2' does not return information about
+           subexpressions.  */
+  unsigned no_sub : 1;
+
+        /* If set, a beginning-of-line anchor doesn't match at the
+           beginning of the string.  */ 
+  unsigned not_bol : 1;
+
+        /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+        /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value.  It is
+   defined both in `regex.c' and here.  */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, int length,
+             struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+            int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+             unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility.  */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+             regmatch_t pmatch[], int eflags));
+extern size_t regerror
+  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+             size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/

Added: trunk/tagmanager/lregex.c
===================================================================
--- trunk/tagmanager/lregex.c	                        (rev 0)
+++ trunk/tagmanager/lregex.c	2007-11-17 19:27:50 UTC (rev 2053)
@@ -0,0 +1,704 @@
+/*
+*   $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $
+*
+*   Copyright (c) 2000-2003, Darren Hiebert
+*
+*   This source code is released for free distribution under the terms of the
+*   GNU General Public License.
+*
+*   This module contains functions for applying regular expression matching.
+*
+*   The code for utlizing the Gnu regex package with regards to processing the
+*   regex option and checking for regex matches was adapted from routines in
+*   Gnu etags.
+*/
+
+/*
+*   INCLUDE FILES
+*/
+#include "general.h"  /* must always come first */
+
+#include <string.h>
+#include <glib.h>
+
+#ifdef HAVE_REGCOMP
+# include <ctype.h>
+# include <stddef.h>
+# ifdef HAVE_SYS_TYPES_H
+#  include <sys/types.h>  /* declare off_t (not known to regex.h on FreeBSD) */
+# endif
+# include <regex.h>
+#endif
+
+#include "main.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+
+#ifdef HAVE_REGEX
+
+/*
+*   MACROS
+*/
+
+/* Back-references \0 through \9 */
+#define BACK_REFERENCE_COUNT 10
+
+#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
+# define POSIX_REGEX
+#endif
+
+#define REGEX_NAME "Regex"
+
+/*
+*   DATA DECLARATIONS
+*/
+#if defined (POSIX_REGEX)
+
+struct sKind {
+	boolean enabled;
+	char letter;
+	char* name;
+	char* description;
+};
+
+enum pType { PTRN_TAG, PTRN_CALLBACK };
+
+typedef struct {
+	regex_t *pattern;
+	enum pType type;
+	union {
+		struct {
+			char *name_pattern;
+			struct sKind kind;
+		} tag;
+		struct {
+			regexCallback function;
+		} callback;
+	} u;
+} regexPattern;
+
+#endif
+
+typedef struct {
+	regexPattern *patterns;
+	unsigned int count;
+} patternSet;
+
+/*
+*   DATA DEFINITIONS
+*/
+
+static boolean regexBroken = FALSE;
+
+/* Array of pattern sets, indexed by language */
+static patternSet* Sets = NULL;
+static int SetUpper = -1;  /* upper language index in list */
+
+/*
+*   FUNCTION DEFINITIONS
+*/
+
+static void clearPatternSet (const langType language)
+{
+	if (language <= SetUpper)
+	{
+		patternSet* const set = Sets + language;
+		unsigned int i;
+		for (i = 0  ;  i < set->count  ;  ++i)
+		{
+			regexPattern *p = &set->patterns [i];
+#if defined (POSIX_REGEX)
+			regfree (p->pattern);
+#endif
+			eFree (p->pattern);
+			p->pattern = NULL;
+
+			if (p->type == PTRN_TAG)
+			{
+				eFree (p->u.tag.name_pattern);
+				p->u.tag.name_pattern = NULL;
+				eFree (p->u.tag.kind.name);
+				p->u.tag.kind.name = NULL;
+				if (p->u.tag.kind.description != NULL)
+				{
+					eFree (p->u.tag.kind.description);
+					p->u.tag.kind.description = NULL;
+				}
+			}
+		}
+		if (set->patterns != NULL)
+			eFree (set->patterns);
+		set->patterns = NULL;
+		set->count = 0;
+	}
+}
+
+/*
+*   Regex psuedo-parser
+*/
+
+static void makeRegexTag (
+		const vString* const name, const struct sKind* const kind)
+{
+	if (kind->enabled)
+	{
+		tagEntryInfo e;
+		Assert (name != NULL  &&  vStringLength (name) > 0);
+		Assert (kind != NULL);
+		initTagEntry (&e, vStringValue (name));
+		e.kind     = kind->letter;
+		e.kindName = kind->name;
+		makeTagEntry (&e);
+	}
+}
+
+/*
+*   Regex pattern definition
+*/
+
+/* Take a string like "/blah/" and turn it into "blah", making sure
+ * that the first and last characters are the same, and handling
+ * quoted separator characters.  Actually, stops on the occurrence of
+ * an unquoted separator.  Also turns "\t" into a Tab character.
+ * Returns pointer to terminating separator.  Works in place.  Null
+ * terminates name string.
+ */
+static char* scanSeparators (char* name)
+{
+	char sep = name [0];
+	char *copyto = name;
+	boolean quoted = FALSE;
+
+	for (++name ; *name != '\0' ; ++name)
+	{
+		if (quoted)
+		{
+			if (*name == sep)
+				*copyto++ = sep;
+			else if (*name == 't')
+				*copyto++ = '\t';
+			else
+			{
+				/* Something else is quoted, so preserve the quote. */
+				*copyto++ = '\\';
+				*copyto++ = *name;
+			}
+			quoted = FALSE;
+		}
+		else if (*name == '\\')
+			quoted = TRUE;
+		else if (*name == sep)
+		{
+			break;
+		}
+		else
+			*copyto++ = *name;
+	}
+	*copyto = '\0';
+	return name;
+}
+
+/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
+ * character is whatever the first character of `regexp' is), by breaking it
+ * up into null terminated strings, removing the separators, and expanding
+ * '\t' into tabs. When complete, `regexp' points to the line matching
+ * pattern, a pointer to the name matching pattern is written to `name', a
+ * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
+ * to the trailing flags is written to `flags'. If the pattern is not in the
+ * correct format, a false value is returned.
+ */
+static boolean parseTagRegex (
+		char* const regexp, char** const name,
+		char** const kinds, char** const flags)
+{
+	boolean result = FALSE;
+	const int separator = (unsigned char) regexp [0];
+
+	*name = scanSeparators (regexp);
+	if (*regexp == '\0')
+		printf ("regex: empty regexp");
+	else if (**name != separator)
+		printf ("regex: %s: incomplete regexp", regexp);
+	else
+	{
+		char* const third = scanSeparators (*name);
+		if (**name == '\0')
+			printf ("regex: %s: regexp missing name pattern", regexp);
+		if ((*name) [strlen (*name) - 1] == '\\')
+			printf ("regex: error in name pattern: \"%s\"", *name);
+		if (*third != separator)
+			printf ("regex: %s: regexp missing final separator", regexp);
+		else
+		{
+			char* const fourth = scanSeparators (third);
+			if (*fourth == separator)
+			{
+				*kinds = third;
+				scanSeparators (fourth);
+				*flags = fourth;
+			}
+			else
+			{
+				*flags = third;
+				*kinds = NULL;
+			}
+			result = TRUE;
+		}
+	}
+	return result;
+}
+
+static void addCompiledTagPattern (
+		const langType language, regex_t* const pattern,
+		char* const name, const char kind, char* const kindName,
+		char *const description)
+{
+	patternSet* set;
+	regexPattern *ptrn;
+	if (language > SetUpper)
+	{
+		int i;
+		Sets = xRealloc (Sets, (language + 1), patternSet);
+		for (i = SetUpper + 1  ;  i <= language  ;  ++i)
+		{
+			Sets [i].patterns = NULL;
+			Sets [i].count = 0;
+		}
+		SetUpper = language;
+	}
+	set = Sets + language;
+	set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+	ptrn = &set->patterns [set->count];
+	set->count += 1;
+
+	ptrn->pattern = pattern;
+	ptrn->type    = PTRN_TAG;
+	ptrn->u.tag.name_pattern = name;
+	ptrn->u.tag.kind.enabled = TRUE;
+	ptrn->u.tag.kind.letter  = kind;
+	ptrn->u.tag.kind.name    = kindName;
+	ptrn->u.tag.kind.description = description;
+}
+
+static void addCompiledCallbackPattern (
+		const langType language, regex_t* const pattern,
+		const regexCallback callback)
+{
+	patternSet* set;
+	regexPattern *ptrn;
+	if (language > SetUpper)
+	{
+		int i;
+		Sets = xRealloc (Sets, (language + 1), patternSet);
+		for (i = SetUpper + 1  ;  i <= language  ;  ++i)
+		{
+			Sets [i].patterns = NULL;
+			Sets [i].count = 0;
+		}
+		SetUpper = language;
+	}
+	set = Sets + language;
+	set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+	ptrn = &set->patterns [set->count];
+	set->count += 1;
+
+	ptrn->pattern = pattern;
+	ptrn->type    = PTRN_CALLBACK;
+	ptrn->u.callback.function = callback;
+}
+
+#if defined (POSIX_REGEX)
+
+static regex_t* compileRegex (const char* const regexp, const char* const flags)
+{
+	int cflags = REG_EXTENDED | REG_NEWLINE;
+	regex_t *result = NULL;
+	int errcode;
+	int i;
+	for (i = 0  ; flags != NULL  &&  flags [i] != '\0'  ;  ++i)
+	{
+		switch ((int) flags [i])
+		{
+			case 'b': cflags &= ~REG_EXTENDED; break;
+			case 'e': cflags |= REG_EXTENDED;  break;
+			case 'i': cflags |= REG_ICASE;     break;
+			default: printf ("regex: unknown regex flag: '%c'", *flags); break;
+		}
+	}
+	result = xMalloc (1, regex_t);
+	errcode = regcomp (result, regexp, cflags);
+	if (errcode != 0)
+	{
+		char errmsg[256];
+		regerror (errcode, result, errmsg, 256);
+		printf ("regex: regcomp %s: %s", regexp, errmsg);
+		regfree (result);
+		eFree (result);
+		result = NULL;
+	}
+	return result;
+}
+
+#endif
+
+static void parseKinds (
+		const char* const kinds, char* const kind, char** const kindName,
+		char **description)
+{
+	*kind = '\0';
+	*kindName = NULL;
+	*description = NULL;
+	if (kinds == NULL  ||  kinds [0] == '\0')
+	{
+		*kind = 'r';
+		*kindName = eStrdup ("regex");
+	}
+	else if (kinds [0] != '\0')
+	{
+		const char* k = kinds;
+		if (k [0] != ','  &&  (k [1] == ','  ||  k [1] == '\0'))
+			*kind = *k++;
+		else
+			*kind = 'r';
+		if (*k == ',')
+			++k;
+		if (k [0] == '\0')
+			*kindName = eStrdup ("regex");
+		else
+		{
+			const char *const comma = strchr (k, ',');
+			if (comma == NULL)
+				*kindName = eStrdup (k);
+			else
+			{
+				*kindName = (char*) eMalloc (comma - k + 1);
+				strncpy (*kindName, k, comma - k);
+				(*kindName) [comma - k] = '\0';
+				k = comma + 1;
+				if (k [0] != '\0')
+					*description = eStrdup (k);
+			}
+		}
+	}
+}
+
+static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
+{
+	const struct sKind *const kind = &pat [i].u.tag.kind;
+	const char *const indentation = indent ? "    " : "";
+	Assert (pat [i].type == PTRN_TAG);
+	printf ("%s%c  %s %s\n", indentation,
+			kind->letter != '\0' ? kind->letter : '?',
+			kind->description != NULL ? kind->description : kind->name,
+			kind->enabled ? "" : " [off]");
+}
+
+static void processLanguageRegex (const langType language,
+		const char* const parameter)
+{
+	if (parameter == NULL  ||  parameter [0] == '\0')
+		clearPatternSet (language);
+	else if (parameter [0] != '@')
+		addLanguageRegex (language, parameter);
+	else if (! doesFileExist (parameter + 1))
+		printf ("regex: cannot open regex file");
+	else
+	{
+		const char* regexfile = parameter + 1;
+		FILE* const fp = fopen (regexfile, "r");
+		if (fp == NULL)
+			printf ("regex: %s", regexfile);
+		else
+		{
+			vString* const regex = vStringNew ();
+			while (readLine (regex, fp))
+				addLanguageRegex (language, vStringValue (regex));
+			fclose (fp);
+			vStringDelete (regex);
+		}
+	}
+}
+
+/*
+*   Regex pattern matching
+*/
+
+#if defined (POSIX_REGEX)
+
+static vString* substitute (
+		const char* const in, const char* out,
+		const int nmatch, const regmatch_t* const pmatch)
+{
+	vString* result = vStringNew ();
+	const char* p;
+	for (p = out  ;  *p != '\0'  ;  p++)
+	{
+		if (*p == '\\'  &&  isdigit ((int) *++p))
+		{
+			const int dig = *p - '0';
+			if (0 < dig  &&  dig < nmatch  &&  pmatch [dig].rm_so != -1)
+			{
+				const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
+				vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
+			}
+		}
+		else if (*p != '\n'  &&  *p != '\r')
+			vStringPut (result, *p);
+	}
+	vStringTerminate (result);
+	return result;
+}
+
+static void matchTagPattern (const vString* const line,
+		const regexPattern* const patbuf,
+		const regmatch_t* const pmatch)
+{
+	vString *const name = substitute (vStringValue (line),
+			patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
+	vStringStripLeading (name);
+	vStringStripTrailing (name);
+	if (vStringLength (name) > 0)
+		makeRegexTag (name, &patbuf->u.tag.kind);
+	else
+		error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
+			getInputFileName (), getInputLineNumber (),
+			patbuf->u.tag.name_pattern);
+	vStringDelete (name);
+}
+
+static void matchCallbackPattern (
+		const vString* const line, const regexPattern* const patbuf,
+		const regmatch_t* const pmatch)
+{
+	regexMatch matches [BACK_REFERENCE_COUNT];
+	unsigned int count = 0;
+	int i;
+	for (i = 0  ;  i < BACK_REFERENCE_COUNT  &&  pmatch [i].rm_so != -1  ;  ++i)
+	{
+		matches [i].start  = pmatch [i].rm_so;
+		matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
+		++count;
+	}
+	patbuf->u.callback.function (vStringValue (line), matches, count);
+}
+
+static boolean matchRegexPattern (const vString* const line,
+		const regexPattern* const patbuf)
+{
+	boolean result = FALSE;
+	regmatch_t pmatch [BACK_REFERENCE_COUNT];
+	const int match = regexec (patbuf->pattern, vStringValue (line),
+							   BACK_REFERENCE_COUNT, pmatch, 0);
+	if (match == 0)
+	{
+		result = TRUE;
+		if (patbuf->type == PTRN_TAG)
+			matchTagPattern (line, patbuf, pmatch);
+		else if (patbuf->type == PTRN_CALLBACK)
+			matchCallbackPattern (line, patbuf, pmatch);
+		else
+		{
+			Assert ("invalid pattern type" == NULL);
+			result = FALSE;
+		}
+	}
+	return result;
+}
+
+#endif
+
+/* PUBLIC INTERFACE */
+
+/* Match against all patterns for specified language. Returns true if at least
+ * on pattern matched.
+ */
+extern boolean matchRegex (const vString* const line, const langType language)
+{
+	boolean result = FALSE;
+	if (language != LANG_IGNORE  &&  language <= SetUpper  &&
+		Sets [language].count > 0)
+	{
+		const patternSet* const set = Sets + language;
+		unsigned int i;
+		for (i = 0  ;  i < set->count  ;  ++i)
+			if (matchRegexPattern (line, set->patterns + i))
+				result = TRUE;
+	}
+	return result;
+}
+
+extern void findRegexTags (void)
+{
+	/* merely read all lines of the file */
+	while (fileReadLine () != NULL)
+		;
+}
+
+#endif  /* HAVE_REGEX */
+
+extern void addTagRegex (
+		const langType language __unused__,
+		const char* const regex __unused__,
+		const char* const name __unused__,
+		const char* const kinds __unused__,
+		const char* const flags __unused__)
+{
+#ifdef HAVE_REGEX
+	Assert (regex != NULL);
+	Assert (name != NULL);
+	if (! regexBroken)
+	{
+		regex_t* const cp = compileRegex (regex, flags);
+		if (cp != NULL)
+		{
+			char kind;
+			char* kindName;
+			char* description;
+			parseKinds (kinds, &kind, &kindName, &description);
+			addCompiledTagPattern (language, cp, eStrdup (name),
+					kind, kindName, description);
+		}
+	}
+#endif
+}
+
+extern void addCallbackRegex (
+		const langType language __unused__,
+		const char* const regex __unused__,
+		const char* const flags __unused__,
+		const regexCallback callback __unused__)
+{
+#ifdef HAVE_REGEX
+	Assert (regex != NULL);
+	if (! regexBroken)
+	{
+		regex_t* const cp = compileRegex (regex, flags);
+		if (cp != NULL)
+			addCompiledCallbackPattern (language, cp, callback);
+	}
+#endif
+}
+
+extern void addLanguageRegex (
+		const langType language __unused__, const char* const regex __unused__)
+{
+#ifdef HAVE_REGEX
+	if (! regexBroken)
+	{
+		char *const regex_pat = eStrdup (regex);
+		char *name, *kinds, *flags;
+		if (parseTagRegex (regex_pat, &name, &kinds, &flags))
+		{
+			addTagRegex (language, regex_pat, name, kinds, flags);
+			eFree (regex_pat);
+		}
+	}
+#endif
+}
+
+/*
+*   Regex option parsing
+*/
+
+extern boolean processRegexOption (const char *const option,
+								   const char *const parameter __unused__)
+{
+	boolean handled = FALSE;
+	const char* const dash = strchr (option, '-');
+	if (dash != NULL  &&  strncmp (option, "regex", dash - option) == 0)
+	{
+#ifdef HAVE_REGEX
+		langType language;
+		language = getNamedLanguage (dash + 1);
+		if (language == LANG_IGNORE)
+			printf ("regex: unknown language \"%s\" in --%s option", (dash + 1), option);
+		else
+			processLanguageRegex (language, parameter);
+#else
+		printf ("regex: regex support not available; required for --%s option",
+		   option);
+#endif
+		handled = TRUE;
+	}
+	return handled;
+}
+
+extern void disableRegexKinds (const langType language __unused__)
+{
+#ifdef HAVE_REGEX
+	if (language <= SetUpper  &&  Sets [language].count > 0)
+	{
+		patternSet* const set = Sets + language;
+		unsigned int i;
+		for (i = 0  ;  i < set->count  ;  ++i)
+			if (set->patterns [i].type == PTRN_TAG)
+				set->patterns [i].u.tag.kind.enabled = FALSE;
+	}
+#endif
+}
+
+extern boolean enableRegexKind (
+		const langType language __unused__,
+		const int kind __unused__, const boolean mode __unused__)
+{
+	boolean result = FALSE;
+#ifdef HAVE_REGEX
+	if (language <= SetUpper  &&  Sets [language].count > 0)
+	{
+		patternSet* const set = Sets + language;
+		unsigned int i;
+		for (i = 0  ;  i < set->count  ;  ++i)
+			if (set->patterns [i].type == PTRN_TAG &&
+				set->patterns [i].u.tag.kind.letter == kind)
+			{
+				set->patterns [i].u.tag.kind.enabled = mode;
+				result = TRUE;
+			}
+	}
+#endif
+	return result;
+}
+
+extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
+{
+#ifdef HAVE_REGEX
+	if (language <= SetUpper  &&  Sets [language].count > 0)
+	{
+		patternSet* const set = Sets + language;
+		unsigned int i;
+		for (i = 0  ;  i < set->count  ;  ++i)
+			if (set->patterns [i].type == PTRN_TAG)
+				printRegexKind (set->patterns, i, indent);
+	}
+#endif
+}
+
+extern void freeRegexResources (void)
+{
+#ifdef HAVE_REGEX
+	int i;
+	for (i = 0  ;  i <= SetUpper  ;  ++i)
+		clearPatternSet (i);
+	if (Sets != NULL)
+		eFree (Sets);
+	Sets = NULL;
+	SetUpper = -1;
+#endif
+}
+
+/* Check for broken regcomp() on Cygwin */
+extern void checkRegex (void)
+{
+#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
+	regex_t patbuf;
+	int errcode;
+	if (regcomp (&patbuf, "/hello/", 0) != 0)
+	{
+		error (WARNING, "Disabling broken regex");
+		regexBroken = TRUE;
+	}
+#endif
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */

Modified: trunk/tagmanager/makefile.win32
===================================================================
--- trunk/tagmanager/makefile.win32	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/makefile.win32	2007-11-17 19:27:50 UTC (rev 2053)
@@ -9,6 +9,8 @@
 
 COMPLIB=tagmanager.a
 
+REGEX_DEFINES = -DHAVE_REGCOMP -DREGEX_MALLOC -DSTDC_HEADERS=1
+
 GTK_INCLUDES= \
 	-I$(PREFIX)/include/gtk-2.0 \
 	-I$(PREFIX)/lib/gtk-2.0/include \
@@ -25,7 +27,7 @@
 CCFLAGS=-Wall -O2 -g -mms-bitfields -DPACKAGE=\"geany\" -DG_OS_WIN32 -Wno-missing-braces -Wno-char-subscripts $(INCLUDEDIRS)
 
 .c.o:
-	$(CC) $(CCFLAGS) -w -c $<
+	$(CC) $(REGEX_DEFINES) $(CCFLAGS) -w -c $<
 
 all: $(COMPLIB)
 
@@ -33,9 +35,9 @@
 	-$(RM) deps.mak *.o $(COMPLIB)
 
 $(COMPLIB): args.o c.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o lua.o js.o \
-haskell.o haxe.o python.o regex.o rest.o sh.o ctags.o entry.o get.o keyword.o options.o parse.o basic.o \
-read.o sort.o strlist.o latex.o docbook.o tcl.o ruby.o asm.o sql.o css.o vstring.o tm_workspace.o tm_work_object.o \
-tm_source_file.o tm_project.o tm_tag.o tm_symbol.o tm_file_entry.o \
+haskell.o haxe.o python.o lregex.o rest.o sh.o ctags.o entry.o get.o keyword.o options.o parse.o basic.o \
+read.o sort.o strlist.o latex.o docbook.o tcl.o ruby.o asm.o sql.o css.o vstring.o regex.o \
+tm_workspace.o tm_work_object.o tm_source_file.o tm_project.o tm_tag.o tm_symbol.o tm_file_entry.o \
 tm_tagmanager.o
 	$(AR) rc $@ $^
 	$(RANLIB) $@

Modified: trunk/tagmanager/parse.c
===================================================================
--- trunk/tagmanager/parse.c	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/parse.c	2007-11-17 19:27:50 UTC (rev 2053)
@@ -56,7 +56,7 @@
 extern void makeSimpleScopedTag (const vString* const name,
 				 kindOption* const kinds, const int kind,
 				 const char* scope, const char *scope2,
-				 const char *access)
+				 const char *laccess)
 {
     if (name != NULL  &&  vStringLength (name) > 0)
     {
@@ -67,7 +67,7 @@
         e.kind     = kinds [kind].letter;
 	e.extensionFields.scope[0] = scope;
 	e.extensionFields.scope[1] = scope2;
-	e.extensionFields.access = access;
+	e.extensionFields.access = laccess;
 
         makeTagEntry (&e);
     }
@@ -541,7 +541,7 @@
 	    for (i = 0  ;  i < lang->kindCount  ;  ++i)
 		printLangugageKindOption (lang->kinds + i);
 #ifdef HAVE_REGEX
-	printRegexKindOptions (language);
+	// printRegexKindOptions (language); // unused
 #endif
     }
 }

Modified: trunk/tagmanager/parse.h
===================================================================
--- trunk/tagmanager/parse.h	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/parse.h	2007-11-17 19:27:50 UTC (rev 2053)
@@ -113,7 +113,7 @@
 /* Regex interface */
 #ifdef HAVE_REGEX
 extern void findRegexTags (void);
-extern void matchRegex (const vString* const line, const langType language);
+extern boolean matchRegex (const vString* const line, const langType language);
 #endif
 extern boolean processRegexOption (const char *const option, const char *const parameter);
 extern void addLanguageRegex (const langType language, const char* const regex);

Modified: trunk/tagmanager/php.c
===================================================================
--- trunk/tagmanager/php.c	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/php.c	2007-11-17 19:27:50 UTC (rev 2053)
@@ -31,16 +31,74 @@
 	K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE
 } phpKind;
 
+#if 0
 static kindOption PhpKinds [] = {
 	{ TRUE, 'c', "class",    "classes" },
-	{ TRUE, 'd', "macro",   "constant definitions" },
+	{ TRUE, 'd', "define",   "constant definitions" },
 	{ TRUE, 'f', "function", "functions" },
 	{ TRUE, 'v', "variable", "variables" }
 };
+#endif
 
 /*
 *   FUNCTION DEFINITIONS
 */
+
+/* JavaScript patterns are duplicated in jscript.c */
+
+/*
+ * Cygwin doesn't support non-ASCII characters in character classes.
+ * This isn't a good solution to the underlying problem, because we're still
+ * making assumptions about the character encoding.
+ * Really, these regular expressions need to concentrate on what marks the
+ * end of an identifier, and we need something like iconv to take into
+ * account the user's locale (or an override on the command-line.)
+ */
+#ifdef __CYGWIN__
+#define ALPHA "[:alpha:]"
+#define ALNUM "[:alnum:]"
+#else
+#define ALPHA "A-Za-z\x7f-\xff"
+#define ALNUM "0-9A-Za-z\x7f-\xff"
+#endif
+
+static void installPHPRegex (const langType language)
+{
+	addTagRegex(language, "(^|[ \t])class[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+		"\\2", "c,class,classes", NULL);
+	addTagRegex(language, "(^|[ \t])interface[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+		"\\2", "i,interface,interfaces", NULL);
+	addTagRegex(language, "(^|[ \t])define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)",
+		"\\2", "d,define,constant definitions", NULL);
+	addTagRegex(language, "(^|[ \t])function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)",
+		"\\2", "f,function,functions", NULL);
+	addTagRegex(language, "(^|[ \t])(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=",
+		"\\3", "v,variable,variables", NULL);
+	addTagRegex(language, "(^|[ \t])(var|public|protected|private|static)[ \t]+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]",
+		"\\3", "v,variable,variables", NULL);
+
+	/* function regex is covered by PHP regex */
+	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(",
+		"\\2", "j,jsfunction,javascript functions", NULL);
+	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+		"\\2.\\3", "j,jsfunction,javascript functions", NULL);
+	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+		"\\3", "j,jsfunction,javascript functions", NULL);
+}
+
+/* Create parser definition structure */
+extern parserDefinition* PhpParser (void)
+{
+	static const char *const extensions [] = { "php", "php3", "phtml", NULL };
+	parserDefinition* def = parserNew ("PHP");
+	def->extensions = extensions;
+	def->initialize = installPHPRegex;
+	def->regex      = TRUE;
+	return def;
+}
+
+#if 0
+
 static boolean isLetter(const int c)
 {
 	return (boolean)(isalpha(c) || (c >= 127  &&  c <= 255));
@@ -101,7 +159,7 @@
 				cp++;
 
 				while (isspace ((int) *cp))
-					++cp;
+					++cp; 
 			}
 
 			vStringClear (name);
@@ -113,12 +171,11 @@
 			vStringTerminate (name);
 			makeSimpleTag (name, PhpKinds, K_FUNCTION);
 			vStringClear (name);
-		}
-		else if ((f = strstr ((const char*) cp, "class")) != NULL &&
-			(f == (const char*) cp || isspace ((int) f [-1])) &&
-			isspace ((int) f [5]))
+		} 
+		else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 &&
+				 isspace ((int) cp [5]))
 		{
-			cp = ((const unsigned char *) f) + 5;
+			cp += 5;
 
 			while (isspace ((int) *cp))
 				++cp;
@@ -149,7 +206,7 @@
 				++cp;
 			else if (! ((*cp == '_')  || isalnum ((int) *cp)))
 				continue;
-
+	      
 			vStringClear (name);
 			while (isalnum ((int) *cp)  ||  *cp == '_')
 			{
@@ -175,5 +232,6 @@
 	return def;
 }
 
+#endif
 
 /* vi:set tabstop=4 shiftwidth=4: */

Modified: trunk/tagmanager/regex.c
===================================================================
--- trunk/tagmanager/regex.c	2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/regex.c	2007-11-17 19:27:50 UTC (rev 2053)
@@ -1,654 +1,4952 @@
-/*
-*
-*   Copyright (c) 2000-2001, Darren Hiebert
-*
-*   This source code is released for free distribution under the terms of the
-*   GNU General Public License.
-*
-*   This module contains functions for applying regular expression matching.
-*
-*   The code for utlizing the Gnu regex package with regards to processing the
-*   regex option and checking for regex matches was adapted from routines in
-*   Gnu etags.
-*/
+/* Extended regular expression matching and search library,
+   version 0.12, with minor changes by Darren Hiebert.
+   (Implements POSIX draft P10003.2/D11.2, except for
+   internationalization features.)
 
-/*
-*   INCLUDE FILES
-*/
-#include "general.h"	/* must always come first */
+   Copyright (C) 1993 Free Software Foundation, Inc.
 
-#include <string.h>
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
 
-#if defined (HAVE_REGCOMP) || defined (HAVE_RE_COMPILE_PATTERN)
-# include <ctype.h>
-# include <stddef.h>
-# ifdef HAVE_SYS_TYPES_H
-#  include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
-# endif
-# include "regex.h"
-#endif
-#include <glib/gstdio.h>
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
 
-#include "entry.h"
-#include "main.h"
-#include "parse.h"
-#include "read.h"
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
 
-#ifdef HAVE_REGEX
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+  #pragma alloca
+#endif
 
-/*
-*   MACROS
-*/
+#define _GNU_SOURCE
 
-/* Back-references \0 through \9 */
-#define BACK_REFERENCE_COUNT 10
+/* We need this for `regex.h', and perhaps for the Emacs include files.  */
+#include <sys/types.h>
 
-#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
-# define POSIX_REGEX
+#ifdef HAVE_CONFIG_H
+#include "config.h"
 #endif
 
-#define REGEX_NAME "Regex"
+/* The `emacs' switch turns on certain matching commands
+   that make sense only in Emacs. */
+#ifdef emacs
 
-/*
-*   DATA DECLARATIONS
-*/
-#if defined (POSIX_REGEX)
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
 
-struct sKind {
-    boolean enabled;
-    char letter;
-    char* name;
-};
+/* Emacs uses `NULL' as a predicate.  */
+#undef NULL
 
-enum pType { PTRN_TAG, PTRN_CALLBACK };
+#else  /* not emacs */
 
-typedef struct {
-    regex_t *pattern;
-    enum pType type;
-    union {
-	struct {
-	    char *name_pattern;
-	    struct sKind kind;
-	} tag;
-	struct {
-	    regexCallback function;
-	} callback;
-    } u;
-} regexPattern;
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+   `BSTRING', as far as I know, and neither of them use this code.  */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n)	memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n)	memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
 
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
 #endif
 
-typedef struct {
-    regexPattern *patterns;
-    unsigned int count;
-} patternSet;
 
-/*
-*   DATA DEFINITIONS
-*/
+/* Define the syntax stuff for \<, \>, etc.  */
 
-static boolean regexBroken = FALSE;
+/* This must be nonzero for the wordchar and notwordchar pattern
+   commands in re_match_2.  */
+#ifndef Sword
+#define Sword 1
+#endif
 
-/* Array of pattern sets, indexed by language */
-static patternSet* Sets = NULL;
-static int SetUpper = -1;	/* upper language index in list */
+#ifdef SYNTAX_TABLE
 
-/*
-*   FUNCTION DEFINITIONS
-*/
+extern char *re_syntax_table;
 
-static void clearPatternSet (const langType language)
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set.  */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
 {
-    if (language < SetUpper)
-    {
-	patternSet* const set = Sets + language;
-	unsigned int i;
-	for (i = 0  ;  i < set->count  ;  ++i)
-	{
-#if defined (POSIX_REGEX)
-	    regfree (set->patterns [i].pattern);
+   register int c;
+   static int done = 0;
+
+   if (done)
+     return;
+
+   bzero (re_syntax_table, sizeof re_syntax_table);
+
+   for (c = 'a'; c <= 'z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = 'A'; c <= 'Z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = '0'; c <= '9'; c++)
+     re_syntax_table[c] = Sword;
+
+   re_syntax_table['_'] = Sword;
+
+   done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits.  */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes.  */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
 #endif
-	    eFree (set->patterns [i].pattern);
-	    set->patterns [i].pattern = NULL;
 
-	    if (set->patterns [i].type == PTRN_TAG)
-	    {
-		eFree (set->patterns [i].u.tag.name_pattern);
-		set->patterns [i].u.tag.name_pattern = NULL;
-	    }
-	}
-	if (set->patterns != NULL)
-	    eFree (set->patterns);
-	set->patterns = NULL;
-	set->count = 0;
-    }
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+   since ours (we hope) works properly with all combinations of
+   machines, compilers, `char' and `unsigned char' argument types.
+   (Per Bothner suggested the basic approach.)  */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else  /* not __STDC__ */
+/* As in Harbison and Steele.  */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
+   use `alloca' instead of `malloc'.  This is because using malloc in
+   re_search* or re_match* could cause memory leaks when C-g is used in
+   Emacs; also, malloc is slower and causes storage fragmentation.  On
+   the other hand, malloc is more portable, and easier to debug.
+
+   Because we sometimes use alloca, some routines have to be macros,
+   not functions -- `alloca'-allocated space disappears at the end of the
+   function it is called in.  */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC  */
+
+/* Emacs already defines alloca, sometimes.  */
+#ifndef alloca
+
+/* Make alloca work the best possible way.  */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top.  */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable.  */
+#define REGEX_REALLOCATE(source, osize, nsize)				\
+  (destination = (char *) alloca (nsize),				\
+   bcopy (source, destination, osize),					\
+   destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+   `string1' or just past its end.  This works if PTR is NULL, which is
+   a good thing.  */
+#define FIRST_STRING_P(ptr) 					\
+  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail.  */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits.  */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+   expressions.  Some opcodes are followed by argument bytes.  A
+   command code can specify any interpretation whatsoever for its
+   arguments.  Zero bytes may appear in the compiled regular expression.
+
+   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+   `exactn' we use here must also be 1.  */
+
+typedef enum
+{
+  no_op = 0,
+
+        /* Followed by one byte giving n, then by n literal bytes.  */
+  exactn = 1,
+
+        /* Matches any (more or less) character.  */
+  anychar,
+
+        /* Matches any one char belonging to specified set.  First
+           following byte is number of bitmap bytes.  Then come bytes
+           for a bitmap saying which chars are in.  Bits in each byte
+           are ordered low-bit-first.  A character is in the set if its
+           bit is 1.  A character too large to have a bit in the map is
+           automatically not in the set.  */
+  charset,
+
+        /* Same parameters as charset, but match any character that is
+           not one of those specified.  */
+  charset_not,
+
+        /* Start remembering the text that is matched, for storing in a
+           register.  Followed by one byte with the register number, in
+           the range 0 to one less than the pattern buffer's re_nsub
+           field.  Then followed by one byte with the number of groups
+           inner to this one.  (This last has to be part of the
+           start_memory only because we need it in the on_failure_jump
+           of re_match_2.)  */
+  start_memory,
+
+        /* Stop remembering the text that is matched and store it in a
+           memory register.  Followed by one byte with the register
+           number, in the range 0 to one less than `re_nsub' in the
+           pattern buffer, and one byte with the number of inner groups,
+           just like `start_memory'.  (We need the number of inner
+           groups here because we don't have any easy way of finding the
+           corresponding start_memory when we're at a stop_memory.)  */
+  stop_memory,
+
+        /* Match a duplicate of something remembered. Followed by one
+           byte containing the register number.  */
+  duplicate,
+
+        /* Fail unless at beginning of line.  */
+  begline,
+
+        /* Fail unless at end of line.  */
+  endline,
+
+        /* Succeeds if at beginning of buffer (if emacs) or at beginning
+           of string to be matched (if not).  */
+  begbuf,
+
+        /* Analogously, for end of buffer/string.  */
+  endbuf,
+
+        /* Followed by two byte relative address to which to jump.  */
+  jump,
+
+	/* Same as jump, but marks the end of an alternative.  */
+  jump_past_alt,
+
+        /* Followed by two-byte relative address of place to resume at
+           in case of failure.  */
+  on_failure_jump,
+
+        /* Like on_failure_jump, but pushes a placeholder instead of the
+           current string position when executed.  */
+  on_failure_keep_string_jump,
+
+        /* Throw away latest failure point and then jump to following
+           two-byte relative address.  */
+  pop_failure_jump,
+
+        /* Change to pop_failure_jump if know won't have to backtrack to
+           match; otherwise change to jump.  This is used to jump
+           back to the beginning of a repeat.  If what follows this jump
+           clearly won't match what the repeat does, such that we can be
+           sure that there is no use backtracking out of repetitions
+           already matched, then we change it to a pop_failure_jump.
+           Followed by two-byte address.  */
+  maybe_pop_jump,
+
+        /* Jump to following two-byte address, and push a dummy failure
+           point. This failure point will be thrown away if an attempt
+           is made to use it for a failure.  A `+' construct makes this
+           before the first repeat.  Also used as an intermediary kind
+           of jump when compiling an alternative.  */
+  dummy_failure_jump,
+
+	/* Push a dummy failure point and continue.  Used at the end of
+	   alternatives.  */
+  push_dummy_failure,
+
+        /* Followed by two-byte relative address and two-byte number n.
+           After matching N times, jump to the address upon failure.  */
+  succeed_n,
+
+        /* Followed by two-byte relative address, and two-byte number n.
+           Jump to the address N times, then fail.  */
+  jump_n,
+
+        /* Set the following two-byte relative address to the
+           subsequent two-byte number.  The address *includes* the two
+           bytes of number.  */
+  set_number_at,
+
+  wordchar,	/* Matches any word-constituent character.  */
+  notwordchar,	/* Matches any char that is not a word-constituent.  */
+
+  wordbeg,	/* Succeeds if at word beginning.  */
+  wordend,	/* Succeeds if at word end.  */
+
+  wordbound,	/* Succeeds if at a word boundary.  */
+  notwordbound	/* Succeeds if not at a word boundary.  */
+
+#ifdef emacs
+  ,before_dot,	/* Succeeds if before point.  */
+  at_dot,	/* Succeeds if at point.  */
+  after_dot,	/* Succeeds if after point.  */
+
+	/* Matches any character whose syntax is specified.  Followed by
+           a byte which contains a syntax code, e.g., Sword.  */
+  syntaxspec,
+
+	/* Matches any character whose syntax is not that specified.  */
+  notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern.  */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+
+#define STORE_NUMBER(destination, number)				\
+  do {									\
+    (destination)[0] = (number) & 0377;					\
+    (destination)[1] = (number) >> 8;					\
+  } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+   the byte after where the number is stored.  Therefore, DESTINATION
+   must be an lvalue.  */
+
+#define STORE_NUMBER_AND_INCR(destination, number)			\
+  do {									\
+    STORE_NUMBER (destination, number);					\
+    (destination) += 2;							\
+  } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+   at SOURCE.  */
+
+#define EXTRACT_NUMBER(destination, source)				\
+  do {									\
+    (destination) = *(source) & 0377;					\
+    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+    int *dest;
+    unsigned char *source;
+{
+  int temp = SIGN_EXTEND_CHAR (*(source + 1));
+  *dest = *source & 0377;
+  *dest += temp << 8;
 }
 
-/*
-*   Regex psuedo-parser
-*/
+#ifndef EXTRACT_MACROS /* To debug the macros.  */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
 
-static void makeRegexTag (const vString* const name,
-			  const struct sKind* const kind)
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+   SOURCE must be an lvalue.  */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+  do {									\
+    EXTRACT_NUMBER (destination, source);				\
+    (source) += 2; 							\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+    int *destination;
+    unsigned char **source;
 {
-    if (kind->enabled)
-    {
-	tagEntryInfo e;
-	Assert (name != NULL  &&  vStringLength (name) > 0);
-	Assert (kind != NULL);
-	initTagEntry (&e, vStringValue (name));
-	e.kind     = kind->letter;
-	e.kindName = kind->name;
-	makeTagEntry (&e);
-    }
+  extract_number (destination, *source);
+  *source += 2;
 }
 
-/*
-*   Regex pattern definition
-*/
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+  extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
 
-/* Take a string like "/blah/" and turn it into "blah", making sure
- * that the first and last characters are the same, and handling
- * quoted separator characters.  Actually, stops on the occurrence of
- * an unquoted separator.  Also turns "\t" into a Tab character.
- * Returns pointer to terminating separator.  Works in place.  Null
- * terminates name string.
- */
-static char* scanSeparators (char* name)
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+   it is doing (if the variable `debug' is nonzero).  If linked with the
+   main program in `iregex.c', you can enter patterns and strings
+   interactively.  And if linked with the main program in `main.c' and
+   the other test files, you can run the already-written tests.  */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging.  */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging.  */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
+  if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
+  if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form.  */
+
+void
+print_fastmap (fastmap)
+    char *fastmap;
 {
-    char sep = name [0];
-    char *copyto = name;
-    boolean quoted = FALSE;
+  unsigned was_a_range = 0;
+  unsigned i = 0;
 
-    for (++name ; *name != '\0' ; ++name)
+  while (i < (1 << BYTEWIDTH))
     {
-	if (quoted)
+      if (fastmap[i++])
 	{
-	    if (*name == sep)
-		*copyto++ = sep;
-	    else if (*name == 't')
-		*copyto++ = '\t';
-	    else
-	    {
-		/* Something else is quoted, so preserve the quote. */
-		*copyto++ = '\\';
-		*copyto++ = *name;
-	    }
-	    quoted = FALSE;
-	}
-	else if (*name == '\\')
-	    quoted = TRUE;
-	else if (*name == sep)
-	{
-	    break;
-	}
-	else
-	    *copyto++ = *name;
+	  was_a_range = 0;
+          printchar (i - 1);
+          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
+            {
+              was_a_range = 1;
+              i++;
+            }
+	  if (was_a_range)
+            {
+              printf ("-");
+              printchar (i - 1);
+            }
+        }
     }
-    *copyto = '\0';
-    return name;
+  putchar ('\n');
 }
 
-/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
- * character is whatever the first character of `regexp' is), by breaking it
- * up into null terminated strings, removing the separators, and expanding
- * '\t' into tabs. When complete, `regexp' points to the line matching
- * pattern, a pointer to the name matching pattern is written to `name', a
- * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
- * to the trailing flags is written to `flags'. If the pattern is not in the
- * correct format, a false value is returned.
- */
-static boolean parseTagRegex (char* const regexp, char** const name,
-			      char** const kinds, char** const flags)
+
+/* Print a compiled pattern string in human-readable form, starting at
+   the START pointer into it and ending just before the pointer END.  */
+
+void
+print_partial_compiled_pattern (start, end)
+    unsigned char *start;
+    unsigned char *end;
 {
-    boolean result = FALSE;
-    const int separator = (unsigned char) regexp [0];
+  int mcnt, mcnt2;
+  unsigned char *p = start;
+  unsigned char *pend = end;
 
-    *name = scanSeparators (regexp);
-    if (*regexp == '\0')
-	error (WARNING, "empty regexp");
-    else if (**name != separator)
-	error (WARNING, "%s: incomplete regexp", regexp);
-    else
+  if (start == NULL)
     {
-	char* const third = scanSeparators (*name);
-	if (**name == '\0')
-	    error (WARNING, "%s: regexp missing name pattern", regexp);
-	if ((*name) [strlen (*name) - 1] == '\\')
-	    error (WARNING, "error in name pattern: \"%s\"", *name);
-	if (*third != separator)
-	    error (WARNING, "%s: regexp missing final separator", regexp);
-	else
+      printf ("(null)\n");
+      return;
+    }
+
+  /* Loop over pattern commands.  */
+  while (p < pend)
+    {
+      switch ((re_opcode_t) *p++)
 	{
-	    char* const fourth = scanSeparators (third);
-	    if (*fourth == separator)
+        case no_op:
+          printf ("/no_op");
+          break;
+
+	case exactn:
+	  mcnt = *p++;
+          printf ("/exactn/%d", mcnt);
+          do
 	    {
-		*kinds = third;
-		scanSeparators (fourth);
-		*flags = fourth;
-	    }
-	    else
-	    {
-		*flags = third;
-		*kinds = NULL;
-	    }
-	    result = TRUE;
+              putchar ('/');
+	      printchar (*p++);
+            }
+          while (--mcnt);
+          break;
+
+	case start_memory:
+          mcnt = *p++;
+          printf ("/start_memory/%d/%d", mcnt, *p++);
+          break;
+
+	case stop_memory:
+          mcnt = *p++;
+	  printf ("/stop_memory/%d/%d", mcnt, *p++);
+          break;
+
+	case duplicate:
+	  printf ("/duplicate/%d", *p++);
+	  break;
+
+	case anychar:
+	  printf ("/anychar");
+	  break;
+
+	case charset:
+        case charset_not:
+          {
+            register int c;
+
+            printf ("/charset%s",
+	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+            assert (p + *p < pend);
+
+            for (c = 0; c < *p; c++)
+              {
+                unsigned bit;
+                unsigned char map_byte = p[1 + c];
+
+                putchar ('/');
+
+		for (bit = 0; bit < BYTEWIDTH; bit++)
+                  if (map_byte & (1 << bit))
+                    printchar (c * BYTEWIDTH + bit);
+              }
+	    p += 1 + *p;
+	    break;
+	  }
+
+	case begline:
+	  printf ("/begline");
+          break;
+
+	case endline:
+          printf ("/endline");
+          break;
+
+	case on_failure_jump:
+          extract_number_and_incr (&mcnt, &p);
+  	  printf ("/on_failure_jump/0/%d", mcnt);
+          break;
+
+	case on_failure_keep_string_jump:
+          extract_number_and_incr (&mcnt, &p);
+  	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+          break;
+
+	case dummy_failure_jump:
+          extract_number_and_incr (&mcnt, &p);
+  	  printf ("/dummy_failure_jump/0/%d", mcnt);
+          break;
+
+	case push_dummy_failure:
+          printf ("/push_dummy_failure");
+          break;
+
+        case maybe_pop_jump:
+          extract_number_and_incr (&mcnt, &p);
+  	  printf ("/maybe_pop_jump/0/%d", mcnt);
+	  break;
+
+        case pop_failure_jump:
+	  extract_number_and_incr (&mcnt, &p);
+  	  printf ("/pop_failure_jump/0/%d", mcnt);
+	  break;
+
+        case jump_past_alt:
+	  extract_number_and_incr (&mcnt, &p);
+  	  printf ("/jump_past_alt/0/%d", mcnt);
+	  break;
+
+        case jump:
+	  extract_number_and_incr (&mcnt, &p);
+  	  printf ("/jump/0/%d", mcnt);
+	  break;
+
+        case succeed_n:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case jump_n:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case set_number_at:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+ 	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case wordbound:
+	  printf ("/wordbound");
+	  break;
+
+	case notwordbound:
+	  printf ("/notwordbound");
+          break;
+
+	case wordbeg:
+	  printf ("/wordbeg");
+	  break;
+
+	case wordend:
+	  printf ("/wordend");
+
+#ifdef emacs
+	case before_dot:
+	  printf ("/before_dot");
+          break;
+
+	case at_dot:
+	  printf ("/at_dot");
+          break;
+
+	case after_dot:
+	  printf ("/after_dot");
+          break;
+
+	case syntaxspec:
+          printf ("/syntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+          break;
+
+	case notsyntaxspec:
+          printf ("/notsyntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+#endif /* emacs */
+
+	case wordchar:
+	  printf ("/wordchar");
+          break;
+
+	case notwordchar:
+	  printf ("/notwordchar");
+          break;
+
+	case begbuf:
+	  printf ("/begbuf");
+          break;
+
+	case endbuf:
+	  printf ("/endbuf");
+          break;
+
+        default:
+          printf ("?%d", *(p-1));
 	}
     }
-    return result;
+  printf ("/\n");
 }
 
-static void addCompiledTagPattern (const langType language,
-				   regex_t* const pattern, char* const name,
-				   const char kind, char* const kindName)
+
+void
+print_compiled_pattern (bufp)
+    struct re_pattern_buffer *bufp;
 {
-    patternSet* set;
-    regexPattern *ptrn;
-    if (language > SetUpper)
+  unsigned char *buffer = bufp->buffer;
+
+  print_partial_compiled_pattern (buffer, buffer + bufp->used);
+  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+  if (bufp->fastmap_accurate && bufp->fastmap)
     {
-	int i;
-	Sets = xRealloc (Sets, (language + 1), patternSet);
-	for (i = SetUpper + 1  ;  i <= language  ;  ++i)
-	{
-	    Sets [i].patterns = NULL;
-	    Sets [i].count = 0;
-	}
-	SetUpper = language;
+      printf ("fastmap: ");
+      print_fastmap (bufp->fastmap);
     }
-    set = Sets + language;
-    set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
-    ptrn = &set->patterns [set->count];
-    set->count += 1;
 
-    ptrn->pattern = pattern;
-    ptrn->type    = PTRN_TAG;
-    ptrn->u.tag.name_pattern = name;
-    ptrn->u.tag.kind.enabled = TRUE;
-    ptrn->u.tag.kind.letter  = kind;
-    ptrn->u.tag.kind.name    = kindName;
+  printf ("re_nsub: %d\t", bufp->re_nsub);
+  printf ("regs_alloc: %d\t", bufp->regs_allocated);
+  printf ("can_be_null: %d\t", bufp->can_be_null);
+  printf ("newline_anchor: %d\n", bufp->newline_anchor);
+  printf ("no_sub: %d\t", bufp->no_sub);
+  printf ("not_bol: %d\t", bufp->not_bol);
+  printf ("not_eol: %d\t", bufp->not_eol);
+  printf ("syntax: %d\n", bufp->syntax);
+  /* Perhaps we should print the translate table?  */
 }
 
-static void addCompiledCallbackPattern (const langType language,
-					regex_t* const pattern,
-					const regexCallback callback)
+
+void
+print_double_string (where, string1, size1, string2, size2)
+    const char *where;
+    const char *string1;
+    const char *string2;
+    int size1;
+    int size2;
 {
-    patternSet* set;
-    regexPattern *ptrn;
-    if (language > SetUpper)
+  unsigned this_char;
+
+  if (where == NULL)
+    printf ("(null)");
+  else
     {
-	int i;
-	Sets = xRealloc (Sets, (language + 1), patternSet);
-	for (i = SetUpper + 1  ;  i <= language  ;  ++i)
-	{
-	    Sets [i].patterns = NULL;
-	    Sets [i].count = 0;
-	}
-	SetUpper = language;
+      if (FIRST_STRING_P (where))
+        {
+          for (this_char = where - string1; this_char < size1; this_char++)
+            printchar (string1[this_char]);
+
+          where = string2;
+        }
+
+      for (this_char = where - string2; this_char < size2; this_char++)
+        printchar (string2[this_char]);
     }
-    set = Sets + language;
-    set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
-    ptrn = &set->patterns [set->count];
-    set->count += 1;
+}
 
-    ptrn->pattern = pattern;
-    ptrn->type    = PTRN_CALLBACK;
-    ptrn->u.callback.function = callback;
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
 }
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.  */
 
-#if defined (POSIX_REGEX)
+static const char *re_error_msg[] =
+  { NULL,					/* REG_NOERROR */
+    "No match",					/* REG_NOMATCH */
+    "Invalid regular expression",		/* REG_BADPAT */
+    "Invalid collation character",		/* REG_ECOLLATE */
+    "Invalid character class name",		/* REG_ECTYPE */
+    "Trailing backslash",			/* REG_EESCAPE */
+    "Invalid back reference",			/* REG_ESUBREG */
+    "Unmatched [ or [^",			/* REG_EBRACK */
+    "Unmatched ( or \\(",			/* REG_EPAREN */
+    "Unmatched \\{",				/* REG_EBRACE */
+    "Invalid content of \\{\\}",		/* REG_BADBR */
+    "Invalid range end",			/* REG_ERANGE */
+    "Memory exhausted",				/* REG_ESPACE */
+    "Invalid preceding regular expression",	/* REG_BADRPT */
+    "Premature end of regular expression",	/* REG_EEND */
+    "Regular expression too big",		/* REG_ESIZE */
+    "Unmatched ) or \\)",			/* REG_ERPAREN */
+  };
+
+/* Subroutine declarations and macros for regex_compile.  */
 
-static regex_t* compileRegex (const char* const regexp, const char* const flags)
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+   if necessary.  Also cast from a signed character in the constant
+   string passed to us by the user to an unsigned char that we can use
+   as an array index (in, e.g., `translate').  */
+#define PATFETCH(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++;						\
+    if (translate) c = translate[c]; 					\
+  } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+   translation.  */
+#define PATFETCH_RAW(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++; 						\
+  } while (0)
+
+/* Go backwards one character in the pattern.  */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D.  We
+   cast the subscript to translate because some data is declared as
+   `char *', to avoid warnings when a string constant is passed.  But
+   when we use a character as a subscript we must make it unsigned.  */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'.  */
+
+/* If the buffer isn't allocated when it comes in, use this.  */
+#define INIT_BUF_SIZE  32
+
+/* Make sure we have at least N more bytes of space in buffer.  */
+#define GET_BUFFER_SPACE(n)						\
+    while ((unsigned long)(b - bufp->buffer + (n)) > bufp->allocated)			\
+      EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it.  */
+#define BUF_PUSH(c)							\
+  do {									\
+    GET_BUFFER_SPACE (1);						\
+    *b++ = (unsigned char) (c);						\
+  } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
+#define BUF_PUSH_2(c1, c2)						\
+  do {									\
+    GET_BUFFER_SPACE (2);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+  } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes.  */
+#define BUF_PUSH_3(c1, c2, c3)						\
+  do {									\
+    GET_BUFFER_SPACE (3);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+    *b++ = (unsigned char) (c3);					\
+  } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO.  We store a
+   relative address offset by the three bytes the jump itself occupies.  */
+#define STORE_JUMP(op, loc, to) \
+  store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump.  */
+#define STORE_JUMP2(op, loc, to, arg) \
+  store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP(op, loc, to) \
+  insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP2(op, loc, to, arg) \
+  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+   into the pattern are two bytes long.  So if 2^16 bytes turns out to
+   be too small, many things would have to change.  */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+   reset the pointers that pointed into the old block to point to the
+   correct places in the new one.  If extending the buffer results in it
+   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
+#define EXTEND_BUFFER()							\
+  do { 									\
+    unsigned char *old_buffer = bufp->buffer;				\
+    if (bufp->allocated == MAX_BUF_SIZE) 				\
+      return REG_ESIZE;							\
+    bufp->allocated <<= 1;						\
+    if (bufp->allocated > MAX_BUF_SIZE)					\
+      bufp->allocated = MAX_BUF_SIZE; 					\
+    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    if (bufp->buffer == NULL)						\
+      return REG_ESPACE;						\
+    /* If the buffer moved, move all the pointers into it.  */		\
+    if (old_buffer != bufp->buffer)					\
+      {									\
+        b = (b - old_buffer) + bufp->buffer;				\
+        begalt = (begalt - old_buffer) + bufp->buffer;			\
+        if (fixup_alt_jump)						\
+          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+        if (laststart)							\
+          laststart = (laststart - old_buffer) + bufp->buffer;		\
+        if (pending_exact)						\
+          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
+      }									\
+  } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+   {start,stop}_memory, the maximum number of groups we can report
+   things about is what fits in that byte.  */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers.  We just
+   ignore the excess.  */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack.  */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
+typedef int pattern_offset_t;
+
+typedef struct
 {
-    int cflags = REG_EXTENDED | REG_NEWLINE;
-    regex_t *result = NULL;
-    int errcode;
-    int i;
-    for (i = 0  ; flags != NULL  &&  flags [i] != '\0'  ;  ++i)
-    {
-	switch ((int) flags [i])
-	{
-	    case 'b': cflags &= ~REG_EXTENDED; break;
-	    case 'e': cflags |= REG_EXTENDED;  break;
-	    case 'i': cflags |= REG_ICASE;     break;
-	    default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
-	}
+  pattern_offset_t begalt_offset;
+  pattern_offset_t fixup_alt_jump;
+  pattern_offset_t inner_group_offset;
+  pattern_offset_t laststart_offset;
+  regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+  compile_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;			/* Offset of next open position.  */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
+
+/* The next available element.  */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list.  */
+#define SET_LIST_BIT(c)                               \
+  (b[((unsigned char) (c)) / BYTEWIDTH]               \
+   |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern.  */
+#define GET_UNSIGNED_NUMBER(num) 					\
+  { if (p != pend)							\
+     {									\
+       PATFETCH (c); 							\
+       while (ISDIGIT (c)) 						\
+         { 								\
+           if (num < 0)							\
+              num = 0;							\
+           num = num * 10 + c - '0'; 					\
+           if (p == pend) 						\
+              break; 							\
+           PATFETCH (c);						\
+         } 								\
+       } 								\
     }
-    result = xMalloc (1, regex_t);
-    errcode = regcomp (result, regexp, cflags);
-    if (errcode != 0)
+
+#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
+
+#define IS_CHAR_CLASS(string)						\
+   (STREQ (string, "alpha") || STREQ (string, "upper")			\
+    || STREQ (string, "lower") || STREQ (string, "digit")		\
+    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
+    || STREQ (string, "space") || STREQ (string, "print")		\
+    || STREQ (string, "punct") || STREQ (string, "graph")		\
+    || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+   Returns one of error codes defined in `regex.h', or zero for success.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate'
+   fields are set in BUFP on entry.
+
+   If it succeeds, results are put in BUFP (if it returns an error, the
+   contents of BUFP are undefined):
+     `buffer' is the compiled pattern;
+     `syntax' is set to SYNTAX;
+     `used' is set to the length of the compiled pattern;
+     `fastmap_accurate' is zero;
+     `re_nsub' is the number of subexpressions in PATTERN;
+     `not_bol' and `not_eol' are zero;
+
+   The `fastmap' and `newline_anchor' fields are neither
+   examined nor set.  */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+     const char *pattern;
+     int size;
+     reg_syntax_t syntax;
+     struct re_pattern_buffer *bufp;
+{
+  /* We fetch characters from PATTERN here.  Even though PATTERN is
+     `char *' (i.e., signed), we declare these variables as unsigned, so
+     they can be reliably used as array indices.  */
+  register unsigned char c, c1;
+
+  /* A random tempory spot in PATTERN.  */
+  const char *p1;
+
+  /* Points to the end of the buffer, where we should append.  */
+  register unsigned char *b;
+
+  /* Keeps track of unclosed groups.  */
+  compile_stack_type compile_stack;
+
+  /* Points to the current (ending) position in the pattern.  */
+  const char *p = pattern;
+  const char *pend = pattern + size;
+
+  /* How to translate the characters in the pattern.  */
+  char *translate = bufp->translate;
+
+  /* Address of the count-byte of the most recently inserted `exactn'
+     command.  This makes it possible to tell if a new exact-match
+     character can be added to that command or if the character requires
+     a new `exactn' command.  */
+  unsigned char *pending_exact = 0;
+
+  /* Address of start of the most recently finished expression.
+     This tells, e.g., postfix * where to find the start of its
+     operand.  Reset at the beginning of groups and alternatives.  */
+  unsigned char *laststart = 0;
+
+  /* Address of beginning of regexp, or inside of last group.  */
+  unsigned char *begalt;
+
+  /* Place in the uncompiled pattern (i.e., the {) to
+     which to go back if the interval is invalid.  */
+  const char *beg_interval;
+
+  /* Address of the place where a forward jump should go to the end of
+     the containing expression.  Each alternative of an `or' -- except the
+     last -- ends with a forward jump of this sort.  */
+  unsigned char *fixup_alt_jump = 0;
+
+  /* Counts open-groups as they are encountered.  Remembered for the
+     matching close-group on the compile stack, so the same register
+     number is put in the stop_memory as the start_memory.  */
+  regnum_t regnum = 0;
+
+#ifdef DEBUG
+  DEBUG_PRINT1 ("\nCompiling pattern: ");
+  if (debug)
     {
-	char errmsg[256];
-	regerror (errcode, result, errmsg, 256);
-	error (WARNING, "%s", errmsg);
-	regfree (result);
-	eFree (result);
-	result = NULL;
+      unsigned debug_count;
+
+      for (debug_count = 0; debug_count < size; debug_count++)
+        printchar (pattern[debug_count]);
+      putchar ('\n');
     }
-    return result;
-}
+#endif /* DEBUG */
 
+  /* Initialize the compile stack.  */
+  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+  if (compile_stack.stack == NULL)
+    return REG_ESPACE;
+
+  compile_stack.size = INIT_COMPILE_STACK_SIZE;
+  compile_stack.avail = 0;
+
+  /* Initialize the pattern buffer.  */
+  bufp->syntax = syntax;
+  bufp->fastmap_accurate = 0;
+  bufp->not_bol = bufp->not_eol = 0;
+
+  /* Set `used' to zero, so that if we return an error, the pattern
+     printer (for debugging) will think there's no pattern.  We reset it
+     at the end.  */
+  bufp->used = 0;
+
+  /* Always count groups, whether or not bufp->no_sub is set.  */
+  bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+  /* Initialize the syntax table.  */
+   init_syntax_once ();
 #endif
 
-static void parseKinds (const char* const kinds,
-			char* const kind, char** const kindName)
-{
-    *kind = '\0';
-    *kindName = NULL;
-    if (kinds == NULL)
+  if (bufp->allocated == 0)
     {
-	*kind = 'r';
-	*kindName = eStrdup ("regex");
+      if (bufp->buffer)
+	{ /* If zero allocated, but buffer is non-null, try to realloc
+             enough space.  This loses if buffer's address is bogus, but
+             that is the user's responsibility.  */
+          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+        }
+      else
+        { /* Caller did not allocate a buffer.  Do it for them.  */
+          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+        }
+      if (!bufp->buffer) return REG_ESPACE;
+
+      bufp->allocated = INIT_BUF_SIZE;
     }
-    else if (kinds [0] != '\0')
+
+  begalt = b = bufp->buffer;
+
+  /* Loop through the uncompiled pattern until we're at the end.  */
+  while (p != pend)
     {
-	const char* k = kinds;
-	if (k [1] == ','  ||  k [1] == '\0')
-	    *kind = *k++;
-	if (*k == ',')
-	    ++k;
-	if (*k != '\0')
-	    *kindName = eStrdup (k);
+      PATFETCH (c);
+
+      switch (c)
+        {
+        case '^':
+          {
+            if (   /* If at start of pattern, it's an operator.  */
+                   p == pattern + 1
+                   /* If context independent, it's an operator.  */
+                || syntax & RE_CONTEXT_INDEP_ANCHORS
+                   /* Otherwise, depends on what's come before.  */
+                || at_begline_loc_p (pattern, p, syntax))
+              BUF_PUSH (begline);
+            else
+              goto normal_char;
+          }
+          break;
+
+
+        case '$':
+          {
+            if (   /* If at end of pattern, it's an operator.  */
+                   p == pend
+                   /* If context independent, it's an operator.  */
+                || syntax & RE_CONTEXT_INDEP_ANCHORS
+                   /* Otherwise, depends on what's next.  */
+                || at_endline_loc_p (p, pend, syntax))
+               BUF_PUSH (endline);
+             else
+               goto normal_char;
+           }
+           break;
+
+
+	case '+':
+        case '?':
+          if ((syntax & RE_BK_PLUS_QM)
+              || (syntax & RE_LIMITED_OPS))
+            goto normal_char;
+        handle_plus:
+        case '*':
+          /* If there is no previous pattern... */
+          if (!laststart)
+            {
+              if (syntax & RE_CONTEXT_INVALID_OPS)
+                return REG_BADRPT;
+              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+                goto normal_char;
+            }
+
+          {
+            /* Are we optimizing this jump?  */
+            boolean keep_string_p = false;
+
+            /* 1 means zero (many) matches is allowed.  */
+            char zero_times_ok = 0, many_times_ok = 0;
+
+            /* If there is a sequence of repetition chars, collapse it
+               down to just one (the right one).  We can't combine
+               interval operators with these because of, e.g., `a{2}*',
+               which should only match an even number of `a's.  */
+
+            for (;;)
+              {
+                zero_times_ok |= c != '+';
+                many_times_ok |= c != '?';
+
+                if (p == pend)
+                  break;
+
+                PATFETCH (c);
+
+                if (c == '*'
+                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+                  ;
+
+                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
+                  {
+                    if (p == pend) return REG_EESCAPE;
+
+                    PATFETCH (c1);
+                    if (!(c1 == '+' || c1 == '?'))
+                      {
+                        PATUNFETCH;
+                        PATUNFETCH;
+                        break;
+                      }
+
+                    c = c1;
+                  }
+                else
+                  {
+                    PATUNFETCH;
+                    break;
+                  }
+
+                /* If we get here, we found another repeat character.  */
+               }
+
+            /* Star, etc. applied to an empty pattern is equivalent
+               to an empty pattern.  */
+            if (!laststart)
+              break;
+
+            /* Now we know whether or not zero matches is allowed
+               and also whether or not two or more matches is allowed.  */
+            if (many_times_ok)
+              { /* More than one repetition is allowed, so put in at the
+                   end a backward relative jump from `b' to before the next
+                   jump we're going to put in below (which jumps from
+                   laststart to after this jump).
+
+                   But if we are at the `*' in the exact sequence `.*\n',
+                   insert an unconditional jump backwards to the .,
+                   instead of the beginning of the loop.  This way we only
+                   push a failure point once, instead of every time
+                   through the loop.  */
+                assert (p - 1 > pattern);
+
+                /* Allocate the space for the jump.  */
+                GET_BUFFER_SPACE (3);
+
+                /* We know we are not at the first character of the pattern,
+                   because laststart was nonzero.  And we've already
+                   incremented `p', by the way, to be the character after
+                   the `*'.  Do we have to do something analogous here
+                   for null bytes, because of RE_DOT_NOT_NULL?  */
+                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+		    && zero_times_ok
+                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                    && !(syntax & RE_DOT_NEWLINE))
+                  { /* We have .*\n.  */
+                    STORE_JUMP (jump, b, laststart);
+                    keep_string_p = true;
+                  }
+                else
+                  /* Anything else.  */
+                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+                /* We've added more stuff to the buffer.  */
+                b += 3;
+              }
+
+            /* On failure, jump from laststart to b + 3, which will be the
+               end of the buffer after this jump is inserted.  */
+            GET_BUFFER_SPACE (3);
+            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+                                       : on_failure_jump,
+                         laststart, b + 3);
+            pending_exact = 0;
+            b += 3;
+
+            if (!zero_times_ok)
+              {
+                /* At least one repetition is required, so insert a
+                   `dummy_failure_jump' before the initial
+                   `on_failure_jump' instruction of the loop. This
+                   effects a skip over that instruction the first time
+                   we hit that loop.  */
+                GET_BUFFER_SPACE (3);
+                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+                b += 3;
+              }
+            }
+	  break;
+
+
+	case '.':
+          laststart = b;
+          BUF_PUSH (anychar);
+          break;
+
+
+        case '[':
+          {
+            boolean had_char_class = false;
+
+            if (p == pend) return REG_EBRACK;
+
+            /* Ensure that we have enough space to push a charset: the
+               opcode, the length count, and the bitset; 34 bytes in all.  */
+	    GET_BUFFER_SPACE (34);
+
+            laststart = b;
+
+            /* We test `*p == '^' twice, instead of using an if
+               statement, so we only need one BUF_PUSH.  */
+            BUF_PUSH (*p == '^' ? charset_not : charset);
+            if (*p == '^')
+              p++;
+
+            /* Remember the first position in the bracket expression.  */
+            p1 = p;
+
+            /* Push the number of bytes in the bitmap.  */
+            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+            /* Clear the whole map.  */
+            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+            /* charset_not matches newline according to a syntax bit.  */
+            if ((re_opcode_t) b[-2] == charset_not
+                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+              SET_LIST_BIT ('\n');
+
+            /* Read in characters and ranges, setting map bits.  */
+            for (;;)
+              {
+                if (p == pend) return REG_EBRACK;
+
+                PATFETCH (c);
+
+                /* \ might escape characters inside [...] and [^...].  */
+                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+                  {
+                    if (p == pend) return REG_EESCAPE;
+
+                    PATFETCH (c1);
+                    SET_LIST_BIT (c1);
+                    continue;
+                  }
+
+                /* Could be the end of the bracket expression.  If it's
+                   not (i.e., when the bracket expression is `[]' so
+                   far), the ']' character bit gets set way below.  */
+                if (c == ']' && p != p1 + 1)
+                  break;
+
+                /* Look ahead to see if it's a range when the last thing
+                   was a character class.  */
+                if (had_char_class && c == '-' && *p != ']')
+                  return REG_ERANGE;
+
+                /* Look ahead to see if it's a range when the last thing
+                   was a character: if this is a hyphen not at the
+                   beginning or the end of a list, then it's the range
+                   operator.  */
+                if (c == '-'
+                    && !(p - 2 >= pattern && p[-2] == '[')
+                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+                    && *p != ']')
+                  {
+                    reg_errcode_t ret
+                      = compile_range (&p, pend, translate, syntax, b);
+                    if (ret != REG_NOERROR) return ret;
+                  }
+
+                else if (p[0] == '-' && p[1] != ']')
+                  { /* This handles ranges made up of characters only.  */
+                    reg_errcode_t ret;
+
+		    /* Move past the `-'.  */
+                    PATFETCH (c1);
+
+                    ret = compile_range (&p, pend, translate, syntax, b);
+                    if (ret != REG_NOERROR) return ret;
+                  }
+
+                /* See if we're at the beginning of a possible character
+                   class.  */
+
+                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                  { /* Leave room for the null.  */
+                    char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+                    PATFETCH (c);
+                    c1 = 0;
+
+                    /* If pattern is `[[:'.  */
+                    if (p == pend) return REG_EBRACK;
+
+                    for (;;)
+                      {
+                        PATFETCH (c);
+                        if (c == ':' || c == ']' || p == pend
+                            || c1 == CHAR_CLASS_MAX_LENGTH)
+                          break;
+                        str[c1++] = c;
+                      }
+                    str[c1] = '\0';
+
+                    /* If isn't a word bracketed by `[:' and:`]':
+                       undo the ending character, the letters, and leave
+                       the leading `:' and `[' (but set bits for them).  */
+                    if (c == ':' && *p == ']')
+                      {
+                        int ch;
+                        boolean is_alnum = STREQ (str, "alnum");
+                        boolean is_alpha = STREQ (str, "alpha");
+                        boolean is_blank = STREQ (str, "blank");
+                        boolean is_cntrl = STREQ (str, "cntrl");
+                        boolean is_digit = STREQ (str, "digit");
+                        boolean is_graph = STREQ (str, "graph");
+                        boolean is_lower = STREQ (str, "lower");
+                        boolean is_print = STREQ (str, "print");
+                        boolean is_punct = STREQ (str, "punct");
+                        boolean is_space = STREQ (str, "space");
+                        boolean is_upper = STREQ (str, "upper");
+                        boolean is_xdigit = STREQ (str, "xdigit");
+
+                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+                        /* Throw away the ] at the end of the character
+                           class.  */
+                        PATFETCH (c);
+
+                        if (p == pend) return REG_EBRACK;
+
+                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+                          {
+                            if (   (is_alnum  && ISALNUM (ch))
+                                || (is_alpha  && ISALPHA (ch))
+                                || (is_blank  && ISBLANK (ch))
+                                || (is_cntrl  && ISCNTRL (ch))
+                                || (is_digit  && ISDIGIT (ch))
+                                || (is_graph  && ISGRAPH (ch))
+                                || (is_lower  && ISLOWER (ch))
+                                || (is_print  && ISPRINT (ch))
+                                || (is_punct  && ISPUNCT (ch))
+                                || (is_space  && ISSPACE (ch))
+                                || (is_upper  && ISUPPER (ch))
+                                || (is_xdigit && ISXDIGIT (ch)))
+                            SET_LIST_BIT (ch);
+                          }
+                        had_char_class = true;
+                      }
+                    else
+                      {
+                        c1++;
+                        while (c1--)
+                          PATUNFETCH;
+                        SET_LIST_BIT ('[');
+                        SET_LIST_BIT (':');
+                        had_char_class = false;
+                      }
+                  }
+                else
+                  {
+                    had_char_class = false;
+                    SET_LIST_BIT (c);
+                  }
+              }
+

@@ Diff output truncated at 100000 characters. @@

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the Commits mailing list