Revision: 2062
http://geany.svn.sourceforge.net/geany/?rev=2062&view=rev
Author: frlan
Date: 2007-11-20 11:23:42 -0800 (Tue, 20 Nov 2007)
Log Message:
-----------
Update of Swedish translation
Modified Paths:
--------------
trunk/po/ChangeLog
trunk/po/POTFILES.in
trunk/po/sv.po
Modified: trunk/po/ChangeLog
===================================================================
--- trunk/po/ChangeLog 2007-11-20 18:37:20 UTC (rev 2061)
+++ trunk/po/ChangeLog 2007-11-20 19:23:42 UTC (rev 2062)
@@ -1,3 +1,14 @@
+2007-11-20 Frank Lanitz <frank(at)frank(dot)uvena(dot)de>
+
+ * sv.po: Update of Swedish translation (Thanks to Tony Mattsson)
+
+
+2007-11-18 Frank Lanitz <frank(at)frank(dot)uvena(dot)de>
+
+ * POTFILES.in: Removed old plugin svndiff from and add new plugin
+ vcdiff to list of files with translateable strings
+
+
2007-10-30 Frank Lanitz <frank(at)frank(dot)uvena(dot)de>
* de.po: Update of German translation and fix of a few typos.
Modified: trunk/po/POTFILES.in
===================================================================
--- trunk/po/POTFILES.in 2007-11-20 18:37:20 UTC (rev 2061)
+++ trunk/po/POTFILES.in 2007-11-20 19:23:42 UTC (rev 2062)
@@ -37,5 +37,5 @@
plugins/classbuilder.c
plugins/htmlchars.c
plugins/export.c
-plugins/svndiff.c
+plugins/vcdiff.c
plugins/filebrowser.c
Modified: trunk/po/sv.po
===================================================================
--- trunk/po/sv.po 2007-11-20 18:37:20 UTC (rev 2061)
+++ trunk/po/sv.po 2007-11-20 19:23:42 UTC (rev 2062)
@@ -8,7 +8,7 @@
"Project-Id-Version: Geany 0.12\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2007-10-01 11:49+0200\n"
-"PO-Revision-Date: 2007-10-23 22:07+0100\n"
+"PO-Revision-Date: 2007-11-19 01:22+0100\n"
"Last-Translator: Tony Mattsson <superxorn(a)gmail.com>\n"
"Language-Team: Swedish <geany-i18n(a)uvena.de>\n"
"MIME-Version: 1.0\n"
@@ -1063,7 +1063,7 @@
#: ../src/interface.c:3490
#: ../src/templates.c:333
msgid "None"
-msgstr "Inga"
+msgstr "Inget"
#: ../src/filetypes.c:932
#: ../src/win32.c:81
@@ -1148,7 +1148,7 @@
#: ../src/interface.c:466
#: ../src/interface.c:2032
msgid "Select _All"
-msgstr "Välj allt"
+msgstr "Markera allt"
#: ../src/interface.c:475
#: ../src/interface.c:2050
@@ -1295,11 +1295,11 @@
#: ../src/interface.c:663
msgid "Find _Selected"
-msgstr "Sök markerad"
+msgstr "Sök i markering"
#: ../src/interface.c:667
msgid "Find Pre_v Selected"
-msgstr "Sök föregående markerad"
+msgstr "Sök föregående i markering"
#: ../src/interface.c:676
msgid "Next _Message"
@@ -1388,7 +1388,7 @@
#: ../src/interface.c:791
msgid "_Write Unicode BOM"
-msgstr "Skriv Unicode BOM (bör användas för vanliga textfiler, men inte för skriptfiler)"
+msgstr "Skriv Unicode BOM"
#: ../src/interface.c:800
msgid "Set File_type"
@@ -1457,7 +1457,7 @@
#: ../src/interface.c:891
msgid "_New"
-msgstr "Ny"
+msgstr "Nytt"
#: ../src/interface.c:899
msgid "_Open"
@@ -1503,7 +1503,7 @@
#: ../src/interface.c:969
msgid "_Website"
-msgstr "Webbsite"
+msgstr "Webbsida"
#: ../src/interface.c:992
msgid "Create a new file"
@@ -1720,7 +1720,7 @@
#: ../src/interface.c:2734
msgid "Beep on errors or when compilation has finished"
-msgstr "Piper vid fel och när kompilering har avslutats"
+msgstr "Pip vid fel och när kompilering har avslutats"
#: ../src/interface.c:2737
msgid "Whether to beep if an error occurred or when the compilation process has finished."
@@ -1863,7 +1863,7 @@
#: ../src/interface.c:2982
msgid "File tabs will be placed on the left of the notebook"
-msgstr ""
+msgstr "Fil-tabbar kommer att placeras till vänster om anteckningsboken"
#: ../src/interface.c:2987
#: ../src/interface.c:3028
@@ -1874,7 +1874,7 @@
#: ../src/interface.c:2990
msgid "File tabs will be placed on the right of the notebook"
-msgstr ""
+msgstr "Fil-tabbar kommer att placeras till höger om anteckningsboken"
#: ../src/interface.c:2994
msgid "<b>Editor tabs</b>"
@@ -2137,7 +2137,7 @@
#: ../src/interface.c:3493
msgid "Match braces"
-msgstr ""
+msgstr "Matcha klamrar"
#: ../src/interface.c:3495
msgid "<b>Indentation</b>"
@@ -2173,11 +2173,11 @@
#: ../src/interface.c:3537
msgid "Unfold all children of a fold point"
-msgstr ""
+msgstr "Vik upp alla vid uppvikningspunkten"
#: ../src/interface.c:3540
msgid "Unfold all children of a fold point when unfolding it."
-msgstr ""
+msgstr "Vik ihop alla vid uppvikningspunkten"
#: ../src/interface.c:3543
msgid "Use indicators to show compile errors"
@@ -2197,7 +2197,7 @@
#: ../src/interface.c:3571
msgid "Automatic completion of often used constructs like if and for"
-msgstr ""
+msgstr "Automatisk komplettering av ofta använda konstruktioner som <i>if</i> eller <i>for</i>"
#: ../src/interface.c:3574
msgid "XML tag autocompletion"
@@ -2205,7 +2205,7 @@
#: ../src/interface.c:3577
msgid "Automatic completion of open XML tags (includes HTML tags)"
-msgstr ""
+msgstr "Autokomplettera öppna XML-taggar"
#: ../src/interface.c:3580
msgid "Symbol autocompletion"
@@ -2213,11 +2213,11 @@
#: ../src/interface.c:3583
msgid "Automatic completion of known symbols in open files (function names, global variables, ...)"
-msgstr ""
+msgstr "Autokomplettera kända symboler i öppna filer (funktionsnamn, globala variabler etc.)"
#: ../src/interface.c:3590
msgid "Rows of autocompletion list:"
-msgstr ""
+msgstr "Rader i autokompletteringslistan:"
#: ../src/interface.c:3599
msgid "Number of rows to display in the autocompletion list."
@@ -2261,7 +2261,7 @@
#: ../src/interface.c:3698
msgid "Ensure new line at file end"
-msgstr "Skriver alltid nyradstecken i slutet av filen"
+msgstr "Skriv alltid nyradstecken i slutet av filen"
#: ../src/interface.c:3702
msgid "Ensures that at the end of the file is a new line"
@@ -2393,7 +2393,7 @@
#: ../src/interface.c:4046
msgid "Mail address:"
-msgstr "Postadress:"
+msgstr "E-post:"
#: ../src/interface.c:4053
msgid "Initials:"
@@ -2708,7 +2708,7 @@
#: ../src/keybindings.c:275
msgid "Go to matching brace"
-msgstr ""
+msgstr "GÃ¥ till matchande klammer"
#: ../src/keybindings.c:278
msgid "Toggle marker"
@@ -2834,7 +2834,7 @@
#: ../src/main.c:114
msgid "Set initial column number for the first opened file (useful in conjunction with --line)"
-msgstr ""
+msgstr "Sätt kolumn för markören vid öppnande av fil"
#: ../src/main.c:115
msgid "Use an alternate configuration directory"
@@ -3351,7 +3351,7 @@
#: ../src/symbols.c:524
msgid "Type constructors"
-msgstr ""
+msgstr "Typ-konstruktörer"
#: ../src/symbols.c:525
#: ../src/symbols.c:552
@@ -3364,7 +3364,7 @@
#: ../src/symbols.c:530
msgid "Sections"
-msgstr ""
+msgstr "Markeringar"
#: ../src/symbols.c:531
msgid "Keys"
@@ -3376,7 +3376,7 @@
#: ../src/symbols.c:538
msgid "Environment"
-msgstr ""
+msgstr "Miljö"
#: ../src/symbols.c:540
#: ../src/symbols.c:580
@@ -3403,7 +3403,7 @@
#: ../src/symbols.c:554
msgid "Local"
-msgstr ""
+msgstr "Lokal"
#: ../src/symbols.c:555
msgid "Our"
@@ -3466,7 +3466,7 @@
#: ../src/symbols.c:668
msgid "Structs / Typedefs"
-msgstr ""
+msgstr "Strukturer och typdefinitioner"
#: ../src/symbols.c:674
msgid "Macros"
@@ -3539,7 +3539,7 @@
#: ../src/tools.c:225
msgid "You can send the current selection to any of these commands and the output of the command replaces the current selection."
-msgstr ""
+msgstr "Du kan skicka markeringen till vilket av dessa kommandon som helst."
#: ../src/tools.c:416
#: ../src/tools.c:420
@@ -3560,7 +3560,7 @@
#: ../src/tools.c:547
msgid "Range:"
-msgstr ""
+msgstr "Område:"
#: ../src/tools.c:559
msgid "Lines:"
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
Revision: 2054
http://geany.svn.sourceforge.net/geany/?rev=2054&view=rev
Author: eht16
Date: 2007-11-18 07:09:28 -0800 (Sun, 18 Nov 2007)
Log Message:
-----------
Add hidden pref to hide symbol list treeview expander (GTK >= 2.12).
Double click on symbol list sections now expands/collapses them.
Modified Paths:
--------------
trunk/ChangeLog
trunk/doc/geany.html
trunk/doc/geany.txt
trunk/src/keyfile.c
trunk/src/plugindata.h
trunk/src/prefs.h
trunk/src/treeviews.c
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/ChangeLog 2007-11-18 15:09:28 UTC (rev 2054)
@@ -1,3 +1,11 @@
+2007-11-18 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de>
+
+ * doc/geany.html, doc/geany.txt, src/keyfile.c, src/plugindata.h,
+ src/prefs.h, src/treeviews.c:
+ Add hidden pref to hide symbol list treeview expander (GTK >= 2.12).
+ Double click on symbol list sections now expands/collapses them.
+
+
2007-11-17 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de>
* src/templates.c, src/utils.c, src/utils.h:
Modified: trunk/doc/geany.html
===================================================================
--- trunk/doc/geany.html 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/doc/geany.html 2007-11-18 15:09:28 UTC (rev 2054)
@@ -6,7 +6,7 @@
<meta name="generator" content="Docutils 0.4.1: http://docutils.sourceforge.net/" />
<title>Geany</title>
<meta name="authors" content="Enrico Tröger Nick Treleaven Frank Lanitz" />
-<meta name="date" content="2007-11-12" />
+<meta name="date" content="2007-11-17" />
<style type="text/css">
/*
@@ -133,7 +133,7 @@
<br />Nick Treleaven
<br />Frank Lanitz</td></tr>
<tr><th class="docinfo-name">Date:</th>
-<td>2007-11-12</td></tr>
+<td>2007-11-17</td></tr>
<tr><th class="docinfo-name">Version:</th>
<td>0.13</td></tr>
</tbody>
@@ -1499,7 +1499,8 @@
<pre class="literal-block">
gtk-print-preview-command = "epdfview %f"
</pre>
-<p>at the end of the file. Of course, you can also use xpdf, kpdf or whatever.</p>
+<p>at the end of the file. Of course, you can also use xpdf, kpdf or whatever
+as the print preview command.</p>
<p>Unfortunately, native GTK printing support is only available if Geany was
built against GTK 2.10 (or above) <strong>and</strong> is running with GTK 2.10 (or above).
If not, Geany provides basic printing support. This means you can print a
@@ -2603,6 +2604,12 @@
an existing line.</td>
<td>false</td>
</tr>
+<tr><td>show_symbol_list_expanders</td>
+<td>Whether to show or hide the small expander
+icons on the symbol list treeview (only
+available with GTK 2.12 or above).</td>
+<td>true</td>
+</tr>
<tr><td><strong>[VTE]</strong></td>
<td> </td>
<td> </td>
@@ -3127,7 +3134,7 @@
<div class="footer">
<hr class="footer" />
<a class="reference" href="geany.txt">View document source</a>.
-Generated on: 2007-11-17 13:39 UTC.
+Generated on: 2007-11-18 15:02 UTC.
Generated by <a class="reference" href="http://docutils.sourceforge.net/">Docutils</a> from <a class="reference" href="http://docutils.sourceforge.net/rst.html">reStructuredText</a> source.
</div>
Modified: trunk/doc/geany.txt
===================================================================
--- trunk/doc/geany.txt 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/doc/geany.txt 2007-11-18 15:09:28 UTC (rev 2054)
@@ -2349,6 +2349,9 @@
commands`_).
auto_complete_whilst_editing Whether to allow autocompletion when editing false
an existing line.
+show_symbol_list_expanders Whether to show or hide the small expander true
+ icons on the symbol list treeview (only
+ available with GTK 2.12 or above).
**[VTE]**
enable_bash_keys Whether to allow bash shell keyboard true
shortcuts like Ctrl-W to delete the last
Modified: trunk/src/keyfile.c
===================================================================
--- trunk/src/keyfile.c 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/src/keyfile.c 2007-11-18 15:09:28 UTC (rev 2054)
@@ -339,6 +339,9 @@
write_hidden_pref_boolean(config, PACKAGE, "brace_match_ltgt", editor_prefs.brace_match_ltgt);
write_hidden_pref_boolean(config, PACKAGE, "use_gtk_word_boundaries", editor_prefs.use_gtk_word_boundaries);
write_hidden_pref_boolean(config, PACKAGE, "auto_complete_whilst_editing", editor_prefs.auto_complete_whilst_editing);
+#if GTK_CHECK_VERSION(2, 12, 0)
+ write_hidden_pref_boolean(config, PACKAGE, "show_symbol_list_expanders", prefs.show_symbol_list_expanders);
+#endif
#ifdef HAVE_VTE
if (vte_info.have_vte)
write_hidden_pref_boolean(config, "VTE", "enable_bash_keys", vc->enable_bash_keys);
@@ -459,6 +462,9 @@
prefs.editor_font = utils_get_setting_string(config, PACKAGE, "editor_font", GEANY_DEFAULT_FONT_EDITOR);
prefs.tagbar_font = utils_get_setting_string(config, PACKAGE, "tagbar_font", GEANY_DEFAULT_FONT_SYMBOL_LIST);
prefs.msgwin_font = utils_get_setting_string(config, PACKAGE, "msgwin_font", GEANY_DEFAULT_FONT_MSG_WINDOW);
+#if GTK_CHECK_VERSION(2, 12, 0)
+ prefs.show_symbol_list_expanders = utils_get_setting_boolean(config, PACKAGE, "show_symbol_list_expanders", TRUE);
+#endif
// display, editor
editor_prefs.long_line_type = utils_get_setting_integer(config, PACKAGE, "long_line_type", 0);
Modified: trunk/src/plugindata.h
===================================================================
--- trunk/src/plugindata.h 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/src/plugindata.h 2007-11-18 15:09:28 UTC (rev 2054)
@@ -71,12 +71,12 @@
/* The API version should be incremented whenever any plugin data types below are
* modified or appended to. */
-static const gint api_version = 29;
+static const gint api_version = 30;
/* The ABI version should be incremented whenever existing fields in the plugin
* data types below have to be changed or reordered. It should stay the same if fields
* are only appended, as this doesn't affect existing fields. */
-static const gint abi_version = 14;
+static const gint abi_version = 15;
/* This performs runtime checks that try to ensure:
* 1. Geany ABI data types are compatible with this plugin.
Modified: trunk/src/prefs.h
===================================================================
--- trunk/src/prefs.h 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/src/prefs.h 2007-11-18 15:09:28 UTC (rev 2054)
@@ -53,6 +53,7 @@
gint tab_pos_msgwin;
gint tab_pos_sidebar;
gboolean statusbar_visible;
+ gboolean show_symbol_list_expanders;
/* toolbar */
gboolean toolbar_visible;
@@ -82,7 +83,6 @@
gchar *tools_browser_cmd;
gchar *tools_make_cmd;
gchar *tools_term_cmd;
- gchar *tools_print_cmd; /// TODO unused: remove this at next abi_verison change
gchar *tools_grep_cmd;
gchar *context_action_cmd;
Modified: trunk/src/treeviews.c
===================================================================
--- trunk/src/treeviews.c 2007-11-17 19:27:50 UTC (rev 2053)
+++ trunk/src/treeviews.c 2007-11-18 15:09:28 UTC (rev 2054)
@@ -120,6 +120,10 @@
gtk_tree_view_set_enable_search(GTK_TREE_VIEW(tree), FALSE);
+#if GTK_CHECK_VERSION(2, 12, 0)
+ gtk_tree_view_set_show_expanders(GTK_TREE_VIEW(tree), prefs.show_symbol_list_expanders);
+#endif
+
// selection handling
select = gtk_tree_view_get_selection(GTK_TREE_VIEW(tree));
gtk_tree_selection_set_mode(select, GTK_SELECTION_SINGLE);
@@ -643,17 +647,39 @@
static gboolean on_treeviews_button_press_event(GtkWidget *widget, GdkEventButton *event,
- gpointer user_data)
+ gpointer user_data)
{
- if (event->button == 1 && GPOINTER_TO_INT(user_data) == TREEVIEW_SYMBOL)
- { // allow reclicking of taglist treeview item
+ if (event->type == GDK_2BUTTON_PRESS && GPOINTER_TO_INT(user_data) == TREEVIEW_SYMBOL)
+ { // double click on parent node(section) expands/collapses it
+ GtkTreeModel *model;
+ GtkTreeSelection *selection;
+ GtkTreeIter iter;
+
+ selection = gtk_tree_view_get_selection(GTK_TREE_VIEW(widget));
+ if (gtk_tree_selection_get_selected(selection, &model, &iter))
+ {
+ if (gtk_tree_model_iter_has_child(model, &iter))
+ {
+ GtkTreePath *path = gtk_tree_model_get_path(model, &iter);
+
+ if (gtk_tree_view_row_expanded(GTK_TREE_VIEW(widget), path))
+ gtk_tree_view_collapse_row(GTK_TREE_VIEW(widget), path);
+ else
+ gtk_tree_view_expand_row(GTK_TREE_VIEW(widget), path, FALSE);
+
+ gtk_tree_path_free(path);
+ return TRUE;
+ }
+ }
+ }
+ else if (event->button == 1 && GPOINTER_TO_INT(user_data) == TREEVIEW_SYMBOL)
+ { // allow reclicking of taglist treeview item
GtkTreeSelection *select = gtk_tree_view_get_selection(GTK_TREE_VIEW(widget));
// delay the query of selection state because this callback is executed before GTK
// changes the selection (g_signal_connect_after would be better but it doesn't work)
g_idle_add((GSourceFunc) on_taglist_tree_selection_changed, select);
}
-
- if (event->button == 3)
+ else if (event->button == 3)
{ // popupmenu to hide or clear the active treeview
if (GPOINTER_TO_INT(user_data) == TREEVIEW_OPENFILES)
{
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
Revision: 2053
http://geany.svn.sourceforge.net/geany/?rev=2053&view=rev
Author: eht16
Date: 2007-11-17 11:27:50 -0800 (Sat, 17 Nov 2007)
Log Message:
-----------
Fix two more compiler warnings.
Use php.c and lregex.c from CTags SVN (closes #1795810).
Add regex.c and regex.h (GNU regex) for regex support on Windows.
Modified Paths:
--------------
trunk/ChangeLog
trunk/tagmanager/Makefile.am
trunk/tagmanager/include/Makefile.am
trunk/tagmanager/makefile.win32
trunk/tagmanager/parse.c
trunk/tagmanager/parse.h
trunk/tagmanager/php.c
trunk/tagmanager/regex.c
trunk/tagmanager/tm_tag.c
Added Paths:
-----------
trunk/tagmanager/include/regex.h
trunk/tagmanager/lregex.c
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/ChangeLog 2007-11-17 19:27:50 UTC (rev 2053)
@@ -11,6 +11,13 @@
Add native GTK printing support.
* src/printing.c: Set line width for page header, handle empty filename
in page header correctly.
+ * tagmanager/Makefile.am, tagmanager/lregex.c,
+ tagmanager/makefile.win32, tagmanager/parse.c, tagmanager/parse.h,
+ tagmanager/php.c, tagmanager/regex.c, tagmanager/tm_tag.c,
+ tagmanager/include/Makefile.am, tagmanager/include/regex.h:
+ Fix two more compiler warnings.
+ Use php.c and lregex.c from CTags SVN (closes #1795810).
+ Add regex.c and regex.h (GNU regex) for regex support on Windows.
2007-11-14 Nick Treleaven <nick(dot)treleaven(at)btinternet(dot)com>
Modified: trunk/tagmanager/Makefile.am
===================================================================
--- trunk/tagmanager/Makefile.am 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/Makefile.am 2007-11-17 19:27:50 UTC (rev 2053)
@@ -7,6 +7,8 @@
# -DGDK_PIXBUF_DEPRECATED \
# -DGTK_DISABLE_DEPRECATED -DGNOME_DISABLE_DEPRECATED
+# regex.c is the GNU regex implementation needed for Windows
+EXTRA_DIST = regex.c
noinst_LIBRARIES = libtagmanager.a
libtagmanager_a_SOURCES =\
@@ -39,6 +41,7 @@
make.c\
asm.c\
latex.c\
+ lregex.c\
pascal.c\
perl.c\
rest.c\
@@ -46,7 +49,6 @@
sql.c\
php.c\
python.c\
- regex.c\
tcl.c\
sh.c\
vhdl.c\
Modified: trunk/tagmanager/include/Makefile.am
===================================================================
--- trunk/tagmanager/include/Makefile.am 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/include/Makefile.am 2007-11-17 19:27:50 UTC (rev 2053)
@@ -1,4 +1,5 @@
noinst_HEADERS = \
+ regex.h \
tm_project.h\
tm_source_file.h\
tm_tag.h\
Added: trunk/tagmanager/include/regex.h
===================================================================
--- trunk/tagmanager/include/regex.h (rev 0)
+++ trunk/tagmanager/include/regex.h 2007-11-17 19:27:50 UTC (rev 2053)
@@ -0,0 +1,490 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+
+ Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value. It is
+ defined both in `regex.c' and here. */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
Added: trunk/tagmanager/lregex.c
===================================================================
--- trunk/tagmanager/lregex.c (rev 0)
+++ trunk/tagmanager/lregex.c 2007-11-17 19:27:50 UTC (rev 2053)
@@ -0,0 +1,704 @@
+/*
+* $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for applying regular expression matching.
+*
+* The code for utlizing the Gnu regex package with regards to processing the
+* regex option and checking for regex matches was adapted from routines in
+* Gnu etags.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <glib.h>
+
+#ifdef HAVE_REGCOMP
+# include <ctype.h>
+# include <stddef.h>
+# ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
+# endif
+# include <regex.h>
+#endif
+
+#include "main.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+
+#ifdef HAVE_REGEX
+
+/*
+* MACROS
+*/
+
+/* Back-references \0 through \9 */
+#define BACK_REFERENCE_COUNT 10
+
+#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
+# define POSIX_REGEX
+#endif
+
+#define REGEX_NAME "Regex"
+
+/*
+* DATA DECLARATIONS
+*/
+#if defined (POSIX_REGEX)
+
+struct sKind {
+ boolean enabled;
+ char letter;
+ char* name;
+ char* description;
+};
+
+enum pType { PTRN_TAG, PTRN_CALLBACK };
+
+typedef struct {
+ regex_t *pattern;
+ enum pType type;
+ union {
+ struct {
+ char *name_pattern;
+ struct sKind kind;
+ } tag;
+ struct {
+ regexCallback function;
+ } callback;
+ } u;
+} regexPattern;
+
+#endif
+
+typedef struct {
+ regexPattern *patterns;
+ unsigned int count;
+} patternSet;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static boolean regexBroken = FALSE;
+
+/* Array of pattern sets, indexed by language */
+static patternSet* Sets = NULL;
+static int SetUpper = -1; /* upper language index in list */
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void clearPatternSet (const langType language)
+{
+ if (language <= SetUpper)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ {
+ regexPattern *p = &set->patterns [i];
+#if defined (POSIX_REGEX)
+ regfree (p->pattern);
+#endif
+ eFree (p->pattern);
+ p->pattern = NULL;
+
+ if (p->type == PTRN_TAG)
+ {
+ eFree (p->u.tag.name_pattern);
+ p->u.tag.name_pattern = NULL;
+ eFree (p->u.tag.kind.name);
+ p->u.tag.kind.name = NULL;
+ if (p->u.tag.kind.description != NULL)
+ {
+ eFree (p->u.tag.kind.description);
+ p->u.tag.kind.description = NULL;
+ }
+ }
+ }
+ if (set->patterns != NULL)
+ eFree (set->patterns);
+ set->patterns = NULL;
+ set->count = 0;
+ }
+}
+
+/*
+* Regex psuedo-parser
+*/
+
+static void makeRegexTag (
+ const vString* const name, const struct sKind* const kind)
+{
+ if (kind->enabled)
+ {
+ tagEntryInfo e;
+ Assert (name != NULL && vStringLength (name) > 0);
+ Assert (kind != NULL);
+ initTagEntry (&e, vStringValue (name));
+ e.kind = kind->letter;
+ e.kindName = kind->name;
+ makeTagEntry (&e);
+ }
+}
+
+/*
+* Regex pattern definition
+*/
+
+/* Take a string like "/blah/" and turn it into "blah", making sure
+ * that the first and last characters are the same, and handling
+ * quoted separator characters. Actually, stops on the occurrence of
+ * an unquoted separator. Also turns "\t" into a Tab character.
+ * Returns pointer to terminating separator. Works in place. Null
+ * terminates name string.
+ */
+static char* scanSeparators (char* name)
+{
+ char sep = name [0];
+ char *copyto = name;
+ boolean quoted = FALSE;
+
+ for (++name ; *name != '\0' ; ++name)
+ {
+ if (quoted)
+ {
+ if (*name == sep)
+ *copyto++ = sep;
+ else if (*name == 't')
+ *copyto++ = '\t';
+ else
+ {
+ /* Something else is quoted, so preserve the quote. */
+ *copyto++ = '\\';
+ *copyto++ = *name;
+ }
+ quoted = FALSE;
+ }
+ else if (*name == '\\')
+ quoted = TRUE;
+ else if (*name == sep)
+ {
+ break;
+ }
+ else
+ *copyto++ = *name;
+ }
+ *copyto = '\0';
+ return name;
+}
+
+/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
+ * character is whatever the first character of `regexp' is), by breaking it
+ * up into null terminated strings, removing the separators, and expanding
+ * '\t' into tabs. When complete, `regexp' points to the line matching
+ * pattern, a pointer to the name matching pattern is written to `name', a
+ * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
+ * to the trailing flags is written to `flags'. If the pattern is not in the
+ * correct format, a false value is returned.
+ */
+static boolean parseTagRegex (
+ char* const regexp, char** const name,
+ char** const kinds, char** const flags)
+{
+ boolean result = FALSE;
+ const int separator = (unsigned char) regexp [0];
+
+ *name = scanSeparators (regexp);
+ if (*regexp == '\0')
+ printf ("regex: empty regexp");
+ else if (**name != separator)
+ printf ("regex: %s: incomplete regexp", regexp);
+ else
+ {
+ char* const third = scanSeparators (*name);
+ if (**name == '\0')
+ printf ("regex: %s: regexp missing name pattern", regexp);
+ if ((*name) [strlen (*name) - 1] == '\\')
+ printf ("regex: error in name pattern: \"%s\"", *name);
+ if (*third != separator)
+ printf ("regex: %s: regexp missing final separator", regexp);
+ else
+ {
+ char* const fourth = scanSeparators (third);
+ if (*fourth == separator)
+ {
+ *kinds = third;
+ scanSeparators (fourth);
+ *flags = fourth;
+ }
+ else
+ {
+ *flags = third;
+ *kinds = NULL;
+ }
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+static void addCompiledTagPattern (
+ const langType language, regex_t* const pattern,
+ char* const name, const char kind, char* const kindName,
+ char *const description)
+{
+ patternSet* set;
+ regexPattern *ptrn;
+ if (language > SetUpper)
+ {
+ int i;
+ Sets = xRealloc (Sets, (language + 1), patternSet);
+ for (i = SetUpper + 1 ; i <= language ; ++i)
+ {
+ Sets [i].patterns = NULL;
+ Sets [i].count = 0;
+ }
+ SetUpper = language;
+ }
+ set = Sets + language;
+ set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+ ptrn = &set->patterns [set->count];
+ set->count += 1;
+
+ ptrn->pattern = pattern;
+ ptrn->type = PTRN_TAG;
+ ptrn->u.tag.name_pattern = name;
+ ptrn->u.tag.kind.enabled = TRUE;
+ ptrn->u.tag.kind.letter = kind;
+ ptrn->u.tag.kind.name = kindName;
+ ptrn->u.tag.kind.description = description;
+}
+
+static void addCompiledCallbackPattern (
+ const langType language, regex_t* const pattern,
+ const regexCallback callback)
+{
+ patternSet* set;
+ regexPattern *ptrn;
+ if (language > SetUpper)
+ {
+ int i;
+ Sets = xRealloc (Sets, (language + 1), patternSet);
+ for (i = SetUpper + 1 ; i <= language ; ++i)
+ {
+ Sets [i].patterns = NULL;
+ Sets [i].count = 0;
+ }
+ SetUpper = language;
+ }
+ set = Sets + language;
+ set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+ ptrn = &set->patterns [set->count];
+ set->count += 1;
+
+ ptrn->pattern = pattern;
+ ptrn->type = PTRN_CALLBACK;
+ ptrn->u.callback.function = callback;
+}
+
+#if defined (POSIX_REGEX)
+
+static regex_t* compileRegex (const char* const regexp, const char* const flags)
+{
+ int cflags = REG_EXTENDED | REG_NEWLINE;
+ regex_t *result = NULL;
+ int errcode;
+ int i;
+ for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
+ {
+ switch ((int) flags [i])
+ {
+ case 'b': cflags &= ~REG_EXTENDED; break;
+ case 'e': cflags |= REG_EXTENDED; break;
+ case 'i': cflags |= REG_ICASE; break;
+ default: printf ("regex: unknown regex flag: '%c'", *flags); break;
+ }
+ }
+ result = xMalloc (1, regex_t);
+ errcode = regcomp (result, regexp, cflags);
+ if (errcode != 0)
+ {
+ char errmsg[256];
+ regerror (errcode, result, errmsg, 256);
+ printf ("regex: regcomp %s: %s", regexp, errmsg);
+ regfree (result);
+ eFree (result);
+ result = NULL;
+ }
+ return result;
+}
+
+#endif
+
+static void parseKinds (
+ const char* const kinds, char* const kind, char** const kindName,
+ char **description)
+{
+ *kind = '\0';
+ *kindName = NULL;
+ *description = NULL;
+ if (kinds == NULL || kinds [0] == '\0')
+ {
+ *kind = 'r';
+ *kindName = eStrdup ("regex");
+ }
+ else if (kinds [0] != '\0')
+ {
+ const char* k = kinds;
+ if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
+ *kind = *k++;
+ else
+ *kind = 'r';
+ if (*k == ',')
+ ++k;
+ if (k [0] == '\0')
+ *kindName = eStrdup ("regex");
+ else
+ {
+ const char *const comma = strchr (k, ',');
+ if (comma == NULL)
+ *kindName = eStrdup (k);
+ else
+ {
+ *kindName = (char*) eMalloc (comma - k + 1);
+ strncpy (*kindName, k, comma - k);
+ (*kindName) [comma - k] = '\0';
+ k = comma + 1;
+ if (k [0] != '\0')
+ *description = eStrdup (k);
+ }
+ }
+ }
+}
+
+static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
+{
+ const struct sKind *const kind = &pat [i].u.tag.kind;
+ const char *const indentation = indent ? " " : "";
+ Assert (pat [i].type == PTRN_TAG);
+ printf ("%s%c %s %s\n", indentation,
+ kind->letter != '\0' ? kind->letter : '?',
+ kind->description != NULL ? kind->description : kind->name,
+ kind->enabled ? "" : " [off]");
+}
+
+static void processLanguageRegex (const langType language,
+ const char* const parameter)
+{
+ if (parameter == NULL || parameter [0] == '\0')
+ clearPatternSet (language);
+ else if (parameter [0] != '@')
+ addLanguageRegex (language, parameter);
+ else if (! doesFileExist (parameter + 1))
+ printf ("regex: cannot open regex file");
+ else
+ {
+ const char* regexfile = parameter + 1;
+ FILE* const fp = fopen (regexfile, "r");
+ if (fp == NULL)
+ printf ("regex: %s", regexfile);
+ else
+ {
+ vString* const regex = vStringNew ();
+ while (readLine (regex, fp))
+ addLanguageRegex (language, vStringValue (regex));
+ fclose (fp);
+ vStringDelete (regex);
+ }
+ }
+}
+
+/*
+* Regex pattern matching
+*/
+
+#if defined (POSIX_REGEX)
+
+static vString* substitute (
+ const char* const in, const char* out,
+ const int nmatch, const regmatch_t* const pmatch)
+{
+ vString* result = vStringNew ();
+ const char* p;
+ for (p = out ; *p != '\0' ; p++)
+ {
+ if (*p == '\\' && isdigit ((int) *++p))
+ {
+ const int dig = *p - '0';
+ if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
+ {
+ const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
+ vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
+ }
+ }
+ else if (*p != '\n' && *p != '\r')
+ vStringPut (result, *p);
+ }
+ vStringTerminate (result);
+ return result;
+}
+
+static void matchTagPattern (const vString* const line,
+ const regexPattern* const patbuf,
+ const regmatch_t* const pmatch)
+{
+ vString *const name = substitute (vStringValue (line),
+ patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
+ vStringStripLeading (name);
+ vStringStripTrailing (name);
+ if (vStringLength (name) > 0)
+ makeRegexTag (name, &patbuf->u.tag.kind);
+ else
+ error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
+ getInputFileName (), getInputLineNumber (),
+ patbuf->u.tag.name_pattern);
+ vStringDelete (name);
+}
+
+static void matchCallbackPattern (
+ const vString* const line, const regexPattern* const patbuf,
+ const regmatch_t* const pmatch)
+{
+ regexMatch matches [BACK_REFERENCE_COUNT];
+ unsigned int count = 0;
+ int i;
+ for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
+ {
+ matches [i].start = pmatch [i].rm_so;
+ matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
+ ++count;
+ }
+ patbuf->u.callback.function (vStringValue (line), matches, count);
+}
+
+static boolean matchRegexPattern (const vString* const line,
+ const regexPattern* const patbuf)
+{
+ boolean result = FALSE;
+ regmatch_t pmatch [BACK_REFERENCE_COUNT];
+ const int match = regexec (patbuf->pattern, vStringValue (line),
+ BACK_REFERENCE_COUNT, pmatch, 0);
+ if (match == 0)
+ {
+ result = TRUE;
+ if (patbuf->type == PTRN_TAG)
+ matchTagPattern (line, patbuf, pmatch);
+ else if (patbuf->type == PTRN_CALLBACK)
+ matchCallbackPattern (line, patbuf, pmatch);
+ else
+ {
+ Assert ("invalid pattern type" == NULL);
+ result = FALSE;
+ }
+ }
+ return result;
+}
+
+#endif
+
+/* PUBLIC INTERFACE */
+
+/* Match against all patterns for specified language. Returns true if at least
+ * on pattern matched.
+ */
+extern boolean matchRegex (const vString* const line, const langType language)
+{
+ boolean result = FALSE;
+ if (language != LANG_IGNORE && language <= SetUpper &&
+ Sets [language].count > 0)
+ {
+ const patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (matchRegexPattern (line, set->patterns + i))
+ result = TRUE;
+ }
+ return result;
+}
+
+extern void findRegexTags (void)
+{
+ /* merely read all lines of the file */
+ while (fileReadLine () != NULL)
+ ;
+}
+
+#endif /* HAVE_REGEX */
+
+extern void addTagRegex (
+ const langType language __unused__,
+ const char* const regex __unused__,
+ const char* const name __unused__,
+ const char* const kinds __unused__,
+ const char* const flags __unused__)
+{
+#ifdef HAVE_REGEX
+ Assert (regex != NULL);
+ Assert (name != NULL);
+ if (! regexBroken)
+ {
+ regex_t* const cp = compileRegex (regex, flags);
+ if (cp != NULL)
+ {
+ char kind;
+ char* kindName;
+ char* description;
+ parseKinds (kinds, &kind, &kindName, &description);
+ addCompiledTagPattern (language, cp, eStrdup (name),
+ kind, kindName, description);
+ }
+ }
+#endif
+}
+
+extern void addCallbackRegex (
+ const langType language __unused__,
+ const char* const regex __unused__,
+ const char* const flags __unused__,
+ const regexCallback callback __unused__)
+{
+#ifdef HAVE_REGEX
+ Assert (regex != NULL);
+ if (! regexBroken)
+ {
+ regex_t* const cp = compileRegex (regex, flags);
+ if (cp != NULL)
+ addCompiledCallbackPattern (language, cp, callback);
+ }
+#endif
+}
+
+extern void addLanguageRegex (
+ const langType language __unused__, const char* const regex __unused__)
+{
+#ifdef HAVE_REGEX
+ if (! regexBroken)
+ {
+ char *const regex_pat = eStrdup (regex);
+ char *name, *kinds, *flags;
+ if (parseTagRegex (regex_pat, &name, &kinds, &flags))
+ {
+ addTagRegex (language, regex_pat, name, kinds, flags);
+ eFree (regex_pat);
+ }
+ }
+#endif
+}
+
+/*
+* Regex option parsing
+*/
+
+extern boolean processRegexOption (const char *const option,
+ const char *const parameter __unused__)
+{
+ boolean handled = FALSE;
+ const char* const dash = strchr (option, '-');
+ if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
+ {
+#ifdef HAVE_REGEX
+ langType language;
+ language = getNamedLanguage (dash + 1);
+ if (language == LANG_IGNORE)
+ printf ("regex: unknown language \"%s\" in --%s option", (dash + 1), option);
+ else
+ processLanguageRegex (language, parameter);
+#else
+ printf ("regex: regex support not available; required for --%s option",
+ option);
+#endif
+ handled = TRUE;
+ }
+ return handled;
+}
+
+extern void disableRegexKinds (const langType language __unused__)
+{
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG)
+ set->patterns [i].u.tag.kind.enabled = FALSE;
+ }
+#endif
+}
+
+extern boolean enableRegexKind (
+ const langType language __unused__,
+ const int kind __unused__, const boolean mode __unused__)
+{
+ boolean result = FALSE;
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG &&
+ set->patterns [i].u.tag.kind.letter == kind)
+ {
+ set->patterns [i].u.tag.kind.enabled = mode;
+ result = TRUE;
+ }
+ }
+#endif
+ return result;
+}
+
+extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
+{
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG)
+ printRegexKind (set->patterns, i, indent);
+ }
+#endif
+}
+
+extern void freeRegexResources (void)
+{
+#ifdef HAVE_REGEX
+ int i;
+ for (i = 0 ; i <= SetUpper ; ++i)
+ clearPatternSet (i);
+ if (Sets != NULL)
+ eFree (Sets);
+ Sets = NULL;
+ SetUpper = -1;
+#endif
+}
+
+/* Check for broken regcomp() on Cygwin */
+extern void checkRegex (void)
+{
+#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
+ regex_t patbuf;
+ int errcode;
+ if (regcomp (&patbuf, "/hello/", 0) != 0)
+ {
+ error (WARNING, "Disabling broken regex");
+ regexBroken = TRUE;
+ }
+#endif
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
Modified: trunk/tagmanager/makefile.win32
===================================================================
--- trunk/tagmanager/makefile.win32 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/makefile.win32 2007-11-17 19:27:50 UTC (rev 2053)
@@ -9,6 +9,8 @@
COMPLIB=tagmanager.a
+REGEX_DEFINES = -DHAVE_REGCOMP -DREGEX_MALLOC -DSTDC_HEADERS=1
+
GTK_INCLUDES= \
-I$(PREFIX)/include/gtk-2.0 \
-I$(PREFIX)/lib/gtk-2.0/include \
@@ -25,7 +27,7 @@
CCFLAGS=-Wall -O2 -g -mms-bitfields -DPACKAGE=\"geany\" -DG_OS_WIN32 -Wno-missing-braces -Wno-char-subscripts $(INCLUDEDIRS)
.c.o:
- $(CC) $(CCFLAGS) -w -c $<
+ $(CC) $(REGEX_DEFINES) $(CCFLAGS) -w -c $<
all: $(COMPLIB)
@@ -33,9 +35,9 @@
-$(RM) deps.mak *.o $(COMPLIB)
$(COMPLIB): args.o c.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o lua.o js.o \
-haskell.o haxe.o python.o regex.o rest.o sh.o ctags.o entry.o get.o keyword.o options.o parse.o basic.o \
-read.o sort.o strlist.o latex.o docbook.o tcl.o ruby.o asm.o sql.o css.o vstring.o tm_workspace.o tm_work_object.o \
-tm_source_file.o tm_project.o tm_tag.o tm_symbol.o tm_file_entry.o \
+haskell.o haxe.o python.o lregex.o rest.o sh.o ctags.o entry.o get.o keyword.o options.o parse.o basic.o \
+read.o sort.o strlist.o latex.o docbook.o tcl.o ruby.o asm.o sql.o css.o vstring.o regex.o \
+tm_workspace.o tm_work_object.o tm_source_file.o tm_project.o tm_tag.o tm_symbol.o tm_file_entry.o \
tm_tagmanager.o
$(AR) rc $@ $^
$(RANLIB) $@
Modified: trunk/tagmanager/parse.c
===================================================================
--- trunk/tagmanager/parse.c 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/parse.c 2007-11-17 19:27:50 UTC (rev 2053)
@@ -56,7 +56,7 @@
extern void makeSimpleScopedTag (const vString* const name,
kindOption* const kinds, const int kind,
const char* scope, const char *scope2,
- const char *access)
+ const char *laccess)
{
if (name != NULL && vStringLength (name) > 0)
{
@@ -67,7 +67,7 @@
e.kind = kinds [kind].letter;
e.extensionFields.scope[0] = scope;
e.extensionFields.scope[1] = scope2;
- e.extensionFields.access = access;
+ e.extensionFields.access = laccess;
makeTagEntry (&e);
}
@@ -541,7 +541,7 @@
for (i = 0 ; i < lang->kindCount ; ++i)
printLangugageKindOption (lang->kinds + i);
#ifdef HAVE_REGEX
- printRegexKindOptions (language);
+ // printRegexKindOptions (language); // unused
#endif
}
}
Modified: trunk/tagmanager/parse.h
===================================================================
--- trunk/tagmanager/parse.h 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/parse.h 2007-11-17 19:27:50 UTC (rev 2053)
@@ -113,7 +113,7 @@
/* Regex interface */
#ifdef HAVE_REGEX
extern void findRegexTags (void);
-extern void matchRegex (const vString* const line, const langType language);
+extern boolean matchRegex (const vString* const line, const langType language);
#endif
extern boolean processRegexOption (const char *const option, const char *const parameter);
extern void addLanguageRegex (const langType language, const char* const regex);
Modified: trunk/tagmanager/php.c
===================================================================
--- trunk/tagmanager/php.c 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/php.c 2007-11-17 19:27:50 UTC (rev 2053)
@@ -31,16 +31,74 @@
K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE
} phpKind;
+#if 0
static kindOption PhpKinds [] = {
{ TRUE, 'c', "class", "classes" },
- { TRUE, 'd', "macro", "constant definitions" },
+ { TRUE, 'd', "define", "constant definitions" },
{ TRUE, 'f', "function", "functions" },
{ TRUE, 'v', "variable", "variables" }
};
+#endif
/*
* FUNCTION DEFINITIONS
*/
+
+/* JavaScript patterns are duplicated in jscript.c */
+
+/*
+ * Cygwin doesn't support non-ASCII characters in character classes.
+ * This isn't a good solution to the underlying problem, because we're still
+ * making assumptions about the character encoding.
+ * Really, these regular expressions need to concentrate on what marks the
+ * end of an identifier, and we need something like iconv to take into
+ * account the user's locale (or an override on the command-line.)
+ */
+#ifdef __CYGWIN__
+#define ALPHA "[:alpha:]"
+#define ALNUM "[:alnum:]"
+#else
+#define ALPHA "A-Za-z\x7f-\xff"
+#define ALNUM "0-9A-Za-z\x7f-\xff"
+#endif
+
+static void installPHPRegex (const langType language)
+{
+ addTagRegex(language, "(^|[ \t])class[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "c,class,classes", NULL);
+ addTagRegex(language, "(^|[ \t])interface[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "i,interface,interfaces", NULL);
+ addTagRegex(language, "(^|[ \t])define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "d,define,constant definitions", NULL);
+ addTagRegex(language, "(^|[ \t])function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "f,function,functions", NULL);
+ addTagRegex(language, "(^|[ \t])(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=",
+ "\\3", "v,variable,variables", NULL);
+ addTagRegex(language, "(^|[ \t])(var|public|protected|private|static)[ \t]+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]",
+ "\\3", "v,variable,variables", NULL);
+
+ /* function regex is covered by PHP regex */
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(",
+ "\\2", "j,jsfunction,javascript functions", NULL);
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+ "\\2.\\3", "j,jsfunction,javascript functions", NULL);
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+ "\\3", "j,jsfunction,javascript functions", NULL);
+}
+
+/* Create parser definition structure */
+extern parserDefinition* PhpParser (void)
+{
+ static const char *const extensions [] = { "php", "php3", "phtml", NULL };
+ parserDefinition* def = parserNew ("PHP");
+ def->extensions = extensions;
+ def->initialize = installPHPRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+#if 0
+
static boolean isLetter(const int c)
{
return (boolean)(isalpha(c) || (c >= 127 && c <= 255));
@@ -101,7 +159,7 @@
cp++;
while (isspace ((int) *cp))
- ++cp;
+ ++cp;
}
vStringClear (name);
@@ -113,12 +171,11 @@
vStringTerminate (name);
makeSimpleTag (name, PhpKinds, K_FUNCTION);
vStringClear (name);
- }
- else if ((f = strstr ((const char*) cp, "class")) != NULL &&
- (f == (const char*) cp || isspace ((int) f [-1])) &&
- isspace ((int) f [5]))
+ }
+ else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 &&
+ isspace ((int) cp [5]))
{
- cp = ((const unsigned char *) f) + 5;
+ cp += 5;
while (isspace ((int) *cp))
++cp;
@@ -149,7 +206,7 @@
++cp;
else if (! ((*cp == '_') || isalnum ((int) *cp)))
continue;
-
+
vStringClear (name);
while (isalnum ((int) *cp) || *cp == '_')
{
@@ -175,5 +232,6 @@
return def;
}
+#endif
/* vi:set tabstop=4 shiftwidth=4: */
Modified: trunk/tagmanager/regex.c
===================================================================
--- trunk/tagmanager/regex.c 2007-11-17 17:03:25 UTC (rev 2052)
+++ trunk/tagmanager/regex.c 2007-11-17 19:27:50 UTC (rev 2053)
@@ -1,654 +1,4952 @@
-/*
-*
-* Copyright (c) 2000-2001, Darren Hiebert
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License.
-*
-* This module contains functions for applying regular expression matching.
-*
-* The code for utlizing the Gnu regex package with regards to processing the
-* regex option and checking for regex matches was adapted from routines in
-* Gnu etags.
-*/
+/* Extended regular expression matching and search library,
+ version 0.12, with minor changes by Darren Hiebert.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
-/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
+ Copyright (C) 1993 Free Software Foundation, Inc.
-#include <string.h>
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
-#if defined (HAVE_REGCOMP) || defined (HAVE_RE_COMPILE_PATTERN)
-# include <ctype.h>
-# include <stddef.h>
-# ifdef HAVE_SYS_TYPES_H
-# include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
-# endif
-# include "regex.h"
-#endif
-#include <glib/gstdio.h>
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-#include "entry.h"
-#include "main.h"
-#include "parse.h"
-#include "read.h"
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-#ifdef HAVE_REGEX
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
-/*
-* MACROS
-*/
+#define _GNU_SOURCE
-/* Back-references \0 through \9 */
-#define BACK_REFERENCE_COUNT 10
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
-#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
-# define POSIX_REGEX
+#ifdef HAVE_CONFIG_H
+#include "config.h"
#endif
-#define REGEX_NAME "Regex"
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
-/*
-* DATA DECLARATIONS
-*/
-#if defined (POSIX_REGEX)
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
-struct sKind {
- boolean enabled;
- char letter;
- char* name;
-};
+/* Emacs uses `NULL' as a predicate. */
+#undef NULL
-enum pType { PTRN_TAG, PTRN_CALLBACK };
+#else /* not emacs */
-typedef struct {
- regex_t *pattern;
- enum pType type;
- union {
- struct {
- char *name_pattern;
- struct sKind kind;
- } tag;
- struct {
- regexCallback function;
- } callback;
- } u;
-} regexPattern;
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
#endif
-typedef struct {
- regexPattern *patterns;
- unsigned int count;
-} patternSet;
-/*
-* DATA DEFINITIONS
-*/
+/* Define the syntax stuff for \<, \>, etc. */
-static boolean regexBroken = FALSE;
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
-/* Array of pattern sets, indexed by language */
-static patternSet* Sets = NULL;
-static int SetUpper = -1; /* upper language index in list */
+#ifdef SYNTAX_TABLE
-/*
-* FUNCTION DEFINITIONS
-*/
+extern char *re_syntax_table;
-static void clearPatternSet (const langType language)
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
{
- if (language < SetUpper)
- {
- patternSet* const set = Sets + language;
- unsigned int i;
- for (i = 0 ; i < set->count ; ++i)
- {
-#if defined (POSIX_REGEX)
- regfree (set->patterns [i].pattern);
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
#endif
- eFree (set->patterns [i].pattern);
- set->patterns [i].pattern = NULL;
- if (set->patterns [i].type == PTRN_TAG)
- {
- eFree (set->patterns [i].u.tag.name_pattern);
- set->patterns [i].u.tag.name_pattern = NULL;
- }
- }
- if (set->patterns != NULL)
- eFree (set->patterns);
- set->patterns = NULL;
- set->count = 0;
- }
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn = 1,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
}
-/*
-* Regex psuedo-parser
-*/
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
-static void makeRegexTag (const vString* const name,
- const struct sKind* const kind)
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
{
- if (kind->enabled)
- {
- tagEntryInfo e;
- Assert (name != NULL && vStringLength (name) > 0);
- Assert (kind != NULL);
- initTagEntry (&e, vStringValue (name));
- e.kind = kind->letter;
- e.kindName = kind->name;
- makeTagEntry (&e);
- }
+ extract_number (destination, *source);
+ *source += 2;
}
-/*
-* Regex pattern definition
-*/
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
-/* Take a string like "/blah/" and turn it into "blah", making sure
- * that the first and last characters are the same, and handling
- * quoted separator characters. Actually, stops on the occurrence of
- * an unquoted separator. Also turns "\t" into a Tab character.
- * Returns pointer to terminating separator. Works in place. Null
- * terminates name string.
- */
-static char* scanSeparators (char* name)
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
{
- char sep = name [0];
- char *copyto = name;
- boolean quoted = FALSE;
+ unsigned was_a_range = 0;
+ unsigned i = 0;
- for (++name ; *name != '\0' ; ++name)
+ while (i < (1 << BYTEWIDTH))
{
- if (quoted)
+ if (fastmap[i++])
{
- if (*name == sep)
- *copyto++ = sep;
- else if (*name == 't')
- *copyto++ = '\t';
- else
- {
- /* Something else is quoted, so preserve the quote. */
- *copyto++ = '\\';
- *copyto++ = *name;
- }
- quoted = FALSE;
- }
- else if (*name == '\\')
- quoted = TRUE;
- else if (*name == sep)
- {
- break;
- }
- else
- *copyto++ = *name;
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
}
- *copyto = '\0';
- return name;
+ putchar ('\n');
}
-/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
- * character is whatever the first character of `regexp' is), by breaking it
- * up into null terminated strings, removing the separators, and expanding
- * '\t' into tabs. When complete, `regexp' points to the line matching
- * pattern, a pointer to the name matching pattern is written to `name', a
- * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
- * to the trailing flags is written to `flags'. If the pattern is not in the
- * correct format, a false value is returned.
- */
-static boolean parseTagRegex (char* const regexp, char** const name,
- char** const kinds, char** const flags)
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
{
- boolean result = FALSE;
- const int separator = (unsigned char) regexp [0];
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
- *name = scanSeparators (regexp);
- if (*regexp == '\0')
- error (WARNING, "empty regexp");
- else if (**name != separator)
- error (WARNING, "%s: incomplete regexp", regexp);
- else
+ if (start == NULL)
{
- char* const third = scanSeparators (*name);
- if (**name == '\0')
- error (WARNING, "%s: regexp missing name pattern", regexp);
- if ((*name) [strlen (*name) - 1] == '\\')
- error (WARNING, "error in name pattern: \"%s\"", *name);
- if (*third != separator)
- error (WARNING, "%s: regexp missing final separator", regexp);
- else
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ switch ((re_opcode_t) *p++)
{
- char* const fourth = scanSeparators (third);
- if (*fourth == separator)
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
{
- *kinds = third;
- scanSeparators (fourth);
- *flags = fourth;
- }
- else
- {
- *flags = third;
- *kinds = NULL;
- }
- result = TRUE;
+ putchar ('/');
+ printchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c;
+
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < *p; c++)
+ {
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
+ }
+ p += 1 + *p;
+ break;
+ }
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump/0/%d", mcnt);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump/0/%d", mcnt);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump/0/%d", mcnt);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump/0/%d", mcnt);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt/0/%d", mcnt);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump/0/%d", mcnt);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
}
}
- return result;
+ printf ("/\n");
}
-static void addCompiledTagPattern (const langType language,
- regex_t* const pattern, char* const name,
- const char kind, char* const kindName)
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
{
- patternSet* set;
- regexPattern *ptrn;
- if (language > SetUpper)
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
{
- int i;
- Sets = xRealloc (Sets, (language + 1), patternSet);
- for (i = SetUpper + 1 ; i <= language ; ++i)
- {
- Sets [i].patterns = NULL;
- Sets [i].count = 0;
- }
- SetUpper = language;
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
}
- set = Sets + language;
- set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
- ptrn = &set->patterns [set->count];
- set->count += 1;
- ptrn->pattern = pattern;
- ptrn->type = PTRN_TAG;
- ptrn->u.tag.name_pattern = name;
- ptrn->u.tag.kind.enabled = TRUE;
- ptrn->u.tag.kind.letter = kind;
- ptrn->u.tag.kind.name = kindName;
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
}
-static void addCompiledCallbackPattern (const langType language,
- regex_t* const pattern,
- const regexCallback callback)
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
{
- patternSet* set;
- regexPattern *ptrn;
- if (language > SetUpper)
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
{
- int i;
- Sets = xRealloc (Sets, (language + 1), patternSet);
- for (i = SetUpper + 1 ; i <= language ; ++i)
- {
- Sets [i].patterns = NULL;
- Sets [i].count = 0;
- }
- SetUpper = language;
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ printchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ printchar (string2[this_char]);
}
- set = Sets + language;
- set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
- ptrn = &set->patterns [set->count];
- set->count += 1;
+}
- ptrn->pattern = pattern;
- ptrn->type = PTRN_CALLBACK;
- ptrn->u.callback.function = callback;
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
-#if defined (POSIX_REGEX)
+static const char *re_error_msg[] =
+ { NULL, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+ };
+
+/* Subroutine declarations and macros for regex_compile. */
-static regex_t* compileRegex (const char* const regexp, const char* const flags)
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = translate[c]; \
+ } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while ((unsigned long)(b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
{
- int cflags = REG_EXTENDED | REG_NEWLINE;
- regex_t *result = NULL;
- int errcode;
- int i;
- for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
- {
- switch ((int) flags [i])
- {
- case 'b': cflags &= ~REG_EXTENDED; break;
- case 'e': cflags |= REG_EXTENDED; break;
- case 'i': cflags |= REG_ICASE; break;
- default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
- }
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
}
- result = xMalloc (1, regex_t);
- errcode = regcomp (result, regexp, cflags);
- if (errcode != 0)
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ char *translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
{
- char errmsg[256];
- regerror (errcode, result, errmsg, 256);
- error (WARNING, "%s", errmsg);
- regfree (result);
- eFree (result);
- result = NULL;
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ printchar (pattern[debug_count]);
+ putchar ('\n');
}
- return result;
-}
+#endif /* DEBUG */
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
#endif
-static void parseKinds (const char* const kinds,
- char* const kind, char** const kindName)
-{
- *kind = '\0';
- *kindName = NULL;
- if (kinds == NULL)
+ if (bufp->allocated == 0)
{
- *kind = 'r';
- *kindName = eStrdup ("regex");
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) return REG_ESPACE;
+
+ bufp->allocated = INIT_BUF_SIZE;
}
- else if (kinds [0] != '\0')
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
{
- const char* k = kinds;
- if (k [1] == ',' || k [1] == '\0')
- *kind = *k++;
- if (*k == ',')
- ++k;
- if (*k != '\0')
- *kindName = eStrdup (k);
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) return REG_EBRACK;
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
@@ Diff output truncated at 100000 characters. @@
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.