Revision: 4261 http://geany.svn.sourceforge.net/geany/?rev=4261&view=rev Author: ntrel Date: 2009-09-29 11:07:10 +0000 (Tue, 29 Sep 2009)
Log Message: ----------- Change Perl tag parser to ctags SVN r601. This removes support for buggy local/my/our but it parses constant/format/labels and should be less buggy overall (closes #2861232).
Modified Paths: -------------- trunk/ChangeLog trunk/src/symbols.c trunk/tagmanager/perl.c
Modified: trunk/ChangeLog =================================================================== --- trunk/ChangeLog 2009-09-28 16:27:14 UTC (rev 4260) +++ trunk/ChangeLog 2009-09-29 11:07:10 UTC (rev 4261) @@ -1,3 +1,11 @@ +2009-09-29 Nick Treleaven <nick(dot)treleaven(at)btinternet(dot)com> + + * src/symbols.c, tagmanager/perl.c: + Change Perl tag parser to ctags SVN r601. This removes support for + buggy local/my/our but it parses constant/format/labels and should + be less buggy overall (closes #2861232). + + 2009-09-28 Nick Treleaven <nick(dot)treleaven(at)btinternet(dot)com>
* src/keybindings.c, src/sidebar.c, src/sidebar.h,
Modified: trunk/src/symbols.c =================================================================== --- trunk/src/symbols.c 2009-09-28 16:27:14 UTC (rev 4260) +++ trunk/src/symbols.c 2009-09-29 11:07:10 UTC (rev 4261) @@ -650,15 +650,12 @@ case GEANY_FILETYPES_PERL: { tag_list_add_groups(tag_store, - &(tv_iters.tag_class), _("Package"), NULL, + &(tv_iters.tag_namespace), _("Package"), "classviewer-namespace", &(tv_iters.tag_function), _("Functions"), "classviewer-method", - &(tv_iters.tag_member), _("My"), NULL, - &(tv_iters.tag_macro), _("Local"), NULL, - &(tv_iters.tag_variable), _("Our"), NULL, + &(tv_iters.tag_macro), _("Labels"), NULL, + &(tv_iters.tag_type), _("Constants"), NULL, + &(tv_iters.tag_other), _("Other"), NULL, NULL); - /*&(tv_iters.tag_struct), _("Label"), NULL,*/ - /*&(tv_iters.tag_namespace), _("Begin"), NULL,*/ - /*&(tv_iters.tag_other), _("Other"), NULL, NULL);*/ break; } case GEANY_FILETYPES_PHP:
Modified: trunk/tagmanager/perl.c =================================================================== --- trunk/tagmanager/perl.c 2009-09-28 16:27:14 UTC (rev 4260) +++ trunk/tagmanager/perl.c 2009-09-29 11:07:10 UTC (rev 4261) @@ -1,6 +1,7 @@ /* +* $Id$ * -* Copyright (c) 2000-2001, Darren Hiebert +* Copyright (c) 2000-2003, Darren Hiebert * * This source code is released for free distribution under the terms of the * GNU General Public License. @@ -12,220 +13,370 @@ /* * INCLUDE FILES */ -#include "general.h" /* must always come first */ +#include "general.h" /* must always come first */
#include <string.h>
+#include "entry.h" +#include "options.h" #include "read.h" +#include "main.h" #include "vstring.h"
+#define TRACE_PERL_C 0 +#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf + /* * DATA DEFINITIONS */ typedef enum { - K_SUBROUTINE, - K_PACKAGE, - K_LOCAL, - K_MY, - K_OUR + K_NONE = -1, + K_CONSTANT, + K_FORMAT, + K_LABEL, + K_PACKAGE, + K_SUBROUTINE, + K_SUBROUTINE_DECLARATION } perlKind;
static kindOption PerlKinds [] = { - { TRUE, 'f', "function", "functions" }, - { TRUE, 'c', "class", "packages" }, - { TRUE, 'l', "macro", "local variables" }, - { TRUE, 'm', "member", "my variables" }, - { TRUE, 'v', "variable", "our variables" } + { TRUE, 'e', "enumerator", "constants" }, + { TRUE, 'o', "other", "formats" }, + { TRUE, 'm', "macro", "labels" }, + { TRUE, 'p', "package", "packages" }, + { TRUE, 'f', "function", "subroutines" }, + { FALSE, 'p', "prototype", "subroutine declarations" }, };
/* * FUNCTION DEFINITIONS */
-static const unsigned char *createTagString(const unsigned char *str, int type) +static boolean isIdentifier1 (int c) { - vString *n = vStringNew(); - while (! isspace ((int) *str) && *str != '\0' && *str != '=' && *str != ';' && - *str != ',' && *str != ')' && *str != '$') - { - vStringPut (n, (int) *str); - str++; - } + return (boolean) (isalpha (c) || c == '_'); +}
- vStringTerminate (n); - if (vStringLength (n) > 0) - makeSimpleTag (n, PerlKinds, type); - vStringDelete (n); +static boolean isIdentifier (int c) +{ + return (boolean) (isalnum (c) || c == '_'); +}
-/* if ((*(const char*)str) == ')') - return str-1; - else -*/ return str; +static boolean isPodWord (const char *word) +{ + boolean result = FALSE; + if (isalpha (*word)) + { + const char *const pods [] = { + "head1", "head2", "head3", "head4", "over", "item", "back", + "pod", "begin", "end", "for" + }; + const size_t count = sizeof (pods) / sizeof (pods [0]); + const char *white = strpbrk (word, " \t"); + const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word); + char *const id = (char*) eMalloc (len + 1); + size_t i; + strncpy (id, word, len); + id [len] = '\0'; + for (i = 0 ; i < count && ! result ; ++i) + { + if (strcmp (id, pods [i]) == 0) + result = TRUE; + } + eFree (id); + } + return result; }
+/* + * Perl subroutine declaration may look like one of the following: + * + * sub abc; + * sub abc :attr; + * sub abc (proto); + * sub abc (proto) :attr; + * + * Note that there may be more than one attribute. Attributes may + * have things in parentheses (they look like arguments). Anything + * inside of those parentheses goes. Prototypes may contain semi-colons. + * The matching end when we encounter (outside of any parentheses) either + * a semi-colon (that'd be a declaration) or an left curly brace + * (definition). + * + * This is pretty complicated parsing (plus we all know that only perl can + * parse Perl), so we are only promising best effort here. + * + * If we can't determine what this is (due to a file ending, for example), + * we will return FALSE. + */ +static boolean isSubroutineDeclaration (const unsigned char *cp) +{ + boolean attr = FALSE; + int nparens = 0; + + do { + for ( ; *cp; ++cp) { +SUB_DECL_SWITCH: + switch (*cp) { + case ':': + if (nparens) + break; + else if (TRUE == attr) + return FALSE; /* Invalid attribute name */ + else + attr = TRUE; + break; + case '(': + ++nparens; + break; + case ')': + --nparens; + break; + case ' ': + case '\t': + break; + case ';': + if (!nparens) + return TRUE; + case '{': + if (!nparens) + return FALSE; + default: + if (attr) { + if (isIdentifier1(*cp)) { + cp++; + while (isIdentifier (*cp)) + cp++; + attr = FALSE; + goto SUB_DECL_SWITCH; /* Instead of --cp; */ + } else { + return FALSE; + } + } else if (nparens) { + break; + } else { + return FALSE; + } + } + } + } while (NULL != (cp = fileReadLine ())); + + return FALSE; +} + /* Algorithm adapted from from GNU etags. * Perl support by Bart Robinson lomew@cs.utah.edu * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ */ static void findPerlTags (void) { - vString *name = vStringNew (); - boolean skipPodDoc = FALSE; - const unsigned char *line; - perlKind kind; + vString *name = vStringNew (); + vString *package = NULL; + boolean skipPodDoc = FALSE; + const unsigned char *line;
- while ((line = fileReadLine ()) != NULL) - { - const unsigned char *cp = line; - - if (skipPodDoc) + while ((line = fileReadLine ()) != NULL) { - if (strcmp ((const char*) line, "=cut") == 0) - skipPodDoc = FALSE; - continue; - } - else if (line [0] == '=') - { - skipPodDoc = (boolean) (strncmp ( - (const char*) line + 1, "cut", (size_t) 3) != 0); - continue; - } - else if (strcmp ((const char*) line, "__DATA__") == 0) - break; - else if (strcmp ((const char*) line, "__END__") == 0) - break; - else if (line [0] == '#') - continue; + boolean spaceRequired = FALSE; + boolean qualified = FALSE; + const unsigned char *cp = line; + perlKind kind = K_NONE; + tagEntryInfo e;
- while (isspace (*cp)) - cp++; + if (skipPodDoc) + { + if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) + skipPodDoc = FALSE; + continue; + } + else if (line [0] == '=') + { + skipPodDoc = isPodWord ((const char*)line + 1); + continue; + } + else if (strcmp ((const char*) line, "__DATA__") == 0) + break; + else if (strcmp ((const char*) line, "__END__") == 0) + break; + else if (line [0] == '#') + continue;
- if (strncmp((const char*) cp, "my", (size_t) 2) == 0) - { - cp += 2; - while (isspace (*cp)) cp++; + while (isspace (*cp)) + cp++;
- /* parse something like my($bla) */ - if (*(const char*) cp == '(') - { - cp++; - while (*(const char*) cp != ')') + if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) + { + TRACE("this looks like a sub\n"); + cp += 3; + kind = K_SUBROUTINE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) + { + cp += 3; + if (!isspace(*cp)) + continue; + while (*cp && isspace (*cp)) + ++cp; + if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) + continue; + cp += 8; + kind = K_CONSTANT; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "package", (size_t) 7) == 0) + { + /* This will point to space after 'package' so that a tag + can be made */ + const unsigned char *space = cp += 7; + + if (package == NULL) + package = vStringNew (); + else + vStringClear (package); + while (isspace (*cp)) + cp++; + while ((int) *cp != ';' && !isspace ((int) *cp)) { - while (isspace (*(const char*) cp)) cp++; - if (*(const char*) cp == ',') cp++; /* to skip ',' */ - while (isspace (*(const char*) cp)) cp++; - cp++; /* to skip $ sign */ - cp = createTagString(cp, K_MY); - while (isspace (*(const char*) cp)) cp++; + vStringPut (package, (int) *cp); + cp++; } - } - /* parse my $bla */ + vStringCatS (package, "::"); + + cp = space; /* Rewind */ + kind = K_PACKAGE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) + { + cp += 6; + kind = K_FORMAT; + spaceRequired = TRUE; + qualified = TRUE; + } else { - cp++; /* to skip the $ sign */ + if (isIdentifier1 (*cp)) + { + const unsigned char *p = cp; + while (isIdentifier (*p)) + ++p; + while (isspace (*p)) + ++p; + if ((int) *p == ':' && (int) *(p + 1) != ':') + kind = K_LABEL; + } + } + if (kind != K_NONE) + { + TRACE("cp0: %s\n", (const char *) cp); + if (spaceRequired && *cp && !isspace (*cp)) + continue;
- if (! isalpha (*(const char*) cp)) continue; + TRACE("cp1: %s\n", (const char *) cp); + while (isspace (*cp)) + cp++;
- createTagString (cp, K_MY); - } - } - else if (strncmp((const char*) cp, "our", (size_t) 3) == 0) - { - cp += 3; - while (isspace (*cp)) cp++; + while (!*cp || '#' == *cp) { /* Gobble up empty lines + and comments */ + cp = fileReadLine (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + }
- /* parse something like my($bla) */ - if (*(const char*) cp == '(') - { - cp++; - while (*(const char*) cp != ')') + while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) { - while (isspace (*(const char*) cp)) cp++; - if (*(const char*) cp == ',') cp++; /* to skip ',' */ - while (isspace (*(const char*) cp)) cp++; - cp++; /* to skip $ sign */ - cp = createTagString(cp, K_OUR); - while (isspace (*(const char*) cp)) cp++; + vStringPut (name, (int) *cp); + cp++; } - } - /* parse my $bla */ - else - { - cp++; /* to skip the $ sign */
- if (! isalpha (*(const char*) cp)) continue; + if (K_FORMAT == kind && + vStringLength (name) == 0 && /* cp did not advance */ + '=' == *cp) + { + /* format's name is optional. If it's omitted, 'STDOUT' + is assumed. */ + vStringCatS (name, "STDOUT"); + }
- createTagString (cp, K_OUR); - } - } - else if (strncmp((const char*) cp, "local", (size_t) 5) == 0) - { - cp += 5; - while (isspace (*cp)) cp++; + vStringTerminate (name); + TRACE("name: %s\n", name->buffer);
- /* parse something like my($bla) */ - if (*(const char*) cp == '(') - { - cp++; - while (*(const char*) cp != ')') - { - while (isspace (*(const char*) cp)) cp++; - if (*(const char*) cp == ',') cp++; /* to skip ',' */ - while (isspace (*(const char*) cp)) cp++; - cp++; /* to skip $ sign */ - cp = createTagString(cp, K_LOCAL); - while (isspace (*(const char*) cp)) cp++; + if (0 == vStringLength(name)) { + vStringClear(name); + continue; } - } - /* parse my $bla */ - else - { - cp++; /* to skip the $ sign */
- if (! isalpha (*(const char*) cp)) continue; + if (K_SUBROUTINE == kind) + { + /* + * isSubroutineDeclaration() may consume several lines. So + * we record line positions. + */ + initTagEntry(&e, vStringValue(name));
- createTagString (cp, K_LOCAL); + if (TRUE == isSubroutineDeclaration(cp)) { + if (TRUE == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { + kind = K_SUBROUTINE_DECLARATION; + } else { + vStringClear (name); + continue; + } + } + + e.kind = PerlKinds[kind].letter; + e.kindName = PerlKinds[kind].name; + + makeTagEntry(&e); + + if (Option.include.qualifiedTags && qualified && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + e.name = vStringValue(qualifiedName); + makeTagEntry(&e); + vStringDelete (qualifiedName); + } + } else if (vStringLength (name) > 0) + { + makeSimpleTag (name, PerlKinds, kind); + if (Option.include.qualifiedTags && qualified && + K_PACKAGE != kind && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + makeSimpleTag (qualifiedName, PerlKinds, kind); + vStringDelete (qualifiedName); + } + } + vStringClear (name); } } - else if (strncmp((const char*) cp, "sub", (size_t) 3) == 0 || - strncmp((const char*) cp, "package", (size_t) 7) == 0) - { - if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) - { - cp += 3; - kind = K_SUBROUTINE; - } else { - cp += 7; - kind = K_PACKAGE; - } - if (!isspace(*cp)) /* woops, not followed by a space */ - continue;
- while (isspace (*cp)) - cp++; - while (! isspace ((int) *cp) && *cp != '\0' && *cp != '{' && *cp != '(' && *cp != ';') - { - vStringPut (name, (int) *cp); - cp++; - } - vStringTerminate (name); - if (vStringLength (name) > 0) - makeSimpleTag (name, PerlKinds, kind); - vStringClear (name); - } - } - vStringDelete (name); +END_MAIN_WHILE: + vStringDelete (name); + if (package != NULL) + vStringDelete (package); }
extern parserDefinition* PerlParser (void) { - static const char *const extensions [] = { "pl", "pm", "perl", NULL }; - parserDefinition* def = parserNew ("Perl"); - def->kinds = PerlKinds; - def->kindCount = KIND_COUNT (PerlKinds); - def->extensions = extensions; - def->parser = findPerlTags; - return def; + static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL }; + parserDefinition* def = parserNew ("Perl"); + def->kinds = PerlKinds; + def->kindCount = KIND_COUNT (PerlKinds); + def->extensions = extensions; + def->parser = findPerlTags; + return def; }
-/* vi:set tabstop=8 shiftwidth=4: */ +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.