SF.net SVN: geany: [2660] trunk
eht16 at users.sourceforge.net
eht16 at xxxxx
Sun Jun 8 14:04:02 UTC 2008
Revision: 2660
http://geany.svn.sourceforge.net/geany/?rev=2660&view=rev
Author: eht16
Date: 2008-06-08 07:04:01 -0700 (Sun, 08 Jun 2008)
Log Message:
-----------
Use Python from CTags SVN.
Adapt variable parsing code from the old parser code.
Fix three bugs (see CTags bugs #1988026, 1988027 and 1988130).
Modified Paths:
--------------
trunk/ChangeLog
trunk/tagmanager/python.c
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2008-06-08 12:18:20 UTC (rev 2659)
+++ trunk/ChangeLog 2008-06-08 14:04:01 UTC (rev 2660)
@@ -5,6 +5,10 @@
libiconv (closes #1986134).
Improve the regular expression for detecting encoding cookies to
allow more variants (e.g. "encoding: utf-8").
+ * tagmanager/python.c:
+ Use Python from CTags SVN.
+ Adapt variable parsing code from the old parser code.
+ Fix three bugs (see CTags bugs #1988026, 1988027 and 1988130).
2008-06-07 Frank Lanitz <frank(at)frank(dot)uvena(dot)de>
Modified: trunk/tagmanager/python.c
===================================================================
--- trunk/tagmanager/python.c 2008-06-08 12:18:20 UTC (rev 2659)
+++ trunk/tagmanager/python.c 2008-06-08 14:04:01 UTC (rev 2660)
@@ -1,6 +1,7 @@
/*
+* $Id$
*
-* Copyright (c) 2000-2001, Darren Hiebert
+* Copyright (c) 2000-2003, Darren Hiebert
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
@@ -8,41 +9,59 @@
* This module contains functions for generating tags for Python language
* files.
*/
-
/*
* INCLUDE FILES
*/
-#include "general.h" /* must always come first */
-#include <glib.h>
+#include "general.h" /* must always come first */
+
#include <string.h>
-#include "parse.h"
+#include "entry.h"
+#include "options.h"
#include "read.h"
+#include "main.h"
#include "vstring.h"
/*
* DATA DEFINITIONS
*/
typedef enum {
- K_CLASS, K_FUNCTION, K_METHOD, K_VARIABLE
+ K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE
} pythonKind;
-static kindOption PythonKinds [] = {
- { TRUE, 'c', "class", "classes" },
- { TRUE, 'f', "function", "functions" },
- { TRUE, 'm', "member", "methods" },
- { TRUE, 'v', "variable", "variables" }
+static kindOption PythonKinds[] = {
+ {TRUE, 'c', "class", "classes"},
+ {TRUE, 'f', "function", "functions"},
+ {TRUE, 'm', "member", "class members"},
+ {TRUE, 'v', "variable", "variables"}
};
-typedef struct _lastClass {
- gchar *name;
- gint indent;
-} lastClass;
+typedef struct NestingLevel NestingLevel;
+typedef struct NestingLevels NestingLevels;
+struct NestingLevel
+{
+ int indentation;
+ vString *name;
+ boolean is_class;
+};
+
+struct NestingLevels
+{
+ NestingLevel *levels;
+ int n;
+ int allocated;
+};
+
+static char const * const singletriple = "'''";
+static char const * const doubletriple = "\"\"\"";
+
/*
* FUNCTION DEFINITIONS
*/
+#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
+
static boolean isIdentifierFirstCharacter (int c)
{
return (boolean) (isalpha (c) || c == '_');
@@ -53,181 +72,478 @@
return (boolean) (isalnum (c) || c == '_');
}
-
-/* remove all previous classes with more indent than the current one */
-static GList *clean_class_list(GList *list, gint indent)
+/* Given a string with the contents of a line directly after the "def" keyword,
+ * extract all relevant information and create a tag.
+ */
+static void makeFunctionTag (vString *const function,
+ vString *const parent, int is_class_parent)
{
- GList *tmp, *tmp2;
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (function));
- tmp = g_list_first(list);
- while (tmp != NULL)
+ tag.kindName = "function";
+ tag.kind = 'f';
+
+ if (vStringLength (parent) > 0)
{
- if (((lastClass*)tmp->data)->indent >= indent)
+ if (is_class_parent)
{
- g_free(((lastClass*)tmp->data)->name);
- g_free(tmp->data);
- tmp2 = tmp->next;
-
- list = g_list_remove(list, tmp->data);
- tmp = tmp2;
+ tag.kindName = "member";
+ tag.kind = 'm';
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
}
else
{
- tmp = tmp->next;
+ tag.extensionFields.scope [0] = "function";
+ tag.extensionFields.scope [1] = vStringValue (parent);
}
}
- return list;
+ /* If a function starts with __, we mark it as file scope.
+ * FIXME: What is the proper way to signal such attributes?
+ * TODO: What does functions/classes starting with _ and __ mean in python?
+ */
+ if (strncmp (vStringValue (function), "__", 2) == 0 &&
+ strcmp (vStringValue (function), "__init__") != 0)
+ {
+ tag.extensionFields.access = "private";
+ tag.isFileScope = TRUE;
+ }
+ else
+ {
+ tag.extensionFields.access = "public";
+ }
+ makeTagEntry (&tag);
}
+/* Given a string with the contents of the line directly after the "class"
+ * keyword, extract all necessary information and create a tag.
+ */
+static void makeClassTag (vString *const class, vString *const inheritance,
+ vString *const parent, int is_class_parent)
+{
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (class));
+ tag.kindName = "class";
+ tag.kind = 'c';
+ if (vStringLength (parent) > 0)
+ {
+ if (is_class_parent)
+ {
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ else
+ {
+ tag.extensionFields.scope [0] = "function";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ }
+ tag.extensionFields.inheritance = vStringValue (inheritance);
+ makeTagEntry (&tag);
+}
-static void findPythonTags (void)
+static void makeVariableTag (vString *const var, vString *const parent)
{
- GList *parents = NULL, *tmp; /* list of classes which are around the token */
- vString *name = vStringNew ();
- gint indent;
- const unsigned char *line;
- boolean inMultilineString = FALSE;
- boolean wasInMultilineString = FALSE;
- lastClass *lastclass = NULL;
- boolean inFunction = FALSE;
- gint fn_indent = 0;
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (var));
+ tag.kindName = "variable";
+ tag.kind = 'v';
+ if (vStringLength (parent) > 0)
+ {
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ makeTagEntry (&tag);
+}
- while ((line = fileReadLine ()) != NULL)
- {
- const unsigned char *cp = line;
- indent = 0;
- while (*cp != '\0')
+/* Skip a single or double quoted string. */
+static const char *skipString (const char *cp)
+{
+ const char *start = cp;
+ int escaped = 0;
+ for (cp++; *cp; cp++)
{
- if (*cp=='"' &&
- strncmp ((const char*) cp, "\"\"\"", (size_t) 3) == 0)
- {
- inMultilineString = (boolean) !inMultilineString;
- if (! inMultilineString)
- wasInMultilineString = TRUE;
- cp += 3;
- }
- if (*cp=='\'' &&
- strncmp ((const char*) cp, "'''", (size_t) 3) == 0)
- {
- inMultilineString = (boolean) !inMultilineString;
- if (! inMultilineString)
- wasInMultilineString = TRUE;
- cp += 3;
- }
+ if (escaped)
+ escaped--;
+ else if (*cp == '\\')
+ escaped++;
+ else if (*cp == *start)
+ return cp + 1;
+ }
+ return cp;
+}
- if (*cp == '\0' || wasInMultilineString)
+/* Skip everything up to an identifier start. */
+static const char *skipEverything (const char *cp)
+{
+ for (; *cp; cp++)
+ {
+ if (*cp == '"' || *cp == '\'')
{
- wasInMultilineString = FALSE;
- break; /* at end of multiline string */
+ cp = skipString(cp);
+ if (!*cp) break;
}
+ if (isIdentifierFirstCharacter ((int) *cp))
+ return cp;
+ }
+ return cp;
+}
- /* update indent-sensitive things */
- if (!inMultilineString && !isspace(*cp))
+/* Skip an identifier. */
+static const char *skipIdentifier (const char *cp)
+{
+ while (isIdentifierCharacter ((int) *cp))
+ cp++;
+ return cp;
+}
+
+static const char *findDefinitionOrClass (const char *cp)
+{
+ while (*cp)
+ {
+ cp = skipEverything (cp);
+ if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5))
{
- if (inFunction)
+ return cp;
+ }
+ cp = skipIdentifier (cp);
+ }
+ return NULL;
+}
+
+static const char *skipSpace (const char *cp)
+{
+ while (isspace ((int) *cp))
+ ++cp;
+ return cp;
+}
+
+/* Starting at ''cp'', parse an identifier into ''identifier''. */
+static const char *parseIdentifier (const char *cp, vString *const identifier)
+{
+ vStringClear (identifier);
+ while (isIdentifierCharacter ((int) *cp))
+ {
+ vStringPut (identifier, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (identifier);
+ return cp;
+}
+
+static void parseClass (const char *cp, vString *const class,
+ vString *const parent, int is_class_parent)
+{
+ vString *const inheritance = vStringNew ();
+ vStringClear (inheritance);
+ cp = parseIdentifier (cp, class);
+ cp = skipSpace (cp);
+ if (*cp == '(')
+ {
+ ++cp;
+ while (*cp != ')')
+ {
+ if (*cp == '\0')
{
- if (indent < fn_indent)
- inFunction = FALSE;
+ /* Closing parenthesis can be in follow up line. */
+ cp = (const char *) fileReadLine ();
+ if (!cp) break;
+ vStringPut (inheritance, ' ');
+ continue;
}
- if (lastclass != NULL)
- {
- if (indent <= lastclass->indent)
- {
- GList *last;
+ vStringPut (inheritance, *cp);
+ ++cp;
+ }
+ vStringTerminate (inheritance);
+ }
+ makeClassTag (class, inheritance, parent, is_class_parent);
+ vStringDelete (inheritance);
+}
- parents = clean_class_list(parents, indent);
- last = g_list_last(parents);
- if (last != NULL)
- lastclass = last->data;
- else
- lastclass = NULL;
- }
- }
+static void parseFunction (const char *cp, vString *const def,
+ vString *const parent, int is_class_parent)
+{
+ cp = parseIdentifier (cp, def);
+ makeFunctionTag (def, parent, is_class_parent);
+}
+
+/* Get the combined name of a nested symbol. Classes are separated with ".",
+ * functions with "/". For example this code:
+ * class MyClass:
+ * def myFunction:
+ * def SubFunction:
+ * class SubClass:
+ * def Method:
+ * pass
+ * Would produce this string:
+ * MyClass.MyFunction/SubFunction/SubClass.Method
+ */
+static boolean constructParentString(NestingLevels *nls, int indent,
+ vString *result)
+{
+ int i;
+ NestingLevel *prev = NULL;
+ int is_class = FALSE;
+ vStringClear (result);
+ for (i = 0; i < nls->n; i++)
+ {
+ NestingLevel *nl = nls->levels + i;
+ if (indent <= nl->indentation)
+ break;
+ if (prev)
+ {
+ if (prev->is_class)
+ vStringCatS(result, ".");
+ else
+ vStringCatS(result, "/");
}
+ vStringCat(result, nl->name);
+ is_class = nl->is_class;
+ prev = nl;
+ }
+ return is_class;
+}
- if (inMultilineString)
- ++cp;
- else if (isspace ((int) *cp))
+/* check whether parent's indentation level is higher than the current level and if so, remove it */
+static void checkParent(NestingLevels *nls, int indent, vString *parent)
+{
+ int i;
+ NestingLevel *n;
+
+ for (i = 0; i < nls->n; i++)
+ {
+ n = nls->levels + i;
+ /* is there a better way to compare two vStrings? */
+ if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
{
- /* count indentation amount of current line
- * the indentation has to be made with tabs only _or_ spaces only, if they are mixed
- * the code below gets confused */
- if (cp == line)
+ if (n && indent <= n->indentation)
{
- do
- {
- indent++;
- cp++;
- } while (isspace(*cp));
+ /* invalidate this level by clearing its name */
+ vStringClear(n->name);
}
- else
- cp++; /* non-indent whitespace */
+ break;
}
- else if (*cp == '#')
- break;
- else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0)
- {
- cp += 5;
- if (isspace ((int) *cp))
+ }
+}
+
+static NestingLevels *newNestingLevels(void)
+{
+ NestingLevels *nls = xCalloc (1, NestingLevels);
+ return nls;
+}
+
+static void freeNestingLevels(NestingLevels *nls)
+{
+ int i;
+ for (i = 0; i < nls->allocated; i++)
+ vStringDelete(nls->levels[i].name);
+ if (nls->levels) eFree(nls->levels);
+ eFree(nls);
+}
+
+/* TODO: This is totally out of place in python.c, but strlist.h is not usable.
+ * Maybe should just move these three functions to a separate file, even if no
+ * other parser uses them.
+ */
+static void addNestingLevel(NestingLevels *nls, int indentation,
+ vString *name, boolean is_class)
+{
+ int i;
+ NestingLevel *nl = NULL;
+
+ for (i = 0; i < nls->n; i++)
+ {
+ nl = nls->levels + i;
+ if (indentation <= nl->indentation) break;
+ }
+ if (i == nls->n)
+ {
+ if (i >= nls->allocated)
+ {
+ nls->allocated++;
+ nls->levels = xRealloc(nls->levels,
+ nls->allocated, NestingLevel);
+ nls->levels[i].name = vStringNew();
+ }
+ nl = nls->levels + i;
+ }
+ nls->n = i + 1;
+
+ vStringCopy(nl->name, name);
+ nl->indentation = indentation;
+ nl->is_class = is_class;
+}
+
+/* Checks whether a triple string was quoted before.
+ */
+static boolean isTripleQuoted(char const *start, char const *end, char quote_char)
+{
+ char const *cp = start;
+
+ while (cp < end && *cp != quote_char)
+ cp++;
+
+ return (cp < end);
+}
+
+/* Return a pointer to the start of the next triple string, or NULL. Store
+ * the kind of triple string in "which" if the return is not NULL.
+ */
+static char *find_triple_start(char const *string, char const **which)
+{
+ char *s;
+ *which = NULL;
+ if ((s = strstr (string, doubletriple)))
+ {
+ /* prevent parsing quoted triple strings */
+ if (isTripleQuoted (string, s, '\''))
+ return NULL;
+
+ *which = doubletriple;
+ }
+ else if ((s = strstr (string, singletriple)))
+ {
+ /* prevent parsing quoted triple strings */
+ if (isTripleQuoted (string, s, '"'))
+ return NULL;
+
+ *which = singletriple;
+ }
+ return s;
+}
+
+/* Find the end of a triple string as pointed to by "which", and update "which"
+ * with any other triple strings following in the given string.
+ */
+static void find_triple_end(char const *string, char const **which)
+{
+ char const *s = string;
+ while (1)
+ {
+ /* Check if the sting ends in the same line. */
+ s = strstr (string, *which);
+ if (!s) break;
+ s += 3;
+ *which = NULL;
+ /* If yes, check if another one starts in the same line. */
+ s = find_triple_start(s, which);
+ if (!s) break;
+ s += 3;
+ }
+}
+
+static void findPythonTags (void)
+{
+ vString *const continuation = vStringNew ();
+ vString *const name = vStringNew ();
+ vString *const parent = vStringNew();
+
+ NestingLevels *const nesting_levels = newNestingLevels();
+
+ const char *line;
+ int line_skip = 0;
+ char const *longStringLiteral = NULL;
+
+ while ((line = (const char *) fileReadLine ()) != NULL)
+ {
+ const char *cp = line;
+ char *longstring;
+ const char *keyword;
+ int indent;
+
+ cp = skipSpace (cp);
+
+ if (*cp == '\0') /* skip blank line */
+ continue;
+
+ /* skip comment if we are not inside a triple string */
+ if (*cp == '#' && ! longStringLiteral)
+ continue;
+
+ /* Deal with line continuation. */
+ if (!line_skip) vStringClear(continuation);
+ vStringCatS(continuation, line);
+ vStringStripTrailing(continuation);
+ if (vStringLast(continuation) == '\\')
+ {
+ vStringChop(continuation);
+ vStringCatS(continuation, " ");
+ line_skip = 1;
+ continue;
+ }
+ cp = line = vStringValue(continuation);
+ cp = skipSpace (cp);
+ indent = cp - line;
+ line_skip = 0;
+
+ checkParent(nesting_levels, indent, parent);
+
+ /* Deal with multiline string ending. */
+ if (longStringLiteral)
+ {
+ find_triple_end(cp, &longStringLiteral);
+ continue;
+ }
+
+ /* Deal with multiline string start. */
+ longstring = find_triple_start(cp, &longStringLiteral);
+ if (longstring)
+ {
+ /* Note: For our purposes, the line just ends at the first long
+ * string. I.e. we don't parse for any tags in the rest of the
+ * line, but we do look for the string ending of course.
+ */
+ *longstring = '\0';
+
+ longstring += 3;
+ find_triple_end(longstring, &longStringLiteral);
+ }
+
+ /* Deal with def and class keywords. */
+ keyword = findDefinitionOrClass (cp);
+ if (keyword)
+ {
+ boolean found = FALSE;
+ boolean is_class = FALSE;
+ if (!strncmp (keyword, "def ", 4))
{
- lastClass *newclass = g_new(lastClass, 1);
+ cp = skipSpace (keyword + 3);
+ found = TRUE;
+ }
+ else if (!strncmp (keyword, "class ", 6))
+ {
+ cp = skipSpace (keyword + 5);
+ found = TRUE;
+ is_class = TRUE;
+ }
- while (isspace ((int) *cp))
- ++cp;
- while (isalnum ((int) *cp) || *cp == '_')
- {
- vStringPut (name, (int) *cp);
- ++cp;
- }
- vStringTerminate (name);
+ if (found)
+ {
+ boolean is_parent_class;
- newclass->name = g_strdup(vStringValue(name));
- newclass->indent = indent;
- parents = g_list_append(parents, newclass);
- if (lastclass == NULL)
- makeSimpleTag (name, PythonKinds, K_CLASS);
+ is_parent_class =
+ constructParentString(nesting_levels, indent, parent);
+
+ if (is_class)
+ parseClass (cp, name, parent, is_parent_class);
else
- makeSimpleScopedTag (name, PythonKinds, K_CLASS,
- PythonKinds[K_CLASS].name, lastclass->name, "public");
- vStringClear (name);
+ parseFunction(cp, name, parent, is_parent_class);
- lastclass = newclass;
- break; /* ignore rest of line so that lastclass is not reset immediately */
+ addNestingLevel(nesting_levels, indent, name, is_class);
+ vStringClear(name);
}
- }
- else if (strncmp ((const char*) cp, "def", (size_t) 3) == 0)
- {
- cp += 3;
- if (isspace ((int) *cp))
- {
- while (isspace ((int) *cp))
- ++cp;
- while (isalnum ((int) *cp) || *cp == '_')
- {
- vStringPut (name, (int) *cp);
- ++cp;
- }
- vStringTerminate (name);
- if (!isspace(*line) || lastclass == NULL || strlen(lastclass->name) <= 0)
- makeSimpleTag (name, PythonKinds, K_FUNCTION);
- else
- makeSimpleScopedTag (name, PythonKinds, K_METHOD,
- PythonKinds[K_CLASS].name, lastclass->name, "public");
- vStringClear (name);
-
- inFunction = TRUE;
- fn_indent = indent + 1;
- break; /* ignore rest of line so inFunction is not cancelled immediately */
}
- }
- else if (!inFunction && *(const char*)cp == '=')
+ /* Find global and class variables */
+ if ((cp = strstr(line, "=")))
{
/* Parse global and class variable names (C.x) from assignment statements.
* Object attributes (obj.x) are ignored.
* Assignment to a tuple 'x, y = 2, 3' not supported.
* TODO: ignore duplicate tags from reassignment statements. */
- const guchar *sp, *eq, *start;
+ const char *sp, *eq, *start;
+ boolean parent_is_class;
eq = cp + 1;
while (*eq)
@@ -238,6 +554,8 @@
break; /* allow 'x = func(b=2,y=2,' lines */
eq++;
}
+ if (*eq == '=')
+ continue;
/* go backwards to the start of the line, checking we have valid chars */
start = cp - 1;
while (start >= line && isspace ((int) *start))
@@ -260,50 +578,34 @@
}
vStringTerminate (name);
- if (lastclass == NULL)
- makeSimpleTag (name, PythonKinds, K_VARIABLE);
- else
- makeSimpleScopedTag (name, PythonKinds, K_VARIABLE,
- PythonKinds[K_CLASS].name, lastclass->name, "public"); /* class member variables */
+ parent_is_class = constructParentString(nesting_levels, indent, parent);
+ /* skip variables in methods */
+ if (! parent_is_class && vStringLength(parent) > 0)
+ continue;
+ makeVariableTag (name, parent);
+
vStringClear (name);
-
skipvar:
- ++cp;
+ ; /* dummy */
}
- else if (*cp != '\0')
- {
- do
- ++cp;
- while (isalnum ((int) *cp) || *cp == '_');
- }
}
- }
- vStringDelete (name);
-
- /* clear the remaining elements in the list */
- tmp = g_list_first(parents);
- while (tmp != NULL)
- {
- if (tmp->data)
- {
- g_free(((lastClass*)tmp->data)->name);
- g_free(tmp->data);
- }
- tmp = tmp->next;
- }
- g_list_free(parents);
+ /* Clean up all memory we allocated. */
+ vStringDelete (parent);
+ vStringDelete (name);
+ vStringDelete (continuation);
+ freeNestingLevels (nesting_levels);
}
-extern parserDefinition* PythonParser (void)
+extern parserDefinition *PythonParser (void)
{
- static const char *const extensions [] = { "py", "python", NULL };
- parserDefinition* def = parserNew ("Python");
- def->kinds = PythonKinds;
- def->kindCount = KIND_COUNT (PythonKinds);
- def->extensions = extensions;
- def->parser = findPythonTags;
- return def;
+ static const char *const extensions[] = { "py", "pyx", "pxd", "scons", "python", NULL };
+ parserDefinition *def = parserNew ("Python");
+ def->kinds = PythonKinds;
+ def->kindCount = KIND_COUNT (PythonKinds);
+ def->extensions = extensions;
+ def->parser = findPythonTags;
+ return def;
}
-/* vi:set tabstop=8 shiftwidth=4: */
+/* vi:set tabstop=4 shiftwidth=4: */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Commits
mailing list