[geany/geany] 37f76c: Use uctags version of python

Jiří Techet git-noreply at xxxxx
Mon Mar 14 19:22:47 UTC 2022

Branch:      refs/heads/master
Author:      Jiří Techet <techet at gmail.com>
Committer:   Jiří Techet <techet at gmail.com>
Date:        Mon, 14 Mar 2022 19:22:47 UTC
Commit:      37f76c9a7b593f266104777b13dc61be5314fe48

Log Message:
Use uctags version of python

Modified Paths:

Modified: ctags/Makefile.am
2 lines changed, 1 insertions(+), 1 deletions(-)
@@ -81,7 +81,7 @@ parsers = \
 	parsers/perl.h \
 	parsers/php.c \
 	parsers/powershell.c \
-	parsers/geany_python.c \
+	parsers/python.c \
 	parsers/r.c \
 	parsers/r.h \
 	parsers/rst.c \

Modified: ctags/parsers/geany_python.c
862 lines changed, 0 insertions(+), 862 deletions(-)
@@ -1,862 +0,0 @@
-*   Copyright (c) 2000-2003, Darren Hiebert
-*   This source code is released for free distribution under the terms of the
-*   GNU General Public License version 2 or (at your option) any later version.
-*   This module contains functions for generating tags for Python language
-*   files.
-#include "general.h"  /* must always come first */
-#include <string.h>
-#include "entry.h"
-#include "nestlevel.h"
-#include "options.h"
-#include "read.h"
-#include "parse.h"
-#include "vstring.h"
-#include "routines.h"
-#include "debug.h"
-#include "xtag.h"
-struct corkInfo {
-	int index;
-struct nestingLevelUserData {
-	int indentation;
-#define PY_NL_INDENTATION(nl) ((struct nestingLevelUserData *)nestingLevelGetUserData(nl))->indentation
-typedef enum {
-} pythonKind;
-static kindDefinition PythonKinds[] = {
-	{true, 'c', "class",    "classes"},
-	{true, 'f', "function", "functions"},
-	{true, 'm', "member",   "class members"},
-	{true, 'v', "variable", "variables"},
-	{true, 'x', "unknown", "name referring a classe/variable/function/module defined in other module"}
-typedef enum {
-} pythonAccess;
-static const char *const PythonAccesses[] = {
-	"public", "private", "protected"
-static char const * const singletriple = "'''";
-static char const * const doubletriple = "\"\"\"";
-static bool isIdentifierFirstCharacter (int c)
-	return (bool) (isalpha (c) || c == '_');
-static bool isIdentifierCharacter (int c)
-	return (bool) (isalnum (c) || c == '_');
-/* follows PEP-8, and always reports single-underscores as protected
- * See:
- * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
- * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
- */
-static pythonAccess accessFromIdentifier (const vString *const ident,
-	pythonKind kind, bool has_parent, bool parent_is_class)
-	const char *const p = vStringValue (ident);
-	const size_t len = vStringLength (ident);
-	/* inside a function/method, private */
-	if (has_parent && !parent_is_class)
-		return A_PRIVATE;
-	/* not starting with "_", public */
-	else if (len < 1 || p[0] != '_')
-		return A_PUBLIC;
-	/* "__...__": magic methods */
-	else if (kind == K_METHOD && parent_is_class &&
-			 len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
-		return A_PUBLIC;
-	/* "__...": name mangling */
-	else if (parent_is_class && len > 1 && p[1] == '_')
-		return A_PRIVATE;
-	/* "_...": suggested as non-public, but easily accessible */
-	else
-		return A_PROTECTED;
-static void addAccessFields (tagEntryInfo *const entry,
-	const vString *const ident, pythonKind kind,
-	bool has_parent, bool parent_is_class)
-	pythonAccess access;
-	access = accessFromIdentifier (ident, kind, has_parent, parent_is_class);
-	entry->extensionFields.access = PythonAccesses [access];
-	/* FIXME: should we really set isFileScope in addition to access? */
-	if (access == A_PRIVATE)
-		entry->isFileScope = true;
-/* Given a string with the contents of a line directly after the "def" keyword,
- * extract all relevant information and create a tag.
- */
-static struct corkInfo makeFunctionTag (vString *const function,
-	vString *const parent, int is_class_parent, const char *arglist)
-	tagEntryInfo tag;
-	int corkIndex;
-	struct corkInfo info;
-	if (vStringLength (parent) > 0)
-	{
-		if (is_class_parent)
-		{
-			initTagEntry (&tag, vStringValue (function), K_METHOD);
-			tag.extensionFields.scopeKindIndex = K_CLASS;
-		}
-		else
-		{
-			initTagEntry (&tag, vStringValue (function), K_FUNCTION);
-			tag.extensionFields.scopeKindIndex = K_FUNCTION;
-		}
-		tag.extensionFields.scopeName = vStringValue (parent);
-	}
-	else
-		initTagEntry (&tag, vStringValue (function), K_FUNCTION);
-	tag.extensionFields.signature = arglist;
-	addAccessFields (&tag, function, is_class_parent ? K_METHOD : K_FUNCTION,
-		vStringLength (parent) > 0, is_class_parent);
-	corkIndex = makeTagEntry (&tag);
-	info.index = corkIndex;
-	return info;
-/* Given a string with the contents of the line directly after the "class"
- * keyword, extract all necessary information and create a tag.
- */
-static int makeClassTag (vString *const class, vString *const inheritance,
-	vString *const parent, int is_class_parent)
-	tagEntryInfo tag;
-	initTagEntry (&tag, vStringValue (class), K_CLASS);
-	if (vStringLength (parent) > 0)
-	{
-		if (is_class_parent)
-		{
-			tag.extensionFields.scopeKindIndex = K_CLASS;
-			tag.extensionFields.scopeName = vStringValue (parent);
-		}
-		else
-		{
-			tag.extensionFields.scopeKindIndex = K_FUNCTION;
-			tag.extensionFields.scopeName = vStringValue (parent);
-		}
-	}
-	tag.extensionFields.inheritance = vStringValue (inheritance);
-	addAccessFields (&tag, class, K_CLASS, vStringLength (parent) > 0,
-		is_class_parent);
-	return makeTagEntry (&tag);
-static void makeVariableTag (vString *const var, vString *const parent,
-	bool is_class_parent)
-	tagEntryInfo tag;
-	initTagEntry (&tag, vStringValue (var), K_VARIABLE);
-	if (vStringLength (parent) > 0)
-	{
-		tag.extensionFields.scopeKindIndex = K_CLASS;
-		tag.extensionFields.scopeName = vStringValue (parent);
-	}
-	addAccessFields (&tag, var, K_VARIABLE, vStringLength (parent) > 0,
-		is_class_parent);
-	makeTagEntry (&tag);
-/* Skip a single or double quoted string. */
-static const char *skipString (const char *cp)
-	const char *start = cp;
-	int escaped = 0;
-	for (cp++; *cp; cp++)
-	{
-		if (escaped)
-			escaped--;
-		else if (*cp == '\\')
-			escaped++;
-		else if (*cp == *start)
-			return cp + 1;
-	}
-	return cp;
-/* Skip everything up to an identifier start. */
-static const char *skipEverything (const char *cp)
-	int match;
-	for (; *cp; cp++)
-	{
-		if (*cp == '#')
-			return strchr(cp, '\0');
-		match = 0;
-		if (*cp == '"' || *cp == '\'')
-			match = 1;
-		/* these checks find unicode, binary (Python 3) and raw strings */
-		if (!match)
-		{
-			bool r_first = (*cp == 'r' || *cp == 'R');
-			/* "r" | "R" | "u" | "U" | "b" | "B" */
-			if (r_first || *cp == 'u' || *cp == 'U' ||  *cp == 'b' || *cp == 'B')
-			{
-				unsigned int i = 1;
-				/*  r_first -> "rb" | "rB" | "Rb" | "RB"
-				   !r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */
-				if (( r_first && (cp[i] == 'b' || cp[i] == 'B')) ||
-					(!r_first && (cp[i] == 'r' || cp[i] == 'R')))
-					i++;
-				if (cp[i] == '\'' || cp[i] == '"')
-				{
-					match = 1;
-					cp += i;
-				}
-			}
-		}
-		if (match)
-		{
-			cp = skipString(cp);
-			if (!*cp) break;
-		}
-		if (isIdentifierFirstCharacter ((int) *cp))
-			return cp;
-		if (match)
-			cp--; /* avoid jumping over the character after a skipped string */
-	}
-	return cp;
-/* Skip an identifier. */
-static const char *skipIdentifier (const char *cp)
-	while (isIdentifierCharacter ((int) *cp))
-		cp++;
-	return cp;
-static const char *findDefinitionOrClass (const char *cp)
-	while (*cp)
-	{
-		cp = skipEverything (cp);
-		if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
-			!strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
-		{
-			return cp;
-		}
-		cp = skipIdentifier (cp);
-	}
-	return NULL;
-static const char *skipSpace (const char *cp)
-	while (isspace ((int) *cp))
-		++cp;
-	return cp;
-/* Starting at ''cp'', parse an identifier into ''identifier''. */
-static const char *parseIdentifier (const char *cp, vString *const identifier)
-	vStringClear (identifier);
-	while (isIdentifierCharacter ((int) *cp))
-	{
-		vStringPut (identifier, (int) *cp);
-		++cp;
-	}
-	return cp;
-static int parseClass (const char *cp, vString *const class,
-	vString *const parent, int is_class_parent)
-	int corkIndex;
-	vString *const inheritance = vStringNew ();
-	vStringClear (inheritance);
-	cp = parseIdentifier (cp, class);
-	cp = skipSpace (cp);
-	if (*cp == '(')
-	{
-		++cp;
-		while (*cp != ')')
-		{
-			if (*cp == '\0')
-			{
-				/* Closing parenthesis can be in follow up line. */
-				cp = (const char *) readLineFromInputFile ();
-				if (!cp) break;
-				vStringPut (inheritance, ' ');
-				continue;
-			}
-			vStringPut (inheritance, *cp);
-			++cp;
-		}
-	}
-	corkIndex = makeClassTag (class, inheritance, parent, is_class_parent);
-	vStringDelete (inheritance);
-	return corkIndex;
-static void parseImports (const char *cp)
-	const char *pos;
-	vString *name, *name_next;
-	cp = skipEverything (cp);
-	if ((pos = strstr (cp, "import")) == NULL)
-		return;
-	cp = pos + 6;
-	/* continue only if there is some space between the keyword and the identifier */
-	if (! isspace (*cp))
-		return;
-	cp++;
-	cp = skipSpace (cp);
-	name = vStringNew ();
-	name_next = vStringNew ();
-	cp = skipEverything (cp);
-	while (*cp)
-	{
-		cp = parseIdentifier (cp, name);
-		cp = skipEverything (cp);
-		/* we parse the next possible import statement as well to be able to ignore 'foo' in
-		 * 'import foo as bar' */
-		parseIdentifier (cp, name_next);
-		/* take the current tag only if the next one is not "as" */
-		if (strcmp (vStringValue (name_next), "as") != 0 &&
-			strcmp (vStringValue (name), "as") != 0)
-		{
-			makeSimpleTag (name, K_IMPORT);
-		}
-	}
-	vStringDelete (name);
-	vStringDelete (name_next);
-/* modified from lcpp.c getArglistFromStr().
- * warning: terminates rest of string past arglist!
- * note: does not ignore brackets inside strings! */
-static char *parseArglist(const char *buf)
-	char *start, *end;
-	int level;
-	if (NULL == buf)
-		return NULL;
-	if (NULL == (start = strchr(buf, '(')))
-		return NULL;
-	for (level = 1, end = start + 1; level > 0; ++end)
-	{
-		if ('\0' == *end)
-			break;
-		else if ('(' == *end)
-			++ level;
-		else if (')' == *end)
-			-- level;
-	}
-	*end = '\0';
-	return strdup(start);
-static struct corkInfo parseFunction (const char *cp, vString *const def,
-	vString *const parent, int is_class_parent)
-	char *arglist;
-	struct corkInfo info;
-	cp = parseIdentifier (cp, def);
-	arglist = parseArglist (cp);
-	info = makeFunctionTag (def, parent, is_class_parent, arglist);
-	if (arglist != NULL)
-		eFree (arglist);
-	return info;
-/* Get the combined name of a nested symbol. Classes are separated with ".",
- * functions with "/". For example this code:
- * class MyClass:
- *     def myFunction:
- *         def SubFunction:
- *             class SubClass:
- *                 def Method:
- *                     pass
- * Would produce this string:
- * MyClass.MyFunction/SubFunction/SubClass.Method
- */
-static bool constructParentString(NestingLevels *nls, int indent,
-	vString *result)
-	int i;
-	NestingLevel *prev = NULL;
-	int is_class = false;
-	vStringClear (result);
-	for (i = 0; i < nls->n; i++)
-	{
-		NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i);
-		tagEntryInfo *e;
-		if (indent <= PY_NL_INDENTATION(nl))
-			break;
-		if (prev)
-		{
-			vStringCatS(result, ".");	/* make Geany symbol list grouping work properly */
-			if (prev->kindIndex == K_CLASS)
-				vStringCatS(result, ".");
-			else
-				vStringCatS(result, "/");
-		}
-		e = getEntryOfNestingLevel (nl);
-		if (e)
-		{
-			vStringCatS(result, e->name);
-			is_class = (e->kindIndex == K_CLASS);
-		}
-		else
-			is_class = false;
-		prev = nl;
-	}
-	return is_class;
-/* Check indentation level and truncate nesting levels accordingly */
-static void checkIndent(NestingLevels *nls, int indent)
-	int i;
-	NestingLevel *n;
-	for (i = 0; i < nls->n; i++)
-	{
-		n = nestingLevelsGetNthFromRoot (nls, i);
-		if (n && indent <= PY_NL_INDENTATION(n))
-		{
-			/* truncate levels */
-			nls->n = i;
-			break;
-		}
-	}
-static void addNestingLevel(NestingLevels *nls, int indentation, struct corkInfo *info)
-	int i;
-	NestingLevel *nl = NULL;
-	for (i = 0; i < nls->n; i++)
-	{
-		nl = nestingLevelsGetNthFromRoot(nls, i);
-		if (indentation <= PY_NL_INDENTATION(nl)) break;
-	}
-	if (i == nls->n)
-		nl = nestingLevelsPush(nls, info->index);
-	else
-		/* reuse existing slot */
-		nl = nestingLevelsTruncate (nls, i + 1, info->index);
-	PY_NL_INDENTATION(nl) = indentation;
-/* Return a pointer to the start of the next triple string, or NULL. Store
- * the kind of triple string in "which" if the return is not NULL.
- */
-static char const *find_triple_start(char const *string, char const **which)
-	char const *cp = string;
-	for (; *cp; cp++)
-	{
-		if (*cp == '#')
-			break;
-		if (*cp == '"' || *cp == '\'')
-		{
-			if (strncmp(cp, doubletriple, 3) == 0)
-			{
-				*which = doubletriple;
-				return cp;
-			}
-			if (strncmp(cp, singletriple, 3) == 0)
-			{
-				*which = singletriple;
-				return cp;
-			}
-			cp = skipString(cp);
-			if (!*cp) break;
-			cp--; /* avoid jumping over the character after a skipped string */
-		}
-	}
-	return NULL;
-/* Find the end of a triple string as pointed to by "which", and update "which"
- * with any other triple strings following in the given string.
- */
-static void find_triple_end(char const *string, char const **which)
-	char const *s = string;
-	while (1)
-	{
-		/* Check if the string ends in the same line. */
-		s = strstr (s, *which);
-		if (!s) break;
-		s += 3;
-		*which = NULL;
-		/* If yes, check if another one starts in the same line. */
-		s = find_triple_start(s, which);
-		if (!s) break;
-		s += 3;
-	}
-static const char *findVariable(const char *line)
-	/* Parse global and class variable names (C.x) from assignment statements.
-	 * Object attributes (obj.x) are ignored.
-	 * Assignment to a tuple 'x, y = 2, 3' not supported.
-	 * TODO: ignore duplicate tags from reassignment statements. */
-	const char *cp, *sp, *eq, *start;
-	cp = strstr(line, "=");
-	if (!cp)
-		return NULL;
-	eq = cp + 1;
-	while (*eq)
-	{
-		if (*eq == '=')
-			return NULL;	/* ignore '==' operator and 'x=5,y=6)' function lines */
-		if (*eq == '(' || *eq == '#')
-			break;	/* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
-		eq++;
-	}
-	/* go backwards to the start of the line, checking we have valid chars */
-	start = cp - 1;
-	while (start >= line && isspace ((int) *start))
-		--start;
-	while (start >= line && isIdentifierCharacter ((int) *start))
-		--start;
-	if (!isIdentifierFirstCharacter(*(start + 1)))
-		return NULL;
-	sp = start;
-	while (sp >= line && isspace ((int) *sp))
-		--sp;
-	if ((sp + 1) != line)	/* the line isn't a simple variable assignment */
-		return NULL;
-	/* the line is valid, parse the variable name */
-	++start;
-	return start;
-/* Skip type declaration that optionally follows a cdef/cpdef */
-static const char *skipTypeDecl (const char *cp, bool *is_class)
-	const char *lastStart = cp, *ptr = cp;
-	int loopCount = 0;
-	ptr = skipSpace(cp);
-	if (!strncmp("extern", ptr, 6)) {
-		ptr += 6;
-		ptr = skipSpace(ptr);
-		if (!strncmp("from", ptr, 4)) { return NULL; }
-	}
-	if (!strncmp("class", ptr, 5)) {
-		ptr += 5 ;
-		*is_class = true;
-		ptr = skipSpace(ptr);
-		return ptr;
-	}
-	/* limit so that we don't pick off "int item=obj()" */
-	while (*ptr && loopCount++ < 2) {
-		while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) {
-			/* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */
-			if(*ptr == '[') {
-				while (*ptr && *ptr != ']') ptr++;
-				if (*ptr) ptr++;
-			} else {
-				ptr++;
-			}
-		}
-		if (!*ptr || *ptr == '=') return NULL;
-		if (*ptr == '(') {
-			return lastStart; /* if we stopped on a '(' we are done */
-		}
-		ptr = skipSpace(ptr);
-		lastStart = ptr;
-		while (*lastStart == '*') lastStart++;  /* cdef int *identifier */
-	}
-	return NULL;
-/* checks if there is a lambda at position of cp, and return its argument list
- * if so.
- * We don't return the lambda name since it is useless for now since we already
- * know it when we call this function, and it would be a little slower. */
-static bool varIsLambda (const char *cp, char **arglist)
-	bool is_lambda = false;
-	cp = skipSpace (cp);
-	cp = skipIdentifier (cp); /* skip the lambda's name */
-	cp = skipSpace (cp);
-	if (*cp == '=')
-	{
-		cp++;
-		cp = skipSpace (cp);
-		if (strncmp (cp, "lambda", 6) == 0)
-		{
-			const char *tmp;
-			cp += 6; /* skip the lambda */
-			tmp = skipSpace (cp);
-			/* check if there is a space after lambda to detect assignations
-			 * starting with 'lambdaXXX' */
-			if (tmp != cp)
-			{
-				vString *args = vStringNew ();
-				cp = tmp;
-				vStringPut (args, '(');
-				for (; *cp != 0 && *cp != ':'; cp++)
-					vStringPut (args, *cp);
-				vStringPut (args, ')');
-				if (arglist)
-					*arglist = strdup (vStringValue (args));
-				vStringDelete (args);
-				is_lambda = true;
-			}
-		}
-	}
-	return is_lambda;
-/* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with
- * the position of the next non-whitespace after the keyword */
-static bool matchKeyword (const char *keyword, const char *cp, const char **cp_n)
-	size_t kw_len = strlen (keyword);
-	if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len]))
-	{
-		*cp_n = skipSpace (&cp[kw_len + 1]);
-		return true;
-	}
-	return false;
-static void findPythonTags (void)
-	vString *const continuation = vStringNew ();
-	vString *const name = vStringNew ();
-	vString *const parent = vStringNew();
-	NestingLevels *const nesting_levels = nestingLevelsNew(sizeof (struct nestingLevelUserData));
-	const char *line;
-	int line_skip = 0;
-	char const *longStringLiteral = NULL;
-	while ((line = (const char *) readLineFromInputFile ()) != NULL)
-	{
-		const char *cp = line, *candidate;
-		char const *longstring;
-		char const *keyword, *variable;
-		int indent;
-		cp = skipSpace (cp);
-		if (*cp == '\0')  /* skip blank line */
-			continue;
-		/* Skip comment if we are not inside a multi-line string. */
-		if (*cp == '#' && !longStringLiteral)
-			continue;
-		/* Deal with line continuation. */
-		if (!line_skip) vStringClear(continuation);
-		vStringCatS(continuation, line);
-		vStringStripTrailing(continuation);
-		if (vStringLast(continuation) == '\\')
-		{
-			vStringChop(continuation);
-			vStringCatS(continuation, " ");
-			line_skip = 1;
-			continue;
-		}
-		cp = line = vStringValue(continuation);
-		cp = skipSpace (cp);
-		indent = cp - line;
-		line_skip = 0;
-		/* Deal with multiline string ending. */
-		if (longStringLiteral)
-		{
-			find_triple_end(cp, &longStringLiteral);
-			continue;
-		}
-		checkIndent(nesting_levels, indent);
-		/* Find global and class variables */
-		variable = findVariable(line);
-		if (variable)
-		{
-			const char *start = variable;
-			char *arglist;
-			bool parent_is_class;
-			vStringClear (name);
-			while (isIdentifierCharacter ((int) *start))
-			{
-				vStringPut (name, (int) *start);
-				++start;
-			}
-			parent_is_class = constructParentString(nesting_levels, indent, parent);
-			if (varIsLambda (variable, &arglist))
-			{
-				/* show class members or top-level script lambdas only */
-				if (parent_is_class || vStringLength(parent) == 0)
-					makeFunctionTag (name, parent, parent_is_class, arglist);
-				eFree (arglist);
-			}
-			else
-			{
-				/* skip variables in methods */
-				if (parent_is_class || vStringLength(parent) == 0)
-					makeVariableTag (name, parent, parent_is_class);
-			}
-		}
-		/* Deal with multiline string start. */
-		longstring = find_triple_start(cp, &longStringLiteral);
-		if (longstring)
-		{
-			longstring += 3;
-			find_triple_end(longstring, &longStringLiteral);
-			/* We don't parse for any tags in the rest of the line. */
-			continue;
-		}
-		/* Deal with def and class keywords. */
-		keyword = findDefinitionOrClass (cp);
-		if (keyword)
-		{
-			bool found = false;
-			bool is_class = false;
-			if (matchKeyword ("def", keyword, &cp))
-			{
-				found = true;
-			}
-			else if (matchKeyword ("class", keyword, &cp))
-			{
-				found = true;
-				is_class = true;
-			}
-			else if (matchKeyword ("cdef", keyword, &cp))
-			{
-				candidate = skipTypeDecl (cp, &is_class);
-				if (candidate)
-				{
-					found = true;
-					cp = candidate;
-				}
-			}
-			else if (matchKeyword ("cpdef", keyword, &cp))
-			{
-				candidate = skipTypeDecl (cp, &is_class);
-				if (candidate)
-				{
-					found = true;
-					cp = candidate;
-				}
-			}
-			if (found)
-			{
-				bool is_parent_class;
-				struct corkInfo info;
-				is_parent_class =
-					constructParentString(nesting_levels, indent, parent);
-				if (is_class)
-				{
-					info.index = parseClass (cp, name, parent, is_parent_class);
-				}
-				else
-					info = parseFunction(cp, name, parent, is_parent_class);
-				addNestingLevel(nesting_levels, indent, &info);
-			}
-			continue;
-		}
-		/* Find and parse imports */
-		parseImports(line);
-	}
-	/* Force popping all nesting levels. */
-	checkIndent(nesting_levels, 0);
-	/* Clean up all memory we allocated. */
-	vStringDelete (parent);
-	vStringDelete (name);
-	vStringDelete (continuation);
-	nestingLevelsFree (nesting_levels);
-extern parserDefinition *PythonParser (void)
-    static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
-	parserDefinition *def = parserNew ("Python");
-	def->kindTable = PythonKinds;
-	def->kindCount = ARRAY_SIZE (PythonKinds);
-	def->extensions = extensions;
-	def->parser = findPythonTags;
-	def->useCork = CORK_QUEUE;
-	return def;

Modified: ctags/parsers/python.c
1597 lines changed, 1597 insertions(+), 0 deletions(-)
@@ -0,0 +1,1597 @@
+*   Copyright (c) 2000-2003, Darren Hiebert
+*   Copyright (c) 2014-2016, Colomban Wendling <ban at herbesfolles.org>
+*   This source code is released for free distribution under the terms of the
+*   GNU General Public License version 2 or (at your option) any later version.
+*   This module contains functions for generating tags for Python language
+*   files.
+#include "general.h"  /* must always come first */
+#include <string.h>
+#include "entry.h"
+#include "nestlevel.h"
+#include "read.h"
+#include "parse.h"
+#include "vstring.h"
+#include "keyword.h"
+#include "routines.h"
+#include "debug.h"
+#include "xtag.h"
+#include "objpool.h"
+#define isIdentifierChar(c) \
+	(isalnum (c) || (c) == '_' || (c) >= 0x80)
+#define newToken() (objPoolGet (TokenPool))
+#define deleteToken(t) (objPoolPut (TokenPool, (t)))
+enum {
+	KEYWORD_async,
+	KEYWORD_cdef,
+	KEYWORD_class,
+	KEYWORD_cpdef,
+	KEYWORD_def,
+	KEYWORD_extern,
+	KEYWORD_from,
+	KEYWORD_import,
+	KEYWORD_inline,
+	KEYWORD_lambda,
+	KEYWORD_pass,
+	KEYWORD_return,
+typedef int keywordId; /* to allow KEYWORD_NONE */
+typedef enum {
+} accessType;
+static const char *const PythonAccesses[COUNT_ACCESS] = {
+	"private",
+	"protected",
+	"public"
+typedef enum {
+} pythonField;
+static fieldDefinition PythonFields[COUNT_FIELD] = {
+	{ .name = "decorators",
+	  .description = "decorators on functions and classes",
+	  .enabled = false },
+	{ .name = "nameref",
+	  .description = "the original name for the tag",
+	  .enabled = true },
+typedef enum {
+} pythonKind;
+typedef enum {
+} pythonModuleRole;
+typedef enum {
+} pythonUnknownRole;
+/* Roles related to `import'
+ * ==========================
+ * import X              X = (kind:module, role:imported)
+ *
+ * import X as Y         X = (kind:module, role:indirectlyImported),
+ *                       Y = (kind:namespace, nameref:module:X)
+ *                       ------------------------------------------------
+ *                       Don't confuse the kind of Y with namespace role of module kind.
+ *
+ * from X import *       X = (kind:module,  role:namespace)
+ *
+ * from X import Y       X = (kind:module,  role:namespace),
+ *                       Y = (kind:unknown, role:imported, scope:module:X)
+ *
+ * from X import Y as Z  X = (kind:module,  role:namespace),
+ *                       Y = (kind:unknown, role:indirectlyImported, scope:module:X)
+ *                       Z = (kind:unknown, nameref:unknown:Y) */
+static roleDefinition PythonModuleRoles [] = {
+	{ true, "imported",
+	  "imported modules" },
+	{ true, "namespace",
+	  "namespace from where classes/variables/functions are imported" },
+	{ true, "indirectlyImported",
+	  "module imported in alternative name" },
+static roleDefinition PythonUnknownRoles [] = {
+	{ true, "imported",   "imported from the other module" },
+	{ true, "indirectlyImported",
+	  "classes/variables/functions/modules imported in alternative name" },
+static kindDefinition PythonKinds[COUNT_KIND] = {
+	{true, 'c', "class",    "classes"},
+	{true, 'f', "function", "functions"},
+	{true, 'm', "member",   "class members"},
+	{true, 'v', "variable", "variables"},
+	{true, 'I', "namespace", "name referring a module defined in other file"},
+	{true, 'i', "module",    "modules",
+	 .referenceOnly = true,  ATTACH_ROLES(PythonModuleRoles)},
+	{true, 'x', "unknown",   "name referring a class/variable/function/module defined in other module",
+	 .referenceOnly = false, ATTACH_ROLES(PythonUnknownRoles)},
+	{false, 'z', "parameter", "function parameters" },
+	{false, 'l', "local",    "local variables" },
+static const keywordTable PythonKeywordTable[] = {
+	/* keyword			keyword ID */
+	{ "as",				KEYWORD_as				},
+	{ "async",			KEYWORD_async			},
+	{ "cdef",			KEYWORD_cdef			},
+	{ "cimport",		KEYWORD_import			},
+	{ "class",			KEYWORD_class			},
+	{ "cpdef",			KEYWORD_cpdef			},
+	{ "def",			KEYWORD_def				},
+	{ "extern",			KEYWORD_extern			},
+	{ "from",			KEYWORD_from			},
+	{ "import",			KEYWORD_import			},
+	{ "inline",			KEYWORD_inline			},
+	{ "lambda",			KEYWORD_lambda			},
+	{ "pass",			KEYWORD_pass			},
+	{ "return",			KEYWORD_return			},
+typedef enum eTokenType {
+	/* 0..255 are the byte's value */
+	TOKEN_EOF = 256,
+	TOKEN_ARROW,				/* -> */
+} tokenType;
+typedef struct {
+	int				type;
+	keywordId		keyword;
+	vString *		string;
+	int				indent;
+	unsigned long 	lineNumber;
+	MIOPos			filePosition;
+} tokenInfo;
+struct pythonNestingLevelUserData {
+	int indentation;
+#define PY_NL(nl) ((struct pythonNestingLevelUserData *) nestingLevelGetUserData (nl))
+static langType Lang_python;
+static unsigned int TokenContinuationDepth = 0;
+static tokenInfo *NextToken = NULL;
+static NestingLevels *PythonNestingLevels = NULL;
+static objPool *TokenPool = NULL;
+/* follows PEP-8, and always reports single-underscores as protected
+ * See:
+ * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables
+ * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance
+ */
+static accessType accessFromIdentifier (const vString *const ident,
+                                        pythonKind kind, int parentKind)
+	const char *const p = vStringValue (ident);
+	const size_t len = vStringLength (ident);
+	/* inside a function/method, private */
+	if (parentKind != -1 && parentKind != K_CLASS)
+	/* not starting with "_", public */
+	else if (len < 1 || p[0] != '_')
+		return ACCESS_PUBLIC;
+	/* "__...__": magic methods */
+	else if (kind == K_FUNCTION && parentKind == K_CLASS &&
+	         len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_')
+		return ACCESS_PUBLIC;
+	/* "__...": name mangling */
+	else if (parentKind == K_CLASS && len > 1 && p[1] == '_')
+	/* "_...": suggested as non-public, but easily accessible */
+	else
+static void initPythonEntry (tagEntryInfo *const e, const tokenInfo *const token,
+                             const pythonKind kind)
+	accessType access;
+	int parentKind = -1;
+	NestingLevel *nl;
+	initTagEntry (e, vStringValue (token->string), kind);
+	e->lineNumber	= token->lineNumber;
+	e->filePosition	= token->filePosition;
+	nl = nestingLevelsGetCurrent (PythonNestingLevels);
+	if (nl)
+	{
+		tagEntryInfo *nlEntry = getEntryOfNestingLevel (nl);
+		e->extensionFields.scopeIndex = nl->corkIndex;
+		/* nlEntry can be NULL if a kind was disabled.  But what can we do
+		 * here?  Even disabled kinds should count for the hierarchy I
+		 * guess -- as it'd otherwise be wrong -- but with cork we're
+		 * fucked up as there's nothing to look up.  Damn. */
+		if (nlEntry)
+		{
+			parentKind = nlEntry->kindIndex;
+			/* functions directly inside classes are methods, fix it up */
+			if (kind == K_FUNCTION && parentKind == K_CLASS)
+				e->kindIndex = K_METHOD;
+		}
+	}
+	access = accessFromIdentifier (token->string, kind, parentKind);
+	e->extensionFields.access = PythonAccesses[access];
+	/* FIXME: should we really set isFileScope in addition to access? */
+	if (access == ACCESS_PRIVATE)
+		e->isFileScope = true;
+static int makeClassTag (const tokenInfo *const token,
+                         const vString *const inheritance,
+                         const vString *const decorators)
+	if (PythonKinds[K_CLASS].enabled)
+	{
+		tagEntryInfo e;
+		initPythonEntry (&e, token, K_CLASS);
+		e.extensionFields.inheritance = inheritance ? vStringValue (inheritance) : "";
+		if (decorators && vStringLength (decorators) > 0)
+		{
+			attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
+			                   vStringValue (decorators));
+		}
+		return makeTagEntry (&e);
+	}
+	return CORK_NIL;
+static int makeFunctionTag (const tokenInfo *const token,
+                            const vString *const arglist,
+                            const vString *const decorators)
+	if (PythonKinds[K_FUNCTION].enabled)
+	{
+		tagEntryInfo e;
+		initPythonEntry (&e, token, K_FUNCTION);
+		if (arglist)
+			e.extensionFields.signature = vStringValue (arglist);
+		if (decorators && vStringLength (decorators) > 0)
+		{
+			attachParserField (&e, false, PythonFields[F_DECORATORS].ftype,
+			                   vStringValue (decorators));
+		}
+		return makeTagEntry (&e);
+	}
+	return CORK_NIL;
+static int makeSimplePythonTag (const tokenInfo *const token, pythonKind const kind)
+	if (PythonKinds[kind].enabled)
+	{
+		tagEntryInfo e;
+		initPythonEntry (&e, token, kind);
+		return makeTagEntry (&e);
+	}
+	return CORK_NIL;
+static int makeSimplePythonRefTag (const tokenInfo *const token,
+                                   const vString *const altName,
+                                   pythonKind const kind,
+                                   int roleIndex, xtagType xtag)
+	if (isXtagEnabled (XTAG_REFERENCE_TAGS) &&
+	    PythonKinds[kind].roles[roleIndex].enabled)
+	{
+		tagEntryInfo e;
+		initRefTagEntry (&e, vStringValue (altName ? altName : token->string),
+		                 kind, roleIndex);
+		e.lineNumber	= token->lineNumber;
+		e.filePosition	= token->filePosition;
+		if (xtag != XTAG_UNKNOWN)
+			markTagExtraBit (&e, xtag);
+		return makeTagEntry (&e);
+	}
+	return CORK_NIL;
+static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
+	tokenInfo *token = xMalloc (1, tokenInfo);
+	token->string = vStringNew ();
+	return token;
+static void deletePoolToken (void *data)
+	tokenInfo *token = data;
+	vStringDelete (token->string);
+	eFree (token);
+static void clearPoolToken (void *data)
+	tokenInfo *token = data;
+	token->type			= TOKEN_UNDEFINED;
+	token->keyword		= KEYWORD_NONE;
+	token->indent		= 0;
+	token->lineNumber   = getInputLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	vStringClear (token->string);
+static void copyToken (tokenInfo *const dest, const tokenInfo *const src)
+	dest->lineNumber = src->lineNumber;
+	dest->filePosition = src->filePosition;
+	dest->type = src->type;
+	dest->keyword = src->keyword;
+	dest->indent = src->indent;
+	vStringCopy(dest->string, src->string);
+/* Skip a single or double quoted string. */
+static void readString (vString *const string, const int delimiter)
+	int escaped = 0;
+	int c;
+	while ((c = getcFromInputFile ()) != EOF)
+	{
+		if (escaped)
+		{
+			vStringPut (string, c);
+			escaped--;
+		}
+		else if (c == '\\')
+			escaped++;
+		else if (c == delimiter || c == '\n' || c == '\r')
+		{
+			if (c != delimiter)
+				ungetcToInputFile (c);
+			break;
+		}
+		else
+			vStringPut (string, c);
+	}
+/* Skip a single or double triple quoted string. */
+static void readTripleString (vString *const string, const int delimiter)
+	int c;
+	int escaped = 0;
+	int n = 0;
+	while ((c = getcFromInputFile ()) != EOF)
+	{
+		if (c == delimiter && ! escaped)
+		{
+			if (++n >= 3)
+				break;
+		}
+		else
+		{
+			for (; n > 0; n--)
+				vStringPut (string, delimiter);
+			if (c != '\\' || escaped)
+				vStringPut (string, c);
+			n = 0;
+		}
+		if (escaped)
+			escaped--;
+		else if (c == '\\')
+			escaped++;
+	}
+static void readIdentifier (vString *const string, const int firstChar)
+	int c = firstChar;
+	do
+	{
+		vStringPut (string, (char) c);
+		c = getcFromInputFile ();
+	}
+	while (isIdentifierChar (c));
+	ungetcToInputFile (c);
+static void ungetToken (tokenInfo *const token)
+	Assert (NextToken == NULL);
+	NextToken = newToken ();
+	copyToken (NextToken, token);
+static void readTokenFull (tokenInfo *const token, bool inclWhitespaces)
+	int c;
+	int n;
+	/* if we've got a token held back, emit it */
+	if (NextToken)
+	{
+		copyToken (token, NextToken);
+		deleteToken (NextToken);
+		NextToken = NULL;
+		return;
+	}
+	token->type		= TOKEN_UNDEFINED;
+	token->keyword	= KEYWORD_NONE;
+	vStringClear (token->string);
+	n = 0;
+	do
+	{
+		c = getcFromInputFile ();
+		n++;
+	}
+	while (c == ' ' || c == '\t' || c == '\f');
+	token->lineNumber   = getInputLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	if (inclWhitespaces && n > 1 && c != '\r' && c != '\n')
+	{
+		ungetcToInputFile (c);
+		vStringPut (token->string, ' ');
+		token->type = TOKEN_WHITESPACE;
+		return;
+	}
+	switch (c)
+	{
+		case EOF:
+			token->type = TOKEN_EOF;
+			break;
+		case '\'':
+		case '"':
+		{
+			int d = getcFromInputFile ();
+			token->type = TOKEN_STRING;
+			vStringPut (token->string, c);
+			if (d != c)
+			{
+				ungetcToInputFile (d);
+				readString (token->string, c);
+			}
+			else if ((d = getcFromInputFile ()) == c)
+				readTripleString (token->string, c);
+			else /* empty string */
+				ungetcToInputFile (d);
+			vStringPut (token->string, c);
+			token->lineNumber = getInputLineNumber ();
+			token->filePosition = getInputFilePosition ();
+			break;
+		}
+		case '=':
+		{
+			int d = getcFromInputFile ();
+			vStringPut (token->string, c);
+			if (d == c)
+			{
+				vStringPut (token->string, d);
+				token->type = TOKEN_OPERATOR;
+			}
+			else
+			{
+				ungetcToInputFile (d);
+				token->type = c;
+			}
+			break;
+		}
+		case '-':
+		{
+			int d = getcFromInputFile ();
+			if (d == '>')
+			{
+				vStringPut (token->string, c);
+				vStringPut (token->string, d);
+				token->type = TOKEN_ARROW;
+				break;
+			}
+			ungetcToInputFile (d);
+			/* fall through */
+		}
+		case '+':
+		case '*':
+		case '%':
+		case '<':
+		case '>':
+		case '/':
+		{
+			int d = getcFromInputFile ();
+			vStringPut (token->string, c);
+			if (d != '=')
+			{
+				ungetcToInputFile (d);
+				token->type = c;
+			}
+			else
+			{
+				vStringPut (token->string, d);
+				token->type = TOKEN_OPERATOR;
+			}
+			break;
+		}
+		/* eats newline to implement line continuation  */
+		case '\\':
+		{
+			int d = getcFromInputFile ();
+			if (d == '\r')
+				d = getcFromInputFile ();
+			if (d != '\n')
+				ungetcToInputFile (d);
+			goto getNextChar;
+		}
+		case '#': /* comment */
+		case '\r': /* newlines for indent */
+		case '\n':
+		{
+			int indent = 0;
+			do
+			{
+				if (c == '#')
+				{
+					do
+						c = getcFromInputFile ();
+					while (c != EOF && c != '\r' && c != '\n');
+				}
+				if (c == '\r')
+				{
+					int d = getcFromInputFile ();
+					if (d != '\n')
+						ungetcToInputFile (d);
+				}
+				indent = 0;
+				while ((c = getcFromInputFile ()) == ' ' || c == '\t' || c == '\f')
+				{
+					if (c == '\t')
+						indent += 8 - (indent % 8);
+					else if (c == '\f') /* yeah, it's weird */
+						indent = 0;
+					else
+						indent++;
+				}
+			} /* skip completely empty lines, so retry */
+			while (c == '\r' || c == '\n' || c == '#');
+			ungetcToInputFile (c);
+			if (TokenContinuationDepth > 0)
+			{
+				if (inclWhitespaces)
+				{
+					vStringPut (token->string, ' ');
+					token->type = TOKEN_WHITESPACE;
+				}
+				else
+					goto getNextChar;
+			}
+			else
+			{
+				token->type = TOKEN_INDENT;
+				token->indent = indent;
+			}
+			break;
+		}
+		default:
+			if (! isIdentifierChar (c))
+			{
+				vStringPut (token->string, c);
+				token->type = c;
+			}
+			else
+			{
+				/* FIXME: handle U, B, R and F string prefixes? */
+				readIdentifier (token->string, c);
+				token->keyword = lookupKeyword (vStringValue (token->string), Lang_python);
+				if (token->keyword == KEYWORD_NONE)
+					token->type = TOKEN_IDENTIFIER;
+				else
+					token->type = TOKEN_KEYWORD;
+			}
+			break;
+	}
+	/* handle implicit continuation lines not to emit INDENT inside brackets
+	 * https://docs.python.org/3.6/reference/lexical_analysis.html#implicit-line-joining */
+	if (token->type == '(' ||
+	    token->type == '{' ||
+	    token->type == '[')
+	{
+		TokenContinuationDepth ++;
+	}
+	else if (TokenContinuationDepth > 0 &&
+	         (token->type == ')' ||
+	          token->type == '}' ||
+	          token->type == ']'))
+	{
+		TokenContinuationDepth --;
+	}
+static void readToken (tokenInfo *const token)
+	readTokenFull (token, false);
+/*================================= parsing =================================*/
+static void reprCat (vString *const repr, const tokenInfo *const token)
+	if (token->type != TOKEN_INDENT &&
+	    token->type != TOKEN_WHITESPACE)
+	{
+		vStringCat (repr, token->string);
+	}
+	else if (vStringLength (repr) > 0 && vStringLast (repr) != ' ')
+	{
+		vStringPut (repr, ' ');
+	}
+static bool skipOverPair (tokenInfo *const token, int tOpen, int tClose,
+                             vString *const repr, bool reprOuterPair)
+	if (token->type == tOpen)
+	{
+		int depth = 1;
+		if (repr && reprOuterPair)
+			reprCat (repr, token);
+		do
+		{
+			readTokenFull (token, true);
+			if (repr && (reprOuterPair || token->type != tClose || depth > 1))
+			{
+				reprCat (repr, token);
+			}
+			if (token->type == tOpen)
+				depth ++;
+			else if (token->type == tClose)
+				depth --;
+		}
+		while (token->type != TOKEN_EOF && depth > 0);
+	}
+	return token->type == tClose;
+static bool skipLambdaArglist (tokenInfo *const token, vString *const repr)
+	while (token->type != TOKEN_EOF && token->type != ':' &&
+	       /* avoid reading too much, just in case */
+	       token->type != TOKEN_INDENT)
+	{
+		bool readNext = true;
+		if (token->type == '(')
+			readNext = skipOverPair (token, '(', ')', repr, true);
+		else if (token->type == '[')
+			readNext = skipOverPair (token, '[', ']', repr, true);
+		else if (token->type == '{')
+			readNext = skipOverPair (token, '{', '}', repr, true);
+		else if (token->keyword == KEYWORD_lambda)
+		{ /* handle lambdas in a default value */
+			if (repr)
+				reprCat (repr, token);
+			readTokenFull (token, true);
+			readNext = skipLambdaArglist (token, repr);
+			if (token->type == ':')
+				readNext = true;
+			if (readNext && repr)
+				reprCat (repr, token);
+		}
+		else if (repr)
+		{
+			reprCat (repr, token);
+		}
+		if (readNext)
+			readTokenFull (token, true);
+	}
+	return false;
+static void readQualifiedName (tokenInfo *const nameToken)
+	readToken (nameToken);
+	if (nameToken->type == TOKEN_IDENTIFIER ||
+	    nameToken->type == '.')
+	{
+		vString *qualifiedName = vStringNew ();
+		tokenInfo *token = newToken ();
+		while (nameToken->type == TOKEN_IDENTIFIER ||
+		       nameToken->type == '.')
+		{
+			vStringCat (qualifiedName, nameToken->string);
+			copyToken (token, nameToken);
+			readToken (nameToken);
+		}
+		/* put the last, non-matching, token back */
+		ungetToken (nameToken);
+		copyToken (nameToken, token);
+		nameToken->type = TOKEN_IDENTIFIER;
+		vStringCopy (nameToken->string, qualifiedName);
+		deleteToken (token);
+		vStringDelete (qualifiedName);
+	}
+static bool readCDefName (tokenInfo *const token, pythonKind *kind)
+	readToken (token);
+	if (token->keyword == KEYWORD_extern ||
+	    token->keyword == KEYWORD_import)
+	{
+		readToken (token);
+		if (token->keyword == KEYWORD_from)
+			return false;
+	}
+	if (token->keyword == KEYWORD_class)
+	{
+		*kind = K_CLASS;
+		readToken (token);
+	}
+	else
+	{
+		/* skip the optional type declaration -- everything on the same line
+		 * until an identifier followed by "(". */
+		tokenInfo *candidate = newToken ();
+		while (token->type != TOKEN_EOF &&
+		       token->type != TOKEN_INDENT &&
+		       token->type != '=' &&
+		       token->type != ',' &&
+		       token->type != ':')
+		{
+			if (token->type == '[')
+			{
+				if (skipOverPair (token, '[', ']', NULL, false))
+					readToken (token);
+			}
+			else if (token->type == '(')
+			{
+				if (skipOverPair (token, '(', ')', NULL, false))
+					readToken (token);
+			}
+			else if (token->type == TOKEN_IDENTIFIER)
+			{
+				copyToken (candidate, token);
+				readToken (token);
+				if (token->type == '(')
+				{ /* okay, we really found a function, use this */
+					*kind = K_FUNCTION;
+					ungetToken (token);
+					copyToken (token, candidate);
+					break;
+				}
+			}
+			else
+				readToken (token);
+		}
+		deleteToken (candidate);
+	}
+	return token->type == TOKEN_IDENTIFIER;
+static vString *parseParamTypeAnnotation (tokenInfo *const token,
+										  vString *arglist)
+	readToken (token);
+	if (token->type != ':')
+	{
+		ungetToken (token);
+		return NULL;
+	}
+	reprCat (arglist, token);
+	int depth = 0;
+	vString *t = vStringNew ();
+	while (true)
+	{
+		readTokenFull (token, true);
+		if (token->type == TOKEN_WHITESPACE)
+		{
+			reprCat (arglist, token);
+			continue;
+		}
+		else if (token->type == TOKEN_EOF)
+			break;
+		if (token->type == '(' ||
+			token->type == '[' ||
+			token->type == '{')
+			depth ++;
+		else if (token->type == ')' ||
+				 token->type == ']' ||
+				 token->type == '}')
+			depth --;
+		if (depth < 0
+			|| (depth == 0 && (token->type == '='
+							   || token->type == ',')))
+		{
+			ungetToken (token);
+			return t;
+		}
+		reprCat (arglist, token);
+		reprCat (t, token);
+	}
+	vStringDelete (t);
+	return NULL;
+static vString *parseReturnTypeAnnotation (tokenInfo *const token)
+	readToken (token);
+	if (token->type != TOKEN_ARROW)
+	{
+		ungetToken (token);
+		return NULL;
+	}
+	int depth = 0;
+	vString *t = vStringNew ();
+	while (true)
+	{
+		readToken (token);
+		if (token->type == TOKEN_EOF)
+			break;
+		if (token->type == '(' ||
+			token->type == '[' ||
+			token->type == '{')
+			depth ++;
+		else if (token->type == ')' ||
+				 token->type == ']' ||
+				 token->type == '}')
+			depth --;
+		if (depth == 0 && token->type == ':')
+		{
+			ungetToken (token);
+			return t;
+		}
+		else
+			reprCat (t, token);
+	}
+	vStringDelete (t);
+	return NULL;
+static bool parseClassOrDef (tokenInfo *const token,
+                                const vString *const decorators,
+                                pythonKind kind, bool isCDef)
+	vString *arglist = NULL;
+	tokenInfo *name = NULL;
+	tokenInfo *parameterTokens[16] = { NULL };
+	vString   *parameterTypes [ARRAY_SIZE(parameterTokens)] = { NULL };
+	unsigned int parameterCount = 0;
+	NestingLevel *lv;
+	int corkIndex;
+	if (isCDef)
+	{
+		if (! readCDefName (token, &kind))
+			return false;
+	}
+	else
+	{
+		readToken (token);
+		if (token->type != TOKEN_IDENTIFIER)
+			return false;
+	}
+	name = newToken ();
+	copyToken (name, token);
+	readToken (token);
+	/* collect parameters or inheritance */
+	if (token->type == '(')
+	{
+		int prevTokenType = token->type;
+		int depth = 1;
+		arglist = vStringNew ();
+		if (kind != K_CLASS)
+			reprCat (arglist, token);
+		do
+		{
+			if (token->type != TOKEN_WHITESPACE &&
+			    /* for easy `*args` and `**kwargs` support, we also ignore
+			     * `*`, which anyway can't otherwise screw us up */
+			    token->type != '*')
+			{
+				prevTokenType = token->type;
+			}
+			readTokenFull (token, true);
+			if (kind != K_CLASS || token->type != ')' || depth > 1)
+				reprCat (arglist, token);
+			if (token->type == '(' ||
+			    token->type == '[' ||
+			    token->type == '{')
+				depth ++;
+			else if (token->type == ')' ||
+			         token->type == ']' ||
+			         token->type == '}')
+				depth --;
+			else if (kind != K_CLASS && depth == 1 &&
+			         token->type == TOKEN_IDENTIFIER &&
+			         (prevTokenType == '(' || prevTokenType == ',') &&
+			         parameterCount < ARRAY_SIZE (parameterTokens) &&
+			         PythonKinds[K_PARAMETER].enabled)
+			{
+				tokenInfo *parameterName = newToken ();
+				copyToken (parameterName, token);
+				parameterTokens[parameterCount] = parameterName;
+				parameterTypes [parameterCount++] = parseParamTypeAnnotation (token, arglist);
+			}
+		}
+		while (token->type != TOKEN_EOF && depth > 0);
+	}
+	if (kind == K_CLASS)
+		corkIndex = makeClassTag (name, arglist, decorators);
+	else
+		corkIndex = makeFunctionTag (name, arglist, decorators);
+	lv = nestingLevelsPush (PythonNestingLevels, corkIndex);
+	PY_NL (lv)->indentation = token->indent;
+	deleteToken (name);
+	vStringDelete (arglist);
+	if (parameterCount > 0)
+	{
+		unsigned int i;
+		for (i = 0; i < parameterCount; i++)
+		{
+			int paramCorkIndex = makeSimplePythonTag (parameterTokens[i], K_PARAMETER);
+			deleteToken (parameterTokens[i]);
+			tagEntryInfo *e = getEntryInCorkQueue (paramCorkIndex);
+			if (e && parameterTypes[i])
+			{
+				e->extensionFields.typeRef [0] = eStrdup ("typename");
+				e->extensionFields.typeRef [1] = vStringDeleteUnwrap (parameterTypes[i]);
+				parameterTypes[i] = NULL;
+			}
+			vStringDelete (parameterTypes[i]); /* NULL is acceptable. */
+		}
+	}
+	tagEntryInfo *e;
+	vString *t;
+	if (kind != K_CLASS
+		&& (e = getEntryInCorkQueue (corkIndex))
+		&& (t = parseReturnTypeAnnotation (token)))
+	{
+		e->extensionFields.typeRef [0] = eStrdup ("typename");
+		e->extensionFields.typeRef [1] = vStringDeleteUnwrap (t);
+	}
+	return true;
+static bool parseImport (tokenInfo *const token)
+	tokenInfo *fromModule = NULL;
+	if (token->keyword == KEYWORD_from)
+	{
+		readQualifiedName (token);
+		if (token->type == TOKEN_IDENTIFIER)
+		{
+			fromModule = newToken ();
+			copyToken (fromModule, token);
+			readToken (token);
+		}
+	}
+	if (token->keyword == KEYWORD_import)
+	{
+		bool parenthesized = false;
+		int moduleIndex;
+		if (fromModule)
+		{
+			/* from X import ...
+			 * --------------------
+			 * X = (kind:module, role:namespace) */
+			moduleIndex = makeSimplePythonRefTag (fromModule, NULL, K_MODULE,
+												  XTAG_UNKNOWN);
+		}
+		do
+		{
+			readQualifiedName (token);
+			/* support for `from x import (...)` */
+			if (fromModule && ! parenthesized && token->type == '(')
+			{
+				parenthesized = true;
+				readQualifiedName (token);
+			}
+			if (token->type == TOKEN_IDENTIFIER)
+			{
+				tokenInfo *name = newToken ();
+				copyToken (name, token);
+				readToken (token);
+				/* if there is an "as", use it as the name */
+				if (token->keyword == KEYWORD_as)
+				{
+					readToken (token);
+					if (token->type == TOKEN_IDENTIFIER)
+					{
+						if (fromModule)
+						{
+							/* from x import Y as Z
+							 * ----------------------------
+							 * x = (kind:module,  role:namespace),
+							 * Y = (kind:unknown, role:indirectlyImported, scope:module:X),
+							 * Z = (kind:unknown, nameref:unknown:Y) */
+							int index;
+							/* Y */
+							index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
+															XTAG_UNKNOWN);
+							/* fill the scope field for Y */
+							tagEntryInfo *e = getEntryInCorkQueue (index);
+							if (e)
+								e->extensionFields.scopeIndex = moduleIndex;
+							/* Z */
+							index = makeSimplePythonTag (token, K_UNKNOWN);
+							/* fill the nameref filed for Y */
+							if (PythonFields[F_NAMEREF].enabled)
+							{
+								vString *nameref = vStringNewInit (PythonKinds [K_UNKNOWN].name);
+								vStringPut (nameref, ':');
+								vStringCat (nameref, name->string);
+								attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
+															  vStringValue (nameref));
+								vStringDelete (nameref);
+							}
+						}
+						else
+						{
+							/* import x as Y
+							 * ----------------------------
+							 * x = (kind:module, role:indirectlyImported)
+							 * Y = (kind:namespace, nameref:module:x)*/
+							/* x */
+							makeSimplePythonRefTag (name, NULL, K_MODULE,
+							                        PYTHON_MODULE_INDIRECTLY_IMPORTED,
+							                        XTAG_UNKNOWN);
+							/* Y */
+							int index = makeSimplePythonTag (token, K_NAMESPACE);
+							/* fill the nameref filed for Y */
+							if (PythonFields[F_NAMEREF].enabled)
+							{
+								vString *nameref = vStringNewInit (PythonKinds [K_MODULE].name);
+								vStringPut (nameref, ':');
+								vStringCat (nameref, name->string);
+								attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype,
+															  vStringValue (nameref));
+								vStringDelete (nameref);
+							}
+						}
+						copyToken (name, token);
+						readToken (token);
+					}
+				}
+				else
+				{
+					if (fromModule)
+					{
+						/* from x import Y
+						   --------------
+						   x = (kind:module,  role:namespace),
+						   Y = (kind:unknown, role:imported, scope:module:x) */
+						/* Y */
+						int index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN,
+															XTAG_UNKNOWN);
+						/* fill the scope field for Y */
+						tagEntryInfo *e = getEntryInCorkQueue (index);
+						if (e)
+							e->extensionFields.scopeIndex = moduleIndex;
+					}
+					else
+					{
+						/* import X
+						   --------------
+						   X = (kind:module, role:imported) */
+						makeSimplePythonRefTag (name, NULL, K_MODULE,
+						                        PYTHON_MODULE_IMPORTED,
+						                        XTAG_UNKNOWN);
+					}
+				}
+				deleteToken (name);
+			}
+		}
+		while (token->type == ',');
+		if (parenthesized && token->type == ')')
+			readToken (token);
+	}
+	if (fromModule)
+		deleteToken (fromModule);
+	return false;
+/* this only handles the most common cases, but an annotation can be any
+ * expression in theory.
+ * this function assumes there must be an annotation, and doesn't do any check
+ * on the token on which it is called: the caller should do that part. */
+static bool skipVariableTypeAnnotation (tokenInfo *const token, vString *const repr)
+	bool readNext = true;
+	readToken (token);
+	switch (token->type)
+	{
+		case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
+		case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
+		case '{': readNext = skipOverPair (token, '{', '}', repr, true); break;
+		default: reprCat (repr, token);
+	}
+	if (readNext)
+		readToken (token);
+	/* skip subscripts and calls */
+	while (token->type == '[' || token->type == '(' || token->type == '.' || token->type == '|')
+	{
+		switch (token->type)
+		{
+			case '[': readNext = skipOverPair (token, '[', ']', repr, true); break;
+			case '(': readNext = skipOverPair (token, '(', ')', repr, true); break;
+			case '|':
+				reprCat (repr, token);
+				skipVariableTypeAnnotation (token, repr);
+				readNext = false;
+				break;
+			case '.':
+				reprCat (repr, token);
+				readToken (token);
+				readNext = token->type == TOKEN_IDENTIFIER;
+				if (readNext)
+					reprCat (repr, token);
+				break;
+			default:  readNext = false; break;
+		}
+		if (readNext)
+			readToken (token);
+	}
+	return false;
+static bool parseVariable (tokenInfo *const token, const pythonKind kind)
+	/* In order to support proper tag type for lambdas in multiple
+	 * assignations, we first collect all the names, and then try and map
+	 * an assignation to it */
+	tokenInfo *nameTokens[8] = { NULL };
+	vString   *nameTypes [ARRAY_SIZE (nameTokens)] = { NULL };
+	unsigned int nameCount = 0;
+	vString *type = vStringNew();
+	/* first, collect variable name tokens */
+	while (token->type == TOKEN_IDENTIFIER &&
+	       nameCount < ARRAY_SIZE (nameTokens))
+	{
+		unsigned int i;
+		tokenInfo *name = newToken ();
+		copyToken (name, token);
+		readToken (token);
+		if (token->type == '.')
+		{
+			/* FIXME: what to do with dotted names?  We currently ignore them
+			 *        as we need to do something not to break the whole
+			 *        declaration, but the expected behavior is questionable */
+			deleteToken (name);
+			name = NULL;
+			do
+			{
+				readToken (token);
+			}
+			while (token->type == TOKEN_IDENTIFIER ||
+			       token->type == '.');
+		}
+		i = nameCount++;
+		nameTokens[i] = name;
+		/* (parse and) skip annotations.  we need not to be too permissive because we
+		 * aren't yet sure we're actually parsing a variable. */
+		if (token->type == ':' && skipVariableTypeAnnotation (token, type))
+			readToken (token);
+		if (vStringLength (type) > 0)
+		{
+			nameTypes[i] = type;
+			type = vStringNew ();
+		}
+		if (token->type == ',')
+			readToken (token);
+		else
+			break;
+	}
+	vStringDelete (type);
+	/* then, if it's a proper assignation, try and map assignations so that
+	 * we catch lambdas and alike */
+	if (token->type == '=')
+	{
+		unsigned int i = 0;
+		do
+		{
+			const tokenInfo *const nameToken = nameTokens[i];
+			vString **type = &(nameTypes[i++]);
+			readToken (token);
+			if (! nameToken)
+				/* nothing */;
+			else if (token->keyword != KEYWORD_lambda)
+			{
+				int index = makeSimplePythonTag (nameToken, kind);
+				tagEntryInfo *e = getEntryInCorkQueue (index);
+				if (e && *type)
+				{
+					e->extensionFields.typeRef [0] = eStrdup ("typename");
+					e->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
+					*type = NULL;
+				}
+			}
+			else
+			{
+				tokenInfo *anon  = NULL;
+				vString *arglist = vStringNew ();
+				if (*type)
+				{
+					anon = newToken ();
+					copyToken (anon, token);
+				}
+				readToken (token);
+				vStringPut (arglist, '(');
+				skipLambdaArglist (token, arglist);
+				vStringPut (arglist, ')');
+				if (*type)
+				{
+					/* How to handle lambda assigned to a variable
+					 * --------------------------------------------
+					 *
+					 * input.py:
+					 *
+					 * 	  id = lambda var: var
+					 * 	  id_t: Callable[[int], int] = lambda var: var
+					 *
+					 * `id' is tagged as a function kind object like:
+					 *
+					 *    id	input.py	/^id = lambda var: var$/;"	function
+					 *
+					 * For `id_t' we cannot do the same as `id'.
+					 *
+					 * We should not store `Callable[[int], int]' to typeref
+					 * field of the tag of `id_t' if the tag has "function" as
+					 * its kind because users expect the typeref field of a
+					 * function kind represents a type for the value returned
+					 * from the function (return type).
+					 *
+					 * the unexpected tag:
+					 *
+					 *    id_t	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	function \
+					 *                          typeref:typename:Callable[[int], int]
+					 *
+					 * If we make a tag for `id_t' as a function, we should
+					 * attach `typeref:typename:int' and `signature:(int)'. To
+					 * achieve this, we have to make ctags analyze
+					 * `Callable[[int], int]'.  However, we want to avoid the
+					 * level of analyzing.
+					 *
+					 * For recording `Callable[[int], int]', a valuable
+					 * information in the input, we use indirection.
+					 *
+					 *    id_t	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	variable \
+					 *                          typeref:typename:Callable[[int], int]	nameref:function:anonFuncNNN
+					 *    anonFuncNNN	input.py	/^id_t: Callable[[int], int] = lambda var: var$/;"	function \
+					 *                          extras:anonymous
+					 */
+					int vindex = makeSimplePythonTag (nameToken, kind);
+					vStringClear (anon->string);
+					anonGenerate (anon->string, "anonFunc", K_FUNCTION);
+					int findex = makeFunctionTag (anon, arglist, NULL);
+					tagEntryInfo *fe = getEntryInCorkQueue (findex);
+					if (fe)
+						markTagExtraBit (fe, XTAG_ANONYMOUS);
+					tagEntryInfo *ve = getEntryInCorkQueue (vindex);
+					if (ve)
+					{
+						ve->extensionFields.typeRef [0] = eStrdup ("typename");
+						ve->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type);
+						*type = NULL;
+						vString *nameref = vStringNewInit (PythonKinds [K_FUNCTION].name);
+						vStringPut (nameref, ':');
+						vStringCat (nameref, anon->string);
+						attachParserField (ve, true, PythonFields[F_NAMEREF].ftype,
+										   vStringValue (nameref));
+						vStringDelete (nameref);
+					}
+					if (anon)
+						deleteToken (anon);
+				}
+				else
+					makeFunctionTag (nameToken, arglist, NULL);
+				vStringDelete (arglist);
+			}
+			/* skip until next initializer */
+			while ((TokenContinuationDepth > 0 || token->type != ',') &&
+			       token->type != TOKEN_EOF &&
+			       token->type != ';' &&
+			       token->type != TOKEN_INDENT)
+			{
+				readToken (token);
+			}
+		}
+		while (token->type == ',' && i < nameCount);
+		/* if we got leftover to initialize, just make variables out of them.
+		 * This handles cases like `a, b, c = (c, d, e)` -- or worse */
+		for (; i < nameCount; i++)
+		{
+			if (nameTokens[i])
+				makeSimplePythonTag (nameTokens[i], kind);
+		}
+	}
+	while (nameCount > 0)
+	{
+		if (nameTokens[--nameCount])
+			deleteToken (nameTokens[nameCount]);
+		vStringDelete (nameTypes[nameCount]); /* NULL is acceptable. */
+	}
+	return false;
+/* pops any level >= to indent */
+static void setIndent (tokenInfo *const token)
+	NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
+	while (lv && PY_NL (lv)->indentation >= token->indent)
+	{
+		tagEntryInfo *e = getEntryInCorkQueue (lv->corkIndex);
+		if (e)
+			e->extensionFields.endLine = token->lineNumber;
+		nestingLevelsPop (PythonNestingLevels);
+		lv = nestingLevelsGetCurrent (PythonNestingLevels);
+	}
+static void findPythonTags (void)
+	tokenInfo *const token = newToken ();
+	vString *decorators = vStringNew ();
+	bool atStatementStart = true;
+	TokenContinuationDepth = 0;
+	NextToken = NULL;
+	PythonNestingLevels = nestingLevelsNew (sizeof (struct pythonNestingLevelUserData));
+	readToken (token);
+	while (token->type != TOKEN_EOF)
+	{
+		tokenType iterationTokenType = token->type;
+		bool readNext = true;
+		/* skip async keyword that confuses decorator parsing before a def */
+		if (token->keyword == KEYWORD_async)
+			readToken (token);
+		if (token->type == TOKEN_INDENT)
+			setIndent (token);
+		else if (token->keyword == KEYWORD_class ||
+		         token->keyword == KEYWORD_def)
+		{
+			pythonKind kind = token->keyword == KEYWORD_class ? K_CLASS : K_FUNCTION;
+			readNext = parseClassOrDef (token, decorators, kind, false);
+		}
+		else if (token->keyword == KEYWORD_cdef ||
+		         token->keyword == KEYWORD_cpdef)
+		{
+			readNext = parseClassOrDef (token, decorators, K_FUNCTION, true);
+		}
+		else if (token->keyword == KEYWORD_from ||
+		         token->keyword == KEYWORD_import)
+		{
+			readNext = parseImport (token);
+		}
+		else if (token->type == '(')
+		{ /* skip parentheses to avoid finding stuff inside them */
+			readNext = skipOverPair (token, '(', ')', NULL, false);
+		}
+		else if (token->type == TOKEN_IDENTIFIER && atStatementStart)
+		{
+			NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels);
+			tagEntryInfo *lvEntry = getEntryOfNestingLevel (lv);
+			pythonKind kind = K_VARIABLE;
+			if (lvEntry && lvEntry->kindIndex != K_CLASS)
+				kind = K_LOCAL_VARIABLE;
+			readNext = parseVariable (token, kind);
+		}
+		else if (token->type == '@' && atStatementStart &&
+		         PythonFields[F_DECORATORS].enabled)
+		{
+			/* collect decorators */
+			readQualifiedName (token);
+			if (token->type != TOKEN_IDENTIFIER)
+				readNext = false;
+			else
+			{
+				if (vStringLength (decorators) > 0)
+					vStringPut (decorators, ',');
+				vStringCat (decorators, token->string);
+				readToken (token);
+				readNext = skipOverPair (token, '(', ')', decorators, true);
+			}
+		}
+		/* clear collected decorators for any non-decorator tokens non-indent
+		 * token.  decorator collection takes care of skipping the possible
+		 * argument list, so we should never hit here parsing a decorator */
+		if (iterationTokenType != TOKEN_INDENT &&
+		    iterationTokenType != '@' &&
+		    PythonFields[F_DECORATORS].enabled)
+		{
+			vStringClear (decorators);
+		}
+		atStatementStart = (token->type == TOKEN_INDENT || token->type == ';');
+		if (readNext)
+			readToken (token);
+	}
+	nestingLevelsFree (PythonNestingLevels);
+	vStringDelete (decorators);
+	deleteToken (token);
+	Assert (NextToken == NULL);
+static void initialize (const langType language)
+	Lang_python = language;
+	TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
+static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
+	if (!initialized)
+		return;
+	objPoolDelete (TokenPool);
+extern parserDefinition* PythonParser (void)
+	static const char *const extensions[] = { "py", "pyx", "pxd", "pxi", "scons",
+											  "wsgi", NULL };
+	static const char *const aliases[] = { "python[23]*", "scons", NULL };
+	parserDefinition *def = parserNew ("Python");
+	def->kindTable = PythonKinds;
+	def->kindCount = ARRAY_SIZE (PythonKinds);
+	def->extensions = extensions;
+	def->aliases = aliases;
+	def->parser = findPythonTags;
+	def->initialize = initialize;
+	def->finalize = finalize;
+	def->keywordTable = PythonKeywordTable;
+	def->keywordCount = ARRAY_SIZE (PythonKeywordTable);
+	def->fieldTable = PythonFields;
+	def->fieldCount = ARRAY_SIZE (PythonFields);
+	def->useCork = CORK_QUEUE;
+	def->requestAutomaticFQTag = true;
+	return def;

This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).

More information about the Commits mailing list