Branch: refs/heads/master Author: Jiřà Techet techet@gmail.com Committer: Jiřà Techet techet@gmail.com Date: Mon, 14 Mar 2022 19:22:47 UTC Commit: 37f76c9a7b593f266104777b13dc61be5314fe48 https://github.com/geany/geany/commit/37f76c9a7b593f266104777b13dc61be5314fe...
Log Message: ----------- Use uctags version of python
Modified Paths: -------------- ctags/Makefile.am ctags/parsers/geany_python.c ctags/parsers/python.c
Modified: ctags/Makefile.am 2 lines changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -81,7 +81,7 @@ parsers = \ parsers/perl.h \ parsers/php.c \ parsers/powershell.c \ - parsers/geany_python.c \ + parsers/python.c \ parsers/r.c \ parsers/r.h \ parsers/rst.c \
Modified: ctags/parsers/geany_python.c 862 lines changed, 0 insertions(+), 862 deletions(-) =================================================================== @@ -1,862 +0,0 @@ -/* -* Copyright (c) 2000-2003, Darren Hiebert -* -* This source code is released for free distribution under the terms of the -* GNU General Public License version 2 or (at your option) any later version. -* -* This module contains functions for generating tags for Python language -* files. -*/ -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include <string.h> - -#include "entry.h" -#include "nestlevel.h" -#include "options.h" -#include "read.h" -#include "parse.h" -#include "vstring.h" -#include "routines.h" -#include "debug.h" -#include "xtag.h" - -/* -* DATA DEFINITIONS -*/ - -struct corkInfo { - int index; -}; - -struct nestingLevelUserData { - int indentation; -}; -#define PY_NL_INDENTATION(nl) ((struct nestingLevelUserData *)nestingLevelGetUserData(nl))->indentation - - -typedef enum { - K_CLASS, K_FUNCTION, K_METHOD, K_VARIABLE, K_IMPORT -} pythonKind; - -static kindDefinition PythonKinds[] = { - {true, 'c', "class", "classes"}, - {true, 'f', "function", "functions"}, - {true, 'm', "member", "class members"}, - {true, 'v', "variable", "variables"}, - {true, 'x', "unknown", "name referring a classe/variable/function/module defined in other module"} -}; - -typedef enum { - A_PUBLIC, A_PRIVATE, A_PROTECTED -} pythonAccess; - -static const char *const PythonAccesses[] = { - "public", "private", "protected" -}; - -static char const * const singletriple = "'''"; -static char const * const doubletriple = """""; - -/* -* FUNCTION DEFINITIONS -*/ - -static bool isIdentifierFirstCharacter (int c) -{ - return (bool) (isalpha (c) || c == '_'); -} - -static bool isIdentifierCharacter (int c) -{ - return (bool) (isalnum (c) || c == '_'); -} - -/* follows PEP-8, and always reports single-underscores as protected - * See: - * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables - * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance - */ -static pythonAccess accessFromIdentifier (const vString *const ident, - pythonKind kind, bool has_parent, bool parent_is_class) -{ - const char *const p = vStringValue (ident); - const size_t len = vStringLength (ident); - - /* inside a function/method, private */ - if (has_parent && !parent_is_class) - return A_PRIVATE; - /* not starting with "_", public */ - else if (len < 1 || p[0] != '_') - return A_PUBLIC; - /* "__...__": magic methods */ - else if (kind == K_METHOD && parent_is_class && - len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_') - return A_PUBLIC; - /* "__...": name mangling */ - else if (parent_is_class && len > 1 && p[1] == '_') - return A_PRIVATE; - /* "_...": suggested as non-public, but easily accessible */ - else - return A_PROTECTED; -} - -static void addAccessFields (tagEntryInfo *const entry, - const vString *const ident, pythonKind kind, - bool has_parent, bool parent_is_class) -{ - pythonAccess access; - - access = accessFromIdentifier (ident, kind, has_parent, parent_is_class); - entry->extensionFields.access = PythonAccesses [access]; - /* FIXME: should we really set isFileScope in addition to access? */ - if (access == A_PRIVATE) - entry->isFileScope = true; -} - -/* Given a string with the contents of a line directly after the "def" keyword, - * extract all relevant information and create a tag. - */ -static struct corkInfo makeFunctionTag (vString *const function, - vString *const parent, int is_class_parent, const char *arglist) -{ - tagEntryInfo tag; - int corkIndex; - struct corkInfo info; - - if (vStringLength (parent) > 0) - { - if (is_class_parent) - { - initTagEntry (&tag, vStringValue (function), K_METHOD); - tag.extensionFields.scopeKindIndex = K_CLASS; - } - else - { - initTagEntry (&tag, vStringValue (function), K_FUNCTION); - tag.extensionFields.scopeKindIndex = K_FUNCTION; - } - tag.extensionFields.scopeName = vStringValue (parent); - } - else - initTagEntry (&tag, vStringValue (function), K_FUNCTION); - - tag.extensionFields.signature = arglist; - - addAccessFields (&tag, function, is_class_parent ? K_METHOD : K_FUNCTION, - vStringLength (parent) > 0, is_class_parent); - - corkIndex = makeTagEntry (&tag); - - info.index = corkIndex; - return info; -} - -/* Given a string with the contents of the line directly after the "class" - * keyword, extract all necessary information and create a tag. - */ -static int makeClassTag (vString *const class, vString *const inheritance, - vString *const parent, int is_class_parent) -{ - tagEntryInfo tag; - initTagEntry (&tag, vStringValue (class), K_CLASS); - if (vStringLength (parent) > 0) - { - if (is_class_parent) - { - tag.extensionFields.scopeKindIndex = K_CLASS; - tag.extensionFields.scopeName = vStringValue (parent); - } - else - { - tag.extensionFields.scopeKindIndex = K_FUNCTION; - tag.extensionFields.scopeName = vStringValue (parent); - } - } - tag.extensionFields.inheritance = vStringValue (inheritance); - addAccessFields (&tag, class, K_CLASS, vStringLength (parent) > 0, - is_class_parent); - return makeTagEntry (&tag); -} - -static void makeVariableTag (vString *const var, vString *const parent, - bool is_class_parent) -{ - tagEntryInfo tag; - initTagEntry (&tag, vStringValue (var), K_VARIABLE); - if (vStringLength (parent) > 0) - { - tag.extensionFields.scopeKindIndex = K_CLASS; - tag.extensionFields.scopeName = vStringValue (parent); - } - addAccessFields (&tag, var, K_VARIABLE, vStringLength (parent) > 0, - is_class_parent); - makeTagEntry (&tag); -} - -/* Skip a single or double quoted string. */ -static const char *skipString (const char *cp) -{ - const char *start = cp; - int escaped = 0; - for (cp++; *cp; cp++) - { - if (escaped) - escaped--; - else if (*cp == '\') - escaped++; - else if (*cp == *start) - return cp + 1; - } - return cp; -} - -/* Skip everything up to an identifier start. */ -static const char *skipEverything (const char *cp) -{ - int match; - for (; *cp; cp++) - { - if (*cp == '#') - return strchr(cp, '\0'); - - match = 0; - if (*cp == '"' || *cp == ''') - match = 1; - - /* these checks find unicode, binary (Python 3) and raw strings */ - if (!match) - { - bool r_first = (*cp == 'r' || *cp == 'R'); - - /* "r" | "R" | "u" | "U" | "b" | "B" */ - if (r_first || *cp == 'u' || *cp == 'U' || *cp == 'b' || *cp == 'B') - { - unsigned int i = 1; - - /* r_first -> "rb" | "rB" | "Rb" | "RB" - !r_first -> "ur" | "UR" | "Ur" | "uR" | "br" | "Br" | "bR" | "BR" */ - if (( r_first && (cp[i] == 'b' || cp[i] == 'B')) || - (!r_first && (cp[i] == 'r' || cp[i] == 'R'))) - i++; - - if (cp[i] == ''' || cp[i] == '"') - { - match = 1; - cp += i; - } - } - } - if (match) - { - cp = skipString(cp); - if (!*cp) break; - } - if (isIdentifierFirstCharacter ((int) *cp)) - return cp; - if (match) - cp--; /* avoid jumping over the character after a skipped string */ - } - return cp; -} - -/* Skip an identifier. */ -static const char *skipIdentifier (const char *cp) -{ - while (isIdentifierCharacter ((int) *cp)) - cp++; - return cp; -} - -static const char *findDefinitionOrClass (const char *cp) -{ - while (*cp) - { - cp = skipEverything (cp); - if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) || - !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5)) - { - return cp; - } - cp = skipIdentifier (cp); - } - return NULL; -} - -static const char *skipSpace (const char *cp) -{ - while (isspace ((int) *cp)) - ++cp; - return cp; -} - -/* Starting at ''cp'', parse an identifier into ''identifier''. */ -static const char *parseIdentifier (const char *cp, vString *const identifier) -{ - vStringClear (identifier); - while (isIdentifierCharacter ((int) *cp)) - { - vStringPut (identifier, (int) *cp); - ++cp; - } - return cp; -} - -static int parseClass (const char *cp, vString *const class, - vString *const parent, int is_class_parent) -{ - int corkIndex; - vString *const inheritance = vStringNew (); - vStringClear (inheritance); - cp = parseIdentifier (cp, class); - cp = skipSpace (cp); - if (*cp == '(') - { - ++cp; - while (*cp != ')') - { - if (*cp == '\0') - { - /* Closing parenthesis can be in follow up line. */ - cp = (const char *) readLineFromInputFile (); - if (!cp) break; - vStringPut (inheritance, ' '); - continue; - } - vStringPut (inheritance, *cp); - ++cp; - } - } - corkIndex = makeClassTag (class, inheritance, parent, is_class_parent); - vStringDelete (inheritance); - return corkIndex; -} - -static void parseImports (const char *cp) -{ - const char *pos; - vString *name, *name_next; - - cp = skipEverything (cp); - - if ((pos = strstr (cp, "import")) == NULL) - return; - - cp = pos + 6; - - /* continue only if there is some space between the keyword and the identifier */ - if (! isspace (*cp)) - return; - - cp++; - cp = skipSpace (cp); - - name = vStringNew (); - name_next = vStringNew (); - - cp = skipEverything (cp); - while (*cp) - { - cp = parseIdentifier (cp, name); - - cp = skipEverything (cp); - /* we parse the next possible import statement as well to be able to ignore 'foo' in - * 'import foo as bar' */ - parseIdentifier (cp, name_next); - - /* take the current tag only if the next one is not "as" */ - if (strcmp (vStringValue (name_next), "as") != 0 && - strcmp (vStringValue (name), "as") != 0) - { - makeSimpleTag (name, K_IMPORT); - } - } - vStringDelete (name); - vStringDelete (name_next); -} - -/* modified from lcpp.c getArglistFromStr(). - * warning: terminates rest of string past arglist! - * note: does not ignore brackets inside strings! */ -static char *parseArglist(const char *buf) -{ - char *start, *end; - int level; - if (NULL == buf) - return NULL; - if (NULL == (start = strchr(buf, '('))) - return NULL; - for (level = 1, end = start + 1; level > 0; ++end) - { - if ('\0' == *end) - break; - else if ('(' == *end) - ++ level; - else if (')' == *end) - -- level; - } - *end = '\0'; - return strdup(start); -} - -static struct corkInfo parseFunction (const char *cp, vString *const def, - vString *const parent, int is_class_parent) -{ - char *arglist; - struct corkInfo info; - - cp = parseIdentifier (cp, def); - arglist = parseArglist (cp); - info = makeFunctionTag (def, parent, is_class_parent, arglist); - if (arglist != NULL) - eFree (arglist); - return info; -} - -/* Get the combined name of a nested symbol. Classes are separated with ".", - * functions with "/". For example this code: - * class MyClass: - * def myFunction: - * def SubFunction: - * class SubClass: - * def Method: - * pass - * Would produce this string: - * MyClass.MyFunction/SubFunction/SubClass.Method - */ -static bool constructParentString(NestingLevels *nls, int indent, - vString *result) -{ - int i; - NestingLevel *prev = NULL; - int is_class = false; - vStringClear (result); - for (i = 0; i < nls->n; i++) - { - NestingLevel *nl = nestingLevelsGetNthFromRoot (nls, i); - tagEntryInfo *e; - - if (indent <= PY_NL_INDENTATION(nl)) - break; - if (prev) - { - vStringCatS(result, "."); /* make Geany symbol list grouping work properly */ -/* - if (prev->kindIndex == K_CLASS) - vStringCatS(result, "."); - else - vStringCatS(result, "/"); -*/ - } - - e = getEntryOfNestingLevel (nl); - if (e) - { - vStringCatS(result, e->name); - is_class = (e->kindIndex == K_CLASS); - } - else - is_class = false; - - prev = nl; - } - return is_class; -} - -/* Check indentation level and truncate nesting levels accordingly */ -static void checkIndent(NestingLevels *nls, int indent) -{ - int i; - NestingLevel *n; - - for (i = 0; i < nls->n; i++) - { - n = nestingLevelsGetNthFromRoot (nls, i); - if (n && indent <= PY_NL_INDENTATION(n)) - { - /* truncate levels */ - nls->n = i; - break; - } - } -} - -static void addNestingLevel(NestingLevels *nls, int indentation, struct corkInfo *info) -{ - int i; - NestingLevel *nl = NULL; - - for (i = 0; i < nls->n; i++) - { - nl = nestingLevelsGetNthFromRoot(nls, i); - if (indentation <= PY_NL_INDENTATION(nl)) break; - } - if (i == nls->n) - nl = nestingLevelsPush(nls, info->index); - else - /* reuse existing slot */ - nl = nestingLevelsTruncate (nls, i + 1, info->index); - - PY_NL_INDENTATION(nl) = indentation; -} - -/* Return a pointer to the start of the next triple string, or NULL. Store - * the kind of triple string in "which" if the return is not NULL. - */ -static char const *find_triple_start(char const *string, char const **which) -{ - char const *cp = string; - - for (; *cp; cp++) - { - if (*cp == '#') - break; - if (*cp == '"' || *cp == ''') - { - if (strncmp(cp, doubletriple, 3) == 0) - { - *which = doubletriple; - return cp; - } - if (strncmp(cp, singletriple, 3) == 0) - { - *which = singletriple; - return cp; - } - cp = skipString(cp); - if (!*cp) break; - cp--; /* avoid jumping over the character after a skipped string */ - } - } - return NULL; -} - -/* Find the end of a triple string as pointed to by "which", and update "which" - * with any other triple strings following in the given string. - */ -static void find_triple_end(char const *string, char const **which) -{ - char const *s = string; - while (1) - { - /* Check if the string ends in the same line. */ - s = strstr (s, *which); - if (!s) break; - s += 3; - *which = NULL; - /* If yes, check if another one starts in the same line. */ - s = find_triple_start(s, which); - if (!s) break; - s += 3; - } -} - -static const char *findVariable(const char *line) -{ - /* Parse global and class variable names (C.x) from assignment statements. - * Object attributes (obj.x) are ignored. - * Assignment to a tuple 'x, y = 2, 3' not supported. - * TODO: ignore duplicate tags from reassignment statements. */ - const char *cp, *sp, *eq, *start; - - cp = strstr(line, "="); - if (!cp) - return NULL; - eq = cp + 1; - while (*eq) - { - if (*eq == '=') - return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */ - if (*eq == '(' || *eq == '#') - break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */ - eq++; - } - - /* go backwards to the start of the line, checking we have valid chars */ - start = cp - 1; - while (start >= line && isspace ((int) *start)) - --start; - while (start >= line && isIdentifierCharacter ((int) *start)) - --start; - if (!isIdentifierFirstCharacter(*(start + 1))) - return NULL; - sp = start; - while (sp >= line && isspace ((int) *sp)) - --sp; - if ((sp + 1) != line) /* the line isn't a simple variable assignment */ - return NULL; - /* the line is valid, parse the variable name */ - ++start; - return start; -} - -/* Skip type declaration that optionally follows a cdef/cpdef */ -static const char *skipTypeDecl (const char *cp, bool *is_class) -{ - const char *lastStart = cp, *ptr = cp; - int loopCount = 0; - ptr = skipSpace(cp); - if (!strncmp("extern", ptr, 6)) { - ptr += 6; - ptr = skipSpace(ptr); - if (!strncmp("from", ptr, 4)) { return NULL; } - } - if (!strncmp("class", ptr, 5)) { - ptr += 5 ; - *is_class = true; - ptr = skipSpace(ptr); - return ptr; - } - /* limit so that we don't pick off "int item=obj()" */ - while (*ptr && loopCount++ < 2) { - while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) { - /* skip over e.g. 'cpdef numpy.ndarray[dtype=double, ndim=1]' */ - if(*ptr == '[') { - while (*ptr && *ptr != ']') ptr++; - if (*ptr) ptr++; - } else { - ptr++; - } - } - if (!*ptr || *ptr == '=') return NULL; - if (*ptr == '(') { - return lastStart; /* if we stopped on a '(' we are done */ - } - ptr = skipSpace(ptr); - lastStart = ptr; - while (*lastStart == '*') lastStart++; /* cdef int *identifier */ - } - return NULL; -} - -/* checks if there is a lambda at position of cp, and return its argument list - * if so. - * We don't return the lambda name since it is useless for now since we already - * know it when we call this function, and it would be a little slower. */ -static bool varIsLambda (const char *cp, char **arglist) -{ - bool is_lambda = false; - - cp = skipSpace (cp); - cp = skipIdentifier (cp); /* skip the lambda's name */ - cp = skipSpace (cp); - if (*cp == '=') - { - cp++; - cp = skipSpace (cp); - if (strncmp (cp, "lambda", 6) == 0) - { - const char *tmp; - - cp += 6; /* skip the lambda */ - tmp = skipSpace (cp); - /* check if there is a space after lambda to detect assignations - * starting with 'lambdaXXX' */ - if (tmp != cp) - { - vString *args = vStringNew (); - - cp = tmp; - vStringPut (args, '('); - for (; *cp != 0 && *cp != ':'; cp++) - vStringPut (args, *cp); - vStringPut (args, ')'); - if (arglist) - *arglist = strdup (vStringValue (args)); - vStringDelete (args); - is_lambda = true; - } - } - } - return is_lambda; -} - -/* checks if @p cp has keyword @p keyword at the start, and fills @p cp_n with - * the position of the next non-whitespace after the keyword */ -static bool matchKeyword (const char *keyword, const char *cp, const char **cp_n) -{ - size_t kw_len = strlen (keyword); - if (strncmp (cp, keyword, kw_len) == 0 && isspace (cp[kw_len])) - { - *cp_n = skipSpace (&cp[kw_len + 1]); - return true; - } - return false; -} - -static void findPythonTags (void) -{ - vString *const continuation = vStringNew (); - vString *const name = vStringNew (); - vString *const parent = vStringNew(); - - NestingLevels *const nesting_levels = nestingLevelsNew(sizeof (struct nestingLevelUserData)); - - const char *line; - int line_skip = 0; - char const *longStringLiteral = NULL; - - while ((line = (const char *) readLineFromInputFile ()) != NULL) - { - const char *cp = line, *candidate; - char const *longstring; - char const *keyword, *variable; - int indent; - - cp = skipSpace (cp); - - if (*cp == '\0') /* skip blank line */ - continue; - - /* Skip comment if we are not inside a multi-line string. */ - if (*cp == '#' && !longStringLiteral) - continue; - - /* Deal with line continuation. */ - if (!line_skip) vStringClear(continuation); - vStringCatS(continuation, line); - vStringStripTrailing(continuation); - if (vStringLast(continuation) == '\') - { - vStringChop(continuation); - vStringCatS(continuation, " "); - line_skip = 1; - continue; - } - cp = line = vStringValue(continuation); - cp = skipSpace (cp); - indent = cp - line; - line_skip = 0; - - /* Deal with multiline string ending. */ - if (longStringLiteral) - { - find_triple_end(cp, &longStringLiteral); - continue; - } - - checkIndent(nesting_levels, indent); - - /* Find global and class variables */ - variable = findVariable(line); - if (variable) - { - const char *start = variable; - char *arglist; - bool parent_is_class; - - vStringClear (name); - while (isIdentifierCharacter ((int) *start)) - { - vStringPut (name, (int) *start); - ++start; - } - - parent_is_class = constructParentString(nesting_levels, indent, parent); - if (varIsLambda (variable, &arglist)) - { - /* show class members or top-level script lambdas only */ - if (parent_is_class || vStringLength(parent) == 0) - makeFunctionTag (name, parent, parent_is_class, arglist); - eFree (arglist); - } - else - { - /* skip variables in methods */ - if (parent_is_class || vStringLength(parent) == 0) - makeVariableTag (name, parent, parent_is_class); - } - } - - /* Deal with multiline string start. */ - longstring = find_triple_start(cp, &longStringLiteral); - if (longstring) - { - longstring += 3; - find_triple_end(longstring, &longStringLiteral); - /* We don't parse for any tags in the rest of the line. */ - continue; - } - - /* Deal with def and class keywords. */ - keyword = findDefinitionOrClass (cp); - if (keyword) - { - bool found = false; - bool is_class = false; - if (matchKeyword ("def", keyword, &cp)) - { - found = true; - } - else if (matchKeyword ("class", keyword, &cp)) - { - found = true; - is_class = true; - } - else if (matchKeyword ("cdef", keyword, &cp)) - { - candidate = skipTypeDecl (cp, &is_class); - if (candidate) - { - found = true; - cp = candidate; - } - - } - else if (matchKeyword ("cpdef", keyword, &cp)) - { - candidate = skipTypeDecl (cp, &is_class); - if (candidate) - { - found = true; - cp = candidate; - } - } - - if (found) - { - bool is_parent_class; - struct corkInfo info; - - is_parent_class = - constructParentString(nesting_levels, indent, parent); - - if (is_class) - { - info.index = parseClass (cp, name, parent, is_parent_class); - } - else - info = parseFunction(cp, name, parent, is_parent_class); - - addNestingLevel(nesting_levels, indent, &info); - } - continue; - } - /* Find and parse imports */ - parseImports(line); - } - - /* Force popping all nesting levels. */ - checkIndent(nesting_levels, 0); - - /* Clean up all memory we allocated. */ - vStringDelete (parent); - vStringDelete (name); - vStringDelete (continuation); - nestingLevelsFree (nesting_levels); -} - -extern parserDefinition *PythonParser (void) -{ - static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL }; - parserDefinition *def = parserNew ("Python"); - def->kindTable = PythonKinds; - def->kindCount = ARRAY_SIZE (PythonKinds); - def->extensions = extensions; - def->parser = findPythonTags; - def->useCork = CORK_QUEUE; - return def; -}
Modified: ctags/parsers/python.c 1597 lines changed, 1597 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,1597 @@ +/* +* Copyright (c) 2000-2003, Darren Hiebert +* Copyright (c) 2014-2016, Colomban Wendling ban@herbesfolles.org +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Python language +* files. +*/ + +#include "general.h" /* must always come first */ + +#include <string.h> + +#include "entry.h" +#include "nestlevel.h" +#include "read.h" +#include "parse.h" +#include "vstring.h" +#include "keyword.h" +#include "routines.h" +#include "debug.h" +#include "xtag.h" +#include "objpool.h" + +#define isIdentifierChar(c) \ + (isalnum (c) || (c) == '_' || (c) >= 0x80) +#define newToken() (objPoolGet (TokenPool)) +#define deleteToken(t) (objPoolPut (TokenPool, (t))) + +enum { + KEYWORD_as, + KEYWORD_async, + KEYWORD_cdef, + KEYWORD_class, + KEYWORD_cpdef, + KEYWORD_def, + KEYWORD_extern, + KEYWORD_from, + KEYWORD_import, + KEYWORD_inline, + KEYWORD_lambda, + KEYWORD_pass, + KEYWORD_return, +}; +typedef int keywordId; /* to allow KEYWORD_NONE */ + +typedef enum { + ACCESS_PRIVATE, + ACCESS_PROTECTED, + ACCESS_PUBLIC, + COUNT_ACCESS +} accessType; + +static const char *const PythonAccesses[COUNT_ACCESS] = { + "private", + "protected", + "public" +}; + +typedef enum { + F_DECORATORS, + F_NAMEREF, + COUNT_FIELD +} pythonField; + +static fieldDefinition PythonFields[COUNT_FIELD] = { + { .name = "decorators", + .description = "decorators on functions and classes", + .enabled = false }, + { .name = "nameref", + .description = "the original name for the tag", + .enabled = true }, +}; + +typedef enum { + K_CLASS, + K_FUNCTION, + K_METHOD, + K_VARIABLE, + K_NAMESPACE, + K_MODULE, + K_UNKNOWN, + K_PARAMETER, + K_LOCAL_VARIABLE, + COUNT_KIND +} pythonKind; + +typedef enum { + PYTHON_MODULE_IMPORTED, + PYTHON_MODULE_NAMESPACE, + PYTHON_MODULE_INDIRECTLY_IMPORTED, +} pythonModuleRole; + +typedef enum { + PYTHON_UNKNOWN_IMPORTED, + PYTHON_UNKNOWN_INDIRECTLY_IMPORTED, +} pythonUnknownRole; + +/* Roles related to `import' + * ========================== + * import X X = (kind:module, role:imported) + * + * import X as Y X = (kind:module, role:indirectlyImported), + * Y = (kind:namespace, nameref:module:X) + * ------------------------------------------------ + * Don't confuse the kind of Y with namespace role of module kind. + * + * from X import * X = (kind:module, role:namespace) + * + * from X import Y X = (kind:module, role:namespace), + * Y = (kind:unknown, role:imported, scope:module:X) + * + * from X import Y as Z X = (kind:module, role:namespace), + * Y = (kind:unknown, role:indirectlyImported, scope:module:X) + * Z = (kind:unknown, nameref:unknown:Y) */ + +static roleDefinition PythonModuleRoles [] = { + { true, "imported", + "imported modules" }, + { true, "namespace", + "namespace from where classes/variables/functions are imported" }, + { true, "indirectlyImported", + "module imported in alternative name" }, +}; + +static roleDefinition PythonUnknownRoles [] = { + { true, "imported", "imported from the other module" }, + { true, "indirectlyImported", + "classes/variables/functions/modules imported in alternative name" }, +}; + +static kindDefinition PythonKinds[COUNT_KIND] = { + {true, 'c', "class", "classes"}, + {true, 'f', "function", "functions"}, + {true, 'm', "member", "class members"}, + {true, 'v', "variable", "variables"}, + {true, 'I', "namespace", "name referring a module defined in other file"}, + {true, 'i', "module", "modules", + .referenceOnly = true, ATTACH_ROLES(PythonModuleRoles)}, + {true, 'x', "unknown", "name referring a class/variable/function/module defined in other module", + .referenceOnly = false, ATTACH_ROLES(PythonUnknownRoles)}, + {false, 'z', "parameter", "function parameters" }, + {false, 'l', "local", "local variables" }, +}; + +static const keywordTable PythonKeywordTable[] = { + /* keyword keyword ID */ + { "as", KEYWORD_as }, + { "async", KEYWORD_async }, + { "cdef", KEYWORD_cdef }, + { "cimport", KEYWORD_import }, + { "class", KEYWORD_class }, + { "cpdef", KEYWORD_cpdef }, + { "def", KEYWORD_def }, + { "extern", KEYWORD_extern }, + { "from", KEYWORD_from }, + { "import", KEYWORD_import }, + { "inline", KEYWORD_inline }, + { "lambda", KEYWORD_lambda }, + { "pass", KEYWORD_pass }, + { "return", KEYWORD_return }, +}; + +typedef enum eTokenType { + /* 0..255 are the byte's value */ + TOKEN_EOF = 256, + TOKEN_UNDEFINED, + TOKEN_INDENT, + TOKEN_KEYWORD, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_ARROW, /* -> */ + TOKEN_WHITESPACE, +} tokenType; + +typedef struct { + int type; + keywordId keyword; + vString * string; + int indent; + unsigned long lineNumber; + MIOPos filePosition; +} tokenInfo; + +struct pythonNestingLevelUserData { + int indentation; +}; +#define PY_NL(nl) ((struct pythonNestingLevelUserData *) nestingLevelGetUserData (nl)) + +static langType Lang_python; +static unsigned int TokenContinuationDepth = 0; +static tokenInfo *NextToken = NULL; +static NestingLevels *PythonNestingLevels = NULL; +static objPool *TokenPool = NULL; + + +/* follows PEP-8, and always reports single-underscores as protected + * See: + * - http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables + * - http://www.python.org/dev/peps/pep-0008/#designing-for-inheritance + */ +static accessType accessFromIdentifier (const vString *const ident, + pythonKind kind, int parentKind) +{ + const char *const p = vStringValue (ident); + const size_t len = vStringLength (ident); + + /* inside a function/method, private */ + if (parentKind != -1 && parentKind != K_CLASS) + return ACCESS_PRIVATE; + /* not starting with "_", public */ + else if (len < 1 || p[0] != '_') + return ACCESS_PUBLIC; + /* "__...__": magic methods */ + else if (kind == K_FUNCTION && parentKind == K_CLASS && + len > 3 && p[1] == '_' && p[len - 2] == '_' && p[len - 1] == '_') + return ACCESS_PUBLIC; + /* "__...": name mangling */ + else if (parentKind == K_CLASS && len > 1 && p[1] == '_') + return ACCESS_PRIVATE; + /* "_...": suggested as non-public, but easily accessible */ + else + return ACCESS_PROTECTED; +} + +static void initPythonEntry (tagEntryInfo *const e, const tokenInfo *const token, + const pythonKind kind) +{ + accessType access; + int parentKind = -1; + NestingLevel *nl; + + initTagEntry (e, vStringValue (token->string), kind); + + e->lineNumber = token->lineNumber; + e->filePosition = token->filePosition; + + nl = nestingLevelsGetCurrent (PythonNestingLevels); + if (nl) + { + tagEntryInfo *nlEntry = getEntryOfNestingLevel (nl); + + e->extensionFields.scopeIndex = nl->corkIndex; + + /* nlEntry can be NULL if a kind was disabled. But what can we do + * here? Even disabled kinds should count for the hierarchy I + * guess -- as it'd otherwise be wrong -- but with cork we're + * fucked up as there's nothing to look up. Damn. */ + if (nlEntry) + { + parentKind = nlEntry->kindIndex; + + /* functions directly inside classes are methods, fix it up */ + if (kind == K_FUNCTION && parentKind == K_CLASS) + e->kindIndex = K_METHOD; + } + } + + access = accessFromIdentifier (token->string, kind, parentKind); + e->extensionFields.access = PythonAccesses[access]; + /* FIXME: should we really set isFileScope in addition to access? */ + if (access == ACCESS_PRIVATE) + e->isFileScope = true; +} + +static int makeClassTag (const tokenInfo *const token, + const vString *const inheritance, + const vString *const decorators) +{ + if (PythonKinds[K_CLASS].enabled) + { + tagEntryInfo e; + + initPythonEntry (&e, token, K_CLASS); + + e.extensionFields.inheritance = inheritance ? vStringValue (inheritance) : ""; + if (decorators && vStringLength (decorators) > 0) + { + attachParserField (&e, false, PythonFields[F_DECORATORS].ftype, + vStringValue (decorators)); + } + + return makeTagEntry (&e); + } + + return CORK_NIL; +} + +static int makeFunctionTag (const tokenInfo *const token, + const vString *const arglist, + const vString *const decorators) +{ + if (PythonKinds[K_FUNCTION].enabled) + { + tagEntryInfo e; + + initPythonEntry (&e, token, K_FUNCTION); + + if (arglist) + e.extensionFields.signature = vStringValue (arglist); + if (decorators && vStringLength (decorators) > 0) + { + attachParserField (&e, false, PythonFields[F_DECORATORS].ftype, + vStringValue (decorators)); + } + + return makeTagEntry (&e); + } + + return CORK_NIL; +} + +static int makeSimplePythonTag (const tokenInfo *const token, pythonKind const kind) +{ + if (PythonKinds[kind].enabled) + { + tagEntryInfo e; + + initPythonEntry (&e, token, kind); + return makeTagEntry (&e); + } + + return CORK_NIL; +} + +static int makeSimplePythonRefTag (const tokenInfo *const token, + const vString *const altName, + pythonKind const kind, + int roleIndex, xtagType xtag) +{ + if (isXtagEnabled (XTAG_REFERENCE_TAGS) && + PythonKinds[kind].roles[roleIndex].enabled) + { + tagEntryInfo e; + + initRefTagEntry (&e, vStringValue (altName ? altName : token->string), + kind, roleIndex); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + + if (xtag != XTAG_UNKNOWN) + markTagExtraBit (&e, xtag); + + return makeTagEntry (&e); + } + + return CORK_NIL; +} + +static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED) +{ + tokenInfo *token = xMalloc (1, tokenInfo); + token->string = vStringNew (); + return token; +} + +static void deletePoolToken (void *data) +{ + tokenInfo *token = data; + vStringDelete (token->string); + eFree (token); +} + +static void clearPoolToken (void *data) +{ + tokenInfo *token = data; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->indent = 0; + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + vStringClear (token->string); +} + +static void copyToken (tokenInfo *const dest, const tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + dest->indent = src->indent; + vStringCopy(dest->string, src->string); +} + +/* Skip a single or double quoted string. */ +static void readString (vString *const string, const int delimiter) +{ + int escaped = 0; + int c; + + while ((c = getcFromInputFile ()) != EOF) + { + if (escaped) + { + vStringPut (string, c); + escaped--; + } + else if (c == '\') + escaped++; + else if (c == delimiter || c == '\n' || c == '\r') + { + if (c != delimiter) + ungetcToInputFile (c); + break; + } + else + vStringPut (string, c); + } +} + +/* Skip a single or double triple quoted string. */ +static void readTripleString (vString *const string, const int delimiter) +{ + int c; + int escaped = 0; + int n = 0; + while ((c = getcFromInputFile ()) != EOF) + { + if (c == delimiter && ! escaped) + { + if (++n >= 3) + break; + } + else + { + for (; n > 0; n--) + vStringPut (string, delimiter); + if (c != '\' || escaped) + vStringPut (string, c); + n = 0; + } + + if (escaped) + escaped--; + else if (c == '\') + escaped++; + } +} + +static void readIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + do + { + vStringPut (string, (char) c); + c = getcFromInputFile (); + } + while (isIdentifierChar (c)); + ungetcToInputFile (c); +} + +static void ungetToken (tokenInfo *const token) +{ + Assert (NextToken == NULL); + NextToken = newToken (); + copyToken (NextToken, token); +} + +static void readTokenFull (tokenInfo *const token, bool inclWhitespaces) +{ + int c; + int n; + + /* if we've got a token held back, emit it */ + if (NextToken) + { + copyToken (token, NextToken); + deleteToken (NextToken); + NextToken = NULL; + return; + } + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + + n = 0; + do + { + c = getcFromInputFile (); + n++; + } + while (c == ' ' || c == '\t' || c == '\f'); + + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + if (inclWhitespaces && n > 1 && c != '\r' && c != '\n') + { + ungetcToInputFile (c); + vStringPut (token->string, ' '); + token->type = TOKEN_WHITESPACE; + return; + } + + switch (c) + { + case EOF: + token->type = TOKEN_EOF; + break; + + case ''': + case '"': + { + int d = getcFromInputFile (); + token->type = TOKEN_STRING; + vStringPut (token->string, c); + if (d != c) + { + ungetcToInputFile (d); + readString (token->string, c); + } + else if ((d = getcFromInputFile ()) == c) + readTripleString (token->string, c); + else /* empty string */ + ungetcToInputFile (d); + vStringPut (token->string, c); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + + case '=': + { + int d = getcFromInputFile (); + vStringPut (token->string, c); + if (d == c) + { + vStringPut (token->string, d); + token->type = TOKEN_OPERATOR; + } + else + { + ungetcToInputFile (d); + token->type = c; + } + break; + } + + case '-': + { + int d = getcFromInputFile (); + if (d == '>') + { + vStringPut (token->string, c); + vStringPut (token->string, d); + token->type = TOKEN_ARROW; + break; + } + ungetcToInputFile (d); + /* fall through */ + } + case '+': + case '*': + case '%': + case '<': + case '>': + case '/': + { + int d = getcFromInputFile (); + vStringPut (token->string, c); + if (d != '=') + { + ungetcToInputFile (d); + token->type = c; + } + else + { + vStringPut (token->string, d); + token->type = TOKEN_OPERATOR; + } + break; + } + + /* eats newline to implement line continuation */ + case '\': + { + int d = getcFromInputFile (); + if (d == '\r') + d = getcFromInputFile (); + if (d != '\n') + ungetcToInputFile (d); + goto getNextChar; + } + + case '#': /* comment */ + case '\r': /* newlines for indent */ + case '\n': + { + int indent = 0; + do + { + if (c == '#') + { + do + c = getcFromInputFile (); + while (c != EOF && c != '\r' && c != '\n'); + } + if (c == '\r') + { + int d = getcFromInputFile (); + if (d != '\n') + ungetcToInputFile (d); + } + indent = 0; + while ((c = getcFromInputFile ()) == ' ' || c == '\t' || c == '\f') + { + if (c == '\t') + indent += 8 - (indent % 8); + else if (c == '\f') /* yeah, it's weird */ + indent = 0; + else + indent++; + } + } /* skip completely empty lines, so retry */ + while (c == '\r' || c == '\n' || c == '#'); + ungetcToInputFile (c); + if (TokenContinuationDepth > 0) + { + if (inclWhitespaces) + { + vStringPut (token->string, ' '); + token->type = TOKEN_WHITESPACE; + } + else + goto getNextChar; + } + else + { + token->type = TOKEN_INDENT; + token->indent = indent; + } + break; + } + + default: + if (! isIdentifierChar (c)) + { + vStringPut (token->string, c); + token->type = c; + } + else + { + /* FIXME: handle U, B, R and F string prefixes? */ + readIdentifier (token->string, c); + token->keyword = lookupKeyword (vStringValue (token->string), Lang_python); + if (token->keyword == KEYWORD_NONE) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } + + /* handle implicit continuation lines not to emit INDENT inside brackets + * https://docs.python.org/3.6/reference/lexical_analysis.html#implicit-line-jo... */ + if (token->type == '(' || + token->type == '{' || + token->type == '[') + { + TokenContinuationDepth ++; + } + else if (TokenContinuationDepth > 0 && + (token->type == ')' || + token->type == '}' || + token->type == ']')) + { + TokenContinuationDepth --; + } +} + +static void readToken (tokenInfo *const token) +{ + readTokenFull (token, false); +} + +/*================================= parsing =================================*/ + + +static void reprCat (vString *const repr, const tokenInfo *const token) +{ + if (token->type != TOKEN_INDENT && + token->type != TOKEN_WHITESPACE) + { + vStringCat (repr, token->string); + } + else if (vStringLength (repr) > 0 && vStringLast (repr) != ' ') + { + vStringPut (repr, ' '); + } +} + +static bool skipOverPair (tokenInfo *const token, int tOpen, int tClose, + vString *const repr, bool reprOuterPair) +{ + if (token->type == tOpen) + { + int depth = 1; + + if (repr && reprOuterPair) + reprCat (repr, token); + do + { + readTokenFull (token, true); + if (repr && (reprOuterPair || token->type != tClose || depth > 1)) + { + reprCat (repr, token); + } + if (token->type == tOpen) + depth ++; + else if (token->type == tClose) + depth --; + } + while (token->type != TOKEN_EOF && depth > 0); + } + + return token->type == tClose; +} + +static bool skipLambdaArglist (tokenInfo *const token, vString *const repr) +{ + while (token->type != TOKEN_EOF && token->type != ':' && + /* avoid reading too much, just in case */ + token->type != TOKEN_INDENT) + { + bool readNext = true; + + if (token->type == '(') + readNext = skipOverPair (token, '(', ')', repr, true); + else if (token->type == '[') + readNext = skipOverPair (token, '[', ']', repr, true); + else if (token->type == '{') + readNext = skipOverPair (token, '{', '}', repr, true); + else if (token->keyword == KEYWORD_lambda) + { /* handle lambdas in a default value */ + if (repr) + reprCat (repr, token); + readTokenFull (token, true); + readNext = skipLambdaArglist (token, repr); + if (token->type == ':') + readNext = true; + if (readNext && repr) + reprCat (repr, token); + } + else if (repr) + { + reprCat (repr, token); + } + + if (readNext) + readTokenFull (token, true); + } + return false; +} + +static void readQualifiedName (tokenInfo *const nameToken) +{ + readToken (nameToken); + + if (nameToken->type == TOKEN_IDENTIFIER || + nameToken->type == '.') + { + vString *qualifiedName = vStringNew (); + tokenInfo *token = newToken (); + + while (nameToken->type == TOKEN_IDENTIFIER || + nameToken->type == '.') + { + vStringCat (qualifiedName, nameToken->string); + copyToken (token, nameToken); + + readToken (nameToken); + } + /* put the last, non-matching, token back */ + ungetToken (nameToken); + + copyToken (nameToken, token); + nameToken->type = TOKEN_IDENTIFIER; + vStringCopy (nameToken->string, qualifiedName); + + deleteToken (token); + vStringDelete (qualifiedName); + } +} + +static bool readCDefName (tokenInfo *const token, pythonKind *kind) +{ + readToken (token); + + if (token->keyword == KEYWORD_extern || + token->keyword == KEYWORD_import) + { + readToken (token); + if (token->keyword == KEYWORD_from) + return false; + } + + if (token->keyword == KEYWORD_class) + { + *kind = K_CLASS; + readToken (token); + } + else + { + /* skip the optional type declaration -- everything on the same line + * until an identifier followed by "(". */ + tokenInfo *candidate = newToken (); + + while (token->type != TOKEN_EOF && + token->type != TOKEN_INDENT && + token->type != '=' && + token->type != ',' && + token->type != ':') + { + if (token->type == '[') + { + if (skipOverPair (token, '[', ']', NULL, false)) + readToken (token); + } + else if (token->type == '(') + { + if (skipOverPair (token, '(', ')', NULL, false)) + readToken (token); + } + else if (token->type == TOKEN_IDENTIFIER) + { + copyToken (candidate, token); + readToken (token); + if (token->type == '(') + { /* okay, we really found a function, use this */ + *kind = K_FUNCTION; + ungetToken (token); + copyToken (token, candidate); + break; + } + } + else + readToken (token); + } + + deleteToken (candidate); + } + + return token->type == TOKEN_IDENTIFIER; +} + +static vString *parseParamTypeAnnotation (tokenInfo *const token, + vString *arglist) +{ + readToken (token); + if (token->type != ':') + { + ungetToken (token); + return NULL; + } + + reprCat (arglist, token); + int depth = 0; + vString *t = vStringNew (); + while (true) + { + readTokenFull (token, true); + if (token->type == TOKEN_WHITESPACE) + { + reprCat (arglist, token); + continue; + } + else if (token->type == TOKEN_EOF) + break; + + if (token->type == '(' || + token->type == '[' || + token->type == '{') + depth ++; + else if (token->type == ')' || + token->type == ']' || + token->type == '}') + depth --; + + if (depth < 0 + || (depth == 0 && (token->type == '=' + || token->type == ','))) + { + ungetToken (token); + return t; + } + reprCat (arglist, token); + reprCat (t, token); + } + vStringDelete (t); + return NULL; +} + +static vString *parseReturnTypeAnnotation (tokenInfo *const token) +{ + readToken (token); + if (token->type != TOKEN_ARROW) + { + ungetToken (token); + return NULL; + } + + int depth = 0; + vString *t = vStringNew (); + while (true) + { + readToken (token); + if (token->type == TOKEN_EOF) + break; + + if (token->type == '(' || + token->type == '[' || + token->type == '{') + depth ++; + else if (token->type == ')' || + token->type == ']' || + token->type == '}') + depth --; + if (depth == 0 && token->type == ':') + { + ungetToken (token); + return t; + } + else + reprCat (t, token); + } + vStringDelete (t); + return NULL; +} + +static bool parseClassOrDef (tokenInfo *const token, + const vString *const decorators, + pythonKind kind, bool isCDef) +{ + vString *arglist = NULL; + tokenInfo *name = NULL; + tokenInfo *parameterTokens[16] = { NULL }; + vString *parameterTypes [ARRAY_SIZE(parameterTokens)] = { NULL }; + unsigned int parameterCount = 0; + NestingLevel *lv; + int corkIndex; + + if (isCDef) + { + if (! readCDefName (token, &kind)) + return false; + } + else + { + readToken (token); + if (token->type != TOKEN_IDENTIFIER) + return false; + } + + name = newToken (); + copyToken (name, token); + + readToken (token); + /* collect parameters or inheritance */ + if (token->type == '(') + { + int prevTokenType = token->type; + int depth = 1; + + arglist = vStringNew (); + if (kind != K_CLASS) + reprCat (arglist, token); + + do + { + if (token->type != TOKEN_WHITESPACE && + /* for easy `*args` and `**kwargs` support, we also ignore + * `*`, which anyway can't otherwise screw us up */ + token->type != '*') + { + prevTokenType = token->type; + } + + readTokenFull (token, true); + if (kind != K_CLASS || token->type != ')' || depth > 1) + reprCat (arglist, token); + + if (token->type == '(' || + token->type == '[' || + token->type == '{') + depth ++; + else if (token->type == ')' || + token->type == ']' || + token->type == '}') + depth --; + else if (kind != K_CLASS && depth == 1 && + token->type == TOKEN_IDENTIFIER && + (prevTokenType == '(' || prevTokenType == ',') && + parameterCount < ARRAY_SIZE (parameterTokens) && + PythonKinds[K_PARAMETER].enabled) + { + tokenInfo *parameterName = newToken (); + + copyToken (parameterName, token); + parameterTokens[parameterCount] = parameterName; + parameterTypes [parameterCount++] = parseParamTypeAnnotation (token, arglist); + } + } + while (token->type != TOKEN_EOF && depth > 0); + } + + if (kind == K_CLASS) + corkIndex = makeClassTag (name, arglist, decorators); + else + corkIndex = makeFunctionTag (name, arglist, decorators); + + lv = nestingLevelsPush (PythonNestingLevels, corkIndex); + PY_NL (lv)->indentation = token->indent; + + deleteToken (name); + vStringDelete (arglist); + + if (parameterCount > 0) + { + unsigned int i; + + for (i = 0; i < parameterCount; i++) + { + int paramCorkIndex = makeSimplePythonTag (parameterTokens[i], K_PARAMETER); + deleteToken (parameterTokens[i]); + tagEntryInfo *e = getEntryInCorkQueue (paramCorkIndex); + if (e && parameterTypes[i]) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (parameterTypes[i]); + parameterTypes[i] = NULL; + } + vStringDelete (parameterTypes[i]); /* NULL is acceptable. */ + } + } + + tagEntryInfo *e; + vString *t; + if (kind != K_CLASS + && (e = getEntryInCorkQueue (corkIndex)) + && (t = parseReturnTypeAnnotation (token))) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (t); + } + + return true; +} + +static bool parseImport (tokenInfo *const token) +{ + tokenInfo *fromModule = NULL; + + if (token->keyword == KEYWORD_from) + { + readQualifiedName (token); + if (token->type == TOKEN_IDENTIFIER) + { + fromModule = newToken (); + copyToken (fromModule, token); + readToken (token); + } + } + + if (token->keyword == KEYWORD_import) + { + bool parenthesized = false; + int moduleIndex; + + if (fromModule) + { + /* from X import ... + * -------------------- + * X = (kind:module, role:namespace) */ + moduleIndex = makeSimplePythonRefTag (fromModule, NULL, K_MODULE, + PYTHON_MODULE_NAMESPACE, + XTAG_UNKNOWN); + } + + do + { + readQualifiedName (token); + + /* support for `from x import (...)` */ + if (fromModule && ! parenthesized && token->type == '(') + { + parenthesized = true; + readQualifiedName (token); + } + + if (token->type == TOKEN_IDENTIFIER) + { + tokenInfo *name = newToken (); + + copyToken (name, token); + readToken (token); + /* if there is an "as", use it as the name */ + if (token->keyword == KEYWORD_as) + { + readToken (token); + if (token->type == TOKEN_IDENTIFIER) + { + if (fromModule) + { + /* from x import Y as Z + * ---------------------------- + * x = (kind:module, role:namespace), + * Y = (kind:unknown, role:indirectlyImported, scope:module:X), + * Z = (kind:unknown, nameref:unknown:Y) */ + int index; + + /* Y */ + index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN, + PYTHON_UNKNOWN_INDIRECTLY_IMPORTED, + XTAG_UNKNOWN); + /* fill the scope field for Y */ + tagEntryInfo *e = getEntryInCorkQueue (index); + if (e) + e->extensionFields.scopeIndex = moduleIndex; + + /* Z */ + index = makeSimplePythonTag (token, K_UNKNOWN); + /* fill the nameref filed for Y */ + if (PythonFields[F_NAMEREF].enabled) + { + vString *nameref = vStringNewInit (PythonKinds [K_UNKNOWN].name); + vStringPut (nameref, ':'); + vStringCat (nameref, name->string); + attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype, + vStringValue (nameref)); + vStringDelete (nameref); + } + } + else + { + /* import x as Y + * ---------------------------- + * x = (kind:module, role:indirectlyImported) + * Y = (kind:namespace, nameref:module:x)*/ + /* x */ + makeSimplePythonRefTag (name, NULL, K_MODULE, + PYTHON_MODULE_INDIRECTLY_IMPORTED, + XTAG_UNKNOWN); + /* Y */ + int index = makeSimplePythonTag (token, K_NAMESPACE); + /* fill the nameref filed for Y */ + if (PythonFields[F_NAMEREF].enabled) + { + vString *nameref = vStringNewInit (PythonKinds [K_MODULE].name); + vStringPut (nameref, ':'); + vStringCat (nameref, name->string); + attachParserFieldToCorkEntry (index, PythonFields[F_NAMEREF].ftype, + vStringValue (nameref)); + vStringDelete (nameref); + } + } + + copyToken (name, token); + readToken (token); + } + } + else + { + if (fromModule) + { + /* from x import Y + -------------- + x = (kind:module, role:namespace), + Y = (kind:unknown, role:imported, scope:module:x) */ + /* Y */ + int index = makeSimplePythonRefTag (name, NULL, K_UNKNOWN, + PYTHON_UNKNOWN_IMPORTED, + XTAG_UNKNOWN); + /* fill the scope field for Y */ + tagEntryInfo *e = getEntryInCorkQueue (index); + if (e) + e->extensionFields.scopeIndex = moduleIndex; + } + else + { + /* import X + -------------- + X = (kind:module, role:imported) */ + makeSimplePythonRefTag (name, NULL, K_MODULE, + PYTHON_MODULE_IMPORTED, + XTAG_UNKNOWN); + } + } + + deleteToken (name); + } + } + while (token->type == ','); + + if (parenthesized && token->type == ')') + readToken (token); + } + + if (fromModule) + deleteToken (fromModule); + + return false; +} + +/* this only handles the most common cases, but an annotation can be any + * expression in theory. + * this function assumes there must be an annotation, and doesn't do any check + * on the token on which it is called: the caller should do that part. */ +static bool skipVariableTypeAnnotation (tokenInfo *const token, vString *const repr) +{ + bool readNext = true; + + readToken (token); + switch (token->type) + { + case '[': readNext = skipOverPair (token, '[', ']', repr, true); break; + case '(': readNext = skipOverPair (token, '(', ')', repr, true); break; + case '{': readNext = skipOverPair (token, '{', '}', repr, true); break; + default: reprCat (repr, token); + } + if (readNext) + readToken (token); + /* skip subscripts and calls */ + while (token->type == '[' || token->type == '(' || token->type == '.' || token->type == '|') + { + switch (token->type) + { + case '[': readNext = skipOverPair (token, '[', ']', repr, true); break; + case '(': readNext = skipOverPair (token, '(', ')', repr, true); break; + case '|': + reprCat (repr, token); + skipVariableTypeAnnotation (token, repr); + readNext = false; + break; + case '.': + reprCat (repr, token); + readToken (token); + readNext = token->type == TOKEN_IDENTIFIER; + if (readNext) + reprCat (repr, token); + break; + default: readNext = false; break; + } + if (readNext) + readToken (token); + } + + return false; +} + +static bool parseVariable (tokenInfo *const token, const pythonKind kind) +{ + /* In order to support proper tag type for lambdas in multiple + * assignations, we first collect all the names, and then try and map + * an assignation to it */ + tokenInfo *nameTokens[8] = { NULL }; + vString *nameTypes [ARRAY_SIZE (nameTokens)] = { NULL }; + unsigned int nameCount = 0; + vString *type = vStringNew(); + + /* first, collect variable name tokens */ + while (token->type == TOKEN_IDENTIFIER && + nameCount < ARRAY_SIZE (nameTokens)) + { + unsigned int i; + tokenInfo *name = newToken (); + copyToken (name, token); + + readToken (token); + if (token->type == '.') + { + /* FIXME: what to do with dotted names? We currently ignore them + * as we need to do something not to break the whole + * declaration, but the expected behavior is questionable */ + deleteToken (name); + name = NULL; + + do + { + readToken (token); + } + while (token->type == TOKEN_IDENTIFIER || + token->type == '.'); + } + + i = nameCount++; + nameTokens[i] = name; + + /* (parse and) skip annotations. we need not to be too permissive because we + * aren't yet sure we're actually parsing a variable. */ + if (token->type == ':' && skipVariableTypeAnnotation (token, type)) + readToken (token); + + if (vStringLength (type) > 0) + { + nameTypes[i] = type; + type = vStringNew (); + } + + if (token->type == ',') + readToken (token); + else + break; + } + vStringDelete (type); + + /* then, if it's a proper assignation, try and map assignations so that + * we catch lambdas and alike */ + if (token->type == '=') + { + unsigned int i = 0; + + do + { + const tokenInfo *const nameToken = nameTokens[i]; + vString **type = &(nameTypes[i++]); + + readToken (token); + + if (! nameToken) + /* nothing */; + else if (token->keyword != KEYWORD_lambda) + { + int index = makeSimplePythonTag (nameToken, kind); + tagEntryInfo *e = getEntryInCorkQueue (index); + if (e && *type) + { + e->extensionFields.typeRef [0] = eStrdup ("typename"); + e->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type); + *type = NULL; + } + } + else + { + tokenInfo *anon = NULL; + vString *arglist = vStringNew (); + if (*type) + { + anon = newToken (); + copyToken (anon, token); + } + readToken (token); + vStringPut (arglist, '('); + skipLambdaArglist (token, arglist); + vStringPut (arglist, ')'); + if (*type) + { + /* How to handle lambda assigned to a variable + * -------------------------------------------- + * + * input.py: + * + * id = lambda var: var + * id_t: Callable[[int], int] = lambda var: var + * + * `id' is tagged as a function kind object like: + * + * id input.py /^id = lambda var: var$/;" function + * + * For `id_t' we cannot do the same as `id'. + * + * We should not store `Callable[[int], int]' to typeref + * field of the tag of `id_t' if the tag has "function" as + * its kind because users expect the typeref field of a + * function kind represents a type for the value returned + * from the function (return type). + * + * the unexpected tag: + * + * id_t input.py /^id_t: Callable[[int], int] = lambda var: var$/;" function \ + * typeref:typename:Callable[[int], int] + * + * If we make a tag for `id_t' as a function, we should + * attach `typeref:typename:int' and `signature:(int)'. To + * achieve this, we have to make ctags analyze + * `Callable[[int], int]'. However, we want to avoid the + * level of analyzing. + * + * For recording `Callable[[int], int]', a valuable + * information in the input, we use indirection. + * + * id_t input.py /^id_t: Callable[[int], int] = lambda var: var$/;" variable \ + * typeref:typename:Callable[[int], int] nameref:function:anonFuncNNN + * anonFuncNNN input.py /^id_t: Callable[[int], int] = lambda var: var$/;" function \ + * extras:anonymous + */ + int vindex = makeSimplePythonTag (nameToken, kind); + vStringClear (anon->string); + anonGenerate (anon->string, "anonFunc", K_FUNCTION); + int findex = makeFunctionTag (anon, arglist, NULL); + tagEntryInfo *fe = getEntryInCorkQueue (findex); + if (fe) + markTagExtraBit (fe, XTAG_ANONYMOUS); + + tagEntryInfo *ve = getEntryInCorkQueue (vindex); + if (ve) + { + ve->extensionFields.typeRef [0] = eStrdup ("typename"); + ve->extensionFields.typeRef [1] = vStringDeleteUnwrap (*type); + *type = NULL; + vString *nameref = vStringNewInit (PythonKinds [K_FUNCTION].name); + vStringPut (nameref, ':'); + vStringCat (nameref, anon->string); + attachParserField (ve, true, PythonFields[F_NAMEREF].ftype, + vStringValue (nameref)); + vStringDelete (nameref); + } + if (anon) + deleteToken (anon); + } + else + makeFunctionTag (nameToken, arglist, NULL); + vStringDelete (arglist); + } + + /* skip until next initializer */ + while ((TokenContinuationDepth > 0 || token->type != ',') && + token->type != TOKEN_EOF && + token->type != ';' && + token->type != TOKEN_INDENT) + { + readToken (token); + } + } + while (token->type == ',' && i < nameCount); + + /* if we got leftover to initialize, just make variables out of them. + * This handles cases like `a, b, c = (c, d, e)` -- or worse */ + for (; i < nameCount; i++) + { + if (nameTokens[i]) + makeSimplePythonTag (nameTokens[i], kind); + } + } + + while (nameCount > 0) + { + if (nameTokens[--nameCount]) + deleteToken (nameTokens[nameCount]); + vStringDelete (nameTypes[nameCount]); /* NULL is acceptable. */ + } + + return false; +} + +/* pops any level >= to indent */ +static void setIndent (tokenInfo *const token) +{ + NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels); + + while (lv && PY_NL (lv)->indentation >= token->indent) + { + tagEntryInfo *e = getEntryInCorkQueue (lv->corkIndex); + if (e) + e->extensionFields.endLine = token->lineNumber; + + nestingLevelsPop (PythonNestingLevels); + lv = nestingLevelsGetCurrent (PythonNestingLevels); + } +} + +static void findPythonTags (void) +{ + tokenInfo *const token = newToken (); + vString *decorators = vStringNew (); + bool atStatementStart = true; + + TokenContinuationDepth = 0; + NextToken = NULL; + PythonNestingLevels = nestingLevelsNew (sizeof (struct pythonNestingLevelUserData)); + + readToken (token); + while (token->type != TOKEN_EOF) + { + tokenType iterationTokenType = token->type; + bool readNext = true; + + /* skip async keyword that confuses decorator parsing before a def */ + if (token->keyword == KEYWORD_async) + readToken (token); + + if (token->type == TOKEN_INDENT) + setIndent (token); + else if (token->keyword == KEYWORD_class || + token->keyword == KEYWORD_def) + { + pythonKind kind = token->keyword == KEYWORD_class ? K_CLASS : K_FUNCTION; + + readNext = parseClassOrDef (token, decorators, kind, false); + } + else if (token->keyword == KEYWORD_cdef || + token->keyword == KEYWORD_cpdef) + { + readNext = parseClassOrDef (token, decorators, K_FUNCTION, true); + } + else if (token->keyword == KEYWORD_from || + token->keyword == KEYWORD_import) + { + readNext = parseImport (token); + } + else if (token->type == '(') + { /* skip parentheses to avoid finding stuff inside them */ + readNext = skipOverPair (token, '(', ')', NULL, false); + } + else if (token->type == TOKEN_IDENTIFIER && atStatementStart) + { + NestingLevel *lv = nestingLevelsGetCurrent (PythonNestingLevels); + tagEntryInfo *lvEntry = getEntryOfNestingLevel (lv); + pythonKind kind = K_VARIABLE; + + if (lvEntry && lvEntry->kindIndex != K_CLASS) + kind = K_LOCAL_VARIABLE; + + readNext = parseVariable (token, kind); + } + else if (token->type == '@' && atStatementStart && + PythonFields[F_DECORATORS].enabled) + { + /* collect decorators */ + readQualifiedName (token); + if (token->type != TOKEN_IDENTIFIER) + readNext = false; + else + { + if (vStringLength (decorators) > 0) + vStringPut (decorators, ','); + vStringCat (decorators, token->string); + readToken (token); + readNext = skipOverPair (token, '(', ')', decorators, true); + } + } + + /* clear collected decorators for any non-decorator tokens non-indent + * token. decorator collection takes care of skipping the possible + * argument list, so we should never hit here parsing a decorator */ + if (iterationTokenType != TOKEN_INDENT && + iterationTokenType != '@' && + PythonFields[F_DECORATORS].enabled) + { + vStringClear (decorators); + } + + atStatementStart = (token->type == TOKEN_INDENT || token->type == ';'); + + if (readNext) + readToken (token); + } + + nestingLevelsFree (PythonNestingLevels); + vStringDelete (decorators); + deleteToken (token); + Assert (NextToken == NULL); +} + +static void initialize (const langType language) +{ + Lang_python = language; + + TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL); +} + +static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized) +{ + if (!initialized) + return; + + objPoolDelete (TokenPool); +} + +extern parserDefinition* PythonParser (void) +{ + static const char *const extensions[] = { "py", "pyx", "pxd", "pxi", "scons", + "wsgi", NULL }; + static const char *const aliases[] = { "python[23]*", "scons", NULL }; + parserDefinition *def = parserNew ("Python"); + def->kindTable = PythonKinds; + def->kindCount = ARRAY_SIZE (PythonKinds); + def->extensions = extensions; + def->aliases = aliases; + def->parser = findPythonTags; + def->initialize = initialize; + def->finalize = finalize; + def->keywordTable = PythonKeywordTable; + def->keywordCount = ARRAY_SIZE (PythonKeywordTable); + def->fieldTable = PythonFields; + def->fieldCount = ARRAY_SIZE (PythonFields); + def->useCork = CORK_QUEUE; + def->requestAutomaticFQTag = true; + return def; +}
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).