Branch: refs/heads/master
Author: Enrico Tröger <enrico.troeger(a)uvena.de>
Committer: Enrico Tröger <enrico.troeger(a)uvena.de>
Date: Sat, 27 May 2023 13:25:42 UTC
Commit: 2fac5ba2c012421b6cf269027d48d4fd8072005c
https://github.com/geany/www.geany.org/commit/2fac5ba2c012421b6cf269027d48d…
Log Message:
-----------
Ignore "safety" warning about CVE-2020-19002 - blog app is not used
Modified Paths:
--------------
tox.ini
Modified: tox.ini
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -36,7 +36,8 @@ deps =
safety
-r{toxinidir}/requirements.txt
commands =
- {envbindir}/safety check
+ # ignore CVE-2020-19002 as it does not affect us (we don't use the blog app)
+ {envbindir}/safety check --ignore 54309
[flake8]
exclude = build,.git,docs,migrations,local_settings.py,local_settings.docker.py
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
Branch: refs/heads/master
Author: Enrico Tröger <enrico.troeger(a)uvena.de>
Committer: Enrico Tröger <enrico.troeger(a)uvena.de>
Date: Sun, 07 May 2023 07:34:16 UTC
Commit: d350dad27a45656cd9c84a0ac9dfb3a0412d6604
https://github.com/geany/geany/commit/d350dad27a45656cd9c84a0ac9dfb3a0412d6…
Log Message:
-----------
Rewrite Python standard library tags creation script for Python 3
Modified Paths:
--------------
data/tags/std.py.tags
scripts/create_py_tags.py
Modified: data/tags/std.py.tags
21550 lines changed, 15586 insertions(+), 5964 deletions(-)
===================================================================
No diff available, check online
Modified: scripts/create_py_tags.py
514 lines changed, 286 insertions(+), 228 deletions(-)
===================================================================
@@ -7,310 +7,368 @@
#
# This script should be run in the top source directory.
#
-# Parses all files given on command line for Python classes or functions and write
-# them into data/tags/std.py.tags (internal tagmanager format).
+# Parses all files in the directories given on command line for Python classes or functions and
+# write them into data/tags/std.py.tags (internal tagmanager format).
# If called without command line arguments, a preset of common Python libs is used.
#
# WARNING
-# Be aware that running this script will actually *import* modules in the specified directory
+# Be aware that running this script will actually *import* all modules given on the command line
# or in the standard library path of your Python installation. Dependent on what Python modules
# you have installed, this might not be want you want and can have weird side effects.
# You have been warned.
#
# It should be however relatively safe to execute this script from a fresh Python installation
-# installed into a dedicated prefix. Then nothing else is necessary as to change the interpreter
-# with which you start this script.
+# installed into a dedicated prefix or from an empty virtualenv or ideally in a Docker container
+# in the Geany project directory:
+# docker run --rm -it --user $(id -u):$(id -g) -v $(pwd):/data --workdir /data python:3.11-alpine python scripts/create_py_tags.py
#
import datetime
-import imp
+import importlib.util
import inspect
import os
+import platform
import re
import sys
-
-PYTHON_LIB_DIRECTORY = os.path.dirname(os.__file__)
-PYTHON_LIB_IGNORE_PACKAGES = ('test', 'dist-packages', 'site-packages', 'Tools')
-# some modules execute funky code when they are imported which we really don't want here
-# (though if you feel funny, try: 'import antigravity')
-PYTHON_LIB_IGNORE_MODULES = ('antigravity.py', 'idlelib/idle.py', 'multiprocessing/util.py')
-PYTHON_KEYWORDS = ('and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
- 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
- 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try',
- 'while', 'with', 'yield', 'False', 'None', 'True')
-
-# (from tagmanager/tm_tag.c:32)
-TA_NAME = '%c' % 200,
-TA_TYPE = '%c' % 204
-TA_ARGLIST = '%c' % 205
-TA_SCOPE = '%c' % 206
-
-# TMTagType (tagmanager/tm_tag.h:47)
-TYPE_CLASS = '%d' % 1
-TYPE_FUNCTION = '%d' % 128
-
-tag_filename = 'data/tags/std.py.tags'
-tag_regexp = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]'
-
-def joinseq(seq):
- if len(seq) == 1:
- return '(' + seq[0] + ',)'
- else:
- return '(' + ', '.join(seq) + ')'
-
-def strseq(object, convert, join=joinseq):
- """Recursively walk a sequence, stringifying each element."""
- if type(object) in (list, tuple):
- return join(map(lambda o, c=convert, j=join: strseq(o, c, j), object))
- else:
- return convert(object)
+import sysconfig
+import warnings
+from pathlib import Path
+
+# treat all DeprecationWarnings as errors so we can catch them to ignore the corresponding modules
+warnings.filterwarnings('error', category=DeprecationWarning)
+
+PYTHON_LIB_DIRECTORY = Path(os.__file__).parent
+PYTHON_LIB_IGNORE_PACKAGES = ['dist-packages', 'distutils', 'encodings', 'idlelib', 'lib2to3',
+ 'site-packages', 'test', 'turtledemo', 'Tools']
+# some modules/classes are deprecated or execute funky code when they are imported
+# which we really don't want here (though if you feel funny, try: 'import antigravity')
+PYTHON_LIB_IGNORE_MODULES = ('__phello__.foo', 'antigravity', 'asyncio.windows_events',
+ 'asyncio.windows_utils', 'ctypes.wintypes', 'ensurepip._bundled',
+ 'lib2to3', 'multiprocessing.popen_spawn_win32', 'this', 'turtle')
+PYTHON_LIB_IGNORE_CLASSES = ('typing.io', 'typing.re')
+
+# Python kinds
+KIND_CLASS = 'class'
+KIND_FUNCTION = 'function'
+KIND_MEMBER = 'member'
+
+TAG_FILENAME = 'data/tags/std.py.tags'
+TAG_REGEXP = re.compile(r'^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]')
+OBJECT_MEMORY_ADDRESS_REGEXP = re.compile(r'<(.+?) at 0x[0-9a-f]+(?:.+)>', flags=re.IGNORECASE)
+
+CTAGS_FILE_HEADER = f'''!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_OUTPUT_EXCMD mixed /number, pattern, mixed, or combineV2/
+!_TAG_OUTPUT_FILESEP slash /slash or backslash/
+!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
+!_TAG_PATTERN_LENGTH_LIMIT 96 /0 for no limit/
+!_TAG_PROGRAM_NAME scripts/create_py_tags.py Automatically generated file - do not edit (created on {datetime.datetime.now().ctime()} with Python {platform.python_version()})
+'''
+
+# pylint: disable=no-else-return,no-self-use
-########################################################################
class Parser:
- #----------------------------------------------------------------------
def __init__(self):
self.tags = {}
- self.re_matcher = re.compile(tag_regexp)
- #----------------------------------------------------------------------
- def _get_superclass(self, _object):
+ def _add_tag(self, object_name, object_, kind, module_path=None, parent=''):
"""
- Python class base-finder
- (found on http://mail.python.org/pipermail/python-list/2002-November/173949.html)
+ Verify the found tag name and if it is valid, add it to the list
- @param _object (object)
- @return superclass (object)
+ @param object_ (instance)
+ @param tag_type (str)
+ @param parent (str)
"""
- try:
- #~ TODO print inspect.getmro(c)
- if isinstance(_object, type):
- return _object.__bases__[0].__name__
- else:
- return _object.__mro__[1].__name__
- except IndexError:
- return ''
+ if len(object_name) < 4 or is_private_identifier(object_name):
+ return # skip short and private tags
+ if object_ is not None and not is_relevant_identifier(object_):
+ return
+
+ tag_key = (module_path, parent, object_name)
+ if tag_key not in self.tags:
+ signature = self._create_signature(object_) if object_ is not None else None
+ self.tags[tag_key] = self._format_tag(object_name, kind, signature, parent)
- #----------------------------------------------------------------------
- def _formatargspec(self, args, varargs=None, varkw=None, defaults=None,
- formatarg=str,
- formatvarargs=lambda name: '*' + name,
- formatvarkw=lambda name: '**' + name,
- formatvalue=lambda value: '=' + repr(value),
- join=joinseq):
- """Format an argument spec from the 4 values returned by getargspec.
-
- The first four arguments are (args, varargs, varkw, defaults). The
- other four arguments are the corresponding optional formatting functions
- that are called to turn names and values into strings. The ninth
- argument is an optional function to format the sequence of arguments."""
- specs = []
- if defaults:
- firstdefault = len(args) - len(defaults)
- for i in range(len(args)):
- spec = strseq(args[i], formatarg, join)
- if defaults and i >= firstdefault:
- d = defaults[i - firstdefault]
- # this is the difference from the original formatargspec() function
- # to use nicer names then the default repr() output
- if hasattr(d, '__name__'):
- d = d.__name__
- spec = spec + formatvalue(d)
- specs.append(spec)
- if varargs is not None:
- specs.append(formatvarargs(varargs))
- if varkw is not None:
- specs.append(formatvarkw(varkw))
- return ', '.join(specs)
-
- #----------------------------------------------------------------------
- def _add_tag(self, obj, tag_type, parent=''):
+ def _format_tag(self, tagname, kind, signature, parent):
+ signature_field = f'\tsignature:{signature}' if signature else ''
+ parent_field = f'\tclass:{parent}' if parent else ''
+
+ return f'{tagname}\t/unknown\t1;"\tkind:{kind}{parent_field}{signature_field}\n'
+
+ def _get_safe_parameter_default_value(self, value):
"""
- Verify the found tag name and if it is valid, add it to the list
+ Replace possibly sensitive or just much information from the default value
+ """
+ # prevent evaluating of `os.environ` in cgi.print_environ(environ=os.environ) which
+ # would lead to include the current full environment variables to be included
+ # in the tags file
+ if isinstance(value, (dict, os._Environ)) and value: # pylint: disable=protected-access
+ return f'<default-value-stripped {type(value)}>'
+ if isinstance(value, str):
+ # remove interpreter paths
+ if sys.executable in value:
+ return '/nonexistent/bin/python3'
+ # remove interpreter paths
+ if sys.prefix in value:
+ return '/nonexistent'
+
+ # for all other default values, return the string representation,
+ # assuming it is shorter than repr()
+ value_str = str(value)
+
+ # remove object hex addresses, e.g
+ # subTest(self, msg='<object object at 0x7f14bdfcd5a0>', **params)
+ if OBJECT_MEMORY_ADDRESS_REGEXP.search(value_str):
+ return OBJECT_MEMORY_ADDRESS_REGEXP.sub(r'<\1>', value_str)
+
+ return value_str
+
+ def _stringify_parameter_default_if_necessary(self, parameter):
+ """
+ Replace default values of the parameters with their string variants if they are not
+ basic types. This is to avoid signatures like (`ssl.SSLContext.load_default_certs`):
+ create_default_contextĂŚ128ĂŤ(purpose=<Purpose.SERVER_AUTH: _ASN1Object(nid=129, shortname='serverAuth', longname='TLS Web Server Authentication', oid='1.3.6.1.5.5.7.3.1')>, *, cafile=None, capath=None, cadata=None)ĂŽSSLContext # noqa pylint: disable=line-too-long
+ and create instead:
+ create_default_contextĂŚ128ĂŤ(purpose='Purpose.SERVER_AUTH', *, cafile=None, capath=None, cadata=None)
+
+ This is not perfect as it might suggest that the `purpose` parameter accepts a string.
+ But having the full `repr()` result is even worse.
+ """
+ if not parameter.default or parameter.default is parameter.empty:
+ return parameter
+ if isinstance(parameter.default, (bool, int, float)):
+ return parameter
- @param obj (instance)
- @param tag_type (str)
- @param parent (str)
+ new_default = self._get_safe_parameter_default_value(parameter.default)
+ return parameter.replace(default=new_default)
+
+ def _create_signature(self, object_):
+ """
+ Create signature for the given `object_`.
"""
- args = ''
- scope = ''
try:
- args = self._formatargspec(inspect.getfullargspec(obj))
- except (TypeError, KeyError):
- pass
- if parent:
- if tag_type == TYPE_CLASS:
- args = '(%s)' % parent
+ signature = inspect.signature(object_)
+ except (ValueError, TypeError):
+ # inspect.signature() throws ValueError and TypeError for unsupported callables,
+ # so we need to ignore the signature for this callable
+ return ''
+
+ new_parameters = []
+ for parameter_name in signature.parameters:
+ parameter = signature.parameters[parameter_name]
+ if parameter.default and not isinstance(parameter.default, parameter.empty):
+ new_parameter = self._stringify_parameter_default_if_necessary(parameter)
+ new_parameters.append(new_parameter)
else:
- scope = '%s%s' % (TA_SCOPE, parent)
- if isinstance(obj, str):
- tagname = obj
- else:
- tagname = obj.__name__
- # check for duplicates
- if len(tagname) < 4:
- # skip short tags
- return
- tag = '%s%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args, scope)
+ new_parameters.append(parameter)
- if not tagname in self.tags and not tagname_is_like_keyword(tagname):
- self.tags[tagname] = tag
+ return signature.replace(parameters=new_parameters)
- #----------------------------------------------------------------------
- def process_file(self, filename):
+ def process_module(self, module_path, module_filename):
"""
- Read the file specified by filename and look for class and function definitions
-
- @param filename (str)
+ Import the given module path and look for class and function definitions
"""
+ module = None
+ symbols = None
+ module_error = None
+
+ if module_path.endswith('__main__'):
+ return # ignore any executable modules, importing them would execute the module
+
try:
- module = imp.load_source('tags_file_module', filename)
- except IOError as e:
- # file not found
- print('%s: %s' % (filename, e))
+ module = importlib.import_module(module_path)
+ except DeprecationWarning as exc:
+ print(f'Ignoring deprecated module "{module_path}" (reason: {exc})')
return
- except Exception:
- module = None
-
- if module:
- symbols = inspect.getmembers(module, callable)
- for obj_name, obj in symbols:
- try:
- name = obj.__name__
- except AttributeError:
- name = obj_name
- if not name or not isinstance(name, str) or is_private_identifier(name):
- # skip non-public tags
- continue
- if inspect.isfunction(obj):
- self._add_tag(obj, TYPE_FUNCTION)
- elif inspect.isclass(obj):
- self._add_tag(obj, TYPE_CLASS, self._get_superclass(obj))
- try:
- methods = inspect.getmembers(obj, inspect.ismethod)
- except (TypeError, AttributeError):
- methods = []
- for m_name, m_obj in methods:
- # skip non-public tags
- if is_private_identifier(m_name) or not inspect.ismethod(m_obj):
- continue
- self._add_tag(m_obj, TYPE_FUNCTION, name)
+ except Exception as exc:
+ module_error = str(exc)
+ else:
+ symbols = inspect.getmembers(module)
+
+ if symbols:
+ self._process_module_with_inspect(symbols, module_path)
else:
- # plain regular expression based parsing
- filep = open(filename)
- for line in filep:
- m = self.re_matcher.match(line)
- if m:
- tag_type_str, tagname, args = m.groups()
+ # If error is empty, there are probably just no symbols in the module, e.g. on empty
+ # __init__.py files. Try to parse them anyway. But log module_errors.
+ if module_error:
+ print(f'Using fallback parser for: {module_path} ({module_filename}, reason: {module_error})')
+
+ self._process_module_with_fallback_parser(module_filename)
+
+ def _process_module_with_inspect(self, symbols, module_path):
+ """
+ Try to analyse all symbols in the module as found by `inspect.getmembers`.
+ """
+ for obj_name, obj in symbols:
+ if is_import(obj, module_path):
+ continue
+
+ # function and similar callables
+ if inspect.isroutine(obj):
+ self._add_tag(obj_name, obj, KIND_FUNCTION, module_path)
+ # class
+ elif inspect.isclass(obj):
+ if _ignore_class(module_path, obj_name):
+ continue
+ self._add_tag(obj_name, obj, KIND_CLASS, module_path)
+ methods = inspect.getmembers(obj)
+ # methods
+ for m_name, m_obj in methods:
+ self._add_tag(m_name, m_obj, KIND_MEMBER, module_path, parent=obj_name)
+
+ def _process_module_with_fallback_parser(self, module_filename):
+ """
+ Plain regular expression based parsing, used as fallback if `inspect`'ing the module is not possible
+ """
+ with open(module_filename, encoding='utf-8') as filep:
+ for line_number, line in enumerate(filep):
+ match = TAG_REGEXP.match(line)
+ if match:
+ tag_type_str, tagname, args = match.groups()
if not tagname or is_private_identifier(tagname):
- # skip non-public tags
continue
- if tag_type_str == 'class':
- tag_type = TYPE_CLASS
- else:
- tag_type = TYPE_FUNCTION
- args = args.strip()
- tag = '%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args)
- if not tagname in self.tags and not tagname_is_like_keyword(tagname):
- self.tags[tagname] = tag
- filep.close()
-
- #----------------------------------------------------------------------
+ if tagname in self.tags:
+ continue
+
+ kind = KIND_CLASS if tag_type_str == 'class' else KIND_FUNCTION
+ signature = args.strip()
+ self.tags[tagname] = self._format_tag(tagname, kind, signature, parent=None)
+
def add_builtins(self):
"""
Add the contents of __builtins__ as simple tags
"""
- for tag_name in dir(__builtins__):
- # check if the tag name starts with upper case, then we assume it is a class
- # note that this is a very very simple heuristic to determine the type and will give
- # false positives
- if tag_name[0].isupper():
- tag_type = TYPE_CLASS
- else:
- tag_type = TYPE_FUNCTION
-
- self._add_tag(tag_name, tag_type)
+ builtins = inspect.getmembers(__builtins__)
+ for b_name, b_obj in builtins:
+ if inspect.isclass(b_obj):
+ self._add_tag(b_name, b_obj, KIND_CLASS)
+ elif is_relevant_identifier(b_obj):
+ self._add_tag(b_name, b_obj, KIND_FUNCTION)
- #----------------------------------------------------------------------
def write_to_file(self, filename):
"""
Sort the found tags and write them into the file specified by filename
@param filename (str)
"""
- result = list(self.tags.values())
- # sort the tags
- result.sort()
+ result = sorted(self.tags.values())
# write them
- with open(filename, 'wb') as target_file:
- target_file.write(
- ('# format=tagmanager - Automatically generated file - do not edit (created on %s)\n' % \
- datetime.datetime.now().ctime()).encode('latin-1'))
+ with open(filename, 'w') as target_file:
+ target_file.write(CTAGS_FILE_HEADER)
for symbol in result:
- if not symbol == '\n': # skip empty lines
- target_file.write(symbol.encode('latin-1'))
+ if symbol != '\n': # skip empty lines
+ target_file.write(symbol)
-#----------------------------------------------------------------------
-def tagname_is_like_keyword(tagname):
- """ignore tags which start with a keyword to avoid annoying completions of 'pass_' and similar ones"""
- # this is not really efficient but in this script speed doesn't really matter
- for keyword in PYTHON_KEYWORDS:
- if tagname.startswith(keyword):
- return True
+def is_import(object_, module_path):
+ object_module = getattr(object_, '__module__', None)
+ if object_module and object_module != module_path:
+ return True
+
return False
-#----------------------------------------------------------------------
def is_private_identifier(tagname):
return tagname.startswith('_') or tagname.endswith('_')
-#----------------------------------------------------------------------
-def get_module_filenames(path):
- def ignore_package(package):
- for ignore in PYTHON_LIB_IGNORE_PACKAGES:
- if ignore in package:
- return True
- return False
+def is_relevant_identifier(object_):
+ # TODO add "inspect.isdatadescriptor" for properties
+ # TODO maybe also consider attributes, e.g. by checking against __dict__ or so
+ return \
+ inspect.ismethod(object_) or \
+ inspect.isclass(object_) or \
+ inspect.isfunction(object_) or \
+ inspect.isgeneratorfunction(object_) or \
+ inspect.isgenerator(object_) or \
+ inspect.iscoroutinefunction(object_) or \
+ inspect.iscoroutine(object_) or \
+ inspect.isawaitable(object_) or \
+ inspect.isasyncgenfunction(object_) or \
+ inspect.isasyncgen(object_) or \
+ inspect.isroutine(object_) or \
+ inspect.isabstract(object_)
+
+
+def _setup_global_package_ignore_list():
+ """Read the python-config path from LIBPL and strip the prefix part
+ (e.g. /usr/lib/python3.8/config-3.8-x86_64-linux-gnu gets config-3.8-x86_64-linux-gnu)
+ """
+ python_config_dir = Path(sysconfig.get_config_var('LIBPL'))
+ try:
+ python_config_package = python_config_dir.relative_to(PYTHON_LIB_DIRECTORY)
+ except ValueError:
+ python_config_package = python_config_dir
+
+ PYTHON_LIB_IGNORE_PACKAGES.append(python_config_package.as_posix())
+
+
+def _ignore_package(package):
+ for ignore in PYTHON_LIB_IGNORE_PACKAGES:
+ if ignore in package:
+ return True
+ return False
+
+
+def _ignore_module(module):
+ return module in PYTHON_LIB_IGNORE_MODULES
+
+
+def _ignore_class(module, class_):
+ return f'{module}.{class_}' in PYTHON_LIB_IGNORE_CLASSES
+
+def _get_module_list(*paths):
# the loop is quite slow but it doesn't matter for this script
- filenames = list()
- python_lib_directory_len = len(PYTHON_LIB_DIRECTORY)
- for base, dirs, files in os.walk(path):
- package = base[(python_lib_directory_len + 1):]
- if ignore_package(package):
- continue
- for filename in files:
- module_name = os.path.join(package, filename)
- if module_name in PYTHON_LIB_IGNORE_MODULES:
+ modules = []
+ for path in paths:
+ for module_filename in path.rglob('*.py'):
+ module_name = module_filename.stem
+ package_path = module_filename.relative_to(path)
+ package = '.'.join(package_path.parent.parts)
+ # construct full module path (e.g. xml.sax.xmlreader)
+ if module_name == '__init__':
+ module_path = package
+ elif package:
+ module_path = f'{package}.{module_name}'
+ else:
+ module_path = module_name
+
+ # ignore unwanted modules and packages
+ if _ignore_package(package):
+ continue
+ if _ignore_module(module_path):
continue
- if filename.endswith('.py'):
- module_filename = os.path.join(base, filename)
- filenames.append(module_filename)
- return filenames
+
+ modules.append((module_path, module_filename))
+
+ # sort module list for nicer output
+ return sorted(modules)
-#----------------------------------------------------------------------
def main():
- # process files given on command line
+ _setup_global_package_ignore_list()
+ # process modules given on command line
args = sys.argv[1:]
- if not args:
- args = get_module_filenames(PYTHON_LIB_DIRECTORY)
+ if args:
+ modules = _get_module_list(*args)
+ else:
+ modules = _get_module_list(PYTHON_LIB_DIRECTORY)
parser = Parser()
parser.add_builtins()
- for filename in args:
+ for module_path, module_filename in modules:
try:
- parser.process_file(filename)
- except (SystemExit, ImportError, TypeError):
- continue
+ parser.process_module(module_path, module_filename)
+ except Exception as exc:
+ print(f'{exc.__class__.__name__} in {module_path}: {exc}')
+ raise
- parser.write_to_file(tag_filename)
+ parser.write_to_file(TAG_FILENAME)
if __name__ == '__main__':
main()
-
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
Branch: refs/heads/master
Author: Enrico Tröger <enrico.troeger(a)uvena.de>
Committer: Enrico Tröger <enrico.troeger(a)uvena.de>
Date: Wed, 10 May 2023 21:46:46 UTC
Commit: 706ee56f0f0b2c09744c380b033b2ff44682a95e
https://github.com/geany/geany/commit/706ee56f0f0b2c09744c380b033b2ff44682a…
Log Message:
-----------
Fix doc typos
Modified Paths:
--------------
scripts/create_py_tags.py
Modified: scripts/create_py_tags.py
4 lines changed, 2 insertions(+), 2 deletions(-)
===================================================================
@@ -8,13 +8,13 @@
# This script should be run in the top source directory.
#
# Parses all files in the directories given on command line for Python classes or functions and
-# write them into data/tags/std.py.tags (internal tagmanager format).
+# write them into data/tags/std.py.tags (ctags format).
# If called without command line arguments, a preset of common Python libs is used.
#
# WARNING
# Be aware that running this script will actually *import* all modules given on the command line
# or in the standard library path of your Python installation. Dependent on what Python modules
-# you have installed, this might not be want you want and can have weird side effects.
+# you have installed, this might not be what you want and can have weird side effects.
# You have been warned.
#
# It should be however relatively safe to execute this script from a fresh Python installation
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
Branch: refs/heads/master
Author: Enrico Tröger <enrico.troeger(a)uvena.de>
Committer: GitHub <noreply(a)github.com>
Date: Sun, 21 May 2023 17:21:36 UTC
Commit: d6ce258cec790da1e81db48476507bb114c7e482
https://github.com/geany/geany/commit/d6ce258cec790da1e81db48476507bb114c7e…
Log Message:
-----------
Merge pull request #3039 from eht16/py3_tags_v2
Rewrite Python standard library tags creation script for Python 3
Modified Paths:
--------------
data/tags/std.py.tags
scripts/create_py_tags.py
Modified: data/tags/std.py.tags
21550 lines changed, 15586 insertions(+), 5964 deletions(-)
===================================================================
No diff available, check online
Modified: scripts/create_py_tags.py
513 lines changed, 292 insertions(+), 221 deletions(-)
===================================================================
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
#
# Author: Enrico Tröger
# License: GPL v2 or later
@@ -8,296 +7,368 @@
#
# This script should be run in the top source directory.
#
-# Parses all files given on command line for Python classes or functions and write
-# them into data/tags/std.py.tags (internal tagmanager format).
+# Parses all files in the directories given on command line for Python classes or functions and
+# write them into data/tags/std.py.tags (ctags format).
# If called without command line arguments, a preset of common Python libs is used.
#
# WARNING
-# Be aware that running this script will actually *import* modules in the specified directory
+# Be aware that running this script will actually *import* all modules given on the command line
# or in the standard library path of your Python installation. Dependent on what Python modules
-# you have installed, this might not be want you want and can have weird side effects.
+# you have installed, this might not be what you want and can have weird side effects.
# You have been warned.
#
# It should be however relatively safe to execute this script from a fresh Python installation
-# installed into a dedicated prefix. Then nothing else is necessary as to change the interpreter
-# with which you start this script.
+# installed into a dedicated prefix or from an empty virtualenv or ideally in a Docker container
+# in the Geany project directory:
+# docker run --rm -it --user $(id -u):$(id -g) -v $(pwd):/data --workdir /data python:3.11-alpine python scripts/create_py_tags.py
#
import datetime
-import imp
+import importlib.util
import inspect
import os
+import platform
import re
import sys
-import types
+import sysconfig
+import warnings
+from pathlib import Path
+
+# treat all DeprecationWarnings as errors so we can catch them to ignore the corresponding modules
+warnings.filterwarnings('error', category=DeprecationWarning)
+
+PYTHON_LIB_DIRECTORY = Path(os.__file__).parent
+PYTHON_LIB_IGNORE_PACKAGES = ['dist-packages', 'distutils', 'encodings', 'idlelib', 'lib2to3',
+ 'site-packages', 'test', 'turtledemo', 'Tools']
+# some modules/classes are deprecated or execute funky code when they are imported
+# which we really don't want here (though if you feel funny, try: 'import antigravity')
+PYTHON_LIB_IGNORE_MODULES = ('__phello__.foo', 'antigravity', 'asyncio.windows_events',
+ 'asyncio.windows_utils', 'ctypes.wintypes', 'ensurepip._bundled',
+ 'lib2to3', 'multiprocessing.popen_spawn_win32', 'this', 'turtle')
+PYTHON_LIB_IGNORE_CLASSES = ('typing.io', 'typing.re')
+
+# Python kinds
+KIND_CLASS = 'class'
+KIND_FUNCTION = 'function'
+KIND_MEMBER = 'member'
+
+TAG_FILENAME = 'data/tags/std.py.tags'
+TAG_REGEXP = re.compile(r'^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]')
+OBJECT_MEMORY_ADDRESS_REGEXP = re.compile(r'<(.+?) at 0x[0-9a-f]+(?:.+)>', flags=re.IGNORECASE)
+
+CTAGS_FILE_HEADER = f'''!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_OUTPUT_EXCMD mixed /number, pattern, mixed, or combineV2/
+!_TAG_OUTPUT_FILESEP slash /slash or backslash/
+!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
+!_TAG_PATTERN_LENGTH_LIMIT 96 /0 for no limit/
+!_TAG_PROGRAM_NAME scripts/create_py_tags.py Automatically generated file - do not edit (created on {datetime.datetime.now().ctime()} with Python {platform.python_version()})
+'''
+
+# pylint: disable=no-else-return,no-self-use
-PYTHON_LIB_DIRECTORY = os.path.dirname(os.__file__)
-PYTHON_LIB_IGNORE_PACKAGES = (u'test', u'dist-packages', u'site-packages', 'Tools')
-# some modules execute funky code when they are imported which we really don't want here
-# (though if you feel funny, try: 'import antigravity')
-PYTHON_LIB_IGNORE_MODULES = (u'antigravity.py', u'idlelib/idle.py', u'multiprocessing/util.py')
-PYTHON_KEYWORDS = ('and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
- 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
- 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try',
- 'while', 'with', 'yield', 'False', 'None', 'True')
-# (from tagmanager/tm_tag.c:32)
-TA_NAME = '%c' % 200,
-TA_TYPE = '%c' % 204
-TA_ARGLIST = '%c' % 205
-TA_SCOPE = '%c' % 206
-
-# TMTagType (tagmanager/tm_tag.h:47)
-TYPE_CLASS = '%d' % 1
-TYPE_FUNCTION = '%d' % 128
-
-tag_filename = 'data/tags/std.py.tags'
-tag_regexp = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]'
-
-
-########################################################################
class Parser:
- #----------------------------------------------------------------------
def __init__(self):
self.tags = {}
- self.re_matcher = re.compile(tag_regexp)
- #----------------------------------------------------------------------
- def _get_superclass(self, _object):
+ def _add_tag(self, object_name, object_, kind, module_path=None, parent=''):
"""
- Python class base-finder
- (found on http://mail.python.org/pipermail/python-list/2002-November/173949.html)
+ Verify the found tag name and if it is valid, add it to the list
- @param _object (object)
- @return superclass (object)
+ @param object_ (instance)
+ @param tag_type (str)
+ @param parent (str)
"""
- try:
- #~ TODO print inspect.getmro(c)
- if type(_object) == types.ClassType:
- return _object.__bases__[0].__name__
- else:
- return _object.__mro__[1].__name__
- except IndexError:
- return ''
+ if len(object_name) < 4 or is_private_identifier(object_name):
+ return # skip short and private tags
+ if object_ is not None and not is_relevant_identifier(object_):
+ return
+
+ tag_key = (module_path, parent, object_name)
+ if tag_key not in self.tags:
+ signature = self._create_signature(object_) if object_ is not None else None
+ self.tags[tag_key] = self._format_tag(object_name, kind, signature, parent)
+
+ def _format_tag(self, tagname, kind, signature, parent):
+ signature_field = f'\tsignature:{signature}' if signature else ''
+ parent_field = f'\tclass:{parent}' if parent else ''
- #----------------------------------------------------------------------
- def _formatargspec(self, args, varargs=None, varkw=None, defaults=None,
- formatarg=str,
- formatvarargs=lambda name: '*' + name,
- formatvarkw=lambda name: '**' + name,
- formatvalue=lambda value: '=' + repr(value),
- join=inspect.joinseq):
- """Format an argument spec from the 4 values returned by getargspec.
-
- The first four arguments are (args, varargs, varkw, defaults). The
- other four arguments are the corresponding optional formatting functions
- that are called to turn names and values into strings. The ninth
- argument is an optional function to format the sequence of arguments."""
- specs = []
- if defaults:
- firstdefault = len(args) - len(defaults)
- for i in range(len(args)):
- spec = inspect.strseq(args[i], formatarg, join)
- if defaults and i >= firstdefault:
- d = defaults[i - firstdefault]
- # this is the difference from the original formatargspec() function
- # to use nicer names then the default repr() output
- if hasattr(d, '__name__'):
- d = d.__name__
- spec = spec + formatvalue(d)
- specs.append(spec)
- if varargs is not None:
- specs.append(formatvarargs(varargs))
- if varkw is not None:
- specs.append(formatvarkw(varkw))
- return ', '.join(specs)
-
- #----------------------------------------------------------------------
- def _add_tag(self, obj, tag_type, parent=''):
+ return f'{tagname}\t/unknown\t1;"\tkind:{kind}{parent_field}{signature_field}\n'
+
+ def _get_safe_parameter_default_value(self, value):
"""
- Verify the found tag name and if it is valid, add it to the list
+ Replace possibly sensitive or just much information from the default value
+ """
+ # prevent evaluating of `os.environ` in cgi.print_environ(environ=os.environ) which
+ # would lead to include the current full environment variables to be included
+ # in the tags file
+ if isinstance(value, (dict, os._Environ)) and value: # pylint: disable=protected-access
+ return f'<default-value-stripped {type(value)}>'
+ if isinstance(value, str):
+ # remove interpreter paths
+ if sys.executable in value:
+ return '/nonexistent/bin/python3'
+ # remove interpreter paths
+ if sys.prefix in value:
+ return '/nonexistent'
+
+ # for all other default values, return the string representation,
+ # assuming it is shorter than repr()
+ value_str = str(value)
+
+ # remove object hex addresses, e.g
+ # subTest(self, msg='<object object at 0x7f14bdfcd5a0>', **params)
+ if OBJECT_MEMORY_ADDRESS_REGEXP.search(value_str):
+ return OBJECT_MEMORY_ADDRESS_REGEXP.sub(r'<\1>', value_str)
+
+ return value_str
+
+ def _stringify_parameter_default_if_necessary(self, parameter):
+ """
+ Replace default values of the parameters with their string variants if they are not
+ basic types. This is to avoid signatures like (`ssl.SSLContext.load_default_certs`):
+ create_default_contextĂŚ128ĂŤ(purpose=<Purpose.SERVER_AUTH: _ASN1Object(nid=129, shortname='serverAuth', longname='TLS Web Server Authentication', oid='1.3.6.1.5.5.7.3.1')>, *, cafile=None, capath=None, cadata=None)ĂŽSSLContext # noqa pylint: disable=line-too-long
+ and create instead:
+ create_default_contextĂŚ128ĂŤ(purpose='Purpose.SERVER_AUTH', *, cafile=None, capath=None, cadata=None)
+
+ This is not perfect as it might suggest that the `purpose` parameter accepts a string.
+ But having the full `repr()` result is even worse.
+ """
+ if not parameter.default or parameter.default is parameter.empty:
+ return parameter
+ if isinstance(parameter.default, (bool, int, float)):
+ return parameter
- @param obj (instance)
- @param tag_type (str)
- @param parent (str)
+ new_default = self._get_safe_parameter_default_value(parameter.default)
+ return parameter.replace(default=new_default)
+
+ def _create_signature(self, object_):
+ """
+ Create signature for the given `object_`.
"""
- args = ''
- scope = ''
try:
- args = apply(self._formatargspec, inspect.getargspec(obj))
- except (TypeError, KeyError):
- pass
- if parent:
- if tag_type == TYPE_CLASS:
- args = '(%s)' % parent
+ signature = inspect.signature(object_)
+ except (ValueError, TypeError):
+ # inspect.signature() throws ValueError and TypeError for unsupported callables,
+ # so we need to ignore the signature for this callable
+ return ''
+
+ new_parameters = []
+ for parameter_name in signature.parameters:
+ parameter = signature.parameters[parameter_name]
+ if parameter.default and not isinstance(parameter.default, parameter.empty):
+ new_parameter = self._stringify_parameter_default_if_necessary(parameter)
+ new_parameters.append(new_parameter)
else:
- scope = '%s%s' % (TA_SCOPE, parent)
- if isinstance(obj, basestring):
- tagname = obj
- else:
- tagname = obj.__name__
- # check for duplicates
- if len(tagname) < 4:
- # skip short tags
- return
- tag = '%s%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args, scope)
+ new_parameters.append(parameter)
- if not tagname in self.tags and not tagname_is_like_keyword(tagname):
- self.tags[tagname] = tag
+ return signature.replace(parameters=new_parameters)
- #----------------------------------------------------------------------
- def process_file(self, filename):
+ def process_module(self, module_path, module_filename):
"""
- Read the file specified by filename and look for class and function definitions
-
- @param filename (str)
+ Import the given module path and look for class and function definitions
"""
+ module = None
+ symbols = None
+ module_error = None
+
+ if module_path.endswith('__main__'):
+ return # ignore any executable modules, importing them would execute the module
+
try:
- module = imp.load_source('tags_file_module', filename)
- except IOError, e:
- # file not found
- print '%s: %s' % (filename, e)
+ module = importlib.import_module(module_path)
+ except DeprecationWarning as exc:
+ print(f'Ignoring deprecated module "{module_path}" (reason: {exc})')
return
- except Exception:
- module = None
-
- if module:
- symbols = inspect.getmembers(module, callable)
- for obj_name, obj in symbols:
- try:
- name = obj.__name__
- except AttributeError:
- name = obj_name
- if not name or not isinstance(name, basestring) or is_private_identifier(name):
- # skip non-public tags
- continue
- if inspect.isfunction(obj):
- self._add_tag(obj, TYPE_FUNCTION)
- elif inspect.isclass(obj):
- self._add_tag(obj, TYPE_CLASS, self._get_superclass(obj))
- try:
- methods = inspect.getmembers(obj, inspect.ismethod)
- except (TypeError, AttributeError):
- methods = []
- for m_name, m_obj in methods:
- # skip non-public tags
- if is_private_identifier(m_name) or not inspect.ismethod(m_obj):
- continue
- self._add_tag(m_obj, TYPE_FUNCTION, name)
+ except Exception as exc:
+ module_error = str(exc)
+ else:
+ symbols = inspect.getmembers(module)
+
+ if symbols:
+ self._process_module_with_inspect(symbols, module_path)
else:
- # plain regular expression based parsing
- filep = open(filename)
- for line in filep:
- m = self.re_matcher.match(line)
- if m:
- tag_type_str, tagname, args = m.groups()
+ # If error is empty, there are probably just no symbols in the module, e.g. on empty
+ # __init__.py files. Try to parse them anyway. But log module_errors.
+ if module_error:
+ print(f'Using fallback parser for: {module_path} ({module_filename}, reason: {module_error})')
+
+ self._process_module_with_fallback_parser(module_filename)
+
+ def _process_module_with_inspect(self, symbols, module_path):
+ """
+ Try to analyse all symbols in the module as found by `inspect.getmembers`.
+ """
+ for obj_name, obj in symbols:
+ if is_import(obj, module_path):
+ continue
+
+ # function and similar callables
+ if inspect.isroutine(obj):
+ self._add_tag(obj_name, obj, KIND_FUNCTION, module_path)
+ # class
+ elif inspect.isclass(obj):
+ if _ignore_class(module_path, obj_name):
+ continue
+ self._add_tag(obj_name, obj, KIND_CLASS, module_path)
+ methods = inspect.getmembers(obj)
+ # methods
+ for m_name, m_obj in methods:
+ self._add_tag(m_name, m_obj, KIND_MEMBER, module_path, parent=obj_name)
+
+ def _process_module_with_fallback_parser(self, module_filename):
+ """
+ Plain regular expression based parsing, used as fallback if `inspect`'ing the module is not possible
+ """
+ with open(module_filename, encoding='utf-8') as filep:
+ for line_number, line in enumerate(filep):
+ match = TAG_REGEXP.match(line)
+ if match:
+ tag_type_str, tagname, args = match.groups()
if not tagname or is_private_identifier(tagname):
- # skip non-public tags
continue
- if tag_type_str == 'class':
- tag_type = TYPE_CLASS
- else:
- tag_type = TYPE_FUNCTION
- args = args.strip()
- tag = '%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args)
- if not tagname in self.tags and not tagname_is_like_keyword(tagname):
- self.tags[tagname] = tag
- filep.close()
-
- #----------------------------------------------------------------------
+ if tagname in self.tags:
+ continue
+
+ kind = KIND_CLASS if tag_type_str == 'class' else KIND_FUNCTION
+ signature = args.strip()
+ self.tags[tagname] = self._format_tag(tagname, kind, signature, parent=None)
+
def add_builtins(self):
"""
Add the contents of __builtins__ as simple tags
"""
- for tag_name in dir(__builtins__):
- # check if the tag name starts with upper case, then we assume it is a class
- # note that this is a very very simple heuristic to determine the type and will give
- # false positives
- if tag_name[0].isupper():
- tag_type = TYPE_CLASS
- else:
- tag_type = TYPE_FUNCTION
-
- self._add_tag(tag_name, tag_type)
+ builtins = inspect.getmembers(__builtins__)
+ for b_name, b_obj in builtins:
+ if inspect.isclass(b_obj):
+ self._add_tag(b_name, b_obj, KIND_CLASS)
+ elif is_relevant_identifier(b_obj):
+ self._add_tag(b_name, b_obj, KIND_FUNCTION)
- #----------------------------------------------------------------------
def write_to_file(self, filename):
"""
Sort the found tags and write them into the file specified by filename
@param filename (str)
"""
- result = self.tags.values()
- # sort the tags
- result.sort()
+ result = sorted(self.tags.values())
# write them
- target_file = open(filename, 'wb')
- target_file.write(
- '# format=tagmanager - Automatically generated file - do not edit (created on %s)\n' % \
- datetime.datetime.now().ctime())
- for symbol in result:
- if not symbol == '\n': # skip empty lines
- target_file.write(symbol)
- target_file.close()
-
-
-#----------------------------------------------------------------------
-def tagname_is_like_keyword(tagname):
- """ignore tags which start with a keyword to avoid annoying completions of 'pass_' and similar ones"""
- # this is not really efficient but in this script speed doesn't really matter
- for keyword in PYTHON_KEYWORDS:
- if tagname.startswith(keyword):
- return True
+ with open(filename, 'w') as target_file:
+ target_file.write(CTAGS_FILE_HEADER)
+ for symbol in result:
+ if symbol != '\n': # skip empty lines
+ target_file.write(symbol)
+
+
+def is_import(object_, module_path):
+ object_module = getattr(object_, '__module__', None)
+ if object_module and object_module != module_path:
+ return True
+
return False
-#----------------------------------------------------------------------
def is_private_identifier(tagname):
return tagname.startswith('_') or tagname.endswith('_')
-#----------------------------------------------------------------------
-def get_module_filenames(path):
- def ignore_package(package):
- for ignore in PYTHON_LIB_IGNORE_PACKAGES:
- if ignore in package:
- return True
- return False
+def is_relevant_identifier(object_):
+ # TODO add "inspect.isdatadescriptor" for properties
+ # TODO maybe also consider attributes, e.g. by checking against __dict__ or so
+ return \
+ inspect.ismethod(object_) or \
+ inspect.isclass(object_) or \
+ inspect.isfunction(object_) or \
+ inspect.isgeneratorfunction(object_) or \
+ inspect.isgenerator(object_) or \
+ inspect.iscoroutinefunction(object_) or \
+ inspect.iscoroutine(object_) or \
+ inspect.isawaitable(object_) or \
+ inspect.isasyncgenfunction(object_) or \
+ inspect.isasyncgen(object_) or \
+ inspect.isroutine(object_) or \
+ inspect.isabstract(object_)
+
+
+def _setup_global_package_ignore_list():
+ """Read the python-config path from LIBPL and strip the prefix part
+ (e.g. /usr/lib/python3.8/config-3.8-x86_64-linux-gnu gets config-3.8-x86_64-linux-gnu)
+ """
+ python_config_dir = Path(sysconfig.get_config_var('LIBPL'))
+ try:
+ python_config_package = python_config_dir.relative_to(PYTHON_LIB_DIRECTORY)
+ except ValueError:
+ python_config_package = python_config_dir
+
+ PYTHON_LIB_IGNORE_PACKAGES.append(python_config_package.as_posix())
+
+
+def _ignore_package(package):
+ for ignore in PYTHON_LIB_IGNORE_PACKAGES:
+ if ignore in package:
+ return True
+ return False
+
+
+def _ignore_module(module):
+ return module in PYTHON_LIB_IGNORE_MODULES
+
+
+def _ignore_class(module, class_):
+ return f'{module}.{class_}' in PYTHON_LIB_IGNORE_CLASSES
+
+def _get_module_list(*paths):
# the loop is quite slow but it doesn't matter for this script
- filenames = list()
- python_lib_directory_len = len(PYTHON_LIB_DIRECTORY)
- for base, dirs, files in os.walk(path):
- package = base[(python_lib_directory_len + 1):]
- if ignore_package(package):
- continue
- for filename in files:
- module_name = os.path.join(package, filename)
- if module_name in PYTHON_LIB_IGNORE_MODULES:
+ modules = []
+ for path in paths:
+ for module_filename in path.rglob('*.py'):
+ module_name = module_filename.stem
+ package_path = module_filename.relative_to(path)
+ package = '.'.join(package_path.parent.parts)
+ # construct full module path (e.g. xml.sax.xmlreader)
+ if module_name == '__init__':
+ module_path = package
+ elif package:
+ module_path = f'{package}.{module_name}'
+ else:
+ module_path = module_name
+
+ # ignore unwanted modules and packages
+ if _ignore_package(package):
+ continue
+ if _ignore_module(module_path):
continue
- if filename.endswith('.py'):
- module_filename = os.path.join(base, filename)
- filenames.append(module_filename)
- return filenames
+
+ modules.append((module_path, module_filename))
+
+ # sort module list for nicer output
+ return sorted(modules)
-#----------------------------------------------------------------------
def main():
- # process files given on command line
+ _setup_global_package_ignore_list()
+ # process modules given on command line
args = sys.argv[1:]
- if not args:
- args = get_module_filenames(PYTHON_LIB_DIRECTORY)
+ if args:
+ modules = _get_module_list(*args)
+ else:
+ modules = _get_module_list(PYTHON_LIB_DIRECTORY)
parser = Parser()
parser.add_builtins()
- for filename in args:
- parser.process_file(filename)
+ for module_path, module_filename in modules:
+ try:
+ parser.process_module(module_path, module_filename)
+ except Exception as exc:
+ print(f'{exc.__class__.__name__} in {module_path}: {exc}')
+ raise
- parser.write_to_file(tag_filename)
+ parser.write_to_file(TAG_FILENAME)
if __name__ == '__main__':
main()
-
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
Branch: refs/heads/master
Author: Enrico Tröger <enrico.troeger(a)uvena.de>
Committer: Enrico Tröger <enrico.troeger(a)uvena.de>
Date: Wed, 10 May 2023 21:34:36 UTC
Commit: 721550ca76caa155dc3ea2c7e0edb4710ef6c7e9
https://github.com/geany/geany/commit/721550ca76caa155dc3ea2c7e0edb4710ef6c…
Log Message:
-----------
Port create_php_tags to Python3 and generate new PHP tags file
Related to #2615.
Modified Paths:
--------------
data/tags/std.php.tags
scripts/create_php_tags.py
Modified: data/tags/std.php.tags
10332 lines changed, 3972 insertions(+), 6360 deletions(-)
===================================================================
No diff available, check online
Modified: scripts/create_php_tags.py
26 lines changed, 11 insertions(+), 15 deletions(-)
===================================================================
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
#
# Author: Enrico Tröger
# License: GPL v2 or later
@@ -9,10 +8,10 @@
# From those defintions all function tags are extracted and written
# to ../data/tags/std.php.tags (relative to the script's location, not $CWD).
+import re
from json import loads
from os.path import dirname, join
-from urllib2 import urlopen
-import re
+from urllib.request import urlopen
UPSTREAM_TAG_DEFINITION = 'http://doc.php.net/downloads/json/php_manual_en.json'
@@ -32,14 +31,12 @@
TYPE_VARIABLE = 16384
-#----------------------------------------------------------------------
def normalize_name(name):
""" Replace namespace separator with class separators, as Geany only
understands the latter """
return name.replace('\\', '::')
-#----------------------------------------------------------------------
def split_scope(name):
""" Splits the scope from the member, and returns (scope, member).
Returned scope is None if the name is not a member """
@@ -48,10 +45,9 @@ def split_scope(name):
if sep_pos < 0:
return None, name
else:
- return name[:sep_pos], name[sep_pos+2:]
+ return name[:sep_pos], name[sep_pos + 2:]
-#----------------------------------------------------------------------
def parse_and_create_php_tags_file():
# download upstream definition
response = urlopen(UPSTREAM_TAG_DEFINITION)
@@ -64,7 +60,7 @@ def parse_and_create_php_tags_file():
definitions = loads(html)
# generate tags
- tag_list = list()
+ tag_list = []
for tag_name, tag_definition in definitions.items():
prototype_re = PROTOTYPE_RE.format(tag_name=re.escape(tag_name))
match = re.match(prototype_re, tag_definition['prototype'])
@@ -81,24 +77,24 @@ def parse_and_create_php_tags_file():
# Also create a class tag when encountering a __construct()
if tag_name == '__construct' and scope is not None:
scope, tag_name = split_scope(scope)
- tag_list.append((tag_name, TYPE_CLASS, None, arg_list, scope))
+ tag_list.append((tag_name, TYPE_CLASS, None, arg_list, scope or ''))
# write tags
script_dir = dirname(__file__)
tags_file_path = join(script_dir, '..', 'data', 'tags', 'std.php.tags')
- with open(tags_file_path, 'w') as tags_file:
+ with open(tags_file_path, 'w', encoding='iso-8859-1') as tags_file:
tags_file.write('# format=tagmanager\n')
for tag_name, tag_type, return_type, arg_list, scope in sorted(tag_list):
- tag_line = '{}'.format(tag_name)
- for attr, type in [(tag_type, TA_TYPE),
+ tag_line = f'{tag_name}'
+ for attr, type_ in [(tag_type, TA_TYPE),
(arg_list, TA_ARGLIST),
(return_type, TA_VARTYPE),
(scope, TA_SCOPE)]:
if attr is not None:
- tag_line += '{type:c}{attr}'.format(type=type, attr=attr)
+ tag_line += f'{type_:c}{attr}'
tags_file.write(tag_line + '\n')
- print(u'Created: {} with {} tags'.format(tags_file_path, len(tag_list)))
+ print(f'Created: {tags_file_path} with {len(tag_list)} tags')
if __name__ == '__main__':
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).