[geany/geany] 8a6fbd: Add PowerShell tag parser

Enrico Tröger git-noreply at xxxxx
Sun Jun 28 13:46:23 UTC 2015

Branch:      refs/heads/master
Author:      Enrico Tröger <enrico.troeger at uvena.de>
Committer:   Enrico Tröger <enrico.troeger at uvena.de>
Date:        Sun, 28 Jun 2015 13:46:23 UTC
Commit:      8a6fbd978691fa324bd5c5ff171bcd508e3b466c

Log Message:
Add PowerShell tag parser

Modified Paths:

Modified: src/filetypes.c
2 lines changed, 1 insertions(+), 1 deletions(-)
@@ -188,7 +188,7 @@ static void init_builtin_filetypes(void)
 	FT_INIT( ASCIIDOC,   ASCIIDOC,     "Asciidoc",         NULL,                      SOURCE_FILE, MARKUP   );
 	FT_INIT( ABAQUS,     ABAQUS,       "Abaqus",           NULL,                      SOURCE_FILE, SCRIPT   );
 	FT_INIT( BATCH,      NONE,         "Batch",            NULL,                      SCRIPT,      SCRIPT   );
-	FT_INIT( POWERSHELL, NONE,         "PowerShell",       NULL,                      SOURCE_FILE, SCRIPT   );
+	FT_INIT( POWERSHELL, POWERSHELL,   "PowerShell",       NULL,                      SOURCE_FILE, SCRIPT   );
 	FT_INIT( RUST,       RUST,         "Rust",             NULL,                      SOURCE_FILE, COMPILED );
 	FT_INIT( COFFEESCRIPT, NONE,       "CoffeeScript",     NULL,                      SOURCE_FILE, SCRIPT   );
 	FT_INIT( GO,         GO,           "Go",               NULL,                      SOURCE_FILE, COMPILED );

Modified: src/symbols.c
3 lines changed, 2 insertions(+), 1 deletions(-)
@@ -286,7 +286,7 @@ GString *symbols_find_typenames_as_string(gint lang, gboolean global)
  * type.
  * @param ft_id File type identifier.
  * @return The context separator string.
- * 
+ *
  * Returns non-printing sequence "\x03" ie ETX (end of text) for filetypes
  * without a context separator.
@@ -302,6 +302,7 @@ const gchar *symbols_get_context_separator(gint ft_id)
 		case GEANY_FILETYPES_GLSL:	/* for structs */
 		/*case GEANY_FILETYPES_RUBY:*/ /* not sure what to use atm*/
 			return "::";

Modified: tagmanager/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
@@ -43,6 +43,7 @@ parsers = \
 	pascal.c \
 	perl.c \
 	php.c \
+	powershell.c \
 	python.c \
 	r.c \
 	rest.c \

Modified: tagmanager/ctags/makefile.win32
2 lines changed, 1 insertions(+), 1 deletions(-)
@@ -45,7 +45,7 @@ clean:
 	-$(RM) deps.mak *.o $(COMPLIB)
 $(COMPLIB): abaqus.o abc.o args.o c.o cobol.o erlang.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o json.o \
-actionscript.o nsis.o objc.o \
+actionscript.o nsis.o objc.o powershell.c \
 haskell.o haxe.o html.o python.o lregex.o asciidoc.o rest.o sh.o ctags.o entry.o get.o keyword.o nestlevel.o \
 options.o \
 parse.o basic.o read.o sort.o strlist.o latex.o markdown.o matlab.o docbook.o tcl.o ruby.o rust.o asm.o sql.o txt2tags.o css.o \

Modified: tagmanager/ctags/parsers.h
3 lines changed, 2 insertions(+), 1 deletions(-)
@@ -65,7 +65,8 @@
     RustParser, \
     GoParser, \
     JsonParser, \
-    ZephirParser
+    ZephirParser, \
+    PowerShellParser
 #endif	/* _PARSERS_H */

Modified: tagmanager/ctags/powershell.c
628 lines changed, 628 insertions(+), 0 deletions(-)
@@ -0,0 +1,628 @@
+*   Copyright (c) 2015, Enrico Tröger <enrico.troeger at uvena.de>
+*   Loosely based on the PHP tags parser since the syntax is somewhat similar
+*   regarding variable and function definitions.
+*   This source code is released for free distribution under the terms of the
+*   GNU General Public License.
+*   This module contains code for generating tags for Windows PowerShell scripts.
+#include "general.h"  /* must always come first */
+#include "main.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+#include "keyword.h"
+#include "entry.h"
+#include <string.h>
+#define SCOPE_SEPARATOR "::"
+#define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
+static const char *const accessTypes[] = {
+	"global",
+	"local",
+	"script",
+	"private"
+typedef enum {
+} powerShellKind;
+static kindOption PowerShellKinds[COUNT_KIND] = {
+	{ TRUE, 'f', "function",	"functions" },
+	{ TRUE, 'v', "variable",	"variables" }
+typedef enum eTokenType {
+} tokenType;
+typedef struct {
+	tokenType		type;
+	vString *		string;
+	vString *		scope;
+	unsigned long	lineNumber;
+	MIOPos			filePosition;
+	int 			parentKind; /* -1 if none */
+} tokenInfo;
+static const char *findValidAccessType (const char *const access)
+	unsigned int i;
+	if (access == ACCESS_UNDEFINED)
+		return ACCESS_UNDEFINED; /* early out to save the for-loop if possible */
+	for (i = 0; i < ARRAY_LENGTH(accessTypes); i++)
+	{
+		if (accessTypes[i] == ACCESS_UNDEFINED)
+			continue;
+		if (strcasecmp (access, accessTypes[i]) == 0)
+			return accessTypes[i];
+		i++;
+	}
+static void initPowerShellEntry (tagEntryInfo *const e, const tokenInfo *const token,
+								 const powerShellKind kind, const char *const access)
+	initTagEntry (e, vStringValue (token->string));
+	e->lineNumber	= token->lineNumber;
+	e->filePosition	= token->filePosition;
+	e->kindName		= PowerShellKinds[kind].name;
+	e->kind			= (char) PowerShellKinds[kind].letter;
+	if (access != NULL)
+		e->extensionFields.access = access;
+	if (vStringLength (token->scope) > 0)
+	{
+		int parentKind = token->parentKind;
+		Assert (parentKind >= 0);
+		e->extensionFields.scope[0] = PowerShellKinds[parentKind].name;
+		e->extensionFields.scope[1] = vStringValue (token->scope);
+	}
+static void makeSimplePowerShellTag (const tokenInfo *const token, const powerShellKind kind,
+									 const char *const access)
+	if (PowerShellKinds[kind].enabled)
+	{
+		tagEntryInfo e;
+		initPowerShellEntry (&e, token, kind, access);
+		makeTagEntry (&e);
+	}
+static void makeFunctionTag (const tokenInfo *const token, const vString *const arglist,
+							 const char *const access)
+	if (PowerShellKinds[K_FUNCTION].enabled)
+	{
+		tagEntryInfo e;
+		initPowerShellEntry (&e, token, K_FUNCTION, access);
+		if (arglist)
+			e.extensionFields.arglist = vStringValue (arglist);
+		makeTagEntry (&e);
+	}
+static tokenInfo *newToken (void)
+	tokenInfo *const token = xMalloc (1, tokenInfo);
+	token->type			= TOKEN_UNDEFINED;
+	token->string		= vStringNew ();
+	token->scope		= vStringNew ();
+	token->lineNumber   = getSourceLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	token->parentKind	= -1;
+	return token;
+static void deleteToken (tokenInfo *const token)
+	vStringDelete (token->string);
+	vStringDelete (token->scope);
+	eFree (token);
+static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
+					   boolean scope)
+	dest->lineNumber = src->lineNumber;
+	dest->filePosition = src->filePosition;
+	dest->type = src->type;
+	vStringCopy (dest->string, src->string);
+	dest->parentKind = src->parentKind;
+	if (scope)
+		vStringCopy (dest->scope, src->scope);
+static void addToScope (tokenInfo *const token, const vString *const extra)
+	if (vStringLength (token->scope) > 0)
+		vStringCatS (token->scope, SCOPE_SEPARATOR);
+	vStringCatS (token->scope, vStringValue (extra));
+	vStringTerminate (token->scope);
+static boolean isIdentChar (const int c)
+	return (isalnum (c) || c == ':' || c == '_' || c == '-' || c >= 0x80);
+static int skipToCharacter (const int c)
+	int d;
+	do
+	{
+		d = fileGetc ();
+	} while (d != EOF  &&  d != c);
+	return d;
+static void parseString (vString *const string, const int delimiter)
+	while (TRUE)
+	{
+		int c = fileGetc ();
+		if (c == '\\' && (c = fileGetc ()) != EOF)
+			vStringPut (string, (char) c);
+		else if (c == EOF || c == delimiter)
+			break;
+		else
+			vStringPut (string, (char) c);
+	}
+	vStringTerminate (string);
+static void parseIdentifier (vString *const string, const int firstChar)
+	int c = firstChar;
+	do
+	{
+		vStringPut (string, (char) c);
+		c = fileGetc ();
+	} while (isIdentChar (c));
+	fileUngetc (c);
+	vStringTerminate (string);
+static boolean isTokenFunction (vString *const name)
+	return (strcasecmp (vStringValue (name), "function") == 0 ||
+			strcasecmp (vStringValue (name), "filter") == 0);
+static boolean isSpace (int c)
+	return (c == '\t' || c == ' ' || c == '\v' ||
+			c == '\n' || c == '\r' || c == '\f');
+static int skipWhitespaces (int c)
+	while (isSpace (c))
+		c = fileGetc ();
+	return c;
+static int skipSingleComment (void)
+	int c;
+	do
+	{
+		c = fileGetc ();
+		if (c == '\r')
+		{
+			int next = fileGetc ();
+			if (next != '\n')
+				fileUngetc (next);
+			else
+				c = next;
+		}
+	} while (c != EOF && c != '\n' && c != '\r');
+	return c;
+static void readToken (tokenInfo *const token)
+	int c;
+	token->type		= TOKEN_UNDEFINED;
+	vStringClear (token->string);
+	c = fileGetc ();
+	c = skipWhitespaces (c);
+	token->lineNumber   = getSourceLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	switch (c)
+	{
+		case EOF: token->type = TOKEN_EOF;					break;
+		case '(': token->type = TOKEN_OPEN_PAREN;			break;
+		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
+		case ';': token->type = TOKEN_SEMICOLON;			break;
+		case ',': token->type = TOKEN_COMMA;				break;
+		case '.': token->type = TOKEN_PERIOD;				break;
+		case ':': token->type = TOKEN_COLON;				break;
+		case '{': token->type = TOKEN_OPEN_CURLY;			break;
+		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
+		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
+		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
+		case '=': token->type = TOKEN_EQUAL_SIGN;			break;
+		case '\'':
+		case '"':
+			token->type = TOKEN_STRING;
+			parseString (token->string, c);
+			token->lineNumber = getSourceLineNumber ();
+			token->filePosition = getInputFilePosition ();
+			break;
+		case '<':
+		{
+			int d = fileGetc ();
+			if (d == '#')
+			{
+				/* <# ... #> multiline comment */
+				do
+				{
+					c = skipToCharacter ('#');
+					if (c != EOF)
+					{
+						c = fileGetc ();
+						if (c == '>')
+							break;
+						else
+							fileUngetc (c);
+					}
+				} while (c != EOF);
+				goto getNextChar;
+			}
+			else
+			{
+				fileUngetc (d);
+				token->type = TOKEN_UNDEFINED;
+			}
+			break;
+		}
+		case '#': /* comment */
+			skipSingleComment ();
+			goto getNextChar;
+			break;
+		case '+':
+		case '-':
+		case '*':
+		case '/':
+		case '%':
+		{
+			int d = fileGetc ();
+			if (d != '=')
+				fileUngetc (d);
+			token->type = TOKEN_OPERATOR;
+			break;
+		}
+		case '$': /* variable start */
+		{
+			int d = fileGetc ();
+			if (! isIdentChar (d))
+			{
+				fileUngetc (d);
+				token->type = TOKEN_UNDEFINED;
+			}
+			else
+			{
+				parseIdentifier (token->string, d);
+				token->type = TOKEN_VARIABLE;
+			}
+			break;
+		}
+		default:
+			if (! isIdentChar (c))
+				token->type = TOKEN_UNDEFINED;
+			else
+			{
+				parseIdentifier (token->string, c);
+				if (isTokenFunction (token->string))
+					token->type = TOKEN_KEYWORD;
+				else
+					token->type = TOKEN_IDENTIFIER;
+			}
+			break;
+	}
+static void enterScope (tokenInfo *const parentToken,
+						const vString *const extraScope,
+						const int parentKind);
+/* strip a possible PowerShell scope specification and convert it to accessType */
+static const char *parsePowerShellScope (tokenInfo *const token)
+	const char *access = ACCESS_UNDEFINED;
+	const char *const tokenName = vStringValue (token->string);
+	const char *powershellScopeEnd;
+	powershellScopeEnd = strchr (tokenName, ':');
+	if (powershellScopeEnd)
+	{
+		size_t powershellScopeLen;
+		vString * powershellScope = vStringNew ();
+		powershellScopeLen = (size_t)(powershellScopeEnd - tokenName);
+		/* extract the scope */
+		vStringNCopyS (powershellScope, tokenName, powershellScopeLen);
+		vStringTerminate (powershellScope);
+		/* cut the resulting scope string from the identifier */
+		memmove (token->string->buffer,
+				 /* +1 to skip the leading colon */
+				 token->string->buffer + powershellScopeLen + 1,
+				 /* +1 for the skipped leading colon and - 1 to include the trailing \0 byte */
+				 token->string->length + 1 - powershellScopeLen - 1);
+		token->string->length -= powershellScopeLen + 1;
+		access = findValidAccessType (vStringValue (powershellScope));
+		vStringDelete (powershellScope);
+	}
+	return access;
+/* parse a function
+ *
+ * 	function myfunc($foo, $bar) {}
+ */
+static boolean parseFunction (tokenInfo *const token)
+	boolean readNext = TRUE;
+	tokenInfo *nameFree = NULL;
+	const char *access;
+	readToken (token);
+	if (token->type != TOKEN_IDENTIFIER)
+		return FALSE;
+	access = parsePowerShellScope (token);
+	nameFree = newToken ();
+	copyToken (nameFree, token, TRUE);
+	readToken (token);
+	if (token->type == TOKEN_OPEN_PAREN)
+	{
+		vString *arglist = vStringNew ();
+		int depth = 1;
+		vStringPut (arglist, '(');
+		do
+		{
+			readToken (token);
+			switch (token->type)
+			{
+				case TOKEN_OPEN_PAREN:  depth++; break;
+				case TOKEN_CLOSE_PAREN: depth--; break;
+				default: break;
+			}
+			/* display part */
+			switch (token->type)
+			{
+				case TOKEN_CLOSE_CURLY:		vStringPut (arglist, '}');		break;
+				case TOKEN_CLOSE_PAREN:		vStringPut (arglist, ')');		break;
+				case TOKEN_CLOSE_SQUARE:	vStringPut (arglist, ']');		break;
+				case TOKEN_COLON:			vStringPut (arglist, ':');		break;
+				case TOKEN_COMMA:			vStringCatS (arglist, ", ");	break;
+				case TOKEN_EQUAL_SIGN:		vStringCatS (arglist, " = ");	break;
+				case TOKEN_OPEN_CURLY:		vStringPut (arglist, '{');		break;
+				case TOKEN_OPEN_PAREN:		vStringPut (arglist, '(');		break;
+				case TOKEN_OPEN_SQUARE:		vStringPut (arglist, '[');		break;
+				case TOKEN_PERIOD:			vStringPut (arglist, '.');		break;
+				case TOKEN_SEMICOLON:		vStringPut (arglist, ';');		break;
+				case TOKEN_STRING:			vStringCatS (arglist, "'...'");	break;
+				case TOKEN_KEYWORD:
+				{
+					switch (vStringLast (arglist))
+					{
+						case 0:
+						case ' ':
+						case '{':
+						case '(':
+						case '[':
+						case '.':
+							/* no need for a space between those and the identifier */
+							break;
+						default:
+							vStringPut (arglist, ' ');
+							break;
+					}
+					if (token->type == TOKEN_VARIABLE)
+						vStringPut (arglist, '$');
+					vStringCat (arglist, token->string);
+					break;
+				}
+				default: break;
+			}
+		}
+		while (token->type != TOKEN_EOF && depth > 0);
+		vStringTerminate (arglist);
+		makeFunctionTag (nameFree, arglist, access);
+		vStringDelete (arglist);
+		readToken (token);
+	}
+	else if (token->type == TOKEN_OPEN_CURLY)
+	{	/* filters doesn't need to have an arglist */
+		makeFunctionTag (nameFree, NULL, access);
+	}
+	if (token->type == TOKEN_OPEN_CURLY)
+		enterScope (token, nameFree->string, K_FUNCTION);
+	else
+		readNext = FALSE;
+	if (nameFree)
+		deleteToken (nameFree);
+	return readNext;
+/* parses declarations of the form
+ * 	$var = VALUE
+ */
+static boolean parseVariable (tokenInfo *const token)
+	tokenInfo *name;
+	boolean readNext = TRUE;
+	const char *access;
+	name = newToken ();
+	copyToken (name, token, TRUE);
+	readToken (token);
+	if (token->type == TOKEN_EQUAL_SIGN)
+	{
+		if (token->parentKind != K_FUNCTION)
+		{	/* ignore local variables (i.e. within a function) */
+			access = parsePowerShellScope (name);
+			makeSimplePowerShellTag (name, K_VARIABLE, access);
+			readNext = TRUE;
+		}
+	}
+	else
+		readNext = FALSE;
+	deleteToken (name);
+	return readNext;
+static void enterScope (tokenInfo *const parentToken,
+						const vString *const extraScope,
+						const int parentKind)
+	tokenInfo *token = newToken ();
+	int origParentKind = parentToken->parentKind;
+	copyToken (token, parentToken, TRUE);
+	if (extraScope)
+	{
+		addToScope (token, extraScope);
+		token->parentKind = parentKind;
+	}
+	readToken (token);
+	while (token->type != TOKEN_EOF &&
+		   token->type != TOKEN_CLOSE_CURLY)
+	{
+		boolean readNext = TRUE;
+		switch (token->type)
+		{
+				enterScope (token, NULL, -1);
+				break;
+				readNext = parseFunction (token);
+				break;
+				readNext = parseVariable (token);
+				break;
+			default: break;
+		}
+		if (readNext)
+			readToken (token);
+	}
+	copyToken (parentToken, token, FALSE);
+	parentToken->parentKind = origParentKind;
+	deleteToken (token);
+static void findPowerShellTags (void)
+	tokenInfo *const token = newToken ();
+	do
+	{
+		enterScope (token, NULL, -1);
+	}
+	while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
+	deleteToken (token);
+extern parserDefinition* PowerShellParser (void)
+	static const char *const extensions [] = { "ps1", "psm1", NULL };
+	parserDefinition* def = parserNew ("PowerShell");
+	def->kinds      = PowerShellKinds;
+	def->kindCount  = KIND_COUNT (PowerShellKinds);
+	def->extensions = extensions;
+	def->parser     = findPowerShellTags;
+	return def;
+/* vi:set tabstop=4 shiftwidth=4: */

Modified: tagmanager/src/tm_parser.h
1 lines changed, 1 insertions(+), 0 deletions(-)
@@ -72,6 +72,7 @@ typedef enum
 } TMParserType;

Modified: tests/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
@@ -260,6 +260,7 @@ test_sources = \
 	simple.mak						\
 	simple.php						\
 	simple.pl						\
+	simple.ps1						\
 	simple.py						\
 	simple.rb						\
 	simple.sh						\

Modified: tests/ctags/simple.ps1
112 lines changed, 112 insertions(+), 0 deletions(-)
@@ -0,0 +1,112 @@
+# pseudo #!/PowerShell :)
+# test file for the CTags/Geany PowerShell tag parser
+# based on real world code but simplified for automated tests
+multiline comment including a function and variable, both should be ignored:
+$IgnoreThisVaribale = "Stop"
+function IgnoreThisFunction($arg1)  {
+    Write-Host "dummy"
+# immediately stop the script if an errors occurs
+$ErrorActionPreference = "Stop"
+# a global scoped variable
+$Global:Settings = $null
+# a local scoped variable
+$Local:ALocalVar = $null
+# a usual variable
+$BasePath = split-path -parent $Global:MyInvocation.InvocationName
+FUNCTION Read-Configuration-File() {
+    $Hostname = [System.Environment]::MachineName
+    $ConfigurationFileName = $BasePath + "\script-${Hostname}.conf"
+    LogMessage "Read configuration '${ConfigurationFileName}'"
+    $ConfigFileContent = Get-Content -raw $ConfigurationFileName
+    $Global:Settings = Convertfrom-Stringdata $ConfigFileContent
+Function LogMessageOK()
+    $x = [Console]::WindowWidth - 6
+    $y = [Console]::CursorTop
+    Try {
+        [Console]::setcursorposition($x, $y)
+    } Catch [system.exception] {
+        # intentionally left empty for redirect of outputs to file
+    }
+    Write-Host -foregroundcolor "green" "[ok]"
+function LogMessage() {
+    param(
+        [Parameter(Mandatory=$false)][switch] $NoNewLine,
+        [Parameter(Mandatory=$true)][string] $Message
+    )
+    $Date = Get-Date -UFormat "%Y-%m-%d %T: "
+    Write-Host -foregroundcolor "yellow" -NoNewLine $Date
+    if ($NoNewLine) {
+        Write-Host -foregroundcolor "green" -NoNewLine $Message
+    } else {
+        Write-Host -foregroundcolor "green" $Message
+    }
+function global:A-Global-Scope-Function() {
+    Write-Host "dummy"
+filter Script:MyFilter {
+    filter-something
+Filter Private:MyPrivateFilter {
+    filter-something
+function LoadTemplate($template) {
+    # woah, this is real magic,
+    # see http://stackoverflow.com/questions/10754582/string-interpolation-of-hashtable-values-in-powershell
+    # Set all unbound variables (@args) in the local context
+    while ($args)
+    {
+        ($key, $val, $args) = $args
+        Set-Variable -Name $key.SubString(1, $key.Length-2) -Value $val
+    }
+    $ExecutionContext.InvokeCommand.ExpandString($template)
+function TopLevelFunction() {
+    function SecondLevelNestedFunction() {
+        function ThirdLevelNestedFunction() {
+            doSomething()
+        }
+        ThirdLevelNestedFunction
+    }
+    SecondLevelNestedFunction
+function Main() {
+    Read-Configuration-File
+    LogMessage $("Working on Environment '{0}'" -f $Settings["EnvironmentName"])
+    LogMessage "do something ..."
+    Stripped-Down-Code
+    LogMessageOK

Modified: tests/ctags/simple.ps1.tags
16 lines changed, 16 insertions(+), 0 deletions(-)
@@ -0,0 +1,16 @@
+# format=tagmanager

Modified: wscript
1 lines changed, 1 insertions(+), 0 deletions(-)
@@ -106,6 +106,7 @@ ctags_sources = set([
+    'tagmanager/ctags/powershell.c',

This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).

More information about the Commits mailing list