[geany/geany] 4bfac4: Merge branch 'wip/better-php-parser'

Colomban Wendling git-noreply at xxxxx
Wed Jul 3 17:27:19 UTC 2013


Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Wed, 03 Jul 2013 17:27:19 UTC
Commit:      4bfac4fcdacb93f58d36496769c5f03e7603788a
             https://github.com/geany/geany/commit/4bfac4fcdacb93f58d36496769c5f03e7603788a

Log Message:
-----------
Merge branch 'wip/better-php-parser'


Modified Paths:
--------------
    data/filetypes.html
    src/symbols.c
    tagmanager/ctags/php.c
    tagmanager/ctags/vstring.h
    tests/ctags/Makefile.am
    tests/ctags/case_sensitivity.php
    tests/ctags/case_sensitivity.php.tags
    tests/ctags/classes.php.tags
    tests/ctags/mode.php
    tests/ctags/mode.php.tags
    tests/ctags/namespaces.php.tags
    tests/ctags/namespaces2.php.tags
    tests/ctags/php5_5_class_kw.php
    tests/ctags/php5_5_class_kw.php.tags
    tests/ctags/simple.php.tags
    tests/ctags/traits.php.tags

Modified: data/filetypes.html
2 files changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -81,7 +81,7 @@
 javascript=abs abstract acos anchor asin atan atan2 big bold boolean break byte case catch ceil char charAt charCodeAt class concat const continue cos Date debugger default delete do double else enum escape eval exp export extends false final finally fixed float floor fontcolor fontsize for fromCharCode function goto if implements import in indexOf Infinity instanceof int interface isFinite isNaN italics join lastIndexOf length link log long Math max MAX_VALUE min MIN_VALUE NaN native NEGATIVE_INFINITY new null Number package parseFloat parseInt pop POSITIVE_INFINITY pow private protected public push random return reverse round shift short sin slice small sort splice split sqrt static strike string String sub substr substring sup super switch synchronized tan this throw throws toLowerCase toString toUpperCase transient true try typeof undefined unescape unshift valueOf var void volatile while with
 vbscript=and as boolean byref byte byval call case class const continue currency date dim do double each else elseif empty end error exit false for function get global goto if in integer long loop me new next not nothing object on optional or private property public put redim rem resume select set single string sub then to true type until variant wend while with
 python=and assert break class continue complex def del elif else except exec finally for from global if import in inherit is int lambda not or pass print raise return tuple try unicode while yield long float str list
-php=abstract and array as bool boolean break case catch cfunction __class__ class clone const continue declare default die __dir__ directory do double echo else elseif empty enddeclare endfor endforeach endif endswitch endwhile eval exception exit extends false __file__ final float for foreach __function__ function goto global if implements include include_once instanceof int integer interface isset __line__ list __method__ namespace __namespace__ new null object old_function or parent php_user_filter print private protected public real require require_once resource return self __sleep static stdclass string switch this throw true try unset use var __wakeup while xor insteadof trait
+php=abstract and array as bool boolean break case catch cfunction __class__ class clone const continue declare default die __dir__ directory do double echo else elseif empty enddeclare endfor endforeach endif endswitch endwhile eval exception exit extends false __file__ final finally float for foreach __function__ function goto global if implements include include_once instanceof insteadof int integer interface isset __line__ list __method__ namespace __namespace__ new null object old_function or parent php_user_filter print private protected public real require require_once resource return self __sleep static stdclass string switch this throw trait true try unset use var __wakeup while xor yield
 sgml=ELEMENT DOCTYPE ATTLIST ENTITY NOTATION
 
 


Modified: src/symbols.c
3 files changed, 3 insertions(+), 0 deletions(-)
===================================================================
@@ -299,6 +299,7 @@ const gchar *symbols_get_context_separator(gint ft_id)
 		case GEANY_FILETYPES_CPP:
 		case GEANY_FILETYPES_GLSL:	/* for structs */
 		/*case GEANY_FILETYPES_RUBY:*/ /* not sure what to use atm*/
+		case GEANY_FILETYPES_PHP:
 			return "::";
 
 		/* avoid confusion with other possible separators in group/section name */
@@ -788,11 +789,13 @@ static void add_top_level_items(GeanyDocument *doc)
 		case GEANY_FILETYPES_PHP:
 		{
 			tag_list_add_groups(tag_store,
+				&(tv_iters.tag_namespace), _("Namespaces"), "classviewer-namespace",
 				&(tv_iters.tag_interface), _("Interfaces"), "classviewer-struct",
 				&(tv_iters.tag_class), _("Classes"), "classviewer-class",
 				&(tv_iters.tag_function), _("Functions"), "classviewer-method",
 				&(tv_iters.tag_macro), _("Constants"), "classviewer-macro",
 				&(tv_iters.tag_variable), _("Variables"), "classviewer-var",
+				&(tv_iters.tag_struct), _("Traits"), "classviewer-struct",
 				NULL);
 			break;
 		}


Modified: tagmanager/ctags/php.c
1556 files changed, 1361 insertions(+), 195 deletions(-)
===================================================================
@@ -1,281 +1,1447 @@
 /*
-*   Copyright (c) 2000, Jesus Castagnetto <jmcastagnetto at zkey.com>
+*   Copyright (c) 2013, Colomban Wendling <ban at herbesfolles.org>
 *
 *   This source code is released for free distribution under the terms of the
 *   GNU General Public License.
 *
-*   This module contains functions for generating tags for the PHP web page
-*   scripting language. Only recognizes functions and classes, not methods or
-*   variables.
-*
-*   Parsing PHP defines by Pavel Hlousek <pavel.hlousek at seznam.cz>, Apr 2003.
+*   This module contains code for generating tags for the PHP scripting
+*   language.
 */
 
 /*
 *   INCLUDE FILES
 */
 #include "general.h"  /* must always come first */
-
-#include <string.h>
 #include "main.h"
 #include "parse.h"
 #include "read.h"
 #include "vstring.h"
+#include "keyword.h"
+#include "entry.h"
+
+
+#define SCOPE_SEPARATOR "::"
+
+
+typedef enum {
+	KEYWORD_NONE = -1,
+	KEYWORD_abstract,
+	KEYWORD_and,
+	KEYWORD_as,
+	KEYWORD_break,
+	KEYWORD_callable,
+	KEYWORD_case,
+	KEYWORD_catch,
+	KEYWORD_class,
+	KEYWORD_clone,
+	KEYWORD_const,
+	KEYWORD_continue,
+	KEYWORD_declare,
+	KEYWORD_define,
+	KEYWORD_default,
+	KEYWORD_do,
+	KEYWORD_echo,
+	KEYWORD_else,
+	KEYWORD_elif,
+	KEYWORD_enddeclare,
+	KEYWORD_endfor,
+	KEYWORD_endforeach,
+	KEYWORD_endif,
+	KEYWORD_endswitch,
+	KEYWORD_endwhile,
+	KEYWORD_extends,
+	KEYWORD_final,
+	KEYWORD_finally,
+	KEYWORD_for,
+	KEYWORD_foreach,
+	KEYWORD_function,
+	KEYWORD_global,
+	KEYWORD_goto,
+	KEYWORD_if,
+	KEYWORD_implements,
+	KEYWORD_include,
+	KEYWORD_include_once,
+	KEYWORD_instanceof,
+	KEYWORD_insteadof,
+	KEYWORD_interface,
+	KEYWORD_namespace,
+	KEYWORD_new,
+	KEYWORD_or,
+	KEYWORD_print,
+	KEYWORD_private,
+	KEYWORD_protected,
+	KEYWORD_public,
+	KEYWORD_require,
+	KEYWORD_require_once,
+	KEYWORD_return,
+	KEYWORD_static,
+	KEYWORD_switch,
+	KEYWORD_throw,
+	KEYWORD_trait,
+	KEYWORD_try,
+	KEYWORD_use,
+	KEYWORD_var,
+	KEYWORD_while,
+	KEYWORD_xor,
+	KEYWORD_yield
+} keywordId;
 
-/*
-*   DATA DEFINITIONS
-*/
 typedef enum {
-	K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE
+	ACCESS_UNDEFINED,
+	ACCESS_PRIVATE,
+	ACCESS_PROTECTED,
+	ACCESS_PUBLIC,
+	COUNT_ACCESS
+} accessType;
+
+typedef enum {
+	IMPL_UNDEFINED,
+	IMPL_ABSTRACT,
+	COUNT_IMPL
+} implType;
+
+typedef enum {
+	K_CLASS,
+	K_DEFINE,
+	K_FUNCTION,
+	K_INTERFACE,
+	K_LOCAL_VARIABLE,
+	K_NAMESPACE,
+	K_TRAIT,
+	K_VARIABLE,
+	COUNT_KIND
 } phpKind;
 
-#if 0
-static kindOption PhpKinds [] = {
-	{ TRUE, 'c', "class",    "classes" },
-	{ TRUE, 'd', "define",   "constant definitions" },
-	{ TRUE, 'f', "function", "functions" },
-	{ TRUE, 'v', "variable", "variables" }
+static kindOption PhpKinds[COUNT_KIND] = {
+	{ TRUE, 'c', "class",		"classes" },
+	{ TRUE, 'm', "macro",		"constant definitions" },
+	{ TRUE, 'f', "function",	"functions" },
+	{ TRUE, 'i', "interface",	"interfaces" },
+	{ FALSE, 'l', "local",		"local variables" },
+	{ TRUE, 'n', "namespace",	"namespaces" },
+	{ TRUE, 's', "struct",		"traits" },
+	{ TRUE, 'v', "variable",	"variables" }
 };
-#endif
 
-/*
-*   FUNCTION DEFINITIONS
-*/
+typedef struct {
+	const char *name;
+	keywordId id;
+} keywordDesc;
 
-/* JavaScript patterns are duplicated in jscript.c */
+static const keywordDesc PhpKeywordTable[] = {
+	/* keyword			keyword ID */
+	{ "abstract",		KEYWORD_abstract		},
+	{ "and",			KEYWORD_and				},
+	{ "as",				KEYWORD_as				},
+	{ "break",			KEYWORD_break			},
+	{ "callable",		KEYWORD_callable		},
+	{ "case",			KEYWORD_case			},
+	{ "catch",			KEYWORD_catch			},
+	{ "cfunction",		KEYWORD_function		}, /* nobody knows what the hell this is, but it seems to behave much like "function" so bind it to it */
+	{ "class",			KEYWORD_class			},
+	{ "clone",			KEYWORD_clone			},
+	{ "const",			KEYWORD_const			},
+	{ "continue",		KEYWORD_continue		},
+	{ "declare",		KEYWORD_declare			},
+	{ "define",			KEYWORD_define			}, /* this isn't really a keyword but we handle it so it's easier this way */
+	{ "default",		KEYWORD_default			},
+	{ "do",				KEYWORD_do				},
+	{ "echo",			KEYWORD_echo			},
+	{ "else",			KEYWORD_else			},
+	{ "elseif",			KEYWORD_elif			},
+	{ "enddeclare",		KEYWORD_enddeclare		},
+	{ "endfor",			KEYWORD_endfor			},
+	{ "endforeach",		KEYWORD_endforeach		},
+	{ "endif",			KEYWORD_endif			},
+	{ "endswitch",		KEYWORD_endswitch		},
+	{ "endwhile",		KEYWORD_endwhile		},
+	{ "extends",		KEYWORD_extends			},
+	{ "final",			KEYWORD_final			},
+	{ "finally",		KEYWORD_finally			},
+	{ "for",			KEYWORD_for				},
+	{ "foreach",		KEYWORD_foreach			},
+	{ "function",		KEYWORD_function		},
+	{ "global",			KEYWORD_global			},
+	{ "goto",			KEYWORD_goto			},
+	{ "if",				KEYWORD_if				},
+	{ "implements",		KEYWORD_implements		},
+	{ "include",		KEYWORD_include			},
+	{ "include_once",	KEYWORD_include_once	},
+	{ "instanceof",		KEYWORD_instanceof		},
+	{ "insteadof",		KEYWORD_insteadof		},
+	{ "interface",		KEYWORD_interface		},
+	{ "namespace",		KEYWORD_namespace		},
+	{ "new",			KEYWORD_new				},
+	{ "or",				KEYWORD_or				},
+	{ "print",			KEYWORD_print			},
+	{ "private",		KEYWORD_private			},
+	{ "protected",		KEYWORD_protected		},
+	{ "public",			KEYWORD_public			},
+	{ "require",		KEYWORD_require			},
+	{ "require_once",	KEYWORD_require_once	},
+	{ "return",			KEYWORD_return			},
+	{ "static",			KEYWORD_static			},
+	{ "switch",			KEYWORD_switch			},
+	{ "throw",			KEYWORD_throw			},
+	{ "trait",			KEYWORD_trait			},
+	{ "try",			KEYWORD_try				},
+	{ "use",			KEYWORD_use				},
+	{ "var",			KEYWORD_var				},
+	{ "while",			KEYWORD_while			},
+	{ "xor",			KEYWORD_xor				},
+	{ "yield",			KEYWORD_yield			}
+};
 
-/*
- * Cygwin doesn't support non-ASCII characters in character classes.
- * This isn't a good solution to the underlying problem, because we're still
- * making assumptions about the character encoding.
- * Really, these regular expressions need to concentrate on what marks the
- * end of an identifier, and we need something like iconv to take into
- * account the user's locale (or an override on the command-line.)
- */
-/*
-#ifdef __CYGWIN__
-#define ALPHA "[:alpha:]"
-#define ALNUM "[:alnum:]"
-#else
-#define ALPHA "A-Za-z\x7f-\xff"
-#define ALNUM "0-9A-Za-z\x7f-\xff"
-#endif
-*/
-/* "A-Za-z\x7f-\xff" fails on other locales than "C" and so skip it */
-#define ALPHA "[:alpha:]"
-#define ALNUM "[:alnum:]"
-
-static void function_cb(const char *line, const regexMatch *matches, unsigned int count);
-
-static void installPHPRegex (const langType language)
-{
-	addTagRegex(language, "^[ \t]*((final|abstract)[ \t]+)*class[ \t]+([" ALPHA "_][" ALNUM "_]*)",
-		"\\3", "c,class,classes", NULL);
-	addTagRegex(language, "^[ \t]*interface[ \t]+([" ALPHA "_][" ALNUM "_]*)",
-		"\\1", "i,interface,interfaces", NULL);
-	addTagRegex(language, "^[ \t]*define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)",
-		"\\1", "m,macro,macros", NULL);
-	addTagRegex(language, "^[ \t]*const[ \t]*([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]",
-		"\\1", "m,macro,macros", NULL);
-	addCallbackRegex(language,
-		"^[ \t]*((public|protected|private|static|final)[ \t]+)*function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)[[:space:]]*(\\(.*\\)|\\(.*)",
-		NULL, function_cb);
-	addTagRegex(language, "^[ \t]*(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=",
-		"\\2", "v,variable,variables", NULL);
-	addTagRegex(language, "^[ \t]*((var|public|protected|private|static)[ \t]+)+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]",
-		"\\3", "v,variable,variables", NULL);
-
-	/* function regex is covered by PHP regex */
-	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(",
-		"\\2", "j,jsfunction,javascript functions", NULL);
-	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
-		"\\2.\\3", "j,jsfunction,javascript functions", NULL);
-	addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
-		"\\3", "j,jsfunction,javascript functions", NULL);
-}
-
-
-static void function_cb(const char *line, const regexMatch *matches, unsigned int count)
-{
-	char *name, *arglist;
-	char kind = 'f';
-	static const char *kindName = "function";
-	tagEntryInfo e;
-	const regexMatch *match_funcname = NULL;
-	const regexMatch *match_arglist = NULL;
-
-	if (count > 2)
-	{
-		match_funcname = &matches[count - 2];
-		match_arglist = &matches[count - 1];
-	}
-
-	if (match_funcname != NULL)
-	{
-		name = xMalloc(match_funcname->length + 1, char);
-		strncpy(name, line + match_funcname->start, match_funcname->length);
-		*(name+match_funcname->length) = '\x0';
-		arglist = xMalloc(match_arglist->length + 1, char);
-		strncpy(arglist, line + match_arglist->start, match_arglist->length);
-		*(arglist+match_arglist->length) = '\x0';
-
-		initTagEntry (&e, name);
-		e.kind = kind;
-		e.kindName = kindName;
-		e.extensionFields.arglist = arglist;
+
+typedef enum eTokenType {
+	TOKEN_UNDEFINED,
+	TOKEN_EOF,
+	TOKEN_CHARACTER,
+	TOKEN_CLOSE_PAREN,
+	TOKEN_SEMICOLON,
+	TOKEN_COLON,
+	TOKEN_COMMA,
+	TOKEN_KEYWORD,
+	TOKEN_OPEN_PAREN,
+	TOKEN_OPERATOR,
+	TOKEN_IDENTIFIER,
+	TOKEN_STRING,
+	TOKEN_PERIOD,
+	TOKEN_OPEN_CURLY,
+	TOKEN_CLOSE_CURLY,
+	TOKEN_EQUAL_SIGN,
+	TOKEN_OPEN_SQUARE,
+	TOKEN_CLOSE_SQUARE,
+	TOKEN_VARIABLE,
+	TOKEN_AMPERSAND
+} tokenType;
+
+typedef struct {
+	tokenType		type;
+	keywordId		keyword;
+	vString *		string;
+	vString *		scope;
+	unsigned long 	lineNumber;
+	MIOPos			filePosition;
+	int 			parentKind; /* -1 if none */
+} tokenInfo;
+
+static langType Lang_php;
+
+static boolean InPhp = FALSE; /* whether we are between <? ?> */
+
+/* current statement details */
+struct {
+	accessType access;
+	implType impl;
+} CurrentStatement;
+
+/* Current namespace */
+vString *CurrentNamesapce;
+
+
+static void buildPhpKeywordHash (void)
+{
+	const size_t count = sizeof (PhpKeywordTable) / sizeof (PhpKeywordTable[0]);
+	size_t i;
+	for (i = 0; i < count ; i++)
+	{
+		const keywordDesc* const p = &PhpKeywordTable[i];
+		addKeyword (p->name, Lang_php, (int) p->id);
+	}
+}
+
+static const char *accessToString (const accessType access)
+{
+	static const char *const names[COUNT_ACCESS] = {
+		"undefined",
+		"private",
+		"protected",
+		"public"
+	};
+
+	Assert (access < COUNT_ACCESS);
+
+	return names[access];
+}
+
+static const char *implToString (const implType impl)
+{
+	static const char *const names[COUNT_IMPL] = {
+		"undefined",
+		"abstract"
+	};
+
+	Assert (impl < COUNT_IMPL);
+
+	return names[impl];
+}
+
+static void initPhpEntry (tagEntryInfo *const e, const tokenInfo *const token,
+						  const phpKind kind, const accessType access)
+{
+	static vString *fullScope = NULL;
+	int parentKind = -1;
+
+	if (fullScope == NULL)
+		fullScope = vStringNew ();
+	else
+		vStringClear (fullScope);
+
+	if (vStringLength (CurrentNamesapce) > 0)
+	{
+		vStringCopy (fullScope, CurrentNamesapce);
+		parentKind = K_NAMESPACE;
+	}
+
+	initTagEntry (e, vStringValue (token->string));
+
+	e->lineNumber	= token->lineNumber;
+	e->filePosition	= token->filePosition;
+	e->kindName		= PhpKinds[kind].name;
+	e->kind			= (char) PhpKinds[kind].letter;
+
+	if (access != ACCESS_UNDEFINED)
+		e->extensionFields.access = accessToString (access);
+	if (vStringLength (token->scope) > 0)
+	{
+		parentKind = token->parentKind;
+		if (vStringLength (fullScope) > 0)
+			vStringCatS (fullScope, SCOPE_SEPARATOR);
+		vStringCat (fullScope, token->scope);
+	}
+	if (vStringLength (fullScope) > 0)
+	{
+		Assert (parentKind >= 0);
+
+		vStringTerminate (fullScope);
+		e->extensionFields.scope[0] = PhpKinds[parentKind].name;
+		e->extensionFields.scope[1] = vStringValue (fullScope);
+	}
+}
+
+static void makeSimplePhpTag (const tokenInfo *const token, const phpKind kind,
+							  const accessType access)
+{
+	if (PhpKinds[kind].enabled)
+	{
+		tagEntryInfo e;
+
+		initPhpEntry (&e, token, kind, access);
 		makeTagEntry (&e);
+	}
+}
+
+static void makeNamespacePhpTag (const tokenInfo *const token, const vString *const name)
+{
+	if (PhpKinds[K_NAMESPACE].enabled)
+	{
+		tagEntryInfo e;
 
-		eFree(name);
-		eFree(arglist);
+		initTagEntry (&e, vStringValue (name));
+
+		e.lineNumber	= token->lineNumber;
+		e.filePosition	= token->filePosition;
+		e.kindName		= PhpKinds[K_NAMESPACE].name;
+		e.kind			= (char) PhpKinds[K_NAMESPACE].letter;
+
+		makeTagEntry (&e);
 	}
 }
 
-/* Create parser definition structure */
-extern parserDefinition* PhpParser (void)
+static void makeClassOrIfaceTag (const phpKind kind, const tokenInfo *const token,
+								 vString *const inheritance, const implType impl)
 {
-	static const char *const extensions [] = { "php", "php3", "phtml", NULL };
-	parserDefinition* def = parserNew ("PHP");
-	def->extensions = extensions;
-	def->initialize = installPHPRegex;
-	def->regex      = TRUE;
-	return def;
+	if (PhpKinds[kind].enabled)
+	{
+		tagEntryInfo e;
+
+		initPhpEntry (&e, token, kind, ACCESS_UNDEFINED);
+
+		if (impl != IMPL_UNDEFINED)
+			e.extensionFields.implementation = implToString (impl);
+		if (vStringLength (inheritance) > 0)
+			e.extensionFields.inheritance = vStringValue (inheritance);
+
+		makeTagEntry (&e);
+	}
+}
+
+static void makeFunctionTag (const tokenInfo *const token,
+							 const vString *const arglist,
+							 const accessType access, const implType impl)
+{ 
+	if (PhpKinds[K_FUNCTION].enabled)
+	{
+		tagEntryInfo e;
+
+		initPhpEntry (&e, token, K_FUNCTION, access);
+
+		if (impl != IMPL_UNDEFINED)
+			e.extensionFields.implementation = implToString (impl);
+		if (arglist)
+			e.extensionFields.arglist = vStringValue (arglist);
+
+		makeTagEntry (&e);
+	}
+}
+
+static tokenInfo *newToken (void)
+{
+	tokenInfo *const token = xMalloc (1, tokenInfo);
+
+	token->type			= TOKEN_UNDEFINED;
+	token->keyword		= KEYWORD_NONE;
+	token->string		= vStringNew ();
+	token->scope		= vStringNew ();
+	token->lineNumber   = getSourceLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	token->parentKind	= -1;
+
+	return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+	vStringDelete (token->string);
+	vStringDelete (token->scope);
+	eFree (token);
+}
+
+static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
+					   boolean scope)
+{
+	dest->lineNumber = src->lineNumber;
+	dest->filePosition = src->filePosition;
+	dest->type = src->type;
+	dest->keyword = src->keyword;
+	vStringCopy(dest->string, src->string);
+	dest->parentKind = src->parentKind;
+	if (scope)
+		vStringCopy(dest->scope, src->scope);
 }
 
 #if 0
+#include <stdio.h>
 
-static boolean isLetter(const int c)
+static const char *tokenTypeName (const tokenType type)
 {
-	return (boolean)(isalpha(c) || (c >= 127  &&  c <= 255));
+	switch (type)
+	{
+		case TOKEN_UNDEFINED:		return "undefined";
+		case TOKEN_EOF:				return "EOF";
+		case TOKEN_CHARACTER:		return "character";
+		case TOKEN_CLOSE_PAREN:		return "')'";
+		case TOKEN_SEMICOLON:		return "';'";
+		case TOKEN_COLON:			return "':'";
+		case TOKEN_COMMA:			return "','";
+		case TOKEN_OPEN_PAREN:		return "'('";
+		case TOKEN_OPERATOR:		return "operator";
+		case TOKEN_IDENTIFIER:		return "identifier";
+		case TOKEN_KEYWORD:			return "keyword";
+		case TOKEN_STRING:			return "string";
+		case TOKEN_PERIOD:			return "'.'";
+		case TOKEN_OPEN_CURLY:		return "'{'";
+		case TOKEN_CLOSE_CURLY:		return "'}'";
+		case TOKEN_EQUAL_SIGN:		return "'='";
+		case TOKEN_OPEN_SQUARE:		return "'['";
+		case TOKEN_CLOSE_SQUARE:	return "']'";
+		case TOKEN_VARIABLE:		return "variable";
+	}
+	return NULL;
 }
 
-static boolean isVarChar1(const int c)
+static void printToken (const tokenInfo *const token)
 {
-	return (boolean)(isLetter (c)  ||  c == '_');
+	fprintf (stderr, "%p:\n\ttype:\t%s\n\tline:\t%lu\n\tscope:\t%s\n", (void *) token,
+			 tokenTypeName (token->type),
+			 token->lineNumber,
+			 vStringValue (token->scope));
+	switch (token->type)
+	{
+		case TOKEN_IDENTIFIER:
+		case TOKEN_STRING:
+		case TOKEN_VARIABLE:
+			fprintf (stderr, "\tcontent:\t%s\n", vStringValue (token->string));
+			break;
+
+		case TOKEN_KEYWORD:
+		{
+			size_t n = sizeof PhpKeywordTable / sizeof PhpKeywordTable[0];
+			size_t i;
+
+			fprintf (stderr, "\tkeyword:\t");
+			for (i = 0; i < n; i++)
+			{
+				if (PhpKeywordTable[i].id == token->keyword)
+				{
+					fprintf (stderr, "%s\n", PhpKeywordTable[i].name);
+					break;
+				}
+			}
+			if (i >= n)
+				fprintf (stderr, "(unknown)\n");
+		}
+
+		default: break;
+	}
 }
+#endif
 
-static boolean isVarChar(const int c)
+static void addToScope (tokenInfo *const token, const vString *const extra)
 {
-	return (boolean)(isVarChar1 (c) || isdigit (c));
+	if (vStringLength (token->scope) > 0)
+		vStringCatS (token->scope, SCOPE_SEPARATOR);
+	vStringCatS (token->scope, vStringValue (extra));
+	vStringTerminate(token->scope);
 }
 
-static void findPhpTags (void)
+static boolean isIdentChar (const int c)
+{
+	return (isalnum (c) || c == '_' || c & 0x80);
+}
+
+static int skipToCharacter (const int c)
+{
+	int d;
+	do
+	{
+		d = fileGetc ();
+	} while (d != EOF  &&  d != c);
+	return d;
+}
+
+static void parseString (vString *const string, const int delimiter)
+{
+	while (TRUE)
+	{
+		int c = fileGetc ();
+
+		if (c == '\\' && (c = fileGetc ()) != EOF)
+			vStringPut (string, (char) c);
+		else if (c == EOF || c == delimiter)
+			break;
+		else
+			vStringPut (string, (char) c);
+	}
+	vStringTerminate (string);
+}
+
+/* reads an HereDoc or a NowDoc (the part after the <<<).
+ * 	<<<[ \t]*(ID|'ID'|"ID")
+ * 	...
+ * 	ID;?
+ *
+ * note that:
+ *  1) starting ID must be immediately followed by a newline;
+ *  2) closing ID is the same as opening one;
+ *  3) closing ID must be immediately followed by a newline or a semicolon
+ *     then a newline.
+ *
+ * Example of a *single* valid heredoc:
+ * 	<<< FOO
+ * 	something
+ * 	something else
+ * 	FOO this is not an end
+ * 	FOO; this isn't either
+ * 	FOO; # neither this is
+ * 	FOO;
+ * 	# previous line was the end, but the semicolon wasn't required
+ */
+static void parseHeredoc (vString *const string)
+{
+	int c;
+	unsigned int len;
+	char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
+	int quote = 0;
+
+	do
+	{
+		c = fileGetc ();
+	}
+	while (c == ' ' || c == '\t');
+
+	if (c == '\'' || c == '"')
+	{
+		quote = c;
+		c = fileGetc ();
+	}
+	for (len = 0; len < (sizeof delimiter / sizeof delimiter[0]) - 1; len++)
+	{
+		if (! isIdentChar (c))
+			break;
+		delimiter[len] = (char) c;
+		c = fileGetc ();
+	}
+	delimiter[len] = 0;
+
+	if (len == 0) /* no delimiter, give up */
+		goto error;
+	if (quote)
+	{
+		if (c != quote) /* no closing quote for quoted identifier, give up */
+			goto error;
+		c = fileGetc ();
+	}
+	if (c != '\r' && c != '\n') /* missing newline, give up */
+		goto error;
+
+	do
+	{
+		c = fileGetc ();
+
+		if (c != '\r' && c != '\n')
+			vStringPut (string, (char) c);
+		else
+		{
+			/* new line, check for a delimiter right after */
+			int nl = c;
+			int extra = EOF;
+
+			c = fileGetc ();
+			for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
+				c = fileGetc ();
+
+			if (delimiter[len] != 0)
+				fileUngetc (c);
+			else
+			{
+				/* line start matched the delimiter, now check whether there
+				 * is anything after it */
+				if (c == '\r' || c == '\n')
+				{
+					fileUngetc (c);
+					break;
+				}
+				else if (c == ';')
+				{
+					int d = fileGetc ();
+					if (d == '\r' || d == '\n')
+					{
+						/* put back the semicolon since it's not part of the
+						 * string.  we can't put back the newline, but it's a
+						 * whitespace character nobody cares about it anyway */
+						fileUngetc (';');
+						break;
+					}
+					else
+					{
+						/* put semicolon in the string and continue */
+						extra = ';';
+						fileUngetc (d);
+					}
+				}
+			}
+			/* if we are here it wasn't a delimiter, so put everything in the
+			 * string */
+			vStringPut (string, (char) nl);
+			vStringNCatS (string, delimiter, len);
+			if (extra != EOF)
+				vStringPut (string, (char) extra);
+		}
+	}
+	while (c != EOF);
+
+	vStringTerminate (string);
+
+	return;
+
+error:
+	fileUngetc (c);
+}
+
+static void parseIdentifier (vString *const string, const int firstChar)
 {
-	vString *name = vStringNew ();
-	const unsigned char *line;
+	int c = firstChar;
+	do
+	{
+		vStringPut (string, (char) c);
+		c = fileGetc ();
+	} while (isIdentChar (c));
+	fileUngetc (c);
+	vStringTerminate (string);
+}
 
-	while ((line = fileReadLine ()) != NULL)
+static keywordId analyzeToken (vString *const name, langType language)
+{
+	vString *keyword = vStringNew ();
+	keywordId result;
+	vStringCopyToLower (keyword, name);
+	result = lookupKeyword (vStringValue (keyword), language);
+	vStringDelete (keyword);
+	return result;
+}
+
+static int skipWhitespaces (int c)
+{
+	while (c == '\t' || c == ' ' || c == '\n' || c == '\r')
+		c = fileGetc ();
+	return c;
+}
+
+/* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*>
+ * 
+ * This is ugly, but the whole "<script language=php>" tag is and we can't
+ * really do better without adding a lot of code only for this */
+static boolean isOpenScriptLanguagePhp (int c)
+{
+	int quote = 0;
+
+	/* <script[:white:]+language[:white:]*= */
+	if (c                                   != '<' ||
+		tolower ((c = fileGetc ()))         != 's' ||
+		tolower ((c = fileGetc ()))         != 'c' ||
+		tolower ((c = fileGetc ()))         != 'r' ||
+		tolower ((c = fileGetc ()))         != 'i' ||
+		tolower ((c = fileGetc ()))         != 'p' ||
+		tolower ((c = fileGetc ()))         != 't' ||
+		((c = fileGetc ()) != '\t' &&
+		  c                != ' '  &&
+		  c                != '\n' &&
+		  c                != '\r')                ||
+		tolower ((c = skipWhitespaces (c))) != 'l' ||
+		tolower ((c = fileGetc ()))         != 'a' ||
+		tolower ((c = fileGetc ()))         != 'n' ||
+		tolower ((c = fileGetc ()))         != 'g' ||
+		tolower ((c = fileGetc ()))         != 'u' ||
+		tolower ((c = fileGetc ()))         != 'a' ||
+		tolower ((c = fileGetc ()))         != 'g' ||
+		tolower ((c = fileGetc ()))         != 'e' ||
+		(c = skipWhitespaces (fileGetc ())) != '=')
+		return FALSE;
+
+	/* (php|'php'|"php")> */
+	c = skipWhitespaces (fileGetc ());
+	if (c == '"' || c == '\'')
 	{
-		const unsigned char *cp = line;
-		const char* f;
+		quote = c;
+		c = fileGetc ();
+	}
+	if (tolower (c)                         != 'p' ||
+		tolower ((c = fileGetc ()))         != 'h' ||
+		tolower ((c = fileGetc ()))         != 'p' ||
+		(quote != 0 && (c = fileGetc ()) != quote) ||
+		(c = skipWhitespaces (fileGetc ())) != '>')
+		return FALSE;
 
-		while (isspace (*cp))
-			cp++;
+	return TRUE;
+}
 
-		if (*(const char*)cp == '$'  &&  isVarChar1 (*(const char*)(cp+1)))
+static int findPhpStart (void)
+{
+	int c;
+	do
+	{
+		if ((c = fileGetc ()) == '<')
 		{
-			cp += 1;
-			vStringClear (name);
-			while (isVarChar ((int) *cp))
+			c = fileGetc ();
+			/* <? and <?php, but not <?xml */
+			if (c == '?')
 			{
-				vStringPut (name, (int) *cp);
-				++cp;
+				/* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
+				if (tolower ((c = fileGetc ())) != 'x' ||
+					tolower ((c = fileGetc ())) != 'm' ||
+					tolower ((c = fileGetc ())) != 'l')
+				{
+					break;
+				}
 			}
-			while (isspace ((int) *cp))
-				++cp;
-			if (*(const char*) cp == '=')
+			/* <script language="php"> */
+			else
 			{
-				vStringTerminate (name);
-				makeSimpleTag (name, PhpKinds, K_VARIABLE);
-				vStringClear (name);
+				fileUngetc (c);
+				if (isOpenScriptLanguagePhp ('<'))
+					break;
 			}
 		}
-		else if ((f = strstr ((const char*) cp, "function")) != NULL &&
-			(f == (const char*) cp || isspace ((int) f [-1])) &&
-			isspace ((int) f [8]))
+	}
+	while (c != EOF);
+
+	return c;
+}
+
+static int skipSingleComment (void)
+{
+	int c;
+	do
+	{
+		c = fileGetc ();
+		if (c == '\r')
 		{
-			cp = ((const unsigned char *) f) + 8;
+			int next = fileGetc ();
+			if (next != '\n')
+				fileUngetc (next);
+			else
+				c = next;
+		}
+		/* ?> in single-line comments leaves PHP mode */
+		else if (c == '?')
+		{
+			int next = fileGetc ();
+			if (next == '>')
+				InPhp = FALSE;
+			else
+				fileUngetc (next);
+		}
+	} while (InPhp && c != EOF && c != '\n' && c != '\r');
+	return c;
+}
 
-			while (isspace ((int) *cp))
-				++cp;
+static void readToken (tokenInfo *const token)
+{
+	int c;
 
-			if (*cp == '&')	/* skip reference character and following whitespace */
+	token->type		= TOKEN_UNDEFINED;
+	token->keyword	= KEYWORD_NONE;
+	vStringClear (token->string);
+
+getNextChar:
+
+	if (! InPhp)
+	{
+		c = findPhpStart ();
+		if (c != EOF)
+			InPhp = TRUE;
+	}
+	else
+		c = fileGetc ();
+
+	while (c == '\t' || c == ' ' || c == '\n' || c == '\r')
+	{
+		c = fileGetc ();
+	}
+
+	token->lineNumber   = getSourceLineNumber ();
+	token->filePosition = getInputFilePosition ();
+
+	switch (c)
+	{
+		case EOF: token->type = TOKEN_EOF;					break;
+		case '(': token->type = TOKEN_OPEN_PAREN;			break;
+		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
+		case ';': token->type = TOKEN_SEMICOLON;			break;
+		case ',': token->type = TOKEN_COMMA;				break;
+		case '.': token->type = TOKEN_PERIOD;				break;
+		case ':': token->type = TOKEN_COLON;				break;
+		case '{': token->type = TOKEN_OPEN_CURLY;			break;
+		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
+		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
+		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
+		case '&': token->type = TOKEN_AMPERSAND;			break;
+
+		case '=':
+		{
+			int d = fileGetc ();
+			if (d == '=' || d == '>')
+				token->type = TOKEN_OPERATOR;
+			else
 			{
-				cp++;
+				fileUngetc (d);
+				token->type = TOKEN_EQUAL_SIGN;
+			}
+			break;
+		}
+
+		case '\'':
+		case '"':
+			token->type = TOKEN_STRING;
+			parseString (token->string, c);
+			token->lineNumber = getSourceLineNumber ();
+			token->filePosition = getInputFilePosition ();
+			break;
 
-				while (isspace ((int) *cp))
-					++cp;
+		case '<':
+		{
+			int d = fileGetc ();
+			if (d == '/')
+			{
+				/* </script[:white:]*> */
+				if (tolower ((d = fileGetc ())) == 's' &&
+					tolower ((d = fileGetc ())) == 'c' &&
+					tolower ((d = fileGetc ())) == 'r' &&
+					tolower ((d = fileGetc ())) == 'i' &&
+					tolower ((d = fileGetc ())) == 'p' &&
+					tolower ((d = fileGetc ())) == 't' &&
+					(d = skipWhitespaces (fileGetc ())) == '>')
+				{
+					InPhp = FALSE;
+					goto getNextChar;
+				}
+				else
+				{
+					fileUngetc (d);
+					token->type = TOKEN_UNDEFINED;
+				}
 			}
+			else if (d == '<' && (d = fileGetc ()) == '<')
+			{
+				token->type = TOKEN_STRING;
+				parseHeredoc (token->string);
+			}
+			else
+			{
+				fileUngetc (d);
+				token->type = TOKEN_UNDEFINED;
+			}
+			break;
+		}
+
+		case '#': /* comment */
+			skipSingleComment ();
+			goto getNextChar;
+			break;
 
-			vStringClear (name);
-			while (isalnum ((int) *cp)  ||  *cp == '_')
+		case '+':
+		case '-':
+		case '*':
+		case '%':
+		{
+			int d = fileGetc ();
+			if (d != '=')
+				fileUngetc (d);
+			token->type = TOKEN_OPERATOR;
+			break;
+		}
+
+		case '/': /* division or comment start */
+		{
+			int d = fileGetc ();
+			if (d == '/') /* single-line comment */
+			{
+				skipSingleComment ();
+				goto getNextChar;
+			}
+			else if (d == '*')
+			{
+				do
+				{
+					c = skipToCharacter ('*');
+					if (c != EOF)
+					{
+						c = fileGetc ();
+						if (c == '/')
+							break;
+						else
+							fileUngetc (c);
+					}
+				} while (c != EOF && c != '\0');
+				goto getNextChar;
+			}
+			else
+			{
+				if (d != '=')
+					fileUngetc (d);
+				token->type = TOKEN_OPERATOR;
+			}
+			break;
+		}
+
+		case '$': /* variable start */
+		{
+			int d = fileGetc ();
+			if (! isIdentChar (d))
+			{
+				fileUngetc (d);
+				token->type = TOKEN_UNDEFINED;
+			}
+			else
 			{
-				vStringPut (name, (int) *cp);
-				++cp;
+				parseIdentifier (token->string, d);
+				token->type = TOKEN_VARIABLE;
 			}
-			vStringTerminate (name);
-			makeSimpleTag (name, PhpKinds, K_FUNCTION);
-			vStringClear (name);
+			break;
 		}
-		else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 &&
-				 isspace ((int) cp [5]))
+
+		case '?': /* maybe the end of the PHP chunk */
 		{
-			cp += 5;
+			int d = fileGetc ();
+			if (d == '>')
+			{
+				InPhp = FALSE;
+				goto getNextChar;
+			}
+			else
+			{
+				fileUngetc (d);
+				token->type = TOKEN_UNDEFINED;
+			}
+			break;
+		}
 
-			while (isspace ((int) *cp))
-				++cp;
-			vStringClear (name);
-			while (isalnum ((int) *cp)  ||  *cp == '_')
+		default:
+			if (! isIdentChar (c))
+				token->type = TOKEN_UNDEFINED;
+			else
 			{
-				vStringPut (name, (int) *cp);
-				++cp;
+				parseIdentifier (token->string, c);
+				token->keyword = analyzeToken (token->string, Lang_php);
+				if (token->keyword == KEYWORD_NONE)
+					token->type = TOKEN_IDENTIFIER;
+				else
+					token->type = TOKEN_KEYWORD;
 			}
-			vStringTerminate (name);
-			makeSimpleTag (name, PhpKinds, K_CLASS);
-			vStringClear (name);
+			break;
+	}
+
+	if (token->type == TOKEN_SEMICOLON ||
+		token->type == TOKEN_OPEN_CURLY ||
+		token->type == TOKEN_CLOSE_CURLY)
+	{
+		/* reset current statement details on statement end, and when entering
+		 * a deeper scope.
+		 * it is a bit ugly to do this in readToken(), but it makes everything
+		 * a lot simpler. */
+		CurrentStatement.access = ACCESS_UNDEFINED;
+		CurrentStatement.impl = IMPL_UNDEFINED;
+	}
+}
+
+static void enterScope (tokenInfo *const parentToken,
+						const vString *const extraScope,
+						const int parentKind);
+
+/* parses a class or an interface:
+ * 	class Foo {}
+ * 	class Foo extends Bar {}
+ * 	class Foo extends Bar implements iFoo, iBar {}
+ * 	interface iFoo {}
+ * 	interface iBar extends iFoo {} */
+static boolean parseClassOrIface (tokenInfo *const token, const phpKind kind)
+{
+	boolean readNext = TRUE;
+	implType impl = CurrentStatement.impl;
+	tokenInfo *name;
+	vString *inheritance = NULL;
+
+	readToken (token);
+	if (token->type != TOKEN_IDENTIFIER)
+		return FALSE;
+
+	name = newToken ();
+	copyToken (name, token, TRUE);
+
+	inheritance = vStringNew ();
+	/* skip until the open bracket and assume every identifier (not keyword)
+	 * is an inheritance (like in "class Foo extends Bar implements iA, iB") */
+	do
+	{
+		readToken (token);
+
+		if (token->type == TOKEN_IDENTIFIER)
+		{
+			if (vStringLength (inheritance) > 0)
+				vStringPut (inheritance, ',');
+			vStringCat (inheritance, token->string);
 		}
-		else if (strncmp ((const char*) cp, "define", (size_t) 6) == 0 &&
-				 ! isalnum ((int) cp [6]))
+	}
+	while (token->type != TOKEN_EOF &&
+		   token->type != TOKEN_OPEN_CURLY);
+
+	makeClassOrIfaceTag (kind, name, inheritance, impl);
+
+	if (token->type == TOKEN_OPEN_CURLY)
+		enterScope (token, name->string, K_CLASS);
+	else
+		readNext = FALSE;
+
+	deleteToken (name);
+	vStringDelete (inheritance);
+
+	return readNext;
+}
+
+/* parses a trait:
+ * 	trait Foo {} */
+static boolean parseTrait (tokenInfo *const token)
+{
+	boolean readNext = TRUE;
+	tokenInfo *name;
+
+	readToken (token);
+	if (token->type != TOKEN_IDENTIFIER)
+		return FALSE;
+
+	name = newToken ();
+	copyToken (name, token, TRUE);
+
+	makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED);
+
+	readToken (token);
+	if (token->type == TOKEN_OPEN_CURLY)
+		enterScope (token, name->string, K_TRAIT);
+	else
+		readNext = FALSE;
+
+	deleteToken (name);
+
+	return readNext;
+}
+
+/* parse a function
+ *
+ * if @name is NULL, parses a normal function
+ * 	function myfunc($foo, $bar) {}
+ * 	function &myfunc($foo, $bar) {}
+ *
+ * if @name is not NULL, parses an anonymous function with name @name
+ * 	$foo = function($foo, $bar) {} */
+static boolean parseFunction (tokenInfo *const token, const tokenInfo *name)
+{
+	boolean readNext = TRUE;
+	accessType access = CurrentStatement.access;
+	implType impl = CurrentStatement.impl;
+	tokenInfo *nameFree = NULL;
+
+	if (! name)
+	{
+		readToken (token);
+		/* skip a possible leading ampersand (return by reference) */
+		if (token->type == TOKEN_AMPERSAND)
+			readToken (token);
+		if (token->type != TOKEN_IDENTIFIER)
+			return FALSE;
+
+		name = nameFree = newToken ();
+		copyToken (nameFree, token, TRUE);
+	}
+
+	readToken (token);
+	if (token->type == TOKEN_OPEN_PAREN)
+	{
+		vString *arglist = vStringNew ();
+		int depth = 1;
+
+		vStringPut (arglist, '(');
+		do
 		{
-			cp += 6;
-
-			while (isspace ((int) *cp))
-				++cp;
-			if (*cp != '(')
-				continue;
-			++cp;
-
-			while (isspace ((int) *cp))
-				++cp;
-			if ((*cp == '\'') || (*cp == '"'))
-				++cp;
-			else if (! ((*cp == '_')  || isalnum ((int) *cp)))
-				continue;
-
-			vStringClear (name);
-			while (isalnum ((int) *cp)  ||  *cp == '_')
+			readToken (token);
+
+			switch (token->type)
 			{
-				vStringPut (name, (int) *cp);
-				++cp;
+				case TOKEN_OPEN_PAREN:  depth++; break;
+				case TOKEN_CLOSE_PAREN: depth--; break;
+				default: break;
 			}
-			vStringTerminate (name);
-			makeSimpleTag (name, PhpKinds, K_DEFINE);
-			vStringClear (name);
+			/* display part */
+			switch (token->type)
+			{
+				case TOKEN_AMPERSAND:		vStringPut (arglist, '&');		break;
+				case TOKEN_CLOSE_CURLY:		vStringPut (arglist, '}');		break;
+				case TOKEN_CLOSE_PAREN:		vStringPut (arglist, ')');		break;
+				case TOKEN_CLOSE_SQUARE:	vStringPut (arglist, ']');		break;
+				case TOKEN_COLON:			vStringPut (arglist, ':');		break;
+				case TOKEN_COMMA:			vStringCatS (arglist, ", ");	break;
+				case TOKEN_EQUAL_SIGN:		vStringCatS (arglist, " = ");	break;
+				case TOKEN_OPEN_CURLY:		vStringPut (arglist, '{');		break;
+				case TOKEN_OPEN_PAREN:		vStringPut (arglist, '(');		break;
+				case TOKEN_OPEN_SQUARE:		vStringPut (arglist, '[');		break;
+				case TOKEN_PERIOD:			vStringPut (arglist, '.');		break;
+				case TOKEN_SEMICOLON:		vStringPut (arglist, ';');		break;
+				case TOKEN_STRING:			vStringCatS (arglist, "'...'");	break;
+
+				case TOKEN_IDENTIFIER:
+				case TOKEN_KEYWORD:
+				case TOKEN_VARIABLE:
+				{
+					switch (vStringLast (arglist))
+					{
+						case 0:
+						case ' ':
+						case '{':
+						case '(':
+						case '[':
+						case '.':
+							/* no need for a space between those and the identifier */
+							break;
+
+						default:
+							vStringPut (arglist, ' ');
+							break;
+					}
+					if (token->type == TOKEN_VARIABLE)
+						vStringPut (arglist, '$');
+					vStringCat (arglist, token->string);
+					break;
+				}
+
+				default: break;
+			}
+		}
+		while (token->type != TOKEN_EOF && depth > 0);
+
+		vStringTerminate (arglist);
+
+		makeFunctionTag (name, arglist, access, impl);
+		vStringDelete (arglist);
+
+		readToken (token); /* normally it's an open brace or a semicolon */
+	}
+	if (token->type == TOKEN_OPEN_CURLY)
+		enterScope (token, name->string, K_FUNCTION);
+	else
+		readNext = FALSE;
+
+	if (nameFree)
+		deleteToken (nameFree);
+
+	return readNext;
+}
+
+/* parses declarations of the form
+ * 	const NAME = VALUE */
+static boolean parseConstant (tokenInfo *const token)
+{
+	tokenInfo *name;
+
+	readToken (token); /* skip const keyword */
+	if (token->type != TOKEN_IDENTIFIER)
+		return FALSE;
+
+	name = newToken ();
+	copyToken (name, token, TRUE);
+
+	readToken (token);
+	if (token->type == TOKEN_EQUAL_SIGN)
+		makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED);
+
+	deleteToken (name);
+
+	return token->type == TOKEN_EQUAL_SIGN;
+}
+
+/* parses declarations of the form
+ * 	define('NAME', 'VALUE')
+ * 	define(NAME, 'VALUE) */
+static boolean parseDefine (tokenInfo *const token)
+{
+	int depth = 1;
+
+	readToken (token); /* skip "define" identifier */
+	if (token->type != TOKEN_OPEN_PAREN)
+		return FALSE;
+
+	readToken (token);
+	if (token->type == TOKEN_STRING ||
+		token->type == TOKEN_IDENTIFIER)
+	{
+		makeSimplePhpTag (token, K_DEFINE, ACCESS_UNDEFINED);
+		readToken (token);
+	}
+
+	/* skip until the close parenthesis.
+	 * no need to handle nested blocks since they would be invalid
+	 * in this context anyway (the VALUE may only be a scalar, like
+	 * 	42
+	 * 	(42)
+	 * and alike) */
+	while (token->type != TOKEN_EOF && depth > 0)
+	{
+		switch (token->type)
+		{
+			case TOKEN_OPEN_PAREN:	depth++; break;
+			case TOKEN_CLOSE_PAREN:	depth--; break;
+			default: break;
+		}
+		readToken (token);
+	}
+
+	return FALSE;
+}
+
+/* parses declarations of the form
+ * 	$var = VALUE
+ * 	$var; */
+static boolean parseVariable (tokenInfo *const token)
+{
+	tokenInfo *name;
+	boolean readNext = TRUE;
+	accessType access = CurrentStatement.access;
+
+	name = newToken ();
+	copyToken (name, token, TRUE);
+
+	readToken (token);
+	if (token->type == TOKEN_EQUAL_SIGN)
+	{
+		phpKind kind = K_VARIABLE;
+
+		if (token->parentKind == K_FUNCTION)
+			kind = K_LOCAL_VARIABLE;
+
+		readToken (token);
+		if (token->type == TOKEN_KEYWORD &&
+			token->keyword == KEYWORD_function &&
+			PhpKinds[kind].enabled)
+		{
+			if (parseFunction (token, name))
+				readToken (token);
+			readNext = (boolean) (token->type == TOKEN_SEMICOLON);
+		}
+		else
+		{
+			makeSimplePhpTag (name, kind, access);
+			readNext = FALSE;
+		}
+	}
+	else if (token->type == TOKEN_SEMICOLON)
+	{
+		/* generate tags for variable declarations in classes
+		 * 	class Foo {
+		 * 		protected $foo;
+		 * 	}
+		 * but don't get fooled by stuff like $foo = $bar; */
+		if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE)
+			makeSimplePhpTag (name, K_VARIABLE, access);
+	}
+	else
+		readNext = FALSE;
+
+	deleteToken (name);
+
+	return readNext;
+}
+
+/* parses namespace declarations
+ * 	namespace Foo {}
+ * 	namespace Foo\Bar {}
+ * 	namespace Foo;
+ * 	namespace Foo\Bar;
+ * 	namespace;
+ * 	napespace {} */
+static boolean parseNamespace (tokenInfo *const token)
+{
+	tokenInfo *nsToken = newToken ();
+
+	vStringClear (CurrentNamesapce);
+	copyToken (nsToken, token, FALSE);
+
+	do
+	{
+		readToken (token);
+		if (token->type == TOKEN_IDENTIFIER)
+		{
+			if (vStringLength (CurrentNamesapce) > 0)
+				vStringPut (CurrentNamesapce, '\\');
+			vStringCat (CurrentNamesapce, token->string);
 		}
 	}
-	vStringDelete (name);
+	while (token->type != TOKEN_EOF &&
+		   token->type != TOKEN_SEMICOLON &&
+		   token->type != TOKEN_OPEN_CURLY);
+
+	vStringTerminate (CurrentNamesapce);
+	if (vStringLength (CurrentNamesapce) > 0)
+		makeNamespacePhpTag (nsToken, CurrentNamesapce);
+
+	if (token->type == TOKEN_OPEN_CURLY)
+		enterScope (token, NULL, -1);
+
+	deleteToken (nsToken);
+
+	return TRUE;
+}
+
+static void enterScope (tokenInfo *const parentToken,
+						const vString *const extraScope,
+						const int parentKind)
+{
+	tokenInfo *token = newToken ();
+	int origParentKind = parentToken->parentKind;
+
+	copyToken (token, parentToken, TRUE);
+
+	if (extraScope)
+	{
+		addToScope (token, extraScope);
+		token->parentKind = parentKind;
+	}
+
+	readToken (token);
+	while (token->type != TOKEN_EOF &&
+		   token->type != TOKEN_CLOSE_CURLY)
+	{
+		boolean readNext = TRUE;
+
+		switch (token->type)
+		{
+			case TOKEN_OPEN_CURLY:
+				enterScope (token, NULL, -1);
+				break;
+
+			case TOKEN_KEYWORD:
+				switch (token->keyword)
+				{
+					case KEYWORD_class:		readNext = parseClassOrIface (token, K_CLASS);		break;
+					case KEYWORD_interface:	readNext = parseClassOrIface (token, K_INTERFACE);	break;
+					case KEYWORD_trait:		readNext = parseTrait (token);						break;
+					case KEYWORD_function:	readNext = parseFunction (token, NULL);				break;
+					case KEYWORD_const:		readNext = parseConstant (token);					break;
+					case KEYWORD_define:	readNext = parseDefine (token);						break;
+
+					case KEYWORD_namespace:	readNext = parseNamespace (token);	break;
+
+					case KEYWORD_private:	CurrentStatement.access = ACCESS_PRIVATE;	break;
+					case KEYWORD_protected:	CurrentStatement.access = ACCESS_PROTECTED;	break;
+					case KEYWORD_public:	CurrentStatement.access = ACCESS_PUBLIC;	break;
+					case KEYWORD_var:		CurrentStatement.access = ACCESS_PUBLIC;	break;
+
+					case KEYWORD_abstract:	CurrentStatement.impl = IMPL_ABSTRACT;		break;
+
+					default: break;
+				}
+				break;
+
+			case TOKEN_VARIABLE:
+				readNext = parseVariable (token);
+				break;
+
+			default: break;
+		}
+
+		if (readNext)
+			readToken (token);
+	}
+
+	copyToken (parentToken, token, FALSE);
+	parentToken->parentKind = origParentKind;
+	deleteToken (token);
+}
+
+static void findPhpTags (void)
+{
+	tokenInfo *const token = newToken ();
+
+	InPhp = FALSE;
+	CurrentStatement.access = ACCESS_UNDEFINED;
+	CurrentStatement.impl = IMPL_UNDEFINED;
+	CurrentNamesapce = vStringNew ();
+
+	do
+	{
+		enterScope (token, NULL, -1);
+	}
+	while (token->type != TOKEN_EOF); /* keep going even with unmatched braces */
+
+	vStringDelete (CurrentNamesapce);
+	deleteToken (token);
+}
+
+static void initialize (const langType language)
+{
+	Lang_php = language;
+	buildPhpKeywordHash ();
 }
 
 extern parserDefinition* PhpParser (void)
 {
-	static const char *const extensions [] = { "php", "php3", "phtml", NULL };
+	static const char *const extensions [] = { "php", "php3", "php4", "php5", "phtml", NULL };
 	parserDefinition* def = parserNew ("PHP");
 	def->kinds      = PhpKinds;
 	def->kindCount  = KIND_COUNT (PhpKinds);
 	def->extensions = extensions;
 	def->parser     = findPhpTags;
+	def->initialize = initialize;
 	return def;
 }
 
-#endif
-
 /* vi:set tabstop=4 shiftwidth=4: */


Modified: tagmanager/ctags/vstring.h
1 files changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -33,6 +33,7 @@
 
 #define vStringValue(vs)	((vs)->buffer)
 #define vStringItem(vs,i)	((vs)->buffer[i])
+#define vStringLast(vs)		((vs)->buffer[(vs)->length - 1])
 #define vStringLength(vs)	((vs)->length)
 #define vStringSize(vs)		((vs)->size)
 #define vStringCat(vs,s)	vStringCatS((vs), vStringValue((s)))


Modified: tests/ctags/Makefile.am
2 files changed, 2 insertions(+), 0 deletions(-)
===================================================================
@@ -105,6 +105,7 @@ test_sources = \
 	bug960316.v						\
 	bug961001.v						\
 	byte.f							\
+	case_sensitivity.php			\
 	char-selector.f90				\
 	classes.php						\
 	common.f						\
@@ -185,6 +186,7 @@ test_sources = \
 	objectivec_property.mm			\
 	objectivec_protocol.mm			\
 	Package.pm						\
+	php5_5_class_kw.php				\
 	procedure_pointer_module.f90	\
 	property.cs						\
 	prototype.h						\


Modified: tests/ctags/case_sensitivity.php
40 files changed, 40 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,40 @@
+<?php
+// PHP is case insensitive about keywords
+
+class A {}
+
+CLASS B {}
+
+Class C {}
+
+ClAsS D {}
+
+
+
+function a() {}
+
+FUNCTION b() {}
+
+Function c() {}
+
+FuNcTiOn d() {}
+
+
+
+trait tA {}
+
+TRAIT tB {}
+
+Trait tC {}
+
+TrAiT tD {}
+
+
+
+interface iA {}
+
+INTERFACE iB {}
+
+Interface iC {}
+
+InTeRfAcE iD {}


Modified: tests/ctags/case_sensitivity.php.tags
17 files changed, 17 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,17 @@
+# format=tagmanager
+A�1�0
+B�1�0
+C�1�0
+D�1�0
+a�16�()�0
+b�16�()�0
+c�16�()�0
+d�16�()�0
+iA�32�0
+iB�32�0
+iC�32�0
+iD�32�0
+tA�2048�0
+tB�2048�0
+tC�2048�0
+tD�2048�0


Modified: tests/ctags/classes.php.tags
10 files changed, 5 insertions(+), 5 deletions(-)
===================================================================
@@ -1,8 +1,8 @@
 # format=tagmanager
 Bar�1�0
 Foo�1�0
-__construct�16�($a, $b)�0
-__construct�16�()�0
-method1�16�($arg)�0
-method1�16�()�0
-method2�16�()�0
+__construct�16�()�Bar�0
+__construct�16�($a, $b)�Foo�0
+method1�16�()�Bar�0
+method1�16�($arg)�Foo�0
+method2�16�()�Foo�0


Modified: tests/ctags/mode.php
73 files changed, 73 insertions(+), 0 deletions(-)
===================================================================
@@ -7,6 +7,12 @@ functions:
 	b
 	c
 	d
+	e
+	f
+	g
+	h
+	i
+	j
 
 
 function bug0() {
@@ -50,3 +56,70 @@ function bug5() {
 	}
 	<?php // back to PHP mode, still inside function d()
 }
+
+// any open tag matches any close tag, so this is valid
+</script> // leaves PHP mode
+
+function bug4() {}
+
+?> <!-- just in case -->
+
+<script language="php"> // enetered PHP mode
+
+function e() {
+	return 42;
+}
+
+?> // left PHP mode
+
+function bug5() {}
+
+// some valid long tag opening with inner whitespaces
+
+<script
+	language
+	=
+	php
+> // entered
+function f() {}
+</script
+	> // left
+
+function bug6() {}
+
+<script	language=	'php'	> // enter
+function g() {}
+</script > // leave
+
+function bug7() {}
+
+<?php
+// this WONT leave PHP mode, it's in a comment  </script>
+function h() {}
+?>
+
+function bug8() {}
+
+<?php
+
+function i() {}
+// any open tag matches any close tag, so this is valid
+</script         	  	 
+ 	  	        >
+
+function bug9() {}
+
+// this won't enter PHP, no spaces are allowed between the "<" and "script"
+< script language = php >
+
+function bug10() {}
+
+// does nothing, just resets mode for some tools not aware of the "script" thing
+?>
+
+<!-- <script> is OK anywhere, even in XML strings -->
+<p attr="<script language=php>
+function j() {}
+</script>">
+
+</p>


Modified: tests/ctags/mode.php.tags
12 files changed, 6 insertions(+), 6 deletions(-)
===================================================================
@@ -1,11 +1,11 @@
 # format=tagmanager
 a�16�()�0
 b�16�()�0
-bug0�16�()�0
-bug1�16�()�0
-bug2�16�()�0
-bug3�16�()�0
-bug4�16�()�0
-bug5�16�()�0
 c�16�()�0
 d�16�()�0
+e�16�()�0
+f�16�()�0
+g�16�()�0
+h�16�()�0
+i�16�()�0
+j�16�()�0


Modified: tests/ctags/namespaces.php.tags
18 files changed, 11 insertions(+), 7 deletions(-)
===================================================================
@@ -1,10 +1,14 @@
 # format=tagmanager
-B�1�0
-C�1�0
-__construct�16�()�0
-a�16�()�0
-b�16�()�0
-c�16�()�0
-d�16�()�0
+B�1�Bar\Baz�0
+Bar\Baz�256�0
+C�1�Foo�0
+Foo�256�0
+__construct�16�()�Bar\Baz::B�0
+__construct�16�()�Foo::C�0
+a�16�()�Bar\Baz�0
+a�16�()�Foo�0
+b�16�()�Foo�0
+c�16�()�Bar\Baz::B�0
+d�16�()�Foo::C�0
 inRoot�16�()�0
 meToo�16�()�0


Modified: tests/ctags/namespaces2.php.tags
18 files changed, 11 insertions(+), 7 deletions(-)
===================================================================
@@ -1,8 +1,12 @@
 # format=tagmanager
-B�1�0
-C�1�0
-__construct�16�()�0
-a�16�()�0
-b�16�()�0
-c�16�()�0
-d�16�()�0
+B�1�Bar\Baz�0
+Bar\Baz�256�0
+C�1�Foo�0
+Foo�256�0
+__construct�16�()�Bar\Baz::B�0
+__construct�16�()�Foo::C�0
+a�16�()�Bar\Baz�0
+a�16�()�Foo�0
+b�16�()�Foo�0
+c�16�()�Bar\Baz::B�0
+d�16�()�Foo::C�0


Modified: tests/ctags/php5_5_class_kw.php
19 files changed, 19 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,19 @@
+<?php
+
+class A {
+  
+}
+
+echo A::class . "\n";
+
+class B {
+  public function __construct () {
+    echo this::class;
+  }
+}
+
+new B();
+
+class C {
+  
+}


Modified: tests/ctags/php5_5_class_kw.php.tags
5 files changed, 5 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,5 @@
+# format=tagmanager
+A�1�0
+B�1�0
+C�1�0
+__construct�16�()�B�0


Modified: tests/ctags/simple.php.tags
7 files changed, 4 insertions(+), 3 deletions(-)
===================================================================
@@ -1,11 +1,12 @@
 # format=tagmanager
+4site�16384�0
 CONSTANT�65536�0
 Cart�1�0
 Var�16384�0
 _4site�16384�0
-add_item�16�($artnr, $num)�0
+add_item�16�($artnr, $num)�Cart�0
 foo�16�($arg_1, $arg_2, ..., $arg_n)�0
-items�16384�0
-remove_item�16�($artnr, $num)�0
+items�16384�Cart�0
+remove_item�16�($artnr, $num)�Cart�0
 t�yte�16384�0
 var�16384�0


Modified: tests/ctags/traits.php.tags
10 files changed, 6 insertions(+), 4 deletions(-)
===================================================================
@@ -1,7 +1,9 @@
 # format=tagmanager
 A�1�0
 B�1�0
-__construct�16�()�0
-__construct�16�($p)�0
-stuff�16�()�0
-stuff�16�($arg1, $arg2)�0
+__construct�16�()�A�0
+__construct�16�($p)�B�0
+stuff�16�($arg1, $arg2)�tBar�0
+stuff�16�()�tFoo�0
+tBar�2048�0
+tFoo�2048�0



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list