[geany/geany] e454b8: Merge branch 'json-tag-parser' into json

Tue Feb 24 17:12:27 UTC 2015

Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Sat, 31 Jan 2015 21:17:26 UTC
Commit:      e454b8962cdb68cf8d41813ffc55a1a620925845
             https://github.com/geany/geany/commit/e454b8962cdb68cf8d41813ffc55a1a620925845

Log Message:
-----------
Merge branch 'json-tag-parser' into json

Import JSON tag parser from CTags:
https://github.com/fishman/ctags/blob/master/json.c


Modified Paths:
--------------
    data/filetypes.JSON.conf
    tagmanager/ctags/Makefile.am
    tagmanager/ctags/json.c
    tagmanager/ctags/makefile.win32
    tagmanager/ctags/parsers.h
    tagmanager/src/tm_parser.h
    tests/ctags/Makefile.am
    tests/ctags/simple.json
    tests/ctags/simple.json.tags
    wscript

Modified: data/filetypes.JSON.conf
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -5,6 +5,7 @@ primary=true false null
 
 [settings]
 lexer_filetype=Javascript
+tag_parser=JSON
 extension=json
 mime_type=application/json
 


Modified: tagmanager/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -29,6 +29,7 @@ parsers = \
 	haxe.c \
 	html.c \
 	js.c \
+	json.c \
 	latex.c \
 	lregex.c \
 	lua.c \


Modified: tagmanager/ctags/json.c
400 lines changed, 400 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2014, Colomban Wendling <colomban at geany.org>
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ */
+/*
+ * This module contains functions for generating tags for JSON files.
+ *
+ * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+ *
+ * This implementation is forgiving and allows many constructs that are not
+ * actually valid but that don't conflict with the format.  This is intend to
+ * better support partly broken or unfinished files.
+ */
+
+#include "general.h"
+
+#include <string.h>
+#include "main.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+typedef enum {
+	TOKEN_EOF,
+	TOKEN_UNDEFINED,
+	TOKEN_OPEN_SQUARE,
+	TOKEN_CLOSE_SQUARE,
+	TOKEN_OPEN_CURLY,
+	TOKEN_CLOSE_CURLY,
+	TOKEN_COLON,
+	TOKEN_COMMA,
+	TOKEN_TRUE,
+	TOKEN_FALSE,
+	TOKEN_NULL,
+	TOKEN_NUMBER,
+	TOKEN_STRING
+} tokenType;
+
+typedef enum {
+	TAG_NONE = -1,
+	TAG_OBJECT,
+	TAG_ARRAY,
+	TAG_NUMBER,
+	TAG_STRING,
+	TAG_BOOLEAN,
+	TAG_NULL,
+	TAG_COUNT
+} jsonKind;
+
+typedef struct {
+	tokenType		type;
+	jsonKind		scopeKind;
+	vString			*string;
+	vString			*scope;
+	unsigned long	lineNumber;
+	MIOPos			filePosition;
+} tokenInfo;
+
+typedef enum {
+	KEYWORD_true,
+	KEYWORD_false,
+	KEYWORD_null
+} keywordId;
+
+static langType Lang_json;
+
+static kindOption JsonKinds [] = {
+	{ TRUE,  'o', "member",		"objects"	},
+	{ TRUE,  'a', "member",		"arrays"	},
+	{ TRUE,  'n', "member",		"numbers"	},
+	{ TRUE,  's', "member",		"strings"	},
+	{ TRUE,  'b', "member",		"booleans"	},
+	{ TRUE,  'z', "member",		"nulls"		}
+};
+
+static tokenInfo *newToken (void)
+{
+	tokenInfo *const token = xMalloc (1, tokenInfo);
+
+	token->type			= TOKEN_UNDEFINED;
+	token->scopeKind	= TAG_NONE;
+	token->string		= vStringNew ();
+	token->scope		= vStringNew ();
+	token->lineNumber	= getSourceLineNumber ();
+	token->filePosition	= getInputFilePosition ();
+
+	return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+	vStringDelete (token->string);
+	vStringDelete (token->scope);
+	eFree (token);
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+	dest->type = src->type;
+	dest->scopeKind = src->scopeKind;
+	vStringCopy (dest->string, src->string);
+	vStringCopy (dest->scope, src->scope);
+	dest->lineNumber = src->lineNumber;
+	dest->filePosition = src->filePosition;
+}
+
+static void makeJsonTag (tokenInfo *const token, const jsonKind kind)
+{
+	tagEntryInfo e;
+
+	if (! JsonKinds[kind].enabled)
+		return;
+
+	initTagEntry (&e, vStringValue (token->string));
+
+	e.lineNumber	= token->lineNumber;
+	e.filePosition	= token->filePosition;
+	e.kindName		= JsonKinds[kind].name;
+	e.kind			= JsonKinds[kind].letter;
+
+	if (vStringLength (token->scope) > 0)
+	{
+		Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT);
+
+		e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name;
+		e.extensionFields.scope[1] = vStringValue (token->scope);
+	}
+
+	makeTagEntry (&e);
+}
+
+static boolean isIdentChar (int c)
+{
+	return (isalnum (c) || c == '+' || c == '-' || c == '.');
+}
+
+static void readTokenFull (tokenInfo *const token,
+						   boolean includeStringRepr)
+{
+	int c;
+
+	token->type = TOKEN_UNDEFINED;
+	vStringClear (token->string);
+
+	do
+		c = fileGetc ();
+	while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
+
+	token->lineNumber   = getSourceLineNumber ();
+	token->filePosition = getInputFilePosition ();
+
+	switch (c)
+	{
+		case EOF: token->type = TOKEN_EOF;			break;
+		case '[': token->type = TOKEN_OPEN_SQUARE;	break;
+		case ']': token->type = TOKEN_CLOSE_SQUARE;	break;
+		case '{': token->type = TOKEN_OPEN_CURLY;	break;
+		case '}': token->type = TOKEN_CLOSE_CURLY;	break;
+		case ':': token->type = TOKEN_COLON;		break;
+		case ',': token->type = TOKEN_COMMA;		break;
+
+		case '"':
+		{
+			boolean escaped = FALSE;
+			token->type = TOKEN_STRING;
+			while (TRUE)
+			{
+				c = fileGetc ();
+				/* we don't handle unicode escapes but they are safe */
+				if (escaped)
+					escaped = FALSE;
+				else if (c == '\\')
+					escaped = TRUE;
+				else if (c >= 0x00 && c <= 0x1F)
+					break; /* break on invalid, unescaped, control characters */
+				else if (c == '"' || c == EOF)
+					break;
+				if (includeStringRepr)
+					vStringPut (token->string, c);
+			}
+			vStringTerminate (token->string);
+			break;
+		}
+
+		default:
+			if (! isIdentChar (c))
+				token->type = TOKEN_UNDEFINED;
+			else
+			{
+				do
+				{
+					vStringPut (token->string, c);
+					c = fileGetc ();
+				}
+				while (c != EOF && isIdentChar (c));
+				vStringTerminate (token->string);
+				fileUngetc (c);
+				switch (lookupKeyword (vStringValue (token->string), Lang_json))
+				{
+					case KEYWORD_true:	token->type = TOKEN_TRUE;	break;
+					case KEYWORD_false:	token->type = TOKEN_FALSE;	break;
+					case KEYWORD_null:	token->type = TOKEN_NULL;	break;
+					default:			token->type = TOKEN_NUMBER;	break;
+				}
+			}
+			break;
+	}
+}
+
+#define readToken(t) (readTokenFull ((t), FALSE))
+
+static void pushScope (tokenInfo *const token,
+					   const tokenInfo *const parent,
+					   const jsonKind parentKind)
+{
+	if (vStringLength (token->scope) > 0)
+		vStringPut (token->scope, '.');
+	vStringCat (token->scope, parent->string);
+	vStringTerminate (token->scope);
+	token->scopeKind = parentKind;
+}
+
+static void popScope (tokenInfo *const token,
+					  const tokenInfo *const parent)
+{
+	char *dot = strrchr (token->scope->buffer, '.');
+
+	if (! dot)
+		vStringClear (token->scope);
+	else
+	{
+		*dot = 0;
+		token->scope->length = dot - token->scope->buffer;
+	}
+	token->scopeKind = parent->scopeKind;
+}
+
+#define skipToOneOf2(token, type1, type2) \
+	(skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */))
+
+#define skipTo(token, type) \
+	(skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF))
+
+static void skipToOneOf3 (tokenInfo *const token,
+						  const tokenType type1,
+						  const tokenType type2,
+						  const tokenType type3)
+{
+	while (token->type != TOKEN_EOF &&
+		   token->type != type1 &&
+		   token->type != type2 &&
+		   token->type != type3)
+	{
+		readToken (token);
+		if (token->type == TOKEN_OPEN_CURLY)
+		{
+			skipTo (token, TOKEN_CLOSE_CURLY);
+			readToken (token);
+		}
+		else if (token->type == TOKEN_OPEN_SQUARE)
+		{
+			skipTo (token, TOKEN_CLOSE_SQUARE);
+			readToken (token);
+		}
+	}
+}
+
+static jsonKind tokenToKind (const tokenType type)
+{
+	switch (type)
+	{
+		case TOKEN_OPEN_CURLY:	return TAG_OBJECT;
+		case TOKEN_OPEN_SQUARE:	return TAG_ARRAY;
+		case TOKEN_STRING:		return TAG_STRING;
+		case TOKEN_TRUE:
+		case TOKEN_FALSE:		return TAG_BOOLEAN;
+		case TOKEN_NUMBER:		return TAG_NUMBER;
+		default:				return TAG_NULL;
+	}
+}
+
+static void parseValue (tokenInfo *const token)
+{
+	if (token->type == TOKEN_OPEN_CURLY)
+	{
+		tokenInfo *name = newToken ();
+
+		do
+		{
+			readTokenFull (token, TRUE);
+			if (token->type == TOKEN_STRING)
+			{
+				jsonKind tagKind = TAG_NULL; /* default in case of invalid value */
+
+				copyToken (name, token);
+
+				/* skip any possible garbage before the value */
+				skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA);
+
+				if (token->type == TOKEN_COLON)
+				{
+					readToken (token);
+					tagKind = tokenToKind (token->type);
+
+					pushScope (token, name, tagKind);
+					parseValue (token);
+					popScope (token, name);
+				}
+
+				makeJsonTag (name, tagKind);
+			}
+			/* skip to the end of the construct */
+			skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA);
+		}
+		while (token->type != TOKEN_EOF &&
+			   token->type != TOKEN_CLOSE_CURLY);
+
+		if (token->type == TOKEN_CLOSE_CURLY)
+			readToken (token);
+
+		deleteToken (name);
+	}
+	else if (token->type == TOKEN_OPEN_SQUARE)
+	{
+		tokenInfo *name = newToken ();
+		char buf[32];
+		unsigned int nth = 0;
+
+		readToken (token);
+		while (token->type != TOKEN_EOF &&
+			   token->type != TOKEN_CLOSE_SQUARE)
+		{
+			jsonKind tagKind;
+
+			tagKind = tokenToKind (token->type);
+
+			copyToken (name, token);
+			snprintf (buf, sizeof buf, "%u", nth++);
+			vStringCopyS (name->string, buf);
+
+			makeJsonTag (name, tagKind);
+			pushScope (token, name, tagKind);
+			parseValue (token);
+			popScope (token, name);
+
+			/* skip to the end of the construct */
+			skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA);
+			if (token->type != TOKEN_CLOSE_SQUARE)
+				readToken (token);
+		}
+
+		if (token->type == TOKEN_CLOSE_SQUARE)
+			readToken (token);
+
+		deleteToken (name);
+	}
+}
+
+static void findJsonTags (void)
+{
+	tokenInfo *const token = newToken ();
+
+	/* We allow multiple top-level elements, although it's not actually valid
+	 * JSON.  An interesting side effect of this is that we allow a leading
+	 * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */
+	do
+	{
+		readToken (token);
+		parseValue (token);
+	}
+	while (token->type != TOKEN_EOF);
+
+	deleteToken (token);
+}
+
+static void initialize (const langType language)
+{
+	Lang_json = language;
+	addKeyword ("true", language, KEYWORD_true);
+	addKeyword ("false", language, KEYWORD_false);
+	addKeyword ("null", language, KEYWORD_null);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* JsonParser (void)
+{
+	static const char *const extensions [] = { "json", NULL };
+	parserDefinition *const def = parserNew ("JSON");
+	def->extensions = extensions;
+	def->kinds		= JsonKinds;
+	def->kindCount	= KIND_COUNT (JsonKinds);
+	def->parser		= findJsonTags;
+	def->initialize = initialize;
+
+	return def;
+}


Modified: tagmanager/ctags/makefile.win32
2 lines changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -44,7 +44,7 @@ all: $(COMPLIB)
 clean:
 	-$(RM) deps.mak *.o $(COMPLIB)
 
-$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o \
+$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o json.o \
 actionscript.o nsis.o objc.o \
 haskell.o haxe.o html.o python.o lregex.o asciidoc.o rest.o sh.o ctags.o entry.o get.o keyword.o nestlevel.o \
 options.o \


Modified: tagmanager/ctags/parsers.h
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -62,7 +62,8 @@
     AsciidocParser, \
     AbaqusParser, \
     RustParser, \
-    GoParser
+    GoParser, \
+    JsonParser
 
 #endif	/* _PARSERS_H */
 


Modified: tagmanager/src/tm_parser.h
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -69,6 +69,7 @@ typedef enum
 	TM_PARSER_ABAQUS,
 	TM_PARSER_RUST,
 	TM_PARSER_GO,
+	TM_PARSER_JSON,
 	TM_PARSER_COUNT
 } TMParserType;
 


Modified: tests/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -246,6 +246,7 @@ test_sources = \
 	simple.d						\
 	simple.html						\
 	simple.js						\
+	simple.json						\
 	simple.ksh						\
 	simple.lua						\
 	simple.mak						\


Modified: tests/ctags/simple.json
25 lines changed, 25 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,25 @@
+{
+  "firstName": "John",
+  "lastName": "Smith",
+  "isAlive": true,
+  "age": 25,
+  "height_cm": 167.6,
+  "address": {
+    "streetAddress": "21 2nd Street",
+    "city": "New York",
+    "state": "NY",
+    "postalCode": "10021-3100"
+  },
+  "phoneNumbers": [
+    {
+      "type": "home",
+      "number": "212 555-1234"
+    },
+    {
+      "type": "office",
+      "number": "646 555-4567"
+    }
+  ],
+  "children": [],
+  "spouse": null
+}


Modified: tests/ctags/simple.json.tags
20 lines changed, 20 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,20 @@
+# format=tagmanager
+0�64�phoneNumbers�0
+1�64�phoneNumbers�0
+address�64�0
+age�64�0
+children�64�0
+city�64�address�0
+firstName�64�0
+height_cm�64�0
+isAlive�64�0
+lastName�64�0
+number�64�phoneNumbers.0�0
+number�64�phoneNumbers.1�0
+phoneNumbers�64�0
+postalCode�64�address�0
+spouse�64�0
+state�64�address�0
+streetAddress�64�address�0
+type�64�phoneNumbers.0�0
+type�64�phoneNumbers.1�0


Modified: wscript
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -87,6 +87,7 @@ ctags_sources = set([
     'tagmanager/ctags/haxe.c',
     'tagmanager/ctags/html.c',
     'tagmanager/ctags/js.c',
+    'tagmanager/ctags/json.c',
     'tagmanager/ctags/keyword.c',
     'tagmanager/ctags/latex.c',
     'tagmanager/ctags/lregex.c',



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).