Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Sat, 31 Jan 2015 21:14:50 UTC Commit: 2ff1386d9690147af9d5089d319dd57aca044151 https://github.com/geany/geany/commit/2ff1386d9690147af9d5089d319dd57aca0441...
Log Message: ----------- Add new parser for JSON
Modified Paths: -------------- tagmanager/ctags/Makefile.am tagmanager/ctags/json.c tagmanager/ctags/makefile.win32 tagmanager/ctags/parsers.h tagmanager/src/tm_parser.h tests/ctags/simple.json tests/ctags/simple.json.tags wscript
Modified: tagmanager/ctags/Makefile.am 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -29,6 +29,7 @@ parsers = \ haxe.c \ html.c \ js.c \ + json.c \ latex.c \ lregex.c \ lua.c \
Modified: tagmanager/ctags/json.c 396 lines changed, 396 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2014, Colomban Wendling colomban@geany.org + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + */ +/* + * This module contains functions for generating tags for JSON files. + * + * http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf + * + * This implementation is forgiving and allows many constructs that are not + * actually valid but that don't conflict with the format. This is intend to + * better support partly broken or unfinished files. + */ + +#include "general.h" + +#include <string.h> +#include "main.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +typedef enum { + TOKEN_EOF, + TOKEN_UNDEFINED, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_NULL, + TOKEN_NUMBER, + TOKEN_STRING +} tokenType; + +typedef enum { + TAG_NONE = -1, + TAG_OBJECT, + TAG_ARRAY, + TAG_NUMBER, + TAG_STRING, + TAG_BOOLEAN, + TAG_NULL, + TAG_COUNT +} jsonKind; + +typedef struct { + tokenType type; + jsonKind scopeKind; + vString *string; + vString *scope; + unsigned long lineNumber; + MIOPos filePosition; +} tokenInfo; + +typedef enum { + KEYWORD_true, + KEYWORD_false, + KEYWORD_null +} keywordId; + +static langType Lang_json; + +static kindOption JsonKinds [] = { + { TRUE, 'o', "object", "objects" }, + { TRUE, 'a', "array", "arrays" }, + { TRUE, 'n', "number", "numbers" }, + { TRUE, 's', "string", "strings" }, + { TRUE, 'b', "boolean", "booleans" }, + { TRUE, 'z', "null", "nulls" } +}; + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->scopeKind = TAG_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->type = src->type; + dest->scopeKind = src->scopeKind; + vStringCopy (dest->string, src->string); + vStringCopy (dest->scope, src->scope); + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; +} + +static void makeJsonTag (tokenInfo *const token, const jsonKind kind) +{ + tagEntryInfo e; + + if (! JsonKinds[kind].enabled) + return; + + initTagEntry (&e, vStringValue (token->string)); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = JsonKinds[kind].name; + e.kind = JsonKinds[kind].letter; + + if (vStringLength (token->scope) > 0) + { + Assert (token->scopeKind > TAG_NONE && token->scopeKind < TAG_COUNT); + + e.extensionFields.scope[0] = JsonKinds[token->scopeKind].name; + e.extensionFields.scope[1] = vStringValue (token->scope); + } + + makeTagEntry (&e); +} + +static boolean isIdentChar (int c) +{ + return (isalnum (c) || c == '+' || c == '-' || c == '.'); +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + vStringClear (token->string); + + do + c = fileGetc (); + while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); + + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + switch (c) + { + case EOF: token->type = TOKEN_EOF; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case ':': token->type = TOKEN_COLON; break; + case ',': token->type = TOKEN_COMMA; break; + + case '"': + { + boolean escaped = FALSE; + token->type = TOKEN_STRING; + while (TRUE) + { + c = fileGetc (); + /* we don't handle unicode escapes but they are safe */ + if (escaped) + escaped = FALSE; + else if (c == '\') + escaped = TRUE; + else if (c >= 0x00 && c <= 0x1F) + break; /* break on invalid, unescaped, control characters */ + else if (c == '"' || c == EOF) + break; + vStringPut (token->string, c); + } + vStringTerminate (token->string); + break; + } + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + do + { + vStringPut (token->string, c); + c = fileGetc (); + } + while (c != EOF && isIdentChar (c)); + vStringTerminate (token->string); + fileUngetc (c); + switch (lookupKeyword (vStringValue (token->string), Lang_json)) + { + case KEYWORD_true: token->type = TOKEN_TRUE; break; + case KEYWORD_false: token->type = TOKEN_FALSE; break; + case KEYWORD_null: token->type = TOKEN_NULL; break; + default: token->type = TOKEN_NUMBER; break; + } + } + break; + } +} + +static void pushScope (tokenInfo *const token, + const tokenInfo *const parent, + const jsonKind parentKind) +{ + if (vStringLength (token->scope) > 0) + vStringPut (token->scope, '.'); + vStringCat (token->scope, parent->string); + vStringTerminate (token->scope); + token->scopeKind = parentKind; +} + +static void popScope (tokenInfo *const token, + const tokenInfo *const parent) +{ + char *dot = strrchr (token->scope->buffer, '.'); + + if (! dot) + vStringClear (token->scope); + else + { + *dot = 0; + token->scope->length = dot - token->scope->buffer; + } + token->scopeKind = parent->scopeKind; +} + +#define skipToOneOf2(token, type1, type2) \ + (skipToOneOf3 (token, type1, type2, TOKEN_EOF /* dummy */)) + +#define skipTo(token, type) \ + (skipToOneOf3 (token, type, /* dummies */ TOKEN_EOF, TOKEN_EOF)) + +static void skipToOneOf3 (tokenInfo *const token, + const tokenType type1, + const tokenType type2, + const tokenType type3) +{ + while (token->type != TOKEN_EOF && + token->type != type1 && + token->type != type2 && + token->type != type3) + { + readToken (token); + if (token->type == TOKEN_OPEN_CURLY) + { + skipTo (token, TOKEN_CLOSE_CURLY); + readToken (token); + } + else if (token->type == TOKEN_OPEN_SQUARE) + { + skipTo (token, TOKEN_CLOSE_SQUARE); + readToken (token); + } + } +} + +static jsonKind tokenToKind (const tokenType type) +{ + switch (type) + { + case TOKEN_OPEN_CURLY: return TAG_OBJECT; + case TOKEN_OPEN_SQUARE: return TAG_ARRAY; + case TOKEN_STRING: return TAG_STRING; + case TOKEN_TRUE: + case TOKEN_FALSE: return TAG_BOOLEAN; + case TOKEN_NUMBER: return TAG_NUMBER; + default: return TAG_NULL; + } +} + +static void parseValue (tokenInfo *const token) +{ + if (token->type == TOKEN_OPEN_CURLY) + { + tokenInfo *name = newToken (); + + do + { + readToken (token); + if (token->type == TOKEN_STRING) + { + jsonKind tagKind = TAG_NULL; /* default in case of invalid value */ + + copyToken (name, token); + + /* skip any possible garbage before the value */ + skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA); + + if (token->type == TOKEN_COLON) + { + readToken (token); + tagKind = tokenToKind (token->type); + + pushScope (token, name, tagKind); + parseValue (token); + popScope (token, name); + } + + makeJsonTag (name, tagKind); + } + /* skip to the end of the construct */ + skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA); + } + while (token->type != TOKEN_EOF && + token->type != TOKEN_CLOSE_CURLY); + + if (token->type == TOKEN_CLOSE_CURLY) + readToken (token); + + deleteToken (name); + } + else if (token->type == TOKEN_OPEN_SQUARE) + { + tokenInfo *name = newToken (); + char buf[32]; + unsigned int nth = 0; + + readToken (token); + while (token->type != TOKEN_EOF && + token->type != TOKEN_CLOSE_SQUARE) + { + jsonKind tagKind; + + tagKind = tokenToKind (token->type); + + copyToken (name, token); + snprintf (buf, sizeof buf, "%u", nth++); + vStringCopyS (name->string, buf); + + makeJsonTag (name, tagKind); + pushScope (token, name, tagKind); + parseValue (token); + popScope (token, name); + + /* skip to the end of the construct */ + skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA); + if (token->type != TOKEN_CLOSE_SQUARE) + readToken (token); + } + + if (token->type == TOKEN_CLOSE_SQUARE) + readToken (token); + + deleteToken (name); + } +} + +static void findJsonTags (void) +{ + tokenInfo *const token = newToken (); + + /* We allow multiple top-level elements, although it's not actually valid + * JSON. An interesting side effect of this is that we allow a leading + * Unicode BOM mark -- even though ok, many JSON parsers will choke on it */ + do + { + readToken (token); + parseValue (token); + } + while (token->type != TOKEN_EOF); + + deleteToken (token); +} + +static void initialize (const langType language) +{ + Lang_json = language; + addKeyword ("true", language, KEYWORD_true); + addKeyword ("false", language, KEYWORD_false); + addKeyword ("null", language, KEYWORD_null); +} + +/* Create parser definition stucture */ +extern parserDefinition* JsonParser (void) +{ + static const char *const extensions [] = { "json", NULL }; + parserDefinition *const def = parserNew ("JSON"); + def->extensions = extensions; + def->kinds = JsonKinds; + def->kindCount = KIND_COUNT (JsonKinds); + def->parser = findJsonTags; + def->initialize = initialize; + + return def; +}
Modified: tagmanager/ctags/makefile.win32 2 lines changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -44,7 +44,7 @@ all: $(COMPLIB) clean: -$(RM) deps.mak *.o $(COMPLIB)
-$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o \ +$(COMPLIB): abaqus.o abc.o args.o c.o cobol.o fortran.o make.o conf.o pascal.o perl.o php.o diff.o vhdl.o verilog.o lua.o js.o json.o \ actionscript.o nsis.o objc.o \ haskell.o haxe.o html.o python.o lregex.o asciidoc.o rest.o sh.o ctags.o entry.o get.o keyword.o nestlevel.o \ options.o \
Modified: tagmanager/ctags/parsers.h 3 lines changed, 2 insertions(+), 1 deletions(-) =================================================================== @@ -62,7 +62,8 @@ AsciidocParser, \ AbaqusParser, \ RustParser, \ - GoParser + GoParser, \ + JsonParser
#endif /* _PARSERS_H */
Modified: tagmanager/src/tm_parser.h 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -69,6 +69,7 @@ typedef enum TM_PARSER_ABAQUS, TM_PARSER_RUST, TM_PARSER_GO, + TM_PARSER_JSON, TM_PARSER_COUNT } TMParserType;
Modified: tests/ctags/simple.json 25 lines changed, 25 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,25 @@ +{ + "firstName": "John", + "lastName": "Smith", + "isAlive": true, + "age": 25, + "height_cm": 167.6, + "address": { + "streetAddress": "21 2nd Street", + "city": "New York", + "state": "NY", + "postalCode": "10021-3100" + }, + "phoneNumbers": [ + { + "type": "home", + "number": "212 555-1234" + }, + { + "type": "office", + "number": "646 555-4567" + } + ], + "children": [], + "spouse": null +}
Modified: tests/ctags/simple.json.tags 19 lines changed, 19 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,19 @@ +0 input.json /^ {$/;" o array:phoneNumbers +1 input.json /^ {$/;" o array:phoneNumbers +address input.json /^ "address": {$/;" o +age input.json /^ "age": 25,$/;" n +children input.json /^ "children": [],$/;" a +city input.json /^ "city": "New York",$/;" s object:address +firstName input.json /^ "firstName": "John",$/;" s +height_cm input.json /^ "height_cm": 167.6,$/;" n +isAlive input.json /^ "isAlive": true,$/;" b +lastName input.json /^ "lastName": "Smith",$/;" s +number input.json /^ "number": "212 555-1234"$/;" s object:phoneNumbers.0 +number input.json /^ "number": "646 555-4567"$/;" s object:phoneNumbers.1 +phoneNumbers input.json /^ "phoneNumbers": [$/;" a +postalCode input.json /^ "postalCode": "10021-3100"$/;" s object:address +spouse input.json /^ "spouse": null$/;" z +state input.json /^ "state": "NY",$/;" s object:address +streetAddress input.json /^ "streetAddress": "21 2nd Street",$/;" s object:address +type input.json /^ "type": "home",$/;" s object:phoneNumbers.0 +type input.json /^ "type": "office",$/;" s object:phoneNumbers.1
Modified: wscript 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -87,6 +87,7 @@ ctags_sources = set([ 'tagmanager/ctags/haxe.c', 'tagmanager/ctags/html.c', 'tagmanager/ctags/js.c', + 'tagmanager/ctags/json.c', 'tagmanager/ctags/keyword.c', 'tagmanager/ctags/latex.c', 'tagmanager/ctags/lregex.c',
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).