Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Tue, 02 Dec 2014 14:03:20 UTC Commit: f08af8046f8cbbd7d48fc767155d1fe805c1c45d https://github.com/geany/geany/commit/f08af8046f8cbbd7d48fc767155d1fe805c1c4...
Log Message: ----------- Merge branch 'js-update'
Import back JavaScript parser changes from fishman/ctags.
Modified Paths: -------------- tagmanager/ctags/js.c tests/ctags/Makefile.am tests/ctags/js-broken-strings.js tests/ctags/js-broken-strings.js.tags tests/ctags/js-string-continuation.js tests/ctags/js-string-continuation.js.tags
Modified: tagmanager/ctags/js.c 102 lines changed, 51 insertions(+), 51 deletions(-) =================================================================== @@ -19,7 +19,6 @@ #include "general.h" /* must always come first */ #include <ctype.h> /* to define isalpha () */ #include <string.h> -#include <setjmp.h> #include <mio/mio.h> #ifdef DEBUG #include <stdio.h> @@ -41,8 +40,6 @@ * DATA DECLARATIONS */
-typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; - /* * Tracks class and function names already created */ @@ -85,6 +82,7 @@ typedef struct sKeywordDesc {
typedef enum eTokenType { TOKEN_UNDEFINED, + TOKEN_EOF, TOKEN_CHARACTER, TOKEN_CLOSE_PAREN, TOKEN_SEMICOLON, @@ -126,8 +124,6 @@ static tokenType LastTokenType;
static langType Lang_js;
-static jmp_buf Exception; - typedef enum { JSTAG_FUNCTION, JSTAG_CLASS, @@ -364,11 +360,32 @@ static void parseString (vString *const string, const int delimiter) end = TRUE; else if (c == '\') { - c = fileGetc(); /* This maybe a ' or ". */ - vStringPut(string, c); + /* Eat the escape sequence (", ', etc). We properly handle + * <LineContinuation> by eating a whole <CR><LF> not to see <LF> + * as an unescaped character, which is invalid and handled below. + * Also, handle the fact that <LineContinuation> produces an empty + * sequence. + * See ECMA-262 7.8.4 */ + c = fileGetc(); + if (c != '\r' && c != '\n') + vStringPut(string, c); + else if (c == '\r') + { + c = fileGetc(); + if (c != '\n') + fileUngetc (c); + } } else if (c == delimiter) end = TRUE; + else if (c == '\r' || c == '\n') + { + /* those are invalid when not escaped */ + end = TRUE; + /* we don't want to eat the newline itself to let the automatic + * semicolon insertion code kick in */ + fileUngetc (c); + } else vStringPut (string, c); } @@ -458,7 +475,7 @@ static void readTokenFull (tokenInfo *const token, boolean include_newlines, vSt
switch (c) { - case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; @@ -669,32 +686,18 @@ static void skipArgumentList (tokenInfo *const token, boolean include_newlines, { int nest_level = 0;
- /* - * Other databases can have arguments with fully declared - * datatypes: - * ( name varchar(30), text binary(10) ) - * So we must check for nested open and closing parantheses - */ - if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ { nest_level++; if (repr) vStringPut (repr, '('); - while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) + while (nest_level > 0 && ! isType (token, TOKEN_EOF)) { readTokenFull (token, FALSE, repr); if (isType (token, TOKEN_OPEN_PAREN)) - { nest_level++; - } - if (isType (token, TOKEN_CLOSE_PAREN)) - { - if (nest_level > 0) - { - nest_level--; - } - } + else if (isType (token, TOKEN_CLOSE_PAREN)) + nest_level--; } readTokenFull (token, include_newlines, NULL); } @@ -713,20 +716,13 @@ static void skipArrayList (tokenInfo *const token, boolean include_newlines) if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ { nest_level++; - while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + while (nest_level > 0 && ! isType (token, TOKEN_EOF)) { readToken (token); if (isType (token, TOKEN_OPEN_SQUARE)) - { nest_level++; - } - if (isType (token, TOKEN_CLOSE_SQUARE)) - { - if (nest_level > 0) - { - nest_level--; - } - } + else if (isType (token, TOKEN_CLOSE_SQUARE)) + nest_level--; } readTokenFull (token, include_newlines, NULL); } @@ -762,8 +758,9 @@ static boolean findCmdTerm (tokenInfo *const token, boolean include_newlines) * Read until we find either a semicolon or closing brace. * Any nested braces will be handled within. */ - while (! ( isType (token, TOKEN_SEMICOLON) || - isType (token, TOKEN_CLOSE_CURLY) ) ) + while (! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_EOF)) { /* Handle nested blocks */ if ( isType (token, TOKEN_OPEN_CURLY)) @@ -1132,7 +1129,8 @@ static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent) * If we find a statement without a terminator consider the * block finished, otherwise the stack will be off by one. */ - } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); + } while (! isType (token, TOKEN_EOF) && + ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token); }
deleteToken (parent); @@ -1209,7 +1207,8 @@ static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
/* skip whatever is the value */ while (! isType (token, TOKEN_COMMA) && - ! isType (token, TOKEN_CLOSE_CURLY)) + ! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_OPEN_CURLY)) { @@ -1329,7 +1328,8 @@ static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent,
while (! isType (token, TOKEN_CLOSE_CURLY) && ! isType (token, TOKEN_SEMICOLON) && - ! isType (token, TOKEN_EQUAL_SIGN) ) + ! isType (token, TOKEN_EQUAL_SIGN) && + ! isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_OPEN_CURLY)) parseBlock (token, parent); @@ -1403,9 +1403,10 @@ static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent, readToken (method_body_token); vStringCopy (method_body_token->scope, token->scope);
- while (! ( isType (method_body_token, TOKEN_SEMICOLON) || - isType (method_body_token, TOKEN_CLOSE_CURLY) || - isType (method_body_token, TOKEN_OPEN_CURLY)) ) + while (! isType (method_body_token, TOKEN_SEMICOLON) && + ! isType (method_body_token, TOKEN_CLOSE_CURLY) && + ! isType (method_body_token, TOKEN_OPEN_CURLY) && + ! isType (method_body_token, TOKEN_EOF)) { if ( isType (method_body_token, TOKEN_OPEN_PAREN) ) skipArgumentList(method_body_token, FALSE, @@ -1728,7 +1729,7 @@ static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent,
if (parenDepth > 0) { - while (parenDepth > 0) + while (parenDepth > 0 && ! isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_OPEN_PAREN)) parenDepth++; @@ -1795,7 +1796,8 @@ static void parseUI5 (tokenInfo *const token) if (isType (token, TOKEN_PERIOD)) { readToken (token); - while (! isType (token, TOKEN_OPEN_PAREN) ) + while (! isType (token, TOKEN_OPEN_PAREN) && + ! isType (token, TOKEN_EOF)) { readToken (token); } @@ -1813,7 +1815,8 @@ static void parseUI5 (tokenInfo *const token) do { parseMethods (token, name); - } while (! isType (token, TOKEN_CLOSE_CURLY) ); + } while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_EOF)); }
deleteToken (name); @@ -1884,7 +1887,7 @@ static void parseJsFile (tokenInfo *const token) parseUI5 (token); else parseLine (token, token, FALSE); - } while (TRUE); + } while (! isType (token, TOKEN_EOF)); }
static void initialize (const langType language) @@ -1897,15 +1900,12 @@ static void initialize (const langType language) static void findJsTags (void) { tokenInfo *const token = newToken (); - exception_t exception;
ClassNames = stringListNew (); FunctionNames = stringListNew (); LastTokenType = TOKEN_UNDEFINED;
- exception = (exception_t) (setjmp (Exception)); - while (exception == ExceptionNone) - parseJsFile (token); + parseJsFile (token);
stringListDelete (ClassNames); stringListDelete (FunctionNames);
Modified: tests/ctags/Makefile.am 2 lines changed, 2 insertions(+), 0 deletions(-) =================================================================== @@ -159,12 +159,14 @@ test_sources = \ intro.tex \ invalid_name.f90 \ java_enum.java \ + js-broken-strings.js \ js-class-related-unterminated.js \ js-const.js \ js-implicit-semicolons.js \ js-let.js \ js-scope.js \ js-signature.js \ + js-string-continuation.js \ js-sub-block-scope.js \ js-unknown-construct-nesting.js \ jsFunc_tutorial.js \
Modified: tests/ctags/js-broken-strings.js 10 lines changed, 10 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,10 @@ +// this file willfully uses CR-LF line endings to check their handling + +var s1 = "I'm invalid because not terminated + +var s2 = "I'm valid, I have a line continuation:\ +; function bug1(){}"; + +var s3 = "I'm invalid because I'm not terminated either \ +var bug2 = 'this is inside the s3 string' +var s4 = 'this is a separate, valid string'
Modified: tests/ctags/js-broken-strings.js.tags 5 lines changed, 5 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,5 @@ +# format=tagmanager +s1�16384�0 +s2�16384�0 +s3�16384�0 +s4�16384�0
Modified: tests/ctags/js-string-continuation.js 19 lines changed, 19 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,19 @@ + +var o = { + "first": function(){}, + "sec\ +ond": function(){}, + "\ +t\ +h\ +i\ +r\ +d\ +": function(){}, + "fourth": function(){}, +}; + +o.first(); +o.second(); +o.third(); +o.fourth();
Modified: tests/ctags/js-string-continuation.js.tags 6 lines changed, 6 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,6 @@ +# format=tagmanager +first�128�()�o�0 +fourth�128�()�o�0 +o�16384�0 +second�128�()�o�0 +third�128�()�o�0
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).