Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Tue, 11 Nov 2014 01:01:41 UTC Commit: f765463af0aa8dad6d39399da2c138e8b5041bc4 https://github.com/geany/geany/commit/f765463af0aa8dad6d39399da2c138e8b5041b...
Log Message: ----------- Import new CSS parser from fishman-ctags
Some highlights: * Fixes handling of comments * Adds support for attribute and namespace selectors * Adds support for @supports blocks * Fixes tag type for many selectors * Adds support for pseudo-classes with arguments
Modified Paths: -------------- tagmanager/ctags/css.c tests/ctags/css-at-rules.css.tags tests/ctags/css-at-supports.css.tags tests/ctags/css-attribute-selectors.css.tags tests/ctags/css-namespace-selectors.css.tags tests/ctags/css-pseudo-classes.css.tags tests/ctags/css-simple.css.tags tests/ctags/css-singlequote-in-comment-issue2.css.tags tests/ctags/css-tag-types.css.tags
Modified: tagmanager/ctags/css.c 388 lines changed, 212 insertions(+), 176 deletions(-) =================================================================== @@ -1,224 +1,259 @@ /*************************************************************************** * css.c - * Character-based parser for Css definitions - * Author - Iago Rubio <iagorubio(at)users.sourceforge.net> - * - Bronisław Białek <after89(at)gmail.com> + * Token-based parser for CSS definitions + * Author - Colomban Wendling colomban@geany.org **************************************************************************/ #include "general.h"
-#include <string.h> -#include <ctype.h> +#include <string.h> +#include <ctype.h>
-#include "parse.h" -#include "read.h" +#include "entry.h" +#include "parse.h" +#include "read.h"
typedef enum eCssKinds { - K_NONE = -1, K_SELECTOR, K_ID, K_CLASS + K_CLASS, K_SELECTOR, K_ID } cssKind;
static kindOption CssKinds [] = { - { TRUE, 's', "struct", "selectors" }, - { TRUE, 'v', "variable", "identities" }, - { TRUE, 'c', "class", "classes" } + { TRUE, 'c', "class", "classes" }, + { TRUE, 's', "struct", "selectors" }, + { TRUE, 'v', "variable", "identities" } };
-typedef enum _CssParserState { /* state of parsing */ - P_STATE_NONE, /* default state */ - P_STATE_IN_COMMENT, /* into a comment, only multi line in CSS */ - P_STATE_IN_SINGLE_STRING, /* into a single quoted string */ - P_STATE_IN_DOUBLE_STRING, /* into a double quoted string */ - P_STATE_IN_DEFINITION, /* on the body of the style definition, nothing for us */ - P_STATE_IN_MEDIA, /* on a @media declaration, can be multi-line */ - P_STATE_IN_IMPORT, /* on a @import declaration, can be multi-line */ - P_STATE_IN_NAMESPACE, /* on a @namespace declaration */ - P_STATE_IN_PAGE, /* on a @page declaration */ - P_STATE_IN_FONTFACE, /* on a @font-face declaration */ - P_STATE_AT_END /* end of parsing */ -} CssParserState; - -static void makeCssSimpleTag( vString *name, cssKind kind, boolean delete ) +typedef enum { + /* any ASCII */ + TOKEN_EOF = 257, + TOKEN_SELECTOR, + TOKEN_STRING +} tokenType; + +typedef struct { + tokenType type; + vString *string; +} tokenInfo; + + +static boolean isSelectorChar (const int c) { - vStringTerminate (name); - makeSimpleTag (name, CssKinds, kind); - vStringClear (name); - if( delete ) - vStringDelete (name); + /* attribute selectors are handled separately */ + return (isalnum (c) || + c == '_' || // allowed char + c == '-' || // allowed char + c == '+' || // allow all sibling in a single tag + c == '>' || // allow all child in a single tag + c == '|' || // allow namespace separator + c == '(' || // allow pseudo-class arguments + c == ')' || + c == '.' || // allow classes and selectors + c == ':' || // allow pseudo classes + c == '*' || // allow globs as P + * + c == '#'); // allow ids }
-static boolean isCssDeclarationAllowedChar( const unsigned char *cp ) +static void parseSelector (vString *const string, const int firstChar) { - return isalnum ((int) *cp) || - isspace ((int) *cp) || - *cp == '_' || /* allowed char */ - *cp == '-' || /* allowed char */ - *cp == '+' || /* allow all sibling in a single tag */ - *cp == '>' || /* allow all child in a single tag */ - *cp == '{' || /* allow the start of the declaration */ - *cp == '.' || /* allow classes and selectors */ - *cp == ',' || /* allow multiple declarations */ - *cp == ':' || /* allow pseudo classes */ - *cp == '*' || /* allow globs as P + * */ - *cp == '#'; /* allow ids */ + int c = firstChar; + do + { + vStringPut (string, (char) c); + c = fileGetc (); + } while (isSelectorChar (c)); + fileUngetc (c); + vStringTerminate (string); }
-static CssParserState parseCssDeclaration( const unsigned char **position, cssKind kind, const char *aname) +static void readToken (tokenInfo *const token) { - const unsigned char *cp = *position; - vString *name = vStringNew (); - vStringCopyS(name, aname); - - /* pick to the end of line including children and sibling - * if declaration is multiline go for the next line */ - while ( isCssDeclarationAllowedChar(cp) || - *cp == '\0' ) /* track the end of line into the loop */ + int c; + + vStringClear (token->string); + +getNextChar: + + c = fileGetc (); + while (isspace (c)) + c = fileGetc (); + + token->type = c; + switch (c) { - if( *cp == ',' ) + case EOF: token->type = TOKEN_EOF; break; + + case ''': + case '"': { - makeCssSimpleTag(name, kind, TRUE); - *position = cp; - return P_STATE_NONE; + const int delimiter = c; + do + { + vStringPut (token->string, c); + c = fileGetc (); + if (c == '\') + c = fileGetc (); + } + while (c != EOF && c != delimiter); + if (c != EOF) + vStringPut (token->string, c); + token->type = TOKEN_STRING; + break; } - else if( *cp == '{' || *cp == '\0' ) - { /* assume that line end is the same as a starting definition (i.e. the { is on the next line */ - makeCssSimpleTag(name, kind, TRUE); - *position = cp; - return P_STATE_IN_DEFINITION; + + case '/': /* maybe comment start */ + { + int d = fileGetc (); + if (d != '*') + { + fileUngetc (d); + vStringPut (token->string, c); + token->type = c; + } + else + { + d = fileGetc (); + do + { + c = d; + d = fileGetc (); + } + while (d != EOF && ! (c == '*' && d == '/')); + goto getNextChar; + } + break; }
- vStringPut (name, (int) *cp); - ++cp; + default: + if (! isSelectorChar (c)) + { + vStringPut (token->string, c); + token->type = c; + } + else + { + parseSelector (token->string, c); + token->type = TOKEN_SELECTOR; + } + break; } +}
- makeCssSimpleTag(name, kind, TRUE); - *position = cp; +/* sets selector kind in @p kind if found, otherwise don't touches @p kind */ +static cssKind classifySelector (const vString *const selector) +{ + size_t i;
- return P_STATE_NONE; + for (i = vStringLength (selector); i > 0; --i) + { + char c = vStringItem (selector, i - 1); + if (c == '.') + return K_CLASS; + else if (c == '#') + return K_ID; + } + return K_SELECTOR; }
-static CssParserState parseCssLine( const unsigned char *line, CssParserState state ) +static void findCssTags (void) { - vString *aux; - vString *stack = vStringNew (); + boolean readNextToken = TRUE; + tokenInfo token;
- while( *line != '\0' ) /* fileReadLine returns NULL terminated strings */ + token.string = vStringNew (); + + do { - vStringClear (stack); - while (state == P_STATE_NONE && - (isspace ((int) *line) || isalnum ((int) *line) || ( *line == '*' && *(line-1) != '/' ))) - { - if ((stack->length > 0 && isspace((int) *line)) || isalnum ((int) *line) || *line == '*') { - vStringPut(stack, (int) *line); - } + if (readNextToken) + readToken (&token);
- ++line; + readNextToken = TRUE; + + if (token.type == '@') + { /* At-rules, from the "@" to the next block or semicolon */ + boolean useContents; + readToken (&token); + useContents = (strcmp (vStringValue (token.string), "media") == 0 || + strcmp (vStringValue (token.string), "supports") == 0); + while (token.type != TOKEN_EOF && + token.type != ';' && token.type != '{') + { + readToken (&token); + } + /* HACK: we *eat* the opening '{' for medias and the like so that + * the content is parsed as if it was at the root */ + readNextToken = useContents && token.type == '{'; } - vStringTerminate (stack); + else if (token.type == TOKEN_SELECTOR) + { /* collect selectors and make a tag */ + cssKind kind = K_SELECTOR; + MIOPos filePosition; + unsigned long lineNumber; + vString *selector = vStringNew (); + do + { + if (vStringLength (selector) > 0) + vStringPut (selector, ' '); + vStringCat (selector, token.string);
- switch( state ) - { - case P_STATE_NONE: - if( *line == '.' ) /* a class */ - state = parseCssDeclaration( &line, K_CLASS, vStringValue(stack) ); - else if( *line == '#' ) /* an id */ - state = parseCssDeclaration( &line, K_ID, vStringValue(stack) ); - else if( *line == '@' ) /* at-rules, we'll ignore them */ + kind = classifySelector (token.string); + lineNumber = getSourceLineNumber (); + filePosition = getInputFilePosition (); + + readToken (&token); + + /* handle attribute selectors */ + if (token.type == '[') { - ++line; - aux = vStringNew(); - while( !isspace((int) *line) ) + int depth = 1; + while (depth > 0 && token.type != TOKEN_EOF) { - vStringPut (aux, (int) *line); - ++line; + vStringCat (selector, token.string); + readToken (&token); + if (token.type == '[') + depth++; + else if (token.type == ']') + depth--; } - vStringTerminate (aux); - if( strcmp( aux->buffer, "media" ) == 0 ) - state = P_STATE_IN_MEDIA; - else if ( strcmp( aux->buffer, "import" ) == 0 ) - state = P_STATE_IN_IMPORT; - else if ( strcmp( aux->buffer, "namespace" ) == 0 ) - state = P_STATE_IN_NAMESPACE; - else if ( strcmp( aux->buffer, "page" ) == 0 ) - state = P_STATE_IN_PAGE; - else if ( strcmp( aux->buffer, "font-face" ) == 0 ) - state = P_STATE_IN_FONTFACE; - vStringDelete (aux); + if (token.type != TOKEN_EOF) + vStringCat (selector, token.string); + readToken (&token); } - else if( *line == '*' && *(line-1) == '/' ) /* multi-line comment */ - state = P_STATE_IN_COMMENT; - else if ( stack->length > 0 ) - state = parseCssDeclaration( &line, K_SELECTOR, vStringValue(stack) ); + } + while (token.type == TOKEN_SELECTOR); + /* we already consumed the next token, don't read it twice */ + readNextToken = FALSE;
- break; - case P_STATE_IN_COMMENT: - if( *line == '/' && *(line-1) == '*') - state = P_STATE_NONE; - break; - case P_STATE_IN_SINGLE_STRING: - if( *line == ''' && *(line-1) != '\' ) - state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */ - break; - case P_STATE_IN_DOUBLE_STRING: - if( *line=='"' && *(line-1) != '\' ) - state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */ - break; - case P_STATE_IN_MEDIA: - /* skip to start of media body or line end */ - while( *line != '{' ) - { - if( *line == '\0' ) - break; - ++line; - } - if( *line == '{' ) - state = P_STATE_NONE; - break; - case P_STATE_IN_IMPORT: - case P_STATE_IN_NAMESPACE: - /* skip to end of declaration or line end */ - while( *line != ';' ) - { - if( *line == '\0' ) - break; - ++line; - } - if( *line == ';' ) - state = P_STATE_NONE; - break; - case P_STATE_IN_PAGE: - case P_STATE_IN_FONTFACE: - case P_STATE_IN_DEFINITION: - if( *line == '\0' ) - line = fileReadLine (); - if( *line == '}' ) - state = P_STATE_NONE; - else if( *line == ''' ) - state = P_STATE_IN_SINGLE_STRING; - else if( *line == '"' ) - state = P_STATE_IN_DOUBLE_STRING; - break; - case P_STATE_AT_END: - return state; - break; + vStringTerminate (selector); + if (CssKinds[kind].enabled) + { + tagEntryInfo e; + initTagEntry (&e, vStringValue (selector)); + + e.lineNumber = lineNumber; + e.filePosition = filePosition; + e.kindName = CssKinds[kind].name; + e.kind = (char) CssKinds[kind].letter; + + makeTagEntry (&e); + } + vStringDelete (selector); + } + else if (token.type == '{') + { /* skip over { ... } */ + int depth = 1; + while (depth > 0 && token.type != TOKEN_EOF) + { + readToken (&token); + if (token.type == '{') + depth++; + else if (token.type == '}') + depth--; + } } - if (line == NULL) return P_STATE_AT_END; - line++; } - vStringDelete (stack); + while (token.type != TOKEN_EOF);
- return state; -} - -static void findCssTags (void) -{ - const unsigned char *line; - CssParserState state = P_STATE_NONE; - - while ( (line = fileReadLine ()) != NULL ) - { - state = parseCssLine( line, state ); - if( state==P_STATE_AT_END ) return; - } + vStringDelete (token.string); }
/* parser definition */ @@ -232,3 +267,4 @@ extern parserDefinition* CssParser (void) def->parser = findCssTags; return def; } +
Modified: tests/ctags/css-at-rules.css.tags 4 lines changed, 2 insertions(+), 2 deletions(-) =================================================================== @@ -1,3 +1,3 @@ # format=tagmanager -body �2048�0 -html �2048�0 +body�2048�0 +html�2048�0
Modified: tests/ctags/css-at-supports.css.tags 5 lines changed, 1 insertions(+), 4 deletions(-) =================================================================== @@ -1,5 +1,2 @@ # format=tagmanager -html �2048�0 -or �2048�0 -test-property2: 42�2048�0 -test-property: 42�2048�0 +html�2048�0
Modified: tests/ctags/css-attribute-selectors.css.tags 16 lines changed, 5 insertions(+), 11 deletions(-) =================================================================== @@ -1,12 +1,6 @@ # format=tagmanager -a�2048�0 -background-color: light-green�2048�0 -body �2048�0 -color: green�2048�0 -color: red�2048�0 -en�2048�0 -href�2048�0 -http:�2048�0 -https:�2048�0 -lang�2048�0 -p �2048�0 +a[href^="http://%22%5D%EF%BF%BD2048%EF%BF%BD0 +a[href^="https://%22%5D%EF%BF%BD2048%EF%BF%BD0 +a[lang~=en]�2048�0 +body�2048�0 +p�2048�0
Modified: tests/ctags/css-namespace-selectors.css.tags 7 lines changed, 3 insertions(+), 4 deletions(-) =================================================================== @@ -1,5 +1,4 @@ # format=tagmanager -*�2048�0 -a �2048�0 -div �2048�0 -svg�2048�0 +*|div�2048�0 +svg|a�2048�0 +|a�2048�0
Modified: tests/ctags/css-pseudo-classes.css.tags 33 lines changed, 11 insertions(+), 22 deletions(-) =================================================================== @@ -1,23 +1,12 @@ # format=tagmanager -0n+1�2048�0 -1�2048�0 -10n+0�2048�0 -2n�2048�0 -2n+1�2048�0 -a:lang�2048�0 -after �2048�0 -background-color: gray �2048�0 -background-color: light-gray �2048�0 -body :not�2048�0 -color: green�2048�0 -color: lime�2048�0 -color: red�2048�0 -en�2048�0 -even�2048�0 -first-child�2048�0 -n+3�2048�0 -odd�2048�0 -span�2048�0 -span:not�2048�0 -span:nth-child�2048�0 -tr:nth-child�2048�0 +a:lang(en):after�2048�0 +body :not(span)�2048�0 +span:not(:first-child)�2048�0 +span:nth-child(-n+3)�2048�0 +span:nth-child(0n+1)�2048�0 +span:nth-child(1)�2048�0 +tr:nth-child(10n+0)�2048�0 +tr:nth-child(2n)�2048�0 +tr:nth-child(2n+1)�2048�0 +tr:nth-child(even)�2048�0 +tr:nth-child(odd)�2048�0
Modified: tests/ctags/css-simple.css.tags 18 lines changed, 9 insertions(+), 9 deletions(-) =================================================================== @@ -1,11 +1,11 @@ # format=tagmanager -#footer �16384�0 -* �2048�0 -.foo a�1�0 -.foo b �1�0 -.header �1�0 -.red �1�0 -div.magic �1�0 +#footer�16384�0 +*�2048�0 +.foo a�2048�0 +.foo b�2048�0 +.header�1�0 +.red�1�0 +div.magic�1�0 html�2048�0 -ul > li > a �2048�0 -ul li �2048�0 +ul > li > a�2048�0 +ul li�2048�0
Modified: tests/ctags/css-singlequote-in-comment-issue2.css.tags 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -1,2 +1,3 @@ # format=tagmanager a�2048�0 +b�2048�0
Modified: tests/ctags/css-tag-types.css.tags 40 lines changed, 20 insertions(+), 20 deletions(-) =================================================================== @@ -1,36 +1,36 @@ # format=tagmanager #a #foo�16384�0 #a #foo #bar�16384�0 -#a #foo .bar�16384�0 -#a #foo bar�16384�0 -#a .foo�16384�0 +#a #foo .bar�1�0 +#a #foo bar�2048�0 +#a .foo�1�0 #a .foo #bar�16384�0 -#a .foo .bar�16384�0 -#a .foo bar�16384�0 -#a foo�16384�0 +#a .foo .bar�1�0 +#a .foo bar�2048�0 +#a foo�2048�0 #a foo #bar�16384�0 -#a foo .bar�16384�0 -#a foo bar�16384�0 -.a #foo�1�0 -.a #foo #bar�1�0 +#a foo .bar�1�0 +#a foo bar�2048�0 +.a #foo�16384�0 +.a #foo #bar�16384�0 .a #foo .bar�1�0 -.a #foo bar�1�0 +.a #foo bar�2048�0 .a .foo�1�0 -.a .foo #bar�1�0 +.a .foo #bar�16384�0 .a .foo .bar�1�0 -.a .foo bar�1�0 -.a foo�1�0 -.a foo #bar�1�0 +.a .foo bar�2048�0 +.a foo�2048�0 +.a foo #bar�16384�0 .a foo .bar�1�0 -.a foo bar�1�0 +.a foo bar�2048�0 a #foo�16384�0 a #foo #bar�16384�0 -a #foo .bar�16384�0 -a #foo bar�16384�0 +a #foo .bar�1�0 +a #foo bar�2048�0 a .foo�1�0 -a .foo #bar�1�0 +a .foo #bar�16384�0 a .foo .bar�1�0 -a .foo bar�1�0 +a .foo bar�2048�0 a foo�2048�0 a foo #bar�16384�0 a foo .bar�1�0
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).