[geany/geany] f76546: Import new CSS parser from fishman-ctags
Colomban Wendling
git-noreply at xxxxx
Tue Nov 11 01:01:41 UTC 2014
Branch: refs/heads/master
Author: Colomban Wendling <ban at herbesfolles.org>
Committer: Colomban Wendling <ban at herbesfolles.org>
Date: Tue, 11 Nov 2014 01:01:41 UTC
Commit: f765463af0aa8dad6d39399da2c138e8b5041bc4
https://github.com/geany/geany/commit/f765463af0aa8dad6d39399da2c138e8b5041bc4
Log Message:
-----------
Import new CSS parser from fishman-ctags
Some highlights:
* Fixes handling of comments
* Adds support for attribute and namespace selectors
* Adds support for @supports blocks
* Fixes tag type for many selectors
* Adds support for pseudo-classes with arguments
Modified Paths:
--------------
tagmanager/ctags/css.c
tests/ctags/css-at-rules.css.tags
tests/ctags/css-at-supports.css.tags
tests/ctags/css-attribute-selectors.css.tags
tests/ctags/css-namespace-selectors.css.tags
tests/ctags/css-pseudo-classes.css.tags
tests/ctags/css-simple.css.tags
tests/ctags/css-singlequote-in-comment-issue2.css.tags
tests/ctags/css-tag-types.css.tags
Modified: tagmanager/ctags/css.c
388 lines changed, 212 insertions(+), 176 deletions(-)
===================================================================
@@ -1,224 +1,259 @@
/***************************************************************************
* css.c
- * Character-based parser for Css definitions
- * Author - Iago Rubio <iagorubio(at)users.sourceforge.net>
- * - Bronisław Białek <after89(at)gmail.com>
+ * Token-based parser for CSS definitions
+ * Author - Colomban Wendling <colomban at geany.org>
**************************************************************************/
#include "general.h"
-#include <string.h>
-#include <ctype.h>
+#include <string.h>
+#include <ctype.h>
-#include "parse.h"
-#include "read.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
typedef enum eCssKinds {
- K_NONE = -1, K_SELECTOR, K_ID, K_CLASS
+ K_CLASS, K_SELECTOR, K_ID
} cssKind;
static kindOption CssKinds [] = {
- { TRUE, 's', "struct", "selectors" },
- { TRUE, 'v', "variable", "identities" },
- { TRUE, 'c', "class", "classes" }
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 's', "struct", "selectors" },
+ { TRUE, 'v', "variable", "identities" }
};
-typedef enum _CssParserState { /* state of parsing */
- P_STATE_NONE, /* default state */
- P_STATE_IN_COMMENT, /* into a comment, only multi line in CSS */
- P_STATE_IN_SINGLE_STRING, /* into a single quoted string */
- P_STATE_IN_DOUBLE_STRING, /* into a double quoted string */
- P_STATE_IN_DEFINITION, /* on the body of the style definition, nothing for us */
- P_STATE_IN_MEDIA, /* on a @media declaration, can be multi-line */
- P_STATE_IN_IMPORT, /* on a @import declaration, can be multi-line */
- P_STATE_IN_NAMESPACE, /* on a @namespace declaration */
- P_STATE_IN_PAGE, /* on a @page declaration */
- P_STATE_IN_FONTFACE, /* on a @font-face declaration */
- P_STATE_AT_END /* end of parsing */
-} CssParserState;
-
-static void makeCssSimpleTag( vString *name, cssKind kind, boolean delete )
+typedef enum {
+ /* any ASCII */
+ TOKEN_EOF = 257,
+ TOKEN_SELECTOR,
+ TOKEN_STRING
+} tokenType;
+
+typedef struct {
+ tokenType type;
+ vString *string;
+} tokenInfo;
+
+
+static boolean isSelectorChar (const int c)
{
- vStringTerminate (name);
- makeSimpleTag (name, CssKinds, kind);
- vStringClear (name);
- if( delete )
- vStringDelete (name);
+ /* attribute selectors are handled separately */
+ return (isalnum (c) ||
+ c == '_' || // allowed char
+ c == '-' || // allowed char
+ c == '+' || // allow all sibling in a single tag
+ c == '>' || // allow all child in a single tag
+ c == '|' || // allow namespace separator
+ c == '(' || // allow pseudo-class arguments
+ c == ')' ||
+ c == '.' || // allow classes and selectors
+ c == ':' || // allow pseudo classes
+ c == '*' || // allow globs as P + *
+ c == '#'); // allow ids
}
-static boolean isCssDeclarationAllowedChar( const unsigned char *cp )
+static void parseSelector (vString *const string, const int firstChar)
{
- return isalnum ((int) *cp) ||
- isspace ((int) *cp) ||
- *cp == '_' || /* allowed char */
- *cp == '-' || /* allowed char */
- *cp == '+' || /* allow all sibling in a single tag */
- *cp == '>' || /* allow all child in a single tag */
- *cp == '{' || /* allow the start of the declaration */
- *cp == '.' || /* allow classes and selectors */
- *cp == ',' || /* allow multiple declarations */
- *cp == ':' || /* allow pseudo classes */
- *cp == '*' || /* allow globs as P + * */
- *cp == '#'; /* allow ids */
+ int c = firstChar;
+ do
+ {
+ vStringPut (string, (char) c);
+ c = fileGetc ();
+ } while (isSelectorChar (c));
+ fileUngetc (c);
+ vStringTerminate (string);
}
-static CssParserState parseCssDeclaration( const unsigned char **position, cssKind kind, const char *aname)
+static void readToken (tokenInfo *const token)
{
- const unsigned char *cp = *position;
- vString *name = vStringNew ();
- vStringCopyS(name, aname);
-
- /* pick to the end of line including children and sibling
- * if declaration is multiline go for the next line */
- while ( isCssDeclarationAllowedChar(cp) ||
- *cp == '\0' ) /* track the end of line into the loop */
+ int c;
+
+ vStringClear (token->string);
+
+getNextChar:
+
+ c = fileGetc ();
+ while (isspace (c))
+ c = fileGetc ();
+
+ token->type = c;
+ switch (c)
{
- if( *cp == ',' )
+ case EOF: token->type = TOKEN_EOF; break;
+
+ case '\'':
+ case '"':
{
- makeCssSimpleTag(name, kind, TRUE);
- *position = cp;
- return P_STATE_NONE;
+ const int delimiter = c;
+ do
+ {
+ vStringPut (token->string, c);
+ c = fileGetc ();
+ if (c == '\\')
+ c = fileGetc ();
+ }
+ while (c != EOF && c != delimiter);
+ if (c != EOF)
+ vStringPut (token->string, c);
+ token->type = TOKEN_STRING;
+ break;
}
- else if( *cp == '{' || *cp == '\0' )
- { /* assume that line end is the same as a starting definition (i.e. the { is on the next line */
- makeCssSimpleTag(name, kind, TRUE);
- *position = cp;
- return P_STATE_IN_DEFINITION;
+
+ case '/': /* maybe comment start */
+ {
+ int d = fileGetc ();
+ if (d != '*')
+ {
+ fileUngetc (d);
+ vStringPut (token->string, c);
+ token->type = c;
+ }
+ else
+ {
+ d = fileGetc ();
+ do
+ {
+ c = d;
+ d = fileGetc ();
+ }
+ while (d != EOF && ! (c == '*' && d == '/'));
+ goto getNextChar;
+ }
+ break;
}
- vStringPut (name, (int) *cp);
- ++cp;
+ default:
+ if (! isSelectorChar (c))
+ {
+ vStringPut (token->string, c);
+ token->type = c;
+ }
+ else
+ {
+ parseSelector (token->string, c);
+ token->type = TOKEN_SELECTOR;
+ }
+ break;
}
+}
- makeCssSimpleTag(name, kind, TRUE);
- *position = cp;
+/* sets selector kind in @p kind if found, otherwise don't touches @p kind */
+static cssKind classifySelector (const vString *const selector)
+{
+ size_t i;
- return P_STATE_NONE;
+ for (i = vStringLength (selector); i > 0; --i)
+ {
+ char c = vStringItem (selector, i - 1);
+ if (c == '.')
+ return K_CLASS;
+ else if (c == '#')
+ return K_ID;
+ }
+ return K_SELECTOR;
}
-static CssParserState parseCssLine( const unsigned char *line, CssParserState state )
+static void findCssTags (void)
{
- vString *aux;
- vString *stack = vStringNew ();
+ boolean readNextToken = TRUE;
+ tokenInfo token;
- while( *line != '\0' ) /* fileReadLine returns NULL terminated strings */
+ token.string = vStringNew ();
+
+ do
{
- vStringClear (stack);
- while (state == P_STATE_NONE &&
- (isspace ((int) *line) || isalnum ((int) *line) || ( *line == '*' && *(line-1) != '/' )))
- {
- if ((stack->length > 0 && isspace((int) *line)) || isalnum ((int) *line) || *line == '*') {
- vStringPut(stack, (int) *line);
- }
+ if (readNextToken)
+ readToken (&token);
- ++line;
+ readNextToken = TRUE;
+
+ if (token.type == '@')
+ { /* At-rules, from the "@" to the next block or semicolon */
+ boolean useContents;
+ readToken (&token);
+ useContents = (strcmp (vStringValue (token.string), "media") == 0 ||
+ strcmp (vStringValue (token.string), "supports") == 0);
+ while (token.type != TOKEN_EOF &&
+ token.type != ';' && token.type != '{')
+ {
+ readToken (&token);
+ }
+ /* HACK: we *eat* the opening '{' for medias and the like so that
+ * the content is parsed as if it was at the root */
+ readNextToken = useContents && token.type == '{';
}
- vStringTerminate (stack);
+ else if (token.type == TOKEN_SELECTOR)
+ { /* collect selectors and make a tag */
+ cssKind kind = K_SELECTOR;
+ MIOPos filePosition;
+ unsigned long lineNumber;
+ vString *selector = vStringNew ();
+ do
+ {
+ if (vStringLength (selector) > 0)
+ vStringPut (selector, ' ');
+ vStringCat (selector, token.string);
- switch( state )
- {
- case P_STATE_NONE:
- if( *line == '.' ) /* a class */
- state = parseCssDeclaration( &line, K_CLASS, vStringValue(stack) );
- else if( *line == '#' ) /* an id */
- state = parseCssDeclaration( &line, K_ID, vStringValue(stack) );
- else if( *line == '@' ) /* at-rules, we'll ignore them */
+ kind = classifySelector (token.string);
+ lineNumber = getSourceLineNumber ();
+ filePosition = getInputFilePosition ();
+
+ readToken (&token);
+
+ /* handle attribute selectors */
+ if (token.type == '[')
{
- ++line;
- aux = vStringNew();
- while( !isspace((int) *line) )
+ int depth = 1;
+ while (depth > 0 && token.type != TOKEN_EOF)
{
- vStringPut (aux, (int) *line);
- ++line;
+ vStringCat (selector, token.string);
+ readToken (&token);
+ if (token.type == '[')
+ depth++;
+ else if (token.type == ']')
+ depth--;
}
- vStringTerminate (aux);
- if( strcmp( aux->buffer, "media" ) == 0 )
- state = P_STATE_IN_MEDIA;
- else if ( strcmp( aux->buffer, "import" ) == 0 )
- state = P_STATE_IN_IMPORT;
- else if ( strcmp( aux->buffer, "namespace" ) == 0 )
- state = P_STATE_IN_NAMESPACE;
- else if ( strcmp( aux->buffer, "page" ) == 0 )
- state = P_STATE_IN_PAGE;
- else if ( strcmp( aux->buffer, "font-face" ) == 0 )
- state = P_STATE_IN_FONTFACE;
- vStringDelete (aux);
+ if (token.type != TOKEN_EOF)
+ vStringCat (selector, token.string);
+ readToken (&token);
}
- else if( *line == '*' && *(line-1) == '/' ) /* multi-line comment */
- state = P_STATE_IN_COMMENT;
- else if ( stack->length > 0 )
- state = parseCssDeclaration( &line, K_SELECTOR, vStringValue(stack) );
+ }
+ while (token.type == TOKEN_SELECTOR);
+ /* we already consumed the next token, don't read it twice */
+ readNextToken = FALSE;
- break;
- case P_STATE_IN_COMMENT:
- if( *line == '/' && *(line-1) == '*')
- state = P_STATE_NONE;
- break;
- case P_STATE_IN_SINGLE_STRING:
- if( *line == '\'' && *(line-1) != '\\' )
- state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
- break;
- case P_STATE_IN_DOUBLE_STRING:
- if( *line=='"' && *(line-1) != '\\' )
- state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
- break;
- case P_STATE_IN_MEDIA:
- /* skip to start of media body or line end */
- while( *line != '{' )
- {
- if( *line == '\0' )
- break;
- ++line;
- }
- if( *line == '{' )
- state = P_STATE_NONE;
- break;
- case P_STATE_IN_IMPORT:
- case P_STATE_IN_NAMESPACE:
- /* skip to end of declaration or line end */
- while( *line != ';' )
- {
- if( *line == '\0' )
- break;
- ++line;
- }
- if( *line == ';' )
- state = P_STATE_NONE;
- break;
- case P_STATE_IN_PAGE:
- case P_STATE_IN_FONTFACE:
- case P_STATE_IN_DEFINITION:
- if( *line == '\0' )
- line = fileReadLine ();
- if( *line == '}' )
- state = P_STATE_NONE;
- else if( *line == '\'' )
- state = P_STATE_IN_SINGLE_STRING;
- else if( *line == '"' )
- state = P_STATE_IN_DOUBLE_STRING;
- break;
- case P_STATE_AT_END:
- return state;
- break;
+ vStringTerminate (selector);
+ if (CssKinds[kind].enabled)
+ {
+ tagEntryInfo e;
+ initTagEntry (&e, vStringValue (selector));
+
+ e.lineNumber = lineNumber;
+ e.filePosition = filePosition;
+ e.kindName = CssKinds[kind].name;
+ e.kind = (char) CssKinds[kind].letter;
+
+ makeTagEntry (&e);
+ }
+ vStringDelete (selector);
+ }
+ else if (token.type == '{')
+ { /* skip over { ... } */
+ int depth = 1;
+ while (depth > 0 && token.type != TOKEN_EOF)
+ {
+ readToken (&token);
+ if (token.type == '{')
+ depth++;
+ else if (token.type == '}')
+ depth--;
+ }
}
- if (line == NULL) return P_STATE_AT_END;
- line++;
}
- vStringDelete (stack);
+ while (token.type != TOKEN_EOF);
- return state;
-}
-
-static void findCssTags (void)
-{
- const unsigned char *line;
- CssParserState state = P_STATE_NONE;
-
- while ( (line = fileReadLine ()) != NULL )
- {
- state = parseCssLine( line, state );
- if( state==P_STATE_AT_END ) return;
- }
+ vStringDelete (token.string);
}
/* parser definition */
@@ -232,3 +267,4 @@ extern parserDefinition* CssParser (void)
def->parser = findCssTags;
return def;
}
+
Modified: tests/ctags/css-at-rules.css.tags
4 lines changed, 2 insertions(+), 2 deletions(-)
===================================================================
@@ -1,3 +1,3 @@
# format=tagmanager
-body �2048�0
-html �2048�0
+body�2048�0
+html�2048�0
Modified: tests/ctags/css-at-supports.css.tags
5 lines changed, 1 insertions(+), 4 deletions(-)
===================================================================
@@ -1,5 +1,2 @@
# format=tagmanager
-html �2048�0
-or �2048�0
-test-property2: 42�2048�0
-test-property: 42�2048�0
+html�2048�0
Modified: tests/ctags/css-attribute-selectors.css.tags
16 lines changed, 5 insertions(+), 11 deletions(-)
===================================================================
@@ -1,12 +1,6 @@
# format=tagmanager
-a�2048�0
-background-color: light-green�2048�0
-body �2048�0
-color: green�2048�0
-color: red�2048�0
-en�2048�0
-href�2048�0
-http:�2048�0
-https:�2048�0
-lang�2048�0
-p �2048�0
+a[href^="http://"]�2048�0
+a[href^="https://"]�2048�0
+a[lang~=en]�2048�0
+body�2048�0
+p�2048�0
Modified: tests/ctags/css-namespace-selectors.css.tags
7 lines changed, 3 insertions(+), 4 deletions(-)
===================================================================
@@ -1,5 +1,4 @@
# format=tagmanager
-*�2048�0
-a �2048�0
-div �2048�0
-svg�2048�0
+*|div�2048�0
+svg|a�2048�0
+|a�2048�0
Modified: tests/ctags/css-pseudo-classes.css.tags
33 lines changed, 11 insertions(+), 22 deletions(-)
===================================================================
@@ -1,23 +1,12 @@
# format=tagmanager
-0n+1�2048�0
-1�2048�0
-10n+0�2048�0
-2n�2048�0
-2n+1�2048�0
-a:lang�2048�0
-after �2048�0
-background-color: gray �2048�0
-background-color: light-gray �2048�0
-body :not�2048�0
-color: green�2048�0
-color: lime�2048�0
-color: red�2048�0
-en�2048�0
-even�2048�0
-first-child�2048�0
-n+3�2048�0
-odd�2048�0
-span�2048�0
-span:not�2048�0
-span:nth-child�2048�0
-tr:nth-child�2048�0
+a:lang(en):after�2048�0
+body :not(span)�2048�0
+span:not(:first-child)�2048�0
+span:nth-child(-n+3)�2048�0
+span:nth-child(0n+1)�2048�0
+span:nth-child(1)�2048�0
+tr:nth-child(10n+0)�2048�0
+tr:nth-child(2n)�2048�0
+tr:nth-child(2n+1)�2048�0
+tr:nth-child(even)�2048�0
+tr:nth-child(odd)�2048�0
Modified: tests/ctags/css-simple.css.tags
18 lines changed, 9 insertions(+), 9 deletions(-)
===================================================================
@@ -1,11 +1,11 @@
# format=tagmanager
-#footer �16384�0
-* �2048�0
-.foo a�1�0
-.foo b �1�0
-.header �1�0
-.red �1�0
-div.magic �1�0
+#footer�16384�0
+*�2048�0
+.foo a�2048�0
+.foo b�2048�0
+.header�1�0
+.red�1�0
+div.magic�1�0
html�2048�0
-ul > li > a �2048�0
-ul li �2048�0
+ul > li > a�2048�0
+ul li�2048�0
Modified: tests/ctags/css-singlequote-in-comment-issue2.css.tags
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -1,2 +1,3 @@
# format=tagmanager
a�2048�0
+b�2048�0
Modified: tests/ctags/css-tag-types.css.tags
40 lines changed, 20 insertions(+), 20 deletions(-)
===================================================================
@@ -1,36 +1,36 @@
# format=tagmanager
#a #foo�16384�0
#a #foo #bar�16384�0
-#a #foo .bar�16384�0
-#a #foo bar�16384�0
-#a .foo�16384�0
+#a #foo .bar�1�0
+#a #foo bar�2048�0
+#a .foo�1�0
#a .foo #bar�16384�0
-#a .foo .bar�16384�0
-#a .foo bar�16384�0
-#a foo�16384�0
+#a .foo .bar�1�0
+#a .foo bar�2048�0
+#a foo�2048�0
#a foo #bar�16384�0
-#a foo .bar�16384�0
-#a foo bar�16384�0
-.a #foo�1�0
-.a #foo #bar�1�0
+#a foo .bar�1�0
+#a foo bar�2048�0
+.a #foo�16384�0
+.a #foo #bar�16384�0
.a #foo .bar�1�0
-.a #foo bar�1�0
+.a #foo bar�2048�0
.a .foo�1�0
-.a .foo #bar�1�0
+.a .foo #bar�16384�0
.a .foo .bar�1�0
-.a .foo bar�1�0
-.a foo�1�0
-.a foo #bar�1�0
+.a .foo bar�2048�0
+.a foo�2048�0
+.a foo #bar�16384�0
.a foo .bar�1�0
-.a foo bar�1�0
+.a foo bar�2048�0
a #foo�16384�0
a #foo #bar�16384�0
-a #foo .bar�16384�0
-a #foo bar�16384�0
+a #foo .bar�1�0
+a #foo bar�2048�0
a .foo�1�0
-a .foo #bar�1�0
+a .foo #bar�16384�0
a .foo .bar�1�0
-a .foo bar�1�0
+a .foo bar�2048�0
a foo�2048�0
a foo #bar�16384�0
a foo .bar�1�0
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list