[geany/geany] f76546: Import new CSS parser from fishman-ctags

Colomban Wendling git-noreply at xxxxx
Tue Nov 11 01:01:41 UTC 2014


Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Tue, 11 Nov 2014 01:01:41 UTC
Commit:      f765463af0aa8dad6d39399da2c138e8b5041bc4
             https://github.com/geany/geany/commit/f765463af0aa8dad6d39399da2c138e8b5041bc4

Log Message:
-----------
Import new CSS parser from fishman-ctags

Some highlights:
* Fixes handling of comments
* Adds support for attribute and namespace selectors
* Adds support for @supports blocks
* Fixes tag type for many selectors
* Adds support for pseudo-classes with arguments


Modified Paths:
--------------
    tagmanager/ctags/css.c
    tests/ctags/css-at-rules.css.tags
    tests/ctags/css-at-supports.css.tags
    tests/ctags/css-attribute-selectors.css.tags
    tests/ctags/css-namespace-selectors.css.tags
    tests/ctags/css-pseudo-classes.css.tags
    tests/ctags/css-simple.css.tags
    tests/ctags/css-singlequote-in-comment-issue2.css.tags
    tests/ctags/css-tag-types.css.tags

Modified: tagmanager/ctags/css.c
388 lines changed, 212 insertions(+), 176 deletions(-)
===================================================================
@@ -1,224 +1,259 @@
 /***************************************************************************
  * css.c
- * Character-based parser for Css definitions
- * Author - Iago Rubio <iagorubio(at)users.sourceforge.net>
- *        - Bronisław Białek <after89(at)gmail.com>
+ * Token-based parser for CSS definitions
+ * Author - Colomban Wendling <colomban at geany.org>
  **************************************************************************/
 #include "general.h"
 
-#include <string.h>
-#include <ctype.h>
+#include <string.h> 
+#include <ctype.h> 
 
-#include "parse.h"
-#include "read.h"
+#include "entry.h"
+#include "parse.h" 
+#include "read.h" 
 
 
 typedef enum eCssKinds {
-    K_NONE = -1, K_SELECTOR, K_ID, K_CLASS
+	K_CLASS, K_SELECTOR, K_ID
 } cssKind;
 
 static kindOption CssKinds [] = {
-    { TRUE, 's', "struct",  "selectors"  },
-    { TRUE, 'v', "variable",  "identities"  },
-    { TRUE, 'c', "class", "classes" }
+	{ TRUE, 'c', "class",		"classes" },
+	{ TRUE, 's', "struct",		"selectors" },
+	{ TRUE, 'v', "variable",	"identities" }
 };
 
-typedef enum _CssParserState {	/* state of parsing */
-	P_STATE_NONE,  				/* default state */
-	P_STATE_IN_COMMENT,			/* into a comment, only multi line in CSS */
-	P_STATE_IN_SINGLE_STRING,	/* into a single quoted string */
-	P_STATE_IN_DOUBLE_STRING,	/* into a double quoted string */
-	P_STATE_IN_DEFINITION,		/* on the body of the style definition, nothing for us */
-	P_STATE_IN_MEDIA,			/* on a @media declaration, can be multi-line */
-	P_STATE_IN_IMPORT,			/* on a @import declaration, can be multi-line */
-	P_STATE_IN_NAMESPACE,		/* on a @namespace declaration */
-	P_STATE_IN_PAGE,			/* on a @page declaration */
-	P_STATE_IN_FONTFACE,		/* on a @font-face declaration */
-	P_STATE_AT_END				/* end of parsing */
-} CssParserState;
-
-static void makeCssSimpleTag( vString *name, cssKind kind, boolean delete )
+typedef enum {
+	/* any ASCII */
+	TOKEN_EOF = 257,
+	TOKEN_SELECTOR,
+	TOKEN_STRING
+} tokenType;
+
+typedef struct {
+	tokenType type;
+	vString *string;
+} tokenInfo;
+
+
+static boolean isSelectorChar (const int c)
 {
-	vStringTerminate (name);
-	makeSimpleTag (name, CssKinds, kind);
-	vStringClear (name);
-	if( delete )
-		vStringDelete (name);
+	/* attribute selectors are handled separately */
+	return (isalnum (c) ||
+			c == '_' || // allowed char
+			c == '-' || // allowed char
+			c == '+' || // allow all sibling in a single tag
+			c == '>' || // allow all child in a single tag
+			c == '|' || // allow namespace separator
+			c == '(' || // allow pseudo-class arguments
+			c == ')' ||
+			c == '.' || // allow classes and selectors
+			c == ':' || // allow pseudo classes
+			c == '*' || // allow globs as P + *
+			c == '#');  // allow ids
 }
 
-static boolean isCssDeclarationAllowedChar( const unsigned char *cp )
+static void parseSelector (vString *const string, const int firstChar)
 {
-	return  isalnum ((int) *cp) ||
-			isspace ((int) *cp) ||
-			*cp == '_' ||	/* allowed char */
-			*cp == '-' ||	/* allowed char */
-			*cp == '+' ||   /* allow all sibling in a single tag */
-			*cp == '>' ||   /* allow all child in a single tag */
-			*cp == '{' || 	/* allow the start of the declaration */
-			*cp == '.' || 	/* allow classes and selectors */
-			*cp == ',' || 	/* allow multiple declarations */
-			*cp == ':' ||   /* allow pseudo classes */
-			*cp == '*' || 	/* allow globs as P + * */
-			*cp == '#';		/* allow ids */
+	int c = firstChar;
+	do
+	{
+		vStringPut (string, (char) c);
+		c = fileGetc ();
+	} while (isSelectorChar (c));
+	fileUngetc (c);
+	vStringTerminate (string);
 }
 
-static CssParserState parseCssDeclaration( const unsigned char **position, cssKind kind, const char *aname)
+static void readToken (tokenInfo *const token)
 {
-	const unsigned char *cp = *position;
-	vString *name = vStringNew ();
-	vStringCopyS(name, aname);
-
-	/* pick to the end of line including children and sibling
-	 * if declaration is multiline go for the next line */
-	while ( isCssDeclarationAllowedChar(cp) ||
-			*cp == '\0' ) 	/* track the end of line into the loop */
+	int c;
+
+	vStringClear (token->string);
+
+getNextChar:
+
+	c = fileGetc ();
+	while (isspace (c))
+		c = fileGetc ();
+
+	token->type = c;
+	switch (c)
 	{
-		if( *cp == ',' )
+		case EOF: token->type = TOKEN_EOF; break;
+
+		case '\'':
+		case '"':
 		{
-			makeCssSimpleTag(name, kind, TRUE);
-			*position = cp;
-			return P_STATE_NONE;
+			const int delimiter = c;
+			do
+			{
+				vStringPut (token->string, c);
+				c = fileGetc ();
+				if (c == '\\')
+					c = fileGetc ();
+			}
+			while (c != EOF && c != delimiter);
+			if (c != EOF)
+				vStringPut (token->string, c);
+			token->type = TOKEN_STRING;
+			break;
 		}
-		else if( *cp == '{' || *cp == '\0' )
-		{ /* assume that line end is the same as a starting definition (i.e. the { is on the next line */
-			makeCssSimpleTag(name, kind, TRUE);
-			*position = cp;
-			return P_STATE_IN_DEFINITION;
+
+		case '/': /* maybe comment start */
+		{
+			int d = fileGetc ();
+			if (d != '*')
+			{
+				fileUngetc (d);
+				vStringPut (token->string, c);
+				token->type = c;
+			}
+			else
+			{
+				d = fileGetc ();
+				do
+				{
+					c = d;
+					d = fileGetc ();
+				}
+				while (d != EOF && ! (c == '*' && d == '/'));
+				goto getNextChar;
+			}
+			break;
 		}
 
-		vStringPut (name, (int) *cp);
-		++cp;
+		default:
+			if (! isSelectorChar (c))
+			{
+				vStringPut (token->string, c);
+				token->type = c;
+			}
+			else
+			{
+				parseSelector (token->string, c);
+				token->type = TOKEN_SELECTOR;
+			}
+			break;
 	}
+}
 
-	makeCssSimpleTag(name, kind, TRUE);
-	*position = cp;
+/* sets selector kind in @p kind if found, otherwise don't touches @p kind */
+static cssKind classifySelector (const vString *const selector)
+{
+	size_t i;
 
-	return P_STATE_NONE;
+	for (i = vStringLength (selector); i > 0; --i)
+	{
+		char c = vStringItem (selector, i - 1);
+		if (c == '.')
+			return K_CLASS;
+		else if (c == '#')
+			return K_ID;
+	}
+	return K_SELECTOR;
 }
 
-static CssParserState parseCssLine( const unsigned char *line, CssParserState state )
+static void findCssTags (void)
 {
-	vString *aux;
-	vString *stack = vStringNew ();
+	boolean readNextToken = TRUE;
+	tokenInfo token;
 
-	while( *line != '\0' ) /* fileReadLine returns NULL terminated strings */
+	token.string = vStringNew ();
+
+	do
 	{
-		vStringClear (stack);
-		while (state == P_STATE_NONE &&
-			(isspace ((int) *line) || isalnum ((int) *line) || ( *line == '*' && *(line-1) != '/' )))
-		{
-			if ((stack->length > 0 && isspace((int) *line)) || isalnum ((int) *line) || *line == '*') {
-				vStringPut(stack, (int) *line);
-			}
+		if (readNextToken)
+			readToken (&token);
 
-			++line;
+		readNextToken = TRUE;
+
+		if (token.type == '@')
+		{ /* At-rules, from the "@" to the next block or semicolon */
+			boolean useContents;
+			readToken (&token);
+			useContents = (strcmp (vStringValue (token.string), "media") == 0 ||
+						   strcmp (vStringValue (token.string), "supports") == 0);
+			while (token.type != TOKEN_EOF &&
+				   token.type != ';' && token.type != '{')
+			{
+				readToken (&token);
+			}
+			/* HACK: we *eat* the opening '{' for medias and the like so that
+			 *       the content is parsed as if it was at the root */
+			readNextToken = useContents && token.type == '{';
 		}
-		vStringTerminate (stack);
+		else if (token.type == TOKEN_SELECTOR)
+		{ /* collect selectors and make a tag */
+			cssKind kind = K_SELECTOR;
+			MIOPos filePosition;
+			unsigned long lineNumber;
+			vString *selector = vStringNew ();
+			do
+			{
+				if (vStringLength (selector) > 0)
+					vStringPut (selector, ' ');
+				vStringCat (selector, token.string);
 
-		switch( state )
-		{
-			case P_STATE_NONE:
-				if( *line == '.' ) /* a class */
-					state = parseCssDeclaration( &line, K_CLASS, vStringValue(stack) );
-				else if( *line == '#' ) /* an id */
-					state = parseCssDeclaration( &line, K_ID, vStringValue(stack) );
-				else if( *line == '@' ) /* at-rules, we'll ignore them */
+				kind = classifySelector (token.string);
+				lineNumber = getSourceLineNumber ();
+				filePosition = getInputFilePosition ();
+
+				readToken (&token);
+
+				/* handle attribute selectors */
+				if (token.type == '[')
 				{
-					++line;
-					aux = vStringNew();
-					while( !isspace((int) *line) )
+					int depth = 1;
+					while (depth > 0 && token.type != TOKEN_EOF)
 					{
-						vStringPut (aux, (int) *line);
-						++line;
+						vStringCat (selector, token.string);
+						readToken (&token);
+						if (token.type == '[')
+							depth++;
+						else if (token.type == ']')
+							depth--;
 					}
-					vStringTerminate (aux);
-					if( strcmp( aux->buffer, "media" ) == 0 )
-						state = P_STATE_IN_MEDIA;
-					else if ( strcmp( aux->buffer, "import" ) == 0 )
-						state = P_STATE_IN_IMPORT;
-					else if ( strcmp( aux->buffer, "namespace" ) == 0 )
-						state = P_STATE_IN_NAMESPACE;
-					else if ( strcmp( aux->buffer, "page" ) == 0 )
-						state = P_STATE_IN_PAGE;
-					else if ( strcmp( aux->buffer, "font-face" ) == 0 )
-						state = P_STATE_IN_FONTFACE;
-					vStringDelete (aux);
+					if (token.type != TOKEN_EOF)
+						vStringCat (selector, token.string);
+					readToken (&token);
 				}
-				else if( *line == '*' && *(line-1) == '/' ) /* multi-line comment */
-					state = P_STATE_IN_COMMENT;
-				else if ( stack->length > 0 )
-					state = parseCssDeclaration( &line, K_SELECTOR, vStringValue(stack) );
+			}
+			while (token.type == TOKEN_SELECTOR);
+			/* we already consumed the next token, don't read it twice */
+			readNextToken = FALSE;
 
-			break;
-			case P_STATE_IN_COMMENT:
-				if( *line == '/' && *(line-1) == '*')
-					state = P_STATE_NONE;
-			break;
-			case  P_STATE_IN_SINGLE_STRING:
-				if( *line == '\'' && *(line-1) != '\\' )
-					state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
-			break;
-			case  P_STATE_IN_DOUBLE_STRING:
-				if( *line=='"' && *(line-1) != '\\' )
-					state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
-			break;
-			case  P_STATE_IN_MEDIA:
-				/* skip to start of media body or line end */
-				while( *line != '{' )
-				{
-					if( *line == '\0' )
-						break;
-					++line;
-				}
-				if( *line == '{' )
-						state = P_STATE_NONE;
-			break;
-			case  P_STATE_IN_IMPORT:
-			case  P_STATE_IN_NAMESPACE:
-				/* skip to end of declaration or line end */
-				while( *line != ';' )
-				{
-					if( *line == '\0' )
-						break;
-					++line;
-				}
-				if( *line == ';' )
-					state = P_STATE_NONE;
-			break;
-			case P_STATE_IN_PAGE:
-			case P_STATE_IN_FONTFACE:
-			case P_STATE_IN_DEFINITION:
-				if( *line == '\0' )
-					line = fileReadLine ();
-				if( *line == '}' )
-					state = P_STATE_NONE;
-				else if( *line == '\'' )
-					state = P_STATE_IN_SINGLE_STRING;
-				else if( *line == '"' )
-					state = P_STATE_IN_DOUBLE_STRING;
-			break;
-			case P_STATE_AT_END:
-				return state;
-			break;
+			vStringTerminate (selector);
+			if (CssKinds[kind].enabled)
+			{
+				tagEntryInfo e;
+				initTagEntry (&e, vStringValue (selector));
+
+				e.lineNumber	= lineNumber;
+				e.filePosition	= filePosition;
+				e.kindName		= CssKinds[kind].name;
+				e.kind			= (char) CssKinds[kind].letter;
+
+				makeTagEntry (&e);
+			}
+			vStringDelete (selector);
+		}
+		else if (token.type == '{')
+		{ /* skip over { ... } */
+			int depth = 1;
+			while (depth > 0 && token.type != TOKEN_EOF)
+			{
+				readToken (&token);
+				if (token.type == '{')
+					depth++;
+				else if (token.type == '}')
+					depth--;
+			}
 		}
-		if (line == NULL) return P_STATE_AT_END;
-		line++;
 	}
-	vStringDelete (stack);
+	while (token.type != TOKEN_EOF);
 
-	return state;
-}
-
-static void findCssTags (void)
-{
-    const unsigned char *line;
-	CssParserState state = P_STATE_NONE;
-
-    while ( (line = fileReadLine ()) != NULL )
-    {
-		state = parseCssLine( line, state );
-		if( state==P_STATE_AT_END ) return;
-    }
+	vStringDelete (token.string);
 }
 
 /* parser definition */
@@ -232,3 +267,4 @@ extern parserDefinition* CssParser (void)
     def->parser     = findCssTags;
     return def;
 }
+


Modified: tests/ctags/css-at-rules.css.tags
4 lines changed, 2 insertions(+), 2 deletions(-)
===================================================================
@@ -1,3 +1,3 @@
 # format=tagmanager
-body �2048�0
-html �2048�0
+body�2048�0
+html�2048�0


Modified: tests/ctags/css-at-supports.css.tags
5 lines changed, 1 insertions(+), 4 deletions(-)
===================================================================
@@ -1,5 +1,2 @@
 # format=tagmanager
-html �2048�0
-or �2048�0
-test-property2: 42�2048�0
-test-property: 42�2048�0
+html�2048�0


Modified: tests/ctags/css-attribute-selectors.css.tags
16 lines changed, 5 insertions(+), 11 deletions(-)
===================================================================
@@ -1,12 +1,6 @@
 # format=tagmanager
-a�2048�0
-background-color: light-green�2048�0
-body �2048�0
-color: green�2048�0
-color: red�2048�0
-en�2048�0
-href�2048�0
-http:�2048�0
-https:�2048�0
-lang�2048�0
-p �2048�0
+a[href^="http://"]�2048�0
+a[href^="https://"]�2048�0
+a[lang~=en]�2048�0
+body�2048�0
+p�2048�0


Modified: tests/ctags/css-namespace-selectors.css.tags
7 lines changed, 3 insertions(+), 4 deletions(-)
===================================================================
@@ -1,5 +1,4 @@
 # format=tagmanager
-*�2048�0
-a �2048�0
-div �2048�0
-svg�2048�0
+*|div�2048�0
+svg|a�2048�0
+|a�2048�0


Modified: tests/ctags/css-pseudo-classes.css.tags
33 lines changed, 11 insertions(+), 22 deletions(-)
===================================================================
@@ -1,23 +1,12 @@
 # format=tagmanager
-0n+1�2048�0
-1�2048�0
-10n+0�2048�0
-2n�2048�0
-2n+1�2048�0
-a:lang�2048�0
-after �2048�0
-background-color: gray �2048�0
-background-color: light-gray �2048�0
-body :not�2048�0
-color: green�2048�0
-color: lime�2048�0
-color: red�2048�0
-en�2048�0
-even�2048�0
-first-child�2048�0
-n+3�2048�0
-odd�2048�0
-span�2048�0
-span:not�2048�0
-span:nth-child�2048�0
-tr:nth-child�2048�0
+a:lang(en):after�2048�0
+body :not(span)�2048�0
+span:not(:first-child)�2048�0
+span:nth-child(-n+3)�2048�0
+span:nth-child(0n+1)�2048�0
+span:nth-child(1)�2048�0
+tr:nth-child(10n+0)�2048�0
+tr:nth-child(2n)�2048�0
+tr:nth-child(2n+1)�2048�0
+tr:nth-child(even)�2048�0
+tr:nth-child(odd)�2048�0


Modified: tests/ctags/css-simple.css.tags
18 lines changed, 9 insertions(+), 9 deletions(-)
===================================================================
@@ -1,11 +1,11 @@
 # format=tagmanager
-#footer �16384�0
-* �2048�0
-.foo a�1�0
-.foo b �1�0
-.header �1�0
-.red �1�0
-div.magic �1�0
+#footer�16384�0
+*�2048�0
+.foo a�2048�0
+.foo b�2048�0
+.header�1�0
+.red�1�0
+div.magic�1�0
 html�2048�0
-ul > li > a �2048�0
-ul li �2048�0
+ul > li > a�2048�0
+ul li�2048�0


Modified: tests/ctags/css-singlequote-in-comment-issue2.css.tags
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -1,2 +1,3 @@
 # format=tagmanager
 a�2048�0
+b�2048�0


Modified: tests/ctags/css-tag-types.css.tags
40 lines changed, 20 insertions(+), 20 deletions(-)
===================================================================
@@ -1,36 +1,36 @@
 # format=tagmanager
 #a #foo�16384�0
 #a #foo #bar�16384�0
-#a #foo .bar�16384�0
-#a #foo bar�16384�0
-#a .foo�16384�0
+#a #foo .bar�1�0
+#a #foo bar�2048�0
+#a .foo�1�0
 #a .foo #bar�16384�0
-#a .foo .bar�16384�0
-#a .foo bar�16384�0
-#a foo�16384�0
+#a .foo .bar�1�0
+#a .foo bar�2048�0
+#a foo�2048�0
 #a foo #bar�16384�0
-#a foo .bar�16384�0
-#a foo bar�16384�0
-.a #foo�1�0
-.a #foo #bar�1�0
+#a foo .bar�1�0
+#a foo bar�2048�0
+.a #foo�16384�0
+.a #foo #bar�16384�0
 .a #foo .bar�1�0
-.a #foo bar�1�0
+.a #foo bar�2048�0
 .a .foo�1�0
-.a .foo #bar�1�0
+.a .foo #bar�16384�0
 .a .foo .bar�1�0
-.a .foo bar�1�0
-.a foo�1�0
-.a foo #bar�1�0
+.a .foo bar�2048�0
+.a foo�2048�0
+.a foo #bar�16384�0
 .a foo .bar�1�0
-.a foo bar�1�0
+.a foo bar�2048�0
 a #foo�16384�0
 a #foo #bar�16384�0
-a #foo .bar�16384�0
-a #foo bar�16384�0
+a #foo .bar�1�0
+a #foo bar�2048�0
 a .foo�1�0
-a .foo #bar�1�0
+a .foo #bar�16384�0
 a .foo .bar�1�0
-a .foo bar�1�0
+a .foo bar�2048�0
 a foo�2048�0
 a foo #bar�16384�0
 a foo .bar�1�0



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list