[geany/geany] b7b34e: Rewrite the Txt2tags parser for better conformance and features

Colomban Wendling git-noreply at xxxxx
Sat Nov 29 22:41:02 UTC 2014


Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Tue, 19 Aug 2014 14:11:23 UTC
Commit:      b7b34ec451c70927520f57ed773bd1b0736af2c0
             https://github.com/geany/geany/commit/b7b34ec451c70927520f57ed773bd1b0736af2c0

Log Message:
-----------
Rewrite the Txt2tags parser for better conformance and features

This fixes parsing indented titles and titles with embedded delimiter
characters, and adds support for title nesting information.

Syntax: http://txt2tags.org/rules.html

Closes [feature-requests:#690].


Modified Paths:
--------------
    src/symbols.c
    tagmanager/ctags/txt2tags.c

Modified: src/symbols.c
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -315,6 +315,7 @@ const gchar *symbols_get_context_separator(gint ft_id)
 
 		/* no context separator */
 		case GEANY_FILETYPES_ASCIIDOC:
+		case GEANY_FILETYPES_TXT2TAGS:
 			return "\x03";
 
 		default:


Modified: tagmanager/ctags/txt2tags.c
176 lines changed, 120 insertions(+), 56 deletions(-)
===================================================================
@@ -1,5 +1,6 @@
 /*
 *   Copyright (c) 2009, Eric Forgeot
+*   Copyright (c) 2014, Colomban Wendling <colomban at geany.org>
 *
 *   Based on work by Jon Strait
 *
@@ -19,102 +20,165 @@
 
 #include "parse.h"
 #include "read.h"
+#include "nestlevel.h"
 #include "vstring.h"
 
+
+/* as any character may happen in an input, use something highly unlikely */
+#define SCOPE_SEPARATOR '\x3' /* ASCII ETX */
+
 /*
 *   DATA DEFINITIONS
 */
 
 typedef enum {
-	K_SECTION = 0, K_HEADER
+	K_SECTION = 0
 } Txt2tagsKind;
 
 static kindOption Txt2tagsKinds[] = {
-	{ TRUE, 'm', "member", "sections" },
-	{ TRUE, 's', "struct",  "header1"}
+	{ TRUE, 'm', "member", "sections" }
 };
 
 /*
 *   FUNCTION DEFINITIONS
 */
 
-static void parse_title (vString* const name, const char control_char)
-{
-	char *text = vStringValue(name);
-	char *p = text;
-	int offset_start = 0;
-	boolean in_or_after_title = FALSE;
-
-	while (p != NULL && *p != '\0')
-	{
-		if (*p == control_char)
-		{
-			if (in_or_after_title)
-				break;
-			else
-				offset_start++;
-		}
-		else
-			in_or_after_title = TRUE;
-		p++;
-	}
-	*p = '\0';
-	vStringCopyS(name, text + offset_start);
-	vStringStripLeading(name);
-	vStringStripTrailing(name);
-}
-
-static void makeTxt2tagsTag (const vString* const name, boolean name_before, Txt2tagsKind type)
+static void makeTxt2tagsTag (const vString* const name,
+                             const NestingLevels *const nls,
+                             Txt2tagsKind type)
 {
 	tagEntryInfo e;
+	vString *scope = NULL;
 	kindOption *kind = &Txt2tagsKinds[type];
 	initTagEntry (&e, vStringValue(name));
 
-	if (name_before)
-		e.lineNumber--;	/* we want the line before the underline chars */
 	e.kindName = kind->name;
 	e.kind = kind->letter;
 
+	if (nls->n > 0) {
+		int i;
+		kindOption *parentKind;
+
+		scope = vStringNew();
+		for (i = 0; i < nls->n; i++) {
+			if (vStringLength(scope) > 0)
+				vStringPut(scope, SCOPE_SEPARATOR);
+			vStringCat(scope, nls->levels[i].name);
+		}
+		parentKind = &Txt2tagsKinds[nls->levels[nls->n - 1].type];
+
+		e.extensionFields.scope[0] = parentKind->name;
+		e.extensionFields.scope[1] = vStringValue(scope);
+	}
+
 	makeTagEntry(&e);
+
+	vStringDelete(scope);
+}
+
+/* matches: ^ *[=_-]{20,} *$ */
+static boolean isTxt2tagsLine (const unsigned char *line)
+{
+	unsigned int len;
+
+	while (isspace(*line)) line++;
+	for (len = 0; *line == '=' || *line == '-' || *line == '_'; len++)
+		line++;
+	while (isspace(*line)) line++;
+
+	return len >= 20 && *line == 0;
+}
+
+static boolean parseTxt2tagsTitle (const unsigned char *line,
+                                   vString *const title,
+                                   int *const depth_)
+{
+	const int MAX_TITLE_DEPTH = 5; /* maximum length of a title delimiter */
+	unsigned char delim;
+	int delim_delta = 0;
+	const unsigned char *end;
+
+	/* skip leading spaces, but no tabs (probably because they create quotes) */
+	while (*line == ' ') line++;
+
+	/* normal/numbered titles */
+	if (*line != '=' && *line != '+')
+		return FALSE;
+
+	delim = *line;
+
+	/* find the start delimiter length */
+	while (*line == delim && delim_delta < MAX_TITLE_DEPTH+1)
+	{
+		line++;
+		delim_delta++;
+	}
+	while (isspace(*line))
+		line++;
+
+	if (delim_delta > MAX_TITLE_DEPTH) /* invalid */
+		return FALSE;
+
+	*depth_ = delim_delta;
+
+	/* find the end delimiter */
+	end = line + strlen((const char *) line) - 1;
+	while (end > line && isspace(*end)) end--;
+	/* skip a possible label: \[[A-Za-z0-9_-]+\] */
+	if (*end == ']')
+	{
+		end--;
+		while (end > line && (isalnum(*end) || *end == '_' || *end == '-'))
+			end--;
+		if (*end != '[') /* invalid */
+			return FALSE;
+		end--;
+	}
+	while (end > line && *end == delim && delim_delta >= 0)
+	{
+		delim_delta--;
+		end--;
+	}
+	while (end > line && isspace(*end)) end--;
+	end++;
+
+	/* if start and end delimiters are not identical, or the the name is empty */
+	if (delim_delta != 0 || (end - line) <= 0)
+		return FALSE;
+
+	vStringNCopyS(title, (const char *) line, end - line);
+	return TRUE;
 }
 
 static void findTxt2tagsTags (void)
 {
+	NestingLevels *nls = nestingLevelsNew();
 	vString *name = vStringNew();
 	const unsigned char *line;
 
 	while ((line = fileReadLine()) != NULL)
 	{
-		/*int name_len = vStringLength(name);*/
+		int depth;
 
-		/* underlines must be the same length or more */
-		/*if (name_len > 0 &&	(line[0] == '=' || line[0] == '-') && issame((const char*) line))
+		if (isTxt2tagsLine(line))
+			; /* skip not to improperly match titles */
+		else if (parseTxt2tagsTitle(line, name, &depth))
 		{
-			makeTxt2tagsTag(name, TRUE);
-		}*/
-		if (line[0] == '=' || line[0] == '+') {
- 			/*vStringClear(name);*/
-			vStringCatS(name, (const char *) line);
-			vStringTerminate(name);
-			parse_title(name, line[0]);
-			makeTxt2tagsTag(name, FALSE, K_SECTION);
-		}
-		/* TODO what exactly should this match?
-		 * K_HEADER ('struct') isn't matched in src/symbols.c */
-		else if (strcmp((char*)line, "°") == 0) {
-			/*vStringClear(name);*/
-			vStringCatS(name, (const char *) line);
-			vStringTerminate(name);
-			makeTxt2tagsTag(name, FALSE, K_HEADER);
-		}
-		else {
-			vStringClear (name);
-			if (! isspace(*line))
-				vStringCatS(name, (const char*) line);
+			NestingLevel *nl = nestingLevelsGetCurrent(nls);
+			while (nl && nl->indentation >= depth)
+			{
+				nestingLevelsPop(nls);
+				nl = nestingLevelsGetCurrent(nls);
+			}
+
 			vStringTerminate(name);
+			makeTxt2tagsTag(name, nls, K_SECTION);
+			nestingLevelsPush(nls, name, K_SECTION);
+			nestingLevelsGetCurrent(nls)->indentation = depth;
 		}
 	}
 	vStringDelete (name);
+	nestingLevelsFree(nls);
 }
 
 extern parserDefinition* Txt2tagsParser (void)



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list