[geany/geany] 67456e: Copied BibTeX-parser from https://github.com/universal-ctags/ctags/pull/2137 - Commits

30 Oct 2019


      Branch:      refs/heads/master
Author:      Mirco Schoenfeld mirco.schoenfeld@tum.de
Committer:   Mirco Schoenfeld mirco.schoenfeld@tum.de
Date:        Wed, 18 Sep 2019 11:27:35 UTC
Commit:      67456e0cc3b801121c00ac4853d65d4dfb946e34
             https://github.com/geany/geany/commit/67456e0cc3b801121c00ac4853d65d4dfb946e...
Log Message:
-----------
Copied BibTeX-parser from https://github.com/universal-ctags/ctags/pull/2137
Modified Paths:
--------------
    ctags/main/parsers.h
    ctags/parsers/bibtex.c
    src/symbols.c
    src/tagmanager/tm_parser.c
Modified: ctags/main/parsers.h
2 lines changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -24,7 +24,7 @@
    PhpParser, \
    PythonParser, \
    TexParser, \
-	BibParser, \
+	BibtexParser, \
    AsmParser, \
    ConfParser, \
    SqlParser, \
Modified: ctags/parsers/bibtex.c
493 lines changed, 372 insertions(+), 121 deletions(-)
===================================================================
@@ -12,169 +12,420 @@
  */
/*
-*   INCLUDE FILES
-*/
-#include "general.h"    /* must always come first */
-
-#include <ctype.h>
+ *	 INCLUDE FILES
+ */
+#include "general.h"	/* must always come first */
+#include <ctype.h>	/* to define isalpha () */
 #include <string.h>
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
 #include "parse.h"
 #include "read.h"
-#include "vstring.h"
 #include "routines.h"
+#include "vstring.h"
+
+/*
+ *	 MACROS
+ */
+#define isType(token,t)		(bool) ((token)->type == (t))
+#define isKeyword(token,k)	(bool) ((token)->keyword == (k))
+#define isIdentChar(c) \
+	(isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+')
+
+/*
+ *	 DATA DECLARATIONS
+ */
/*
-*   DATA DEFINITIONS
-*/
+ * Used to specify type of keyword.
+ */
+enum eKeywordId {
+	KEYWORD_article,
+	KEYWORD_book,
+	KEYWORD_booklet,
+	KEYWORD_conference,
+	KEYWORD_inbook,
+	KEYWORD_incollection,
+	KEYWORD_inproceedings,
+	KEYWORD_manual,
+	KEYWORD_mastersthesis,
+	KEYWORD_misc,
+	KEYWORD_phdthesis,
+	KEYWORD_proceedings,
+	KEYWORD_string,
+	KEYWORD_techreport,
+	KEYWORD_unpublished
+};
+typedef int keywordId; /* to allow KEYWORD_NONE */
+
+enum eTokenType {
+	/* 0..255 are the byte's value.  Some are named for convenience */
+	TOKEN_OPEN_CURLY = '{',
+	/* above is special types */
+	TOKEN_UNDEFINED = 256,
+	TOKEN_KEYWORD,
+	TOKEN_IDENTIFIER
+};
+typedef int tokenType;
+
+typedef struct sTokenInfo {
+	tokenType		type;
+	keywordId		keyword;
+	vString *		string;
+	unsigned long 	lineNumber;
+	MIOPos 			filePosition;
+} tokenInfo;
+
+/*
+ *	DATA DEFINITIONS
+ */
+
+static langType Lang_bib;
+
 typedef enum {
-	K_ARTICLE,
-  K_BOOK,
-  K_PAPER,
-  K_THESIS,
-  K_OTHER
-} BibKind;
-
-static kindDefinition BibKinds[] = {
-	{ true, 'f', "function",        "@article @inbook @incollection" },
-	{ true, 'c', "class",        "@book @booklet @proceedings" },
-	{ true, 'm', "member",        "@inproceedings @conference" },
-	{ true, 'v', "variable",        "@phdthesis @mastersthesis" },
-	{ true, 's', "struct",        "@manual @misc @techreport" }
+	BIBTAG_ARTICLE,
+	BIBTAG_BOOK,
+	BIBTAG_BOOKLET,
+	BIBTAG_CONFERENCE,
+	BIBTAG_INBOOK,
+	BIBTAG_INCOLLECTION,
+	BIBTAG_INPROCEEDINGS,
+	BIBTAG_MANUAL,
+	BIBTAG_MASTERSTHESIS,
+	BIBTAG_MISC,
+	BIBTAG_PHDTHESIS,
+	BIBTAG_PROCEEDINGS,
+	BIBTAG_STRING,
+	BIBTAG_TECHREPORT,
+	BIBTAG_UNPUBLISHED,
+	BIBTAG_COUNT
+} bibKind;
+
+static kindDefinition BibKinds [] = {
+	{ true,  'a', "article",				"article"				},
+	{ true,  'b', "book",						"book"					},
+	{ true,  'B', "booklet",				"booklet"				},
+	{ true,  'c', "conference",			"conference"		},
+	{ true,  'i', "inbook",					"inbook"				},
+	{ true,  'I', "incollection",		"incollection"	},
+	{ true,  'j', "inproceedings",	"inproceedings"	},
+	{ true,  'm', "manual",					"manual"				},
+	{ true,  'M', "mastersthesis",	"mastersthesis"	},
+	{ true,  'n', "misc",						"misc"					},
+	{ true,  'p', "phdthesis",			"phdthesis"			},
+	{ true,  'P', "proceedings",		"proceedings"		},
+	{ true,  's', "string",					"string"				},
+	{ true,  't', "techreport",			"techreport"		},
+	{ true,  'u', "unpublished",		"unpublished"		}
 };
+static const keywordTable BibKeywordTable [] = {
+	/* keyword			  keyword ID */
+	{ "article",	    KEYWORD_article				},
+	{ "book",	        KEYWORD_book				  },
+	{ "booklet",	    KEYWORD_booklet				},
+	{ "conference",	  KEYWORD_conference		},
+	{ "inbook",	      KEYWORD_inbook				},
+	{ "incollection",	KEYWORD_incollection	},
+	{ "inproceedings",KEYWORD_inproceedings	},
+	{ "manual",	      KEYWORD_manual				},
+	{ "mastersthesis",KEYWORD_mastersthesis	},
+	{ "misc",	        KEYWORD_misc				  },
+	{ "phdthesis",	  KEYWORD_phdthesis			},
+	{ "proceedings",	KEYWORD_proceedings		},
+	{ "string",				KEYWORD_string				},
+	{ "techreport",	  KEYWORD_techreport		},
+	{ "unpublished",	KEYWORD_unpublished		}
+};
+  
 /*
-*   FUNCTION DEFINITIONS
-*/
-#define BIB_LABEL  (1<<2)
+ *	 FUNCTION DEFINITIONS
+ */
-static int getBibWord(const char * ref, const char **ptr)
+static tokenInfo *newToken (void)
 {
-	const char *p = *ptr;
+	tokenInfo *const token = xMalloc (1, tokenInfo);
-	while ((*ref != '\0') && (*p != '\0') && (tolower(*ref) == tolower(*p)))
-		ref++, p++;
+	token->type			= TOKEN_UNDEFINED;
+	token->keyword		= KEYWORD_NONE;
+	token->string		= vStringNew ();
+	token->lineNumber   = getInputLineNumber ();
+	token->filePosition = getInputFilePosition ();
+	return token;
+}
-	if (*ref)
-		return false;
+static void deleteToken (tokenInfo *const token)
+{
+	vStringDelete (token->string);
+	eFree (token);
+}
-	*ptr = p;
-	return true;
+/*
+ *	 Tag generation functions
+ */
+static void makeBibTag (tokenInfo *const token, bibKind kind)
+{
+	if (BibKinds [kind].enabled)
+	{
+		const char *const name = vStringValue (token->string);
+		tagEntryInfo e;
+		initTagEntry (&e, name, kind);
+
+		e.lineNumber   = token->lineNumber;
+		e.filePosition = token->filePosition;
+
+		makeTagEntry (&e);
+	}
 }
-static void createBibTag(int flags, BibKind kind, const char * l)
+/*
+ *	 Parsing functions
+ */
+
+/*
+ *	Read a C identifier beginning with "firstChar" and places it into
+ *	"name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
 {
-	vString *name = vStringNew ();
+	int c = firstChar;
+	Assert (isIdentChar (c));
+	do
+	{
+		vStringPut (string, c);
+		c = getcFromInputFile ();
+	} while (c != EOF && isIdentChar (c));
+	if (c != EOF)
+		ungetcToInputFile (c);		/* unget non-identifier character */
+}
+
+static bool readToken (tokenInfo *const token)
+{
+	int c;
+
+	token->type			= TOKEN_UNDEFINED;
+	token->keyword		= KEYWORD_NONE;
+	vStringClear (token->string);
+
+getNextChar:
-	while ((*l == ' '))
-		l++;
-	if (flags & (BIB_LABEL))
+	do
    {
-		if (*l != '{')
-			goto no_tag;
-		l++;
+		c = getcFromInputFile ();
+	}
+	while (c == '\t' || c == ' ' || c == '\n');
+
+	token->lineNumber   = getInputLineNumber ();
+	token->filePosition = getInputFilePosition ();
+
+	token->type = (unsigned char) c;
+	switch (c)
+	{
+		case EOF: return false;
+
+		case '@':
+					/*
+					 * All Bib entries start with an at symbol.
+					 * Check if the next character is an alpha character
+					 * else it is not a potential tex tag.
+					 */
+					c = getcFromInputFile ();
+					if (! isalpha (c))
+					  ungetcToInputFile (c);
+					else
+					{
+						vStringPut (token->string, '@');
+						parseIdentifier (token->string, c);
+						token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
+						if (isKeyword (token, KEYWORD_NONE))
+							token->type = TOKEN_IDENTIFIER;
+						else
+							token->type = TOKEN_KEYWORD;
+					}
+					break;
+		case '%':
+					skipToCharacterInInputFile ('\n'); /* % are single line comments */
+					goto getNextChar;
+					break;
+		default:
+					if (isIdentChar (c))
+					{
+						parseIdentifier (token->string, c);
+						token->type = TOKEN_IDENTIFIER;
+					}
+					break;
+	}
+	return true;
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+	dest->lineNumber = src->lineNumber;
+	dest->filePosition = src->filePosition;
+	dest->type = src->type;
+	dest->keyword = src->keyword;
+	vStringCopy (dest->string, src->string);
+}
-		do
+/*
+ *	 Scanning functions
+ */
+
+static bool parseTag (tokenInfo *const token, bibKind kind)
+{
+	tokenInfo *	const name = newToken ();
+	vString *		currentid;
+	bool				eof = false;
+
+	currentid = vStringNew ();
+	/*
+	 * Bib entries are of these formats:
+	 *   @article{identifier,
+	 *   author="John Doe"}
+	 *
+	 * When a keyword is found, loop through all words up to
+	 * a comma brace for the tag name.
+	 *
+	 */
+	if (isType (token, TOKEN_KEYWORD))
+	{
+		copyToken (name, token);
+		if (!readToken (token))
    	{
-			vStringPut(name, (int) *l);
-			++l;
-		} while ((*l != '\0') && (*l != ',') && (*l != '}'));
-		if (name->buffer[0] != ',')
-			makeSimpleTag(name, kind);
+			eof = true;
+			goto out;
+		}
    }
-	else
+
+	if (isType (token, TOKEN_OPEN_CURLY))
    {
-		vStringPut(name, (int) *l);
-		makeSimpleTag(name, kind);
+		if (!readToken (token))
+		{
+			eof = true;
+			goto out;
+		}
+		if (isType (token, TOKEN_IDENTIFIER)){
+			vStringCat (currentid, token->string);
+			vStringStripTrailing (currentid);
+			if (vStringLength (currentid) > 0)
+			{
+				vStringCopy (name->string, currentid);
+				makeBibTag (name, kind);
+			}
+		}
+		else
+		{ // should find an identifier for bib item at first place
+			eof = true;
+			goto out;
+		}
    }
+
-no_tag:
-	vStringDelete(name);
+ out:
+	deleteToken (name);
+	vStringDelete (currentid);
+	return eof;
 }
-static void findBibTags(void)
+static void parseBibFile (tokenInfo *const token)
 {
-	const char *line;
+	bool eof = false;
-	while ((line = (const char*)readLineFromInputFile()) != NULL)
+	do
    {
-		const char *cp = line;
-		/*int escaped = 0;*/
+		if (!readToken (token))
+			break;
-		for (; *cp != '\0'; cp++)
+		if (isType (token, TOKEN_KEYWORD))
    	{
-			if (*cp == '%')
-				break;
-			if (*cp == '@')
+			switch (token->keyword)
    		{
-				cp++;
-
-				if (getBibWord("article", &cp))
-				{
-					createBibTag(BIB_LABEL, K_ARTICLE, cp);
-					continue;
-				}else if (getBibWord("inbook", &cp))
-				{
-					createBibTag(BIB_LABEL, K_ARTICLE, cp);
-					continue;
-				}else if (getBibWord("incollection", &cp))
-				{
-					createBibTag(BIB_LABEL, K_ARTICLE, cp);
-					continue;
-				}else if (getBibWord("book", &cp))
-				{
-					createBibTag(BIB_LABEL, K_BOOK, cp);
-					continue;
-				}else if (getBibWord("booklet", &cp))
-				{
-					createBibTag(BIB_LABEL, K_BOOK, cp);
-					continue;
-				}else if (getBibWord("proceedings", &cp))
-				{
-					createBibTag(BIB_LABEL, K_BOOK, cp);
-					continue;
-				}else if (getBibWord("inproceedings", &cp))
-				{
-					createBibTag(BIB_LABEL, K_PAPER, cp);
-					continue;
-				}else if (getBibWord("conference", &cp))
-				{
-					createBibTag(BIB_LABEL, K_PAPER, cp);
-					continue;
-				}else if (getBibWord("phdthesis", &cp))
-				{
-					createBibTag(BIB_LABEL, K_THESIS, cp);
-					continue;
-				}else if (getBibWord("mastersthesis", &cp))
-				{
-					createBibTag(BIB_LABEL, K_THESIS, cp);
-					continue;
-				}else if (getBibWord("manual", &cp))
-				{
-					createBibTag(BIB_LABEL, K_OTHER, cp);
-					continue;
-				}else if (getBibWord("misc", &cp))
-				{
-					createBibTag(BIB_LABEL, K_OTHER, cp);
-					continue;
-				}else if (getBibWord("techreport", &cp))
-				{
-					createBibTag(BIB_LABEL, K_OTHER, cp);
-					continue;
-				}
+				case KEYWORD_article:
+					eof = parseTag (token, BIBTAG_ARTICLE);
+					break;
+				case KEYWORD_book:
+					eof = parseTag (token, BIBTAG_BOOK);
+					break;
+				case KEYWORD_booklet:
+					eof = parseTag (token, BIBTAG_BOOKLET);
+					break;
+				case KEYWORD_conference:
+					eof = parseTag (token, BIBTAG_CONFERENCE);
+					break;
+				case KEYWORD_inbook:
+					eof = parseTag (token, BIBTAG_INBOOK);
+					break;
+				case KEYWORD_incollection:
+					eof = parseTag (token, BIBTAG_INCOLLECTION);
+					break;
+				case KEYWORD_inproceedings:
+					eof = parseTag (token, BIBTAG_INPROCEEDINGS);
+					break;
+				case KEYWORD_manual:
+					eof = parseTag (token, BIBTAG_MANUAL);
+					break;
+				case KEYWORD_mastersthesis:
+					eof = parseTag (token, BIBTAG_MASTERSTHESIS);
+					break;
+				case KEYWORD_misc:
+					eof = parseTag (token, BIBTAG_MISC);
+					break;
+				case KEYWORD_phdthesis:
+					eof = parseTag (token, BIBTAG_PHDTHESIS);
+					break;
+				case KEYWORD_proceedings:
+					eof = parseTag (token, BIBTAG_PROCEEDINGS);
+					break;
+				case KEYWORD_string:
+					eof = parseTag (token, BIBTAG_STRING);
+					break;
+				case KEYWORD_techreport:
+					eof = parseTag (token, BIBTAG_TECHREPORT);
+					break;
+				case KEYWORD_unpublished:
+					eof = parseTag (token, BIBTAG_UNPUBLISHED);
+					break;
+				default:
+					break;
    		}
    	}
-	}
+		if (eof)
+			break;
+	} while (true);
+}
+
+static void initialize (const langType language)
+{
+	Lang_bib = language;
+}
+
+static void findBibTags (void)
+{
+	tokenInfo *const token = newToken ();
+
+	parseBibFile (token);
+
+	deleteToken (token);
 }
-extern parserDefinition* BibParser (void)
+/* Create parser definition structure */
+extern parserDefinition* BibtexParser (void)
 {
+	Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
    static const char *const extensions [] = { "bib", NULL };
-	parserDefinition * def = parserNew ("Bib");
-	def->kindTable  = BibKinds;
-	def->kindCount  = ARRAY_SIZE (BibKinds);
+	parserDefinition *const def = parserNew ("BibTeX");
    def->extensions = extensions;
-	def->parser     = findBibTags;
+	/*
+	 * New definitions for parsing instead of regex
+	 */
+	def->kindTable		= BibKinds;
+	def->kindCount		= ARRAY_SIZE (BibKinds);
+	def->parser				= findBibTags;
+	def->initialize		= initialize;
+	def->keywordTable	= BibKeywordTable;
+	def->keywordCount	= ARRAY_SIZE (BibKeywordTable);
    return def;
 }
Modified: src/symbols.c
7 lines changed, 5 insertions(+), 2 deletions(-)
===================================================================
@@ -528,11 +528,14 @@ static void add_top_level_items(GeanyDocument *doc)
    	case GEANY_FILETYPES_BIBTEX:
    	{
    		tag_list_add_groups(tag_store,
-				&(tv_iters.tag_function), _("Journal Articles"), ICON_NONE,
+				&(tv_iters.tag_function), _("Articles"), ICON_NONE,
+				&(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE,
    			&(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE,
    			&(tv_iters.tag_member), _("Conference Papers"), ICON_NONE,
    			&(tv_iters.tag_variable), _("Theses"), ICON_NONE,
-				&(tv_iters.tag_struct), _("Other"), ICON_NONE,
+				&(tv_iters.tag_namespace), _("Strings"), ICON_NONE,
+				&(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE,
+				&(tv_iters.tag_other), _("Other"), ICON_NONE,
    			NULL);
    		break;
    	}
Modified: src/tagmanager/tm_parser.c
20 lines changed, 15 insertions(+), 5 deletions(-)
===================================================================
@@ -125,11 +125,21 @@ static TMParserMapEntry map_LATEX[] = {
    {'s', tm_tag_struct_t},
 };
 static TMParserMapEntry map_BIBTEX[] = {
-	{'f', tm_tag_function_t},
-	{'c', tm_tag_class_t},
-	{'m', tm_tag_member_t},
-	{'v', tm_tag_variable_t},
-	{'s', tm_tag_struct_t},
+	{'a', tm_tag_function_t},
+	{'b', tm_tag_class_t},
+	{'B', tm_tag_class_t},
+	{'c', tm_tag_member_t},
+	{'i', tm_tag_macro_t},
+	{'I', tm_tag_macro_t},
+	{'j', tm_tag_member_t},
+	{'m', tm_tag_other_t},
+	{'M', tm_tag_variable_t},
+	{'n', tm_tag_other_t},
+	{'p', tm_tag_variable_t},
+	{'P', tm_tag_class_t},
+	{'s', tm_tag_namespace_t},
+	{'t', tm_tag_other_t},
+	{'u', tm_tag_externvar_t},
 };
static TMParserMapEntry map_ASM[] = {
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).