[geany/geany] 67456e: Copied BibTeX-parser from https://github.com/universal-ctags/ctags/pull/2137
Mirco Schoenfeld
git-noreply at xxxxx
Wed Oct 30 00:12:12 UTC 2019
Branch: refs/heads/master
Author: Mirco Schoenfeld <mirco.schoenfeld at tum.de>
Committer: Mirco Schoenfeld <mirco.schoenfeld at tum.de>
Date: Wed, 18 Sep 2019 11:27:35 UTC
Commit: 67456e0cc3b801121c00ac4853d65d4dfb946e34
https://github.com/geany/geany/commit/67456e0cc3b801121c00ac4853d65d4dfb946e34
Log Message:
-----------
Copied BibTeX-parser from https://github.com/universal-ctags/ctags/pull/2137
Modified Paths:
--------------
ctags/main/parsers.h
ctags/parsers/bibtex.c
src/symbols.c
src/tagmanager/tm_parser.c
Modified: ctags/main/parsers.h
2 lines changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -24,7 +24,7 @@
PhpParser, \
PythonParser, \
TexParser, \
- BibParser, \
+ BibtexParser, \
AsmParser, \
ConfParser, \
SqlParser, \
Modified: ctags/parsers/bibtex.c
493 lines changed, 372 insertions(+), 121 deletions(-)
===================================================================
@@ -12,169 +12,420 @@
*/
/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
-
-#include <ctype.h>
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include <ctype.h> /* to define isalpha () */
#include <string.h>
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
#include "parse.h"
#include "read.h"
-#include "vstring.h"
#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (bool) ((token)->type == (t))
+#define isKeyword(token,k) (bool) ((token)->keyword == (k))
+#define isIdentChar(c) \
+ (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+')
+
+/*
+ * DATA DECLARATIONS
+ */
/*
-* DATA DEFINITIONS
-*/
+ * Used to specify type of keyword.
+ */
+enum eKeywordId {
+ KEYWORD_article,
+ KEYWORD_book,
+ KEYWORD_booklet,
+ KEYWORD_conference,
+ KEYWORD_inbook,
+ KEYWORD_incollection,
+ KEYWORD_inproceedings,
+ KEYWORD_manual,
+ KEYWORD_mastersthesis,
+ KEYWORD_misc,
+ KEYWORD_phdthesis,
+ KEYWORD_proceedings,
+ KEYWORD_string,
+ KEYWORD_techreport,
+ KEYWORD_unpublished
+};
+typedef int keywordId; /* to allow KEYWORD_NONE */
+
+enum eTokenType {
+ /* 0..255 are the byte's value. Some are named for convenience */
+ TOKEN_OPEN_CURLY = '{',
+ /* above is special types */
+ TOKEN_UNDEFINED = 256,
+ TOKEN_KEYWORD,
+ TOKEN_IDENTIFIER
+};
+typedef int tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ unsigned long lineNumber;
+ MIOPos filePosition;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_bib;
+
typedef enum {
- K_ARTICLE,
- K_BOOK,
- K_PAPER,
- K_THESIS,
- K_OTHER
-} BibKind;
-
-static kindDefinition BibKinds[] = {
- { true, 'f', "function", "@article @inbook @incollection" },
- { true, 'c', "class", "@book @booklet @proceedings" },
- { true, 'm', "member", "@inproceedings @conference" },
- { true, 'v', "variable", "@phdthesis @mastersthesis" },
- { true, 's', "struct", "@manual @misc @techreport" }
+ BIBTAG_ARTICLE,
+ BIBTAG_BOOK,
+ BIBTAG_BOOKLET,
+ BIBTAG_CONFERENCE,
+ BIBTAG_INBOOK,
+ BIBTAG_INCOLLECTION,
+ BIBTAG_INPROCEEDINGS,
+ BIBTAG_MANUAL,
+ BIBTAG_MASTERSTHESIS,
+ BIBTAG_MISC,
+ BIBTAG_PHDTHESIS,
+ BIBTAG_PROCEEDINGS,
+ BIBTAG_STRING,
+ BIBTAG_TECHREPORT,
+ BIBTAG_UNPUBLISHED,
+ BIBTAG_COUNT
+} bibKind;
+
+static kindDefinition BibKinds [] = {
+ { true, 'a', "article", "article" },
+ { true, 'b', "book", "book" },
+ { true, 'B', "booklet", "booklet" },
+ { true, 'c', "conference", "conference" },
+ { true, 'i', "inbook", "inbook" },
+ { true, 'I', "incollection", "incollection" },
+ { true, 'j', "inproceedings", "inproceedings" },
+ { true, 'm', "manual", "manual" },
+ { true, 'M', "mastersthesis", "mastersthesis" },
+ { true, 'n', "misc", "misc" },
+ { true, 'p', "phdthesis", "phdthesis" },
+ { true, 'P', "proceedings", "proceedings" },
+ { true, 's', "string", "string" },
+ { true, 't', "techreport", "techreport" },
+ { true, 'u', "unpublished", "unpublished" }
};
+static const keywordTable BibKeywordTable [] = {
+ /* keyword keyword ID */
+ { "article", KEYWORD_article },
+ { "book", KEYWORD_book },
+ { "booklet", KEYWORD_booklet },
+ { "conference", KEYWORD_conference },
+ { "inbook", KEYWORD_inbook },
+ { "incollection", KEYWORD_incollection },
+ { "inproceedings",KEYWORD_inproceedings },
+ { "manual", KEYWORD_manual },
+ { "mastersthesis",KEYWORD_mastersthesis },
+ { "misc", KEYWORD_misc },
+ { "phdthesis", KEYWORD_phdthesis },
+ { "proceedings", KEYWORD_proceedings },
+ { "string", KEYWORD_string },
+ { "techreport", KEYWORD_techreport },
+ { "unpublished", KEYWORD_unpublished }
+};
+
/*
-* FUNCTION DEFINITIONS
-*/
-#define BIB_LABEL (1<<2)
+ * FUNCTION DEFINITIONS
+ */
-static int getBibWord(const char * ref, const char **ptr)
+static tokenInfo *newToken (void)
{
- const char *p = *ptr;
+ tokenInfo *const token = xMalloc (1, tokenInfo);
- while ((*ref != '\0') && (*p != '\0') && (tolower(*ref) == tolower(*p)))
- ref++, p++;
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ return token;
+}
- if (*ref)
- return false;
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ eFree (token);
+}
- *ptr = p;
- return true;
+/*
+ * Tag generation functions
+ */
+static void makeBibTag (tokenInfo *const token, bibKind kind)
+{
+ if (BibKinds [kind].enabled)
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name, kind);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+
+ makeTagEntry (&e);
+ }
}
-static void createBibTag(int flags, BibKind kind, const char * l)
+/*
+ * Parsing functions
+ */
+
+/*
+ * Read a C identifier beginning with "firstChar" and places it into
+ * "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
{
- vString *name = vStringNew ();
+ int c = firstChar;
+ Assert (isIdentChar (c));
+ do
+ {
+ vStringPut (string, c);
+ c = getcFromInputFile ();
+ } while (c != EOF && isIdentChar (c));
+ if (c != EOF)
+ ungetcToInputFile (c); /* unget non-identifier character */
+}
+
+static bool readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
- while ((*l == ' '))
- l++;
- if (flags & (BIB_LABEL))
+ do
{
- if (*l != '{')
- goto no_tag;
- l++;
+ c = getcFromInputFile ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ token->type = (unsigned char) c;
+ switch (c)
+ {
+ case EOF: return false;
+
+ case '@':
+ /*
+ * All Bib entries start with an at symbol.
+ * Check if the next character is an alpha character
+ * else it is not a potential tex tag.
+ */
+ c = getcFromInputFile ();
+ if (! isalpha (c))
+ ungetcToInputFile (c);
+ else
+ {
+ vStringPut (token->string, '@');
+ parseIdentifier (token->string, c);
+ token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ case '%':
+ skipToCharacterInInputFile ('\n'); /* % are single line comments */
+ goto getNextChar;
+ break;
+ default:
+ if (isIdentChar (c))
+ {
+ parseIdentifier (token->string, c);
+ token->type = TOKEN_IDENTIFIER;
+ }
+ break;
+ }
+ return true;
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+ dest->lineNumber = src->lineNumber;
+ dest->filePosition = src->filePosition;
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ vStringCopy (dest->string, src->string);
+}
- do
+/*
+ * Scanning functions
+ */
+
+static bool parseTag (tokenInfo *const token, bibKind kind)
+{
+ tokenInfo * const name = newToken ();
+ vString * currentid;
+ bool eof = false;
+
+ currentid = vStringNew ();
+ /*
+ * Bib entries are of these formats:
+ * @article{identifier,
+ * author="John Doe"}
+ *
+ * When a keyword is found, loop through all words up to
+ * a comma brace for the tag name.
+ *
+ */
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ copyToken (name, token);
+ if (!readToken (token))
{
- vStringPut(name, (int) *l);
- ++l;
- } while ((*l != '\0') && (*l != ',') && (*l != '}'));
- if (name->buffer[0] != ',')
- makeSimpleTag(name, kind);
+ eof = true;
+ goto out;
+ }
}
- else
+
+ if (isType (token, TOKEN_OPEN_CURLY))
{
- vStringPut(name, (int) *l);
- makeSimpleTag(name, kind);
+ if (!readToken (token))
+ {
+ eof = true;
+ goto out;
+ }
+ if (isType (token, TOKEN_IDENTIFIER)){
+ vStringCat (currentid, token->string);
+ vStringStripTrailing (currentid);
+ if (vStringLength (currentid) > 0)
+ {
+ vStringCopy (name->string, currentid);
+ makeBibTag (name, kind);
+ }
+ }
+ else
+ { // should find an identifier for bib item at first place
+ eof = true;
+ goto out;
+ }
}
+
-no_tag:
- vStringDelete(name);
+ out:
+ deleteToken (name);
+ vStringDelete (currentid);
+ return eof;
}
-static void findBibTags(void)
+static void parseBibFile (tokenInfo *const token)
{
- const char *line;
+ bool eof = false;
- while ((line = (const char*)readLineFromInputFile()) != NULL)
+ do
{
- const char *cp = line;
- /*int escaped = 0;*/
+ if (!readToken (token))
+ break;
- for (; *cp != '\0'; cp++)
+ if (isType (token, TOKEN_KEYWORD))
{
- if (*cp == '%')
- break;
- if (*cp == '@')
+ switch (token->keyword)
{
- cp++;
-
- if (getBibWord("article", &cp))
- {
- createBibTag(BIB_LABEL, K_ARTICLE, cp);
- continue;
- }else if (getBibWord("inbook", &cp))
- {
- createBibTag(BIB_LABEL, K_ARTICLE, cp);
- continue;
- }else if (getBibWord("incollection", &cp))
- {
- createBibTag(BIB_LABEL, K_ARTICLE, cp);
- continue;
- }else if (getBibWord("book", &cp))
- {
- createBibTag(BIB_LABEL, K_BOOK, cp);
- continue;
- }else if (getBibWord("booklet", &cp))
- {
- createBibTag(BIB_LABEL, K_BOOK, cp);
- continue;
- }else if (getBibWord("proceedings", &cp))
- {
- createBibTag(BIB_LABEL, K_BOOK, cp);
- continue;
- }else if (getBibWord("inproceedings", &cp))
- {
- createBibTag(BIB_LABEL, K_PAPER, cp);
- continue;
- }else if (getBibWord("conference", &cp))
- {
- createBibTag(BIB_LABEL, K_PAPER, cp);
- continue;
- }else if (getBibWord("phdthesis", &cp))
- {
- createBibTag(BIB_LABEL, K_THESIS, cp);
- continue;
- }else if (getBibWord("mastersthesis", &cp))
- {
- createBibTag(BIB_LABEL, K_THESIS, cp);
- continue;
- }else if (getBibWord("manual", &cp))
- {
- createBibTag(BIB_LABEL, K_OTHER, cp);
- continue;
- }else if (getBibWord("misc", &cp))
- {
- createBibTag(BIB_LABEL, K_OTHER, cp);
- continue;
- }else if (getBibWord("techreport", &cp))
- {
- createBibTag(BIB_LABEL, K_OTHER, cp);
- continue;
- }
+ case KEYWORD_article:
+ eof = parseTag (token, BIBTAG_ARTICLE);
+ break;
+ case KEYWORD_book:
+ eof = parseTag (token, BIBTAG_BOOK);
+ break;
+ case KEYWORD_booklet:
+ eof = parseTag (token, BIBTAG_BOOKLET);
+ break;
+ case KEYWORD_conference:
+ eof = parseTag (token, BIBTAG_CONFERENCE);
+ break;
+ case KEYWORD_inbook:
+ eof = parseTag (token, BIBTAG_INBOOK);
+ break;
+ case KEYWORD_incollection:
+ eof = parseTag (token, BIBTAG_INCOLLECTION);
+ break;
+ case KEYWORD_inproceedings:
+ eof = parseTag (token, BIBTAG_INPROCEEDINGS);
+ break;
+ case KEYWORD_manual:
+ eof = parseTag (token, BIBTAG_MANUAL);
+ break;
+ case KEYWORD_mastersthesis:
+ eof = parseTag (token, BIBTAG_MASTERSTHESIS);
+ break;
+ case KEYWORD_misc:
+ eof = parseTag (token, BIBTAG_MISC);
+ break;
+ case KEYWORD_phdthesis:
+ eof = parseTag (token, BIBTAG_PHDTHESIS);
+ break;
+ case KEYWORD_proceedings:
+ eof = parseTag (token, BIBTAG_PROCEEDINGS);
+ break;
+ case KEYWORD_string:
+ eof = parseTag (token, BIBTAG_STRING);
+ break;
+ case KEYWORD_techreport:
+ eof = parseTag (token, BIBTAG_TECHREPORT);
+ break;
+ case KEYWORD_unpublished:
+ eof = parseTag (token, BIBTAG_UNPUBLISHED);
+ break;
+ default:
+ break;
}
}
- }
+ if (eof)
+ break;
+ } while (true);
+}
+
+static void initialize (const langType language)
+{
+ Lang_bib = language;
+}
+
+static void findBibTags (void)
+{
+ tokenInfo *const token = newToken ();
+
+ parseBibFile (token);
+
+ deleteToken (token);
}
-extern parserDefinition* BibParser (void)
+/* Create parser definition structure */
+extern parserDefinition* BibtexParser (void)
{
+ Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
static const char *const extensions [] = { "bib", NULL };
- parserDefinition * def = parserNew ("Bib");
- def->kindTable = BibKinds;
- def->kindCount = ARRAY_SIZE (BibKinds);
+ parserDefinition *const def = parserNew ("BibTeX");
def->extensions = extensions;
- def->parser = findBibTags;
+ /*
+ * New definitions for parsing instead of regex
+ */
+ def->kindTable = BibKinds;
+ def->kindCount = ARRAY_SIZE (BibKinds);
+ def->parser = findBibTags;
+ def->initialize = initialize;
+ def->keywordTable = BibKeywordTable;
+ def->keywordCount = ARRAY_SIZE (BibKeywordTable);
return def;
}
Modified: src/symbols.c
7 lines changed, 5 insertions(+), 2 deletions(-)
===================================================================
@@ -528,11 +528,14 @@ static void add_top_level_items(GeanyDocument *doc)
case GEANY_FILETYPES_BIBTEX:
{
tag_list_add_groups(tag_store,
- &(tv_iters.tag_function), _("Journal Articles"), ICON_NONE,
+ &(tv_iters.tag_function), _("Articles"), ICON_NONE,
+ &(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE,
&(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE,
&(tv_iters.tag_member), _("Conference Papers"), ICON_NONE,
&(tv_iters.tag_variable), _("Theses"), ICON_NONE,
- &(tv_iters.tag_struct), _("Other"), ICON_NONE,
+ &(tv_iters.tag_namespace), _("Strings"), ICON_NONE,
+ &(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE,
+ &(tv_iters.tag_other), _("Other"), ICON_NONE,
NULL);
break;
}
Modified: src/tagmanager/tm_parser.c
20 lines changed, 15 insertions(+), 5 deletions(-)
===================================================================
@@ -125,11 +125,21 @@ static TMParserMapEntry map_LATEX[] = {
{'s', tm_tag_struct_t},
};
static TMParserMapEntry map_BIBTEX[] = {
- {'f', tm_tag_function_t},
- {'c', tm_tag_class_t},
- {'m', tm_tag_member_t},
- {'v', tm_tag_variable_t},
- {'s', tm_tag_struct_t},
+ {'a', tm_tag_function_t},
+ {'b', tm_tag_class_t},
+ {'B', tm_tag_class_t},
+ {'c', tm_tag_member_t},
+ {'i', tm_tag_macro_t},
+ {'I', tm_tag_macro_t},
+ {'j', tm_tag_member_t},
+ {'m', tm_tag_other_t},
+ {'M', tm_tag_variable_t},
+ {'n', tm_tag_other_t},
+ {'p', tm_tag_variable_t},
+ {'P', tm_tag_class_t},
+ {'s', tm_tag_namespace_t},
+ {'t', tm_tag_other_t},
+ {'u', tm_tag_externvar_t},
};
static TMParserMapEntry map_ASM[] = {
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list