Branch: refs/heads/master Author: Mirco Schoenfeld mirco.schoenfeld@tum.de Committer: Mirco Schoenfeld mirco.schoenfeld@tum.de Date: Wed, 18 Sep 2019 11:27:35 UTC Commit: 67456e0cc3b801121c00ac4853d65d4dfb946e34 https://github.com/geany/geany/commit/67456e0cc3b801121c00ac4853d65d4dfb946e...
Log Message: ----------- Copied BibTeX-parser from https://github.com/universal-ctags/ctags/pull/2137
Modified Paths: -------------- ctags/main/parsers.h ctags/parsers/bibtex.c src/symbols.c src/tagmanager/tm_parser.c
Modified: ctags/main/parsers.h 2 lines changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -24,7 +24,7 @@ PhpParser, \ PythonParser, \ TexParser, \ - BibParser, \ + BibtexParser, \ AsmParser, \ ConfParser, \ SqlParser, \
Modified: ctags/parsers/bibtex.c 493 lines changed, 372 insertions(+), 121 deletions(-) =================================================================== @@ -12,169 +12,420 @@ */
/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include <ctype.h> + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include <ctype.h> /* to define isalpha () */ #include <string.h>
+#include "debug.h" +#include "entry.h" +#include "keyword.h" #include "parse.h" #include "read.h" -#include "vstring.h" #include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (bool) ((token)->type == (t)) +#define isKeyword(token,k) (bool) ((token)->keyword == (k)) +#define isIdentChar(c) \ + (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+') + +/* + * DATA DECLARATIONS + */
/* -* DATA DEFINITIONS -*/ + * Used to specify type of keyword. + */ +enum eKeywordId { + KEYWORD_article, + KEYWORD_book, + KEYWORD_booklet, + KEYWORD_conference, + KEYWORD_inbook, + KEYWORD_incollection, + KEYWORD_inproceedings, + KEYWORD_manual, + KEYWORD_mastersthesis, + KEYWORD_misc, + KEYWORD_phdthesis, + KEYWORD_proceedings, + KEYWORD_string, + KEYWORD_techreport, + KEYWORD_unpublished +}; +typedef int keywordId; /* to allow KEYWORD_NONE */ + +enum eTokenType { + /* 0..255 are the byte's value. Some are named for convenience */ + TOKEN_OPEN_CURLY = '{', + /* above is special types */ + TOKEN_UNDEFINED = 256, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER +}; +typedef int tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + unsigned long lineNumber; + MIOPos filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_bib; + typedef enum { - K_ARTICLE, - K_BOOK, - K_PAPER, - K_THESIS, - K_OTHER -} BibKind; - -static kindDefinition BibKinds[] = { - { true, 'f', "function", "@article @inbook @incollection" }, - { true, 'c', "class", "@book @booklet @proceedings" }, - { true, 'm', "member", "@inproceedings @conference" }, - { true, 'v', "variable", "@phdthesis @mastersthesis" }, - { true, 's', "struct", "@manual @misc @techreport" } + BIBTAG_ARTICLE, + BIBTAG_BOOK, + BIBTAG_BOOKLET, + BIBTAG_CONFERENCE, + BIBTAG_INBOOK, + BIBTAG_INCOLLECTION, + BIBTAG_INPROCEEDINGS, + BIBTAG_MANUAL, + BIBTAG_MASTERSTHESIS, + BIBTAG_MISC, + BIBTAG_PHDTHESIS, + BIBTAG_PROCEEDINGS, + BIBTAG_STRING, + BIBTAG_TECHREPORT, + BIBTAG_UNPUBLISHED, + BIBTAG_COUNT +} bibKind; + +static kindDefinition BibKinds [] = { + { true, 'a', "article", "article" }, + { true, 'b', "book", "book" }, + { true, 'B', "booklet", "booklet" }, + { true, 'c', "conference", "conference" }, + { true, 'i', "inbook", "inbook" }, + { true, 'I', "incollection", "incollection" }, + { true, 'j', "inproceedings", "inproceedings" }, + { true, 'm', "manual", "manual" }, + { true, 'M', "mastersthesis", "mastersthesis" }, + { true, 'n', "misc", "misc" }, + { true, 'p', "phdthesis", "phdthesis" }, + { true, 'P', "proceedings", "proceedings" }, + { true, 's', "string", "string" }, + { true, 't', "techreport", "techreport" }, + { true, 'u', "unpublished", "unpublished" } };
+static const keywordTable BibKeywordTable [] = { + /* keyword keyword ID */ + { "article", KEYWORD_article }, + { "book", KEYWORD_book }, + { "booklet", KEYWORD_booklet }, + { "conference", KEYWORD_conference }, + { "inbook", KEYWORD_inbook }, + { "incollection", KEYWORD_incollection }, + { "inproceedings",KEYWORD_inproceedings }, + { "manual", KEYWORD_manual }, + { "mastersthesis",KEYWORD_mastersthesis }, + { "misc", KEYWORD_misc }, + { "phdthesis", KEYWORD_phdthesis }, + { "proceedings", KEYWORD_proceedings }, + { "string", KEYWORD_string }, + { "techreport", KEYWORD_techreport }, + { "unpublished", KEYWORD_unpublished } +}; + /* -* FUNCTION DEFINITIONS -*/ -#define BIB_LABEL (1<<2) + * FUNCTION DEFINITIONS + */
-static int getBibWord(const char * ref, const char **ptr) +static tokenInfo *newToken (void) { - const char *p = *ptr; + tokenInfo *const token = xMalloc (1, tokenInfo);
- while ((*ref != '\0') && (*p != '\0') && (tolower(*ref) == tolower(*p))) - ref++, p++; + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition ();
+ return token; +}
- if (*ref) - return false; +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + eFree (token); +}
- *ptr = p; - return true; +/* + * Tag generation functions + */ +static void makeBibTag (tokenInfo *const token, bibKind kind) +{ + if (BibKinds [kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name, kind); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + + makeTagEntry (&e); + } }
-static void createBibTag(int flags, BibKind kind, const char * l) +/* + * Parsing functions + */ + +/* + * Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) { - vString *name = vStringNew (); + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = getcFromInputFile (); + } while (c != EOF && isIdentChar (c)); + if (c != EOF) + ungetcToInputFile (c); /* unget non-identifier character */ +} + +static bool readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar:
- while ((*l == ' ')) - l++; - if (flags & (BIB_LABEL)) + do { - if (*l != '{') - goto no_tag; - l++; + c = getcFromInputFile (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + token->type = (unsigned char) c; + switch (c) + { + case EOF: return false; + + case '@': + /* + * All Bib entries start with an at symbol. + * Check if the next character is an alpha character + * else it is not a potential tex tag. + */ + c = getcFromInputFile (); + if (! isalpha (c)) + ungetcToInputFile (c); + else + { + vStringPut (token->string, '@'); + parseIdentifier (token->string, c); + token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + case '%': + skipToCharacterInInputFile ('\n'); /* % are single line comments */ + goto getNextChar; + break; + default: + if (isIdentChar (c)) + { + parseIdentifier (token->string, c); + token->type = TOKEN_IDENTIFIER; + } + break; + } + return true; +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy (dest->string, src->string); +}
- do +/* + * Scanning functions + */ + +static bool parseTag (tokenInfo *const token, bibKind kind) +{ + tokenInfo * const name = newToken (); + vString * currentid; + bool eof = false; + + currentid = vStringNew (); + /* + * Bib entries are of these formats: + * @article{identifier, + * author="John Doe"} + * + * When a keyword is found, loop through all words up to + * a comma brace for the tag name. + * + */ + if (isType (token, TOKEN_KEYWORD)) + { + copyToken (name, token); + if (!readToken (token)) { - vStringPut(name, (int) *l); - ++l; - } while ((*l != '\0') && (*l != ',') && (*l != '}')); - if (name->buffer[0] != ',') - makeSimpleTag(name, kind); + eof = true; + goto out; + } } - else + + if (isType (token, TOKEN_OPEN_CURLY)) { - vStringPut(name, (int) *l); - makeSimpleTag(name, kind); + if (!readToken (token)) + { + eof = true; + goto out; + } + if (isType (token, TOKEN_IDENTIFIER)){ + vStringCat (currentid, token->string); + vStringStripTrailing (currentid); + if (vStringLength (currentid) > 0) + { + vStringCopy (name->string, currentid); + makeBibTag (name, kind); + } + } + else + { // should find an identifier for bib item at first place + eof = true; + goto out; + } } +
-no_tag: - vStringDelete(name); + out: + deleteToken (name); + vStringDelete (currentid); + return eof; }
-static void findBibTags(void) +static void parseBibFile (tokenInfo *const token) { - const char *line; + bool eof = false;
- while ((line = (const char*)readLineFromInputFile()) != NULL) + do { - const char *cp = line; - /*int escaped = 0;*/ + if (!readToken (token)) + break;
- for (; *cp != '\0'; cp++) + if (isType (token, TOKEN_KEYWORD)) { - if (*cp == '%') - break; - if (*cp == '@') + switch (token->keyword) { - cp++; - - if (getBibWord("article", &cp)) - { - createBibTag(BIB_LABEL, K_ARTICLE, cp); - continue; - }else if (getBibWord("inbook", &cp)) - { - createBibTag(BIB_LABEL, K_ARTICLE, cp); - continue; - }else if (getBibWord("incollection", &cp)) - { - createBibTag(BIB_LABEL, K_ARTICLE, cp); - continue; - }else if (getBibWord("book", &cp)) - { - createBibTag(BIB_LABEL, K_BOOK, cp); - continue; - }else if (getBibWord("booklet", &cp)) - { - createBibTag(BIB_LABEL, K_BOOK, cp); - continue; - }else if (getBibWord("proceedings", &cp)) - { - createBibTag(BIB_LABEL, K_BOOK, cp); - continue; - }else if (getBibWord("inproceedings", &cp)) - { - createBibTag(BIB_LABEL, K_PAPER, cp); - continue; - }else if (getBibWord("conference", &cp)) - { - createBibTag(BIB_LABEL, K_PAPER, cp); - continue; - }else if (getBibWord("phdthesis", &cp)) - { - createBibTag(BIB_LABEL, K_THESIS, cp); - continue; - }else if (getBibWord("mastersthesis", &cp)) - { - createBibTag(BIB_LABEL, K_THESIS, cp); - continue; - }else if (getBibWord("manual", &cp)) - { - createBibTag(BIB_LABEL, K_OTHER, cp); - continue; - }else if (getBibWord("misc", &cp)) - { - createBibTag(BIB_LABEL, K_OTHER, cp); - continue; - }else if (getBibWord("techreport", &cp)) - { - createBibTag(BIB_LABEL, K_OTHER, cp); - continue; - } + case KEYWORD_article: + eof = parseTag (token, BIBTAG_ARTICLE); + break; + case KEYWORD_book: + eof = parseTag (token, BIBTAG_BOOK); + break; + case KEYWORD_booklet: + eof = parseTag (token, BIBTAG_BOOKLET); + break; + case KEYWORD_conference: + eof = parseTag (token, BIBTAG_CONFERENCE); + break; + case KEYWORD_inbook: + eof = parseTag (token, BIBTAG_INBOOK); + break; + case KEYWORD_incollection: + eof = parseTag (token, BIBTAG_INCOLLECTION); + break; + case KEYWORD_inproceedings: + eof = parseTag (token, BIBTAG_INPROCEEDINGS); + break; + case KEYWORD_manual: + eof = parseTag (token, BIBTAG_MANUAL); + break; + case KEYWORD_mastersthesis: + eof = parseTag (token, BIBTAG_MASTERSTHESIS); + break; + case KEYWORD_misc: + eof = parseTag (token, BIBTAG_MISC); + break; + case KEYWORD_phdthesis: + eof = parseTag (token, BIBTAG_PHDTHESIS); + break; + case KEYWORD_proceedings: + eof = parseTag (token, BIBTAG_PROCEEDINGS); + break; + case KEYWORD_string: + eof = parseTag (token, BIBTAG_STRING); + break; + case KEYWORD_techreport: + eof = parseTag (token, BIBTAG_TECHREPORT); + break; + case KEYWORD_unpublished: + eof = parseTag (token, BIBTAG_UNPUBLISHED); + break; + default: + break; } } - } + if (eof) + break; + } while (true); +} + +static void initialize (const langType language) +{ + Lang_bib = language; +} + +static void findBibTags (void) +{ + tokenInfo *const token = newToken (); + + parseBibFile (token); + + deleteToken (token); }
-extern parserDefinition* BibParser (void) +/* Create parser definition structure */ +extern parserDefinition* BibtexParser (void) { + Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT); static const char *const extensions [] = { "bib", NULL }; - parserDefinition * def = parserNew ("Bib"); - def->kindTable = BibKinds; - def->kindCount = ARRAY_SIZE (BibKinds); + parserDefinition *const def = parserNew ("BibTeX"); def->extensions = extensions; - def->parser = findBibTags; + /* + * New definitions for parsing instead of regex + */ + def->kindTable = BibKinds; + def->kindCount = ARRAY_SIZE (BibKinds); + def->parser = findBibTags; + def->initialize = initialize; + def->keywordTable = BibKeywordTable; + def->keywordCount = ARRAY_SIZE (BibKeywordTable); return def; }
Modified: src/symbols.c 7 lines changed, 5 insertions(+), 2 deletions(-) =================================================================== @@ -528,11 +528,14 @@ static void add_top_level_items(GeanyDocument *doc) case GEANY_FILETYPES_BIBTEX: { tag_list_add_groups(tag_store, - &(tv_iters.tag_function), _("Journal Articles"), ICON_NONE, + &(tv_iters.tag_function), _("Articles"), ICON_NONE, + &(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE, &(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE, &(tv_iters.tag_member), _("Conference Papers"), ICON_NONE, &(tv_iters.tag_variable), _("Theses"), ICON_NONE, - &(tv_iters.tag_struct), _("Other"), ICON_NONE, + &(tv_iters.tag_namespace), _("Strings"), ICON_NONE, + &(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE, + &(tv_iters.tag_other), _("Other"), ICON_NONE, NULL); break; }
Modified: src/tagmanager/tm_parser.c 20 lines changed, 15 insertions(+), 5 deletions(-) =================================================================== @@ -125,11 +125,21 @@ static TMParserMapEntry map_LATEX[] = { {'s', tm_tag_struct_t}, }; static TMParserMapEntry map_BIBTEX[] = { - {'f', tm_tag_function_t}, - {'c', tm_tag_class_t}, - {'m', tm_tag_member_t}, - {'v', tm_tag_variable_t}, - {'s', tm_tag_struct_t}, + {'a', tm_tag_function_t}, + {'b', tm_tag_class_t}, + {'B', tm_tag_class_t}, + {'c', tm_tag_member_t}, + {'i', tm_tag_macro_t}, + {'I', tm_tag_macro_t}, + {'j', tm_tag_member_t}, + {'m', tm_tag_other_t}, + {'M', tm_tag_variable_t}, + {'n', tm_tag_other_t}, + {'p', tm_tag_variable_t}, + {'P', tm_tag_class_t}, + {'s', tm_tag_namespace_t}, + {'t', tm_tag_other_t}, + {'u', tm_tag_externvar_t}, };
static TMParserMapEntry map_ASM[] = {
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).