Branch: refs/heads/master Author: Matthew Brush matt@geany.org Committer: GitHub noreply@github.com Date: Wed, 30 Oct 2019 00:12:02 UTC Commit: 8abe5342c528be02bb62b9927b8c25e8aa4e2a35 https://github.com/geany/geany/commit/8abe5342c528be02bb62b9927b8c25e8aa4e2a...
Log Message: ----------- Merge pull request #2212 from TwlyY29/bibtex-parser
Added a bibtex parser that extracts identifiers of entries in bib-fil…
Modified Paths: -------------- ctags/Makefile.am ctags/main/parsers.h ctags/parsers/bibtex.c data/Makefile.am data/filedefs/filetypes.bibtex data/filetype_extensions.conf src/filetypes.c src/filetypes.h src/symbols.c src/tagmanager/tm_parser.c src/tagmanager/tm_parser.h
Modified: ctags/Makefile.am 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -14,6 +14,7 @@ parsers = \ parsers/asciidoc.c \ parsers/asm.c \ parsers/basic.c \ + parsers/bibtex.c \ parsers/c.c \ parsers/cobol.c \ parsers/iniconf.c \
Modified: ctags/main/parsers.h 3 lines changed, 2 insertions(+), 1 deletions(-) =================================================================== @@ -65,6 +65,7 @@ GoParser, \ JsonParser, \ ZephirParser, \ - PowerShellParser + PowerShellParser, \ + BibtexParser
#endif /* CTAGS_MAIN_PARSERS_H */
Modified: ctags/parsers/bibtex.c 431 lines changed, 431 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2000-2001, Jérôme Plût + * Copyright (c) 2006, Enrico Tröger + * Copyright (c) 2019, Mirco Schönfeld + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for source files + * for the BibTex formatting system. + * https://en.wikipedia.org/wiki/BibTeX + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include <ctype.h> /* to define isalpha () */ +#include <string.h> + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (bool) ((token)->type == (t)) +#define isKeyword(token,k) (bool) ((token)->keyword == (k)) +#define isIdentChar(c) \ + (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+') + +/* + * DATA DECLARATIONS + */ + +/* + * Used to specify type of keyword. + */ +enum eKeywordId { + KEYWORD_article, + KEYWORD_book, + KEYWORD_booklet, + KEYWORD_conference, + KEYWORD_inbook, + KEYWORD_incollection, + KEYWORD_inproceedings, + KEYWORD_manual, + KEYWORD_mastersthesis, + KEYWORD_misc, + KEYWORD_phdthesis, + KEYWORD_proceedings, + KEYWORD_string, + KEYWORD_techreport, + KEYWORD_unpublished +}; +typedef int keywordId; /* to allow KEYWORD_NONE */ + +enum eTokenType { + /* 0..255 are the byte's value. Some are named for convenience */ + TOKEN_OPEN_CURLY = '{', + /* above is special types */ + TOKEN_UNDEFINED = 256, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER +}; +typedef int tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + unsigned long lineNumber; + MIOPos filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_bib; + +typedef enum { + BIBTAG_ARTICLE, + BIBTAG_BOOK, + BIBTAG_BOOKLET, + BIBTAG_CONFERENCE, + BIBTAG_INBOOK, + BIBTAG_INCOLLECTION, + BIBTAG_INPROCEEDINGS, + BIBTAG_MANUAL, + BIBTAG_MASTERSTHESIS, + BIBTAG_MISC, + BIBTAG_PHDTHESIS, + BIBTAG_PROCEEDINGS, + BIBTAG_STRING, + BIBTAG_TECHREPORT, + BIBTAG_UNPUBLISHED, + BIBTAG_COUNT +} bibKind; + +static kindDefinition BibKinds [] = { + { true, 'a', "article", "article" }, + { true, 'b', "book", "book" }, + { true, 'B', "booklet", "booklet" }, + { true, 'c', "conference", "conference" }, + { true, 'i', "inbook", "inbook" }, + { true, 'I', "incollection", "incollection" }, + { true, 'j', "inproceedings", "inproceedings" }, + { true, 'm', "manual", "manual" }, + { true, 'M', "mastersthesis", "mastersthesis" }, + { true, 'n', "misc", "misc" }, + { true, 'p', "phdthesis", "phdthesis" }, + { true, 'P', "proceedings", "proceedings" }, + { true, 's', "string", "string" }, + { true, 't', "techreport", "techreport" }, + { true, 'u', "unpublished", "unpublished" } +}; + +static const keywordTable BibKeywordTable [] = { + /* keyword keyword ID */ + { "article", KEYWORD_article }, + { "book", KEYWORD_book }, + { "booklet", KEYWORD_booklet }, + { "conference", KEYWORD_conference }, + { "inbook", KEYWORD_inbook }, + { "incollection", KEYWORD_incollection }, + { "inproceedings",KEYWORD_inproceedings }, + { "manual", KEYWORD_manual }, + { "mastersthesis",KEYWORD_mastersthesis }, + { "misc", KEYWORD_misc }, + { "phdthesis", KEYWORD_phdthesis }, + { "proceedings", KEYWORD_proceedings }, + { "string", KEYWORD_string }, + { "techreport", KEYWORD_techreport }, + { "unpublished", KEYWORD_unpublished } +}; + +/* + * FUNCTION DEFINITIONS + */ + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + eFree (token); +} + +/* + * Tag generation functions + */ +static void makeBibTag (tokenInfo *const token, bibKind kind) +{ + if (BibKinds [kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name, kind); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + + makeTagEntry (&e); + } +} + +/* + * Parsing functions + */ + +/* + * Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = getcFromInputFile (); + } while (c != EOF && isIdentChar (c)); + if (c != EOF) + ungetcToInputFile (c); /* unget non-identifier character */ +} + +static bool readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + + do + { + c = getcFromInputFile (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + token->type = (unsigned char) c; + switch (c) + { + case EOF: return false; + + case '@': + /* + * All Bib entries start with an at symbol. + * Check if the next character is an alpha character + * else it is not a potential tex tag. + */ + c = getcFromInputFile (); + if (! isalpha (c)) + ungetcToInputFile (c); + else + { + vStringPut (token->string, '@'); + parseIdentifier (token->string, c); + token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + case '%': + skipToCharacterInInputFile ('\n'); /* % are single line comments */ + goto getNextChar; + break; + default: + if (isIdentChar (c)) + { + parseIdentifier (token->string, c); + token->type = TOKEN_IDENTIFIER; + } + break; + } + return true; +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy (dest->string, src->string); +} + +/* + * Scanning functions + */ + +static bool parseTag (tokenInfo *const token, bibKind kind) +{ + tokenInfo * const name = newToken (); + vString * currentid; + bool eof = false; + + currentid = vStringNew (); + /* + * Bib entries are of these formats: + * @article{identifier, + * author="John Doe"} + * + * When a keyword is found, loop through all words up to + * a comma brace for the tag name. + * + */ + if (isType (token, TOKEN_KEYWORD)) + { + copyToken (name, token); + if (!readToken (token)) + { + eof = true; + goto out; + } + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + if (!readToken (token)) + { + eof = true; + goto out; + } + if (isType (token, TOKEN_IDENTIFIER)){ + vStringCat (currentid, token->string); + vStringStripTrailing (currentid); + if (vStringLength (currentid) > 0) + { + vStringCopy (name->string, currentid); + makeBibTag (name, kind); + } + } + else + { // should find an identifier for bib item at first place + eof = true; + goto out; + } + } + + + out: + deleteToken (name); + vStringDelete (currentid); + return eof; +} + +static void parseBibFile (tokenInfo *const token) +{ + bool eof = false; + + do + { + if (!readToken (token)) + break; + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_article: + eof = parseTag (token, BIBTAG_ARTICLE); + break; + case KEYWORD_book: + eof = parseTag (token, BIBTAG_BOOK); + break; + case KEYWORD_booklet: + eof = parseTag (token, BIBTAG_BOOKLET); + break; + case KEYWORD_conference: + eof = parseTag (token, BIBTAG_CONFERENCE); + break; + case KEYWORD_inbook: + eof = parseTag (token, BIBTAG_INBOOK); + break; + case KEYWORD_incollection: + eof = parseTag (token, BIBTAG_INCOLLECTION); + break; + case KEYWORD_inproceedings: + eof = parseTag (token, BIBTAG_INPROCEEDINGS); + break; + case KEYWORD_manual: + eof = parseTag (token, BIBTAG_MANUAL); + break; + case KEYWORD_mastersthesis: + eof = parseTag (token, BIBTAG_MASTERSTHESIS); + break; + case KEYWORD_misc: + eof = parseTag (token, BIBTAG_MISC); + break; + case KEYWORD_phdthesis: + eof = parseTag (token, BIBTAG_PHDTHESIS); + break; + case KEYWORD_proceedings: + eof = parseTag (token, BIBTAG_PROCEEDINGS); + break; + case KEYWORD_string: + eof = parseTag (token, BIBTAG_STRING); + break; + case KEYWORD_techreport: + eof = parseTag (token, BIBTAG_TECHREPORT); + break; + case KEYWORD_unpublished: + eof = parseTag (token, BIBTAG_UNPUBLISHED); + break; + default: + break; + } + } + if (eof) + break; + } while (true); +} + +static void initialize (const langType language) +{ + Lang_bib = language; +} + +static void findBibTags (void) +{ + tokenInfo *const token = newToken (); + + parseBibFile (token); + + deleteToken (token); +} + +/* Create parser definition structure */ +extern parserDefinition* BibtexParser (void) +{ + Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT); + static const char *const extensions [] = { "bib", NULL }; + parserDefinition *const def = parserNew ("BibTeX"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kindTable = BibKinds; + def->kindCount = ARRAY_SIZE (BibKinds); + def->parser = findBibTags; + def->initialize = initialize; + def->keywordTable = BibKeywordTable; + def->keywordCount = ARRAY_SIZE (BibKeywordTable); + return def; +}
Modified: data/Makefile.am 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -11,6 +11,7 @@ filetypes = \ filedefs/filetypes.asciidoc \ filedefs/filetypes.asm \ filedefs/filetypes.batch \ + filedefs/filetypes.bibtex \ filedefs/filetypes.c \ filedefs/filetypes.caml \ filedefs/filetypes.Clojure.conf \
Modified: data/filedefs/filetypes.bibtex 7 lines changed, 7 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,7 @@ +# For complete documentation of this file, please see Geany's main documentation + +[settings] +# highlights commented lines +lexer_filetype=LaTeX +# default extension used when saving files +extension=bib
Modified: data/filetype_extensions.conf 3 lines changed, 2 insertions(+), 1 deletions(-) =================================================================== @@ -10,6 +10,7 @@ Arduino=*.ino;*.pde; Asciidoc=*.asciidoc;*.adoc; ASM=*.asm;*.asm51;*.a51;*.s;*.S;*.sx; Batch=*.bat;*.cmd;*.nt; +BibTeX=*.bib; CAML=*.ml;*.mli; C=*.c;*.xpm; C++=*.cpp;*.cxx;*.c++;*.cc;*.h;*.hpp;*.hxx;*.h++;*.hh;*.C;*.H; @@ -43,7 +44,7 @@ Java=*.java;*.jsp; Javascript=*.js; JSON=*.json; Kotlin=*.kt;*.kts; -LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;*.bib; +LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux; Lisp=*.lisp; Lua=*.lua; Make=*.mak;*.mk;GNUmakefile;makefile;Makefile;makefile.*;Makefile.*;
Modified: src/filetypes.c 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -160,6 +160,7 @@ static void init_builtin_filetypes(void) FT_INIT( SQL, SQL, "SQL", NULL, FILE, MISC ); FT_INIT( COBOL, COBOL, "COBOL", NULL, SOURCE_FILE, COMPILED ); FT_INIT( LATEX, LATEX, "LaTeX", NULL, SOURCE_FILE, MARKUP ); + FT_INIT( BIBTEX, BIBTEX, "BibTeX", NULL, SOURCE_FILE, MARKUP ); FT_INIT( VHDL, VHDL, "VHDL", NULL, SOURCE_FILE, COMPILED ); FT_INIT( VERILOG, VERILOG, "Verilog", NULL, SOURCE_FILE, COMPILED ); FT_INIT( DIFF, DIFF, "Diff", NULL, FILE, MISC );
Modified: src/filetypes.h 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -105,6 +105,7 @@ typedef enum GEANY_FILETYPES_COFFEESCRIPT, GEANY_FILETYPES_GO, GEANY_FILETYPES_ZEPHIR, + GEANY_FILETYPES_BIBTEX, /* ^ append items here */ GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */ }
Modified: src/symbols.c 14 lines changed, 14 insertions(+), 0 deletions(-) =================================================================== @@ -525,6 +525,20 @@ static void add_top_level_items(GeanyDocument *doc) NULL); break; } + case GEANY_FILETYPES_BIBTEX: + { + tag_list_add_groups(tag_store, + &(tv_iters.tag_function), _("Articles"), ICON_NONE, + &(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE, + &(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE, + &(tv_iters.tag_member), _("Conference Papers"), ICON_NONE, + &(tv_iters.tag_variable), _("Theses"), ICON_NONE, + &(tv_iters.tag_namespace), _("Strings"), ICON_NONE, + &(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE, + &(tv_iters.tag_other), _("Other"), ICON_NONE, + NULL); + break; + } case GEANY_FILETYPES_MATLAB: { tag_list_add_groups(tag_store,
Modified: src/tagmanager/tm_parser.c 18 lines changed, 18 insertions(+), 0 deletions(-) =================================================================== @@ -124,6 +124,23 @@ static TMParserMapEntry map_LATEX[] = { {'n', tm_tag_namespace_t}, {'s', tm_tag_struct_t}, }; +static TMParserMapEntry map_BIBTEX[] = { + {'a', tm_tag_function_t}, + {'b', tm_tag_class_t}, + {'B', tm_tag_class_t}, + {'c', tm_tag_member_t}, + {'i', tm_tag_macro_t}, + {'I', tm_tag_macro_t}, + {'j', tm_tag_member_t}, + {'m', tm_tag_other_t}, + {'M', tm_tag_variable_t}, + {'n', tm_tag_other_t}, + {'p', tm_tag_variable_t}, + {'P', tm_tag_class_t}, + {'s', tm_tag_namespace_t}, + {'t', tm_tag_other_t}, + {'u', tm_tag_externvar_t}, +};
static TMParserMapEntry map_ASM[] = { {'d', tm_tag_macro_t}, @@ -531,6 +548,7 @@ static TMParserMap parser_map[] = { MAP_ENTRY(PHP), MAP_ENTRY(PYTHON), MAP_ENTRY(LATEX), + MAP_ENTRY(BIBTEX), MAP_ENTRY(ASM), MAP_ENTRY(CONF), MAP_ENTRY(SQL),
Modified: src/tagmanager/tm_parser.h 1 lines changed, 1 insertions(+), 0 deletions(-) =================================================================== @@ -109,6 +109,7 @@ enum TM_PARSER_JSON, TM_PARSER_ZEPHIR, TM_PARSER_POWERSHELL, + TM_PARSER_BIBTEX, TM_PARSER_COUNT };
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).