[geany/geany] 8abe53: Merge pull request #2212 from TwlyY29/bibtex-parser
Matthew Brush
git-noreply at xxxxx
Wed Oct 30 00:12:02 UTC 2019
Branch: refs/heads/master
Author: Matthew Brush <matt at geany.org>
Committer: GitHub <noreply at github.com>
Date: Wed, 30 Oct 2019 00:12:02 UTC
Commit: 8abe5342c528be02bb62b9927b8c25e8aa4e2a35
https://github.com/geany/geany/commit/8abe5342c528be02bb62b9927b8c25e8aa4e2a35
Log Message:
-----------
Merge pull request #2212 from TwlyY29/bibtex-parser
Added a bibtex parser that extracts identifiers of entries in bib-fil…
Modified Paths:
--------------
ctags/Makefile.am
ctags/main/parsers.h
ctags/parsers/bibtex.c
data/Makefile.am
data/filedefs/filetypes.bibtex
data/filetype_extensions.conf
src/filetypes.c
src/filetypes.h
src/symbols.c
src/tagmanager/tm_parser.c
src/tagmanager/tm_parser.h
Modified: ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -14,6 +14,7 @@ parsers = \
parsers/asciidoc.c \
parsers/asm.c \
parsers/basic.c \
+ parsers/bibtex.c \
parsers/c.c \
parsers/cobol.c \
parsers/iniconf.c \
Modified: ctags/main/parsers.h
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -65,6 +65,7 @@
GoParser, \
JsonParser, \
ZephirParser, \
- PowerShellParser
+ PowerShellParser, \
+ BibtexParser
#endif /* CTAGS_MAIN_PARSERS_H */
Modified: ctags/parsers/bibtex.c
431 lines changed, 431 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2000-2001, Jérôme Plût
+ * Copyright (c) 2006, Enrico Tröger
+ * Copyright (c) 2019, Mirco Schönfeld
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for source files
+ * for the BibTex formatting system.
+ * https://en.wikipedia.org/wiki/BibTeX
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include <ctype.h> /* to define isalpha () */
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (bool) ((token)->type == (t))
+#define isKeyword(token,k) (bool) ((token)->keyword == (k))
+#define isIdentChar(c) \
+ (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+')
+
+/*
+ * DATA DECLARATIONS
+ */
+
+/*
+ * Used to specify type of keyword.
+ */
+enum eKeywordId {
+ KEYWORD_article,
+ KEYWORD_book,
+ KEYWORD_booklet,
+ KEYWORD_conference,
+ KEYWORD_inbook,
+ KEYWORD_incollection,
+ KEYWORD_inproceedings,
+ KEYWORD_manual,
+ KEYWORD_mastersthesis,
+ KEYWORD_misc,
+ KEYWORD_phdthesis,
+ KEYWORD_proceedings,
+ KEYWORD_string,
+ KEYWORD_techreport,
+ KEYWORD_unpublished
+};
+typedef int keywordId; /* to allow KEYWORD_NONE */
+
+enum eTokenType {
+ /* 0..255 are the byte's value. Some are named for convenience */
+ TOKEN_OPEN_CURLY = '{',
+ /* above is special types */
+ TOKEN_UNDEFINED = 256,
+ TOKEN_KEYWORD,
+ TOKEN_IDENTIFIER
+};
+typedef int tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ unsigned long lineNumber;
+ MIOPos filePosition;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_bib;
+
+typedef enum {
+ BIBTAG_ARTICLE,
+ BIBTAG_BOOK,
+ BIBTAG_BOOKLET,
+ BIBTAG_CONFERENCE,
+ BIBTAG_INBOOK,
+ BIBTAG_INCOLLECTION,
+ BIBTAG_INPROCEEDINGS,
+ BIBTAG_MANUAL,
+ BIBTAG_MASTERSTHESIS,
+ BIBTAG_MISC,
+ BIBTAG_PHDTHESIS,
+ BIBTAG_PROCEEDINGS,
+ BIBTAG_STRING,
+ BIBTAG_TECHREPORT,
+ BIBTAG_UNPUBLISHED,
+ BIBTAG_COUNT
+} bibKind;
+
+static kindDefinition BibKinds [] = {
+ { true, 'a', "article", "article" },
+ { true, 'b', "book", "book" },
+ { true, 'B', "booklet", "booklet" },
+ { true, 'c', "conference", "conference" },
+ { true, 'i', "inbook", "inbook" },
+ { true, 'I', "incollection", "incollection" },
+ { true, 'j', "inproceedings", "inproceedings" },
+ { true, 'm', "manual", "manual" },
+ { true, 'M', "mastersthesis", "mastersthesis" },
+ { true, 'n', "misc", "misc" },
+ { true, 'p', "phdthesis", "phdthesis" },
+ { true, 'P', "proceedings", "proceedings" },
+ { true, 's', "string", "string" },
+ { true, 't', "techreport", "techreport" },
+ { true, 'u', "unpublished", "unpublished" }
+};
+
+static const keywordTable BibKeywordTable [] = {
+ /* keyword keyword ID */
+ { "article", KEYWORD_article },
+ { "book", KEYWORD_book },
+ { "booklet", KEYWORD_booklet },
+ { "conference", KEYWORD_conference },
+ { "inbook", KEYWORD_inbook },
+ { "incollection", KEYWORD_incollection },
+ { "inproceedings",KEYWORD_inproceedings },
+ { "manual", KEYWORD_manual },
+ { "mastersthesis",KEYWORD_mastersthesis },
+ { "misc", KEYWORD_misc },
+ { "phdthesis", KEYWORD_phdthesis },
+ { "proceedings", KEYWORD_proceedings },
+ { "string", KEYWORD_string },
+ { "techreport", KEYWORD_techreport },
+ { "unpublished", KEYWORD_unpublished }
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ eFree (token);
+}
+
+/*
+ * Tag generation functions
+ */
+static void makeBibTag (tokenInfo *const token, bibKind kind)
+{
+ if (BibKinds [kind].enabled)
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name, kind);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+
+ makeTagEntry (&e);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+/*
+ * Read a C identifier beginning with "firstChar" and places it into
+ * "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar (c));
+ do
+ {
+ vStringPut (string, c);
+ c = getcFromInputFile ();
+ } while (c != EOF && isIdentChar (c));
+ if (c != EOF)
+ ungetcToInputFile (c); /* unget non-identifier character */
+}
+
+static bool readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+
+ do
+ {
+ c = getcFromInputFile ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ token->type = (unsigned char) c;
+ switch (c)
+ {
+ case EOF: return false;
+
+ case '@':
+ /*
+ * All Bib entries start with an at symbol.
+ * Check if the next character is an alpha character
+ * else it is not a potential tex tag.
+ */
+ c = getcFromInputFile ();
+ if (! isalpha (c))
+ ungetcToInputFile (c);
+ else
+ {
+ vStringPut (token->string, '@');
+ parseIdentifier (token->string, c);
+ token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ case '%':
+ skipToCharacterInInputFile ('\n'); /* % are single line comments */
+ goto getNextChar;
+ break;
+ default:
+ if (isIdentChar (c))
+ {
+ parseIdentifier (token->string, c);
+ token->type = TOKEN_IDENTIFIER;
+ }
+ break;
+ }
+ return true;
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+ dest->lineNumber = src->lineNumber;
+ dest->filePosition = src->filePosition;
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ vStringCopy (dest->string, src->string);
+}
+
+/*
+ * Scanning functions
+ */
+
+static bool parseTag (tokenInfo *const token, bibKind kind)
+{
+ tokenInfo * const name = newToken ();
+ vString * currentid;
+ bool eof = false;
+
+ currentid = vStringNew ();
+ /*
+ * Bib entries are of these formats:
+ * @article{identifier,
+ * author="John Doe"}
+ *
+ * When a keyword is found, loop through all words up to
+ * a comma brace for the tag name.
+ *
+ */
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ copyToken (name, token);
+ if (!readToken (token))
+ {
+ eof = true;
+ goto out;
+ }
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ if (!readToken (token))
+ {
+ eof = true;
+ goto out;
+ }
+ if (isType (token, TOKEN_IDENTIFIER)){
+ vStringCat (currentid, token->string);
+ vStringStripTrailing (currentid);
+ if (vStringLength (currentid) > 0)
+ {
+ vStringCopy (name->string, currentid);
+ makeBibTag (name, kind);
+ }
+ }
+ else
+ { // should find an identifier for bib item at first place
+ eof = true;
+ goto out;
+ }
+ }
+
+
+ out:
+ deleteToken (name);
+ vStringDelete (currentid);
+ return eof;
+}
+
+static void parseBibFile (tokenInfo *const token)
+{
+ bool eof = false;
+
+ do
+ {
+ if (!readToken (token))
+ break;
+
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_article:
+ eof = parseTag (token, BIBTAG_ARTICLE);
+ break;
+ case KEYWORD_book:
+ eof = parseTag (token, BIBTAG_BOOK);
+ break;
+ case KEYWORD_booklet:
+ eof = parseTag (token, BIBTAG_BOOKLET);
+ break;
+ case KEYWORD_conference:
+ eof = parseTag (token, BIBTAG_CONFERENCE);
+ break;
+ case KEYWORD_inbook:
+ eof = parseTag (token, BIBTAG_INBOOK);
+ break;
+ case KEYWORD_incollection:
+ eof = parseTag (token, BIBTAG_INCOLLECTION);
+ break;
+ case KEYWORD_inproceedings:
+ eof = parseTag (token, BIBTAG_INPROCEEDINGS);
+ break;
+ case KEYWORD_manual:
+ eof = parseTag (token, BIBTAG_MANUAL);
+ break;
+ case KEYWORD_mastersthesis:
+ eof = parseTag (token, BIBTAG_MASTERSTHESIS);
+ break;
+ case KEYWORD_misc:
+ eof = parseTag (token, BIBTAG_MISC);
+ break;
+ case KEYWORD_phdthesis:
+ eof = parseTag (token, BIBTAG_PHDTHESIS);
+ break;
+ case KEYWORD_proceedings:
+ eof = parseTag (token, BIBTAG_PROCEEDINGS);
+ break;
+ case KEYWORD_string:
+ eof = parseTag (token, BIBTAG_STRING);
+ break;
+ case KEYWORD_techreport:
+ eof = parseTag (token, BIBTAG_TECHREPORT);
+ break;
+ case KEYWORD_unpublished:
+ eof = parseTag (token, BIBTAG_UNPUBLISHED);
+ break;
+ default:
+ break;
+ }
+ }
+ if (eof)
+ break;
+ } while (true);
+}
+
+static void initialize (const langType language)
+{
+ Lang_bib = language;
+}
+
+static void findBibTags (void)
+{
+ tokenInfo *const token = newToken ();
+
+ parseBibFile (token);
+
+ deleteToken (token);
+}
+
+/* Create parser definition structure */
+extern parserDefinition* BibtexParser (void)
+{
+ Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
+ static const char *const extensions [] = { "bib", NULL };
+ parserDefinition *const def = parserNew ("BibTeX");
+ def->extensions = extensions;
+ /*
+ * New definitions for parsing instead of regex
+ */
+ def->kindTable = BibKinds;
+ def->kindCount = ARRAY_SIZE (BibKinds);
+ def->parser = findBibTags;
+ def->initialize = initialize;
+ def->keywordTable = BibKeywordTable;
+ def->keywordCount = ARRAY_SIZE (BibKeywordTable);
+ return def;
+}
Modified: data/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -11,6 +11,7 @@ filetypes = \
filedefs/filetypes.asciidoc \
filedefs/filetypes.asm \
filedefs/filetypes.batch \
+ filedefs/filetypes.bibtex \
filedefs/filetypes.c \
filedefs/filetypes.caml \
filedefs/filetypes.Clojure.conf \
Modified: data/filedefs/filetypes.bibtex
7 lines changed, 7 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,7 @@
+# For complete documentation of this file, please see Geany's main documentation
+
+[settings]
+# highlights commented lines
+lexer_filetype=LaTeX
+# default extension used when saving files
+extension=bib
Modified: data/filetype_extensions.conf
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -10,6 +10,7 @@ Arduino=*.ino;*.pde;
Asciidoc=*.asciidoc;*.adoc;
ASM=*.asm;*.asm51;*.a51;*.s;*.S;*.sx;
Batch=*.bat;*.cmd;*.nt;
+BibTeX=*.bib;
CAML=*.ml;*.mli;
C=*.c;*.xpm;
C++=*.cpp;*.cxx;*.c++;*.cc;*.h;*.hpp;*.hxx;*.h++;*.hh;*.C;*.H;
@@ -43,7 +44,7 @@ Java=*.java;*.jsp;
Javascript=*.js;
JSON=*.json;
Kotlin=*.kt;*.kts;
-LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;*.bib;
+LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;
Lisp=*.lisp;
Lua=*.lua;
Make=*.mak;*.mk;GNUmakefile;makefile;Makefile;makefile.*;Makefile.*;
Modified: src/filetypes.c
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -160,6 +160,7 @@ static void init_builtin_filetypes(void)
FT_INIT( SQL, SQL, "SQL", NULL, FILE, MISC );
FT_INIT( COBOL, COBOL, "COBOL", NULL, SOURCE_FILE, COMPILED );
FT_INIT( LATEX, LATEX, "LaTeX", NULL, SOURCE_FILE, MARKUP );
+ FT_INIT( BIBTEX, BIBTEX, "BibTeX", NULL, SOURCE_FILE, MARKUP );
FT_INIT( VHDL, VHDL, "VHDL", NULL, SOURCE_FILE, COMPILED );
FT_INIT( VERILOG, VERILOG, "Verilog", NULL, SOURCE_FILE, COMPILED );
FT_INIT( DIFF, DIFF, "Diff", NULL, FILE, MISC );
Modified: src/filetypes.h
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -105,6 +105,7 @@ typedef enum
GEANY_FILETYPES_COFFEESCRIPT,
GEANY_FILETYPES_GO,
GEANY_FILETYPES_ZEPHIR,
+ GEANY_FILETYPES_BIBTEX,
/* ^ append items here */
GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */
}
Modified: src/symbols.c
14 lines changed, 14 insertions(+), 0 deletions(-)
===================================================================
@@ -525,6 +525,20 @@ static void add_top_level_items(GeanyDocument *doc)
NULL);
break;
}
+ case GEANY_FILETYPES_BIBTEX:
+ {
+ tag_list_add_groups(tag_store,
+ &(tv_iters.tag_function), _("Articles"), ICON_NONE,
+ &(tv_iters.tag_macro), _("Book Chapters"), ICON_NONE,
+ &(tv_iters.tag_class), _("Books & Conference Proceedings"), ICON_NONE,
+ &(tv_iters.tag_member), _("Conference Papers"), ICON_NONE,
+ &(tv_iters.tag_variable), _("Theses"), ICON_NONE,
+ &(tv_iters.tag_namespace), _("Strings"), ICON_NONE,
+ &(tv_iters.tag_externvar), _("Unpublished"), ICON_NONE,
+ &(tv_iters.tag_other), _("Other"), ICON_NONE,
+ NULL);
+ break;
+ }
case GEANY_FILETYPES_MATLAB:
{
tag_list_add_groups(tag_store,
Modified: src/tagmanager/tm_parser.c
18 lines changed, 18 insertions(+), 0 deletions(-)
===================================================================
@@ -124,6 +124,23 @@ static TMParserMapEntry map_LATEX[] = {
{'n', tm_tag_namespace_t},
{'s', tm_tag_struct_t},
};
+static TMParserMapEntry map_BIBTEX[] = {
+ {'a', tm_tag_function_t},
+ {'b', tm_tag_class_t},
+ {'B', tm_tag_class_t},
+ {'c', tm_tag_member_t},
+ {'i', tm_tag_macro_t},
+ {'I', tm_tag_macro_t},
+ {'j', tm_tag_member_t},
+ {'m', tm_tag_other_t},
+ {'M', tm_tag_variable_t},
+ {'n', tm_tag_other_t},
+ {'p', tm_tag_variable_t},
+ {'P', tm_tag_class_t},
+ {'s', tm_tag_namespace_t},
+ {'t', tm_tag_other_t},
+ {'u', tm_tag_externvar_t},
+};
static TMParserMapEntry map_ASM[] = {
{'d', tm_tag_macro_t},
@@ -531,6 +548,7 @@ static TMParserMap parser_map[] = {
MAP_ENTRY(PHP),
MAP_ENTRY(PYTHON),
MAP_ENTRY(LATEX),
+ MAP_ENTRY(BIBTEX),
MAP_ENTRY(ASM),
MAP_ENTRY(CONF),
MAP_ENTRY(SQL),
Modified: src/tagmanager/tm_parser.h
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -109,6 +109,7 @@ enum
TM_PARSER_JSON,
TM_PARSER_ZEPHIR,
TM_PARSER_POWERSHELL,
+ TM_PARSER_BIBTEX,
TM_PARSER_COUNT
};
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list