Branch: refs/heads/master Author: Jiří Techet techet@gmail.com Committer: Jiří Techet techet@gmail.com Date: Tue, 12 Apr 2022 18:16:06 UTC Commit: 9bf6ac286dc6e99a0b75216f0c81c98283e0523d https://github.com/geany/geany/commit/9bf6ac286dc6e99a0b75216f0c81c98283e052...
Log Message: ----------- Use the upstream Markdown parser
This is a new Markdown parser supporting scope generation.
Modified Paths: -------------- ctags/Makefile.am ctags/parsers/geany_markdown.c ctags/parsers/markdown.c ctags/parsers/markdown.h meson.build src/tagmanager/tm_parser.c tests/ctags/simple.md.tags
Modified: ctags/Makefile.am 3 lines changed, 2 insertions(+), 1 deletions(-) =================================================================== @@ -72,7 +72,8 @@ parsers = \ parsers/lua.c \ parsers/make.c \ parsers/make.h \ - parsers/geany_markdown.c \ + parsers/markdown.c \ + parsers/markdown.h \ parsers/geany_matlab.c \ parsers/nsis.c \ parsers/objc.c \
Modified: ctags/parsers/geany_markdown.c 103 lines changed, 0 insertions(+), 103 deletions(-) =================================================================== @@ -1,103 +0,0 @@ -/* -* -* Copyright (c) 2009, Jon Strait -* -* This source code is released for free distribution under the terms of the -* GNU General Public License. -* -* This module contains functions for generating tags for Markdown files. -*/ - -/* -* INCLUDE FILES -*/ -#include "general.h" /* must always come first */ - -#include <ctype.h> -#include <string.h> - -#include "parse.h" -#include "read.h" -#include "vstring.h" -#include "routines.h" -#include "entry.h" - -/* -* DATA DEFINITIONS -*/ - -static kindDefinition MarkdownKinds[] = { - { true, 'v', "variable", "sections" } -}; - -/* -* FUNCTION DEFINITIONS -*/ - -/* checks if str is all the same character */ -static bool issame(const char *str) -{ - char first = *str; - - while (*(++str)) - { - if (*str && *str != first) - return false; - } - return true; -} - -static void makeMarkdownTag (const vString* const name, bool name_before) -{ - tagEntryInfo e; - initTagEntry (&e, vStringValue(name), 0); - - if (name_before) - e.lineNumber--; /* we want the line before the underline chars */ - - makeTagEntry(&e); -} - - -static void findMarkdownTags (void) -{ - vString *name = vStringNew(); - const unsigned char *line; - - while ((line = readLineFromInputFile()) != NULL) - { - int name_len = vStringLength(name); - - /* underlines must be the same length or more */ - if (name_len > 0 && (line[0] == '=' || line[0] == '-') && issame((const char*) line)) - { - makeMarkdownTag(name, true); - } - else if (line[0] == '#') { - vStringClear(name); - vStringCatS(name, (const char *) line); - makeMarkdownTag(name, false); - } - else { - vStringClear (name); - if (! isspace(*line)) - vStringCatS(name, (const char*) line); - } - } - vStringDelete (name); -} - -extern parserDefinition* MarkdownParser (void) -{ - static const char *const patterns [] = { "*.md", NULL }; - static const char *const extensions [] = { "md", NULL }; - parserDefinition* const def = parserNew ("Markdown"); - - def->kindTable = MarkdownKinds; - def->kindCount = ARRAY_SIZE (MarkdownKinds); - def->patterns = patterns; - def->extensions = extensions; - def->parser = findMarkdownTags; - return def; -} -
Modified: ctags/parsers/markdown.c 420 lines changed, 420 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,420 @@ +/* + * + * Copyright (c) 2007-2011, Nick Treleaven + * Copyright (c) 2012, Lex Trotman + * Copyright (c) 2021, Jiri Techet + * + * This source code is released for free distribution under the terms of the + * GNU General Public License version 2 or (at your option) any later version. + * + * This module contains functions for generating tags for markdown files. + * + * This parser was based on the asciidoc parser. + * + * Extended syntax like footnotes is described in + * https://www.markdownguide.org/extended-syntax/ + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include <ctype.h> +#include <string.h> + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" +#include "nestlevel.h" +#include "routines.h" +#include "promise.h" +#include "htable.h" + +#include "markdown.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CHAPTER = 0, + K_SECTION, + K_SUBSECTION, + K_SUBSUBSECTION, + K_LEVEL4SECTION, + K_LEVEL5SECTION, + K_SECTION_COUNT, + K_FOOTNOTE = K_SECTION_COUNT, +} markdownKind; + +static kindDefinition MarkdownKinds[] = { + { true, 'c', "chapter", "chapters"}, + { true, 's', "section", "sections" }, + { true, 'S', "subsection", "level 2 sections" }, + { true, 't', "subsubsection", "level 3 sections" }, + { true, 'T', "l4subsection", "level 4 sections" }, + { true, 'u', "l5subsection", "level 5 sections" }, + { true, 'n', "footnote", "footnotes" }, +}; + +static fieldDefinition MarkdownFields [] = { + { + .enabled = false, + .name = "sectionMarker", + .description = "character used for declaring section(#, ##, =, or -)", + }, +}; + +typedef enum { + F_MARKER, +} markdownField; + +static NestingLevels *nestingLevels = NULL; + +/* +* FUNCTION DEFINITIONS +*/ + +static NestingLevel *getNestingLevel (const int kind, unsigned long adjustmentWhenPop) +{ + NestingLevel *nl; + tagEntryInfo *e; + unsigned long line = getInputLineNumber (); + + line = (line > adjustmentWhenPop)? (line - adjustmentWhenPop): 0; + + while (1) + { + nl = nestingLevelsGetCurrent (nestingLevels); + e = getEntryOfNestingLevel (nl); + if ((nl && (e == NULL)) || (e && (e->kindIndex >= kind))) + nestingLevelsPop (nestingLevels); + else + break; + } + return nl; +} + + +static int makeMarkdownTag (const vString* const name, const int kind, const bool twoLine) +{ + int r = CORK_NIL; + + if (vStringLength (name) > 0) + { + const NestingLevel *const nl = getNestingLevel (kind, twoLine? 2: 1); + tagEntryInfo *parent = getEntryOfNestingLevel (nl); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (name), kind); + + if (twoLine) + { + /* we want the line before the '---' underline chars */ + const unsigned long line = getInputLineNumber (); + Assert (line > 0); + if (line > 0) + { + e.lineNumber--; + e.filePosition = getInputFilePositionForLine (line - 1); + } + } + + if (parent && (parent->kindIndex < kind)) + e.extensionFields.scopeIndex = nl->corkIndex; + + r = makeTagEntry (&e); + } + return r; +} + + +static int makeSectionMarkdownTag (const vString* const name, const int kind, const char *marker) +{ + int r = makeMarkdownTag (name, kind, marker[0] != '#'); + attachParserFieldToCorkEntry (r, MarkdownFields [F_MARKER].ftype, marker); + + nestingLevelsPush (nestingLevels, r); + return r; +} + + +static vString *getHeading (const int kind, const unsigned char *line, + const int lineLen, bool *delimited) +{ + int pos = 0; + int start = kind + 1; + int end = lineLen - 1; + vString *name = vStringNew (); + + Assert (kind >= 0 && kind < K_SECTION_COUNT); + Assert (lineLen > start); + + *delimited = false; + while (isspace (line[pos])) ++pos; + while (line[end] == line[pos] && end - 1 >= 0 && line[end - 1] != '\') + { + --end; + *delimited = true; + } + while (isspace (line[start])) ++start; + while (isspace (line[end])) --end; + + if (start <= end) + vStringNCatS (name, (const char*)(&(line[start])), end - start + 1); + + return name; +} + + +static int getFirstCharPos (const unsigned char *line, int lineLen, bool *indented) +{ + int indent = 0; + int i; + for (i = 0; i < lineLen && isspace (line[i]); i++) + indent += line[i] == '\t' ? 4 : 1; + *indented = indent >= 4; + return i; +} + + +static void getFootnoteMaybe (const char *line) +{ + const char *start = strstr (line, "[^"); + const char *end = start? strstr(start + 2, "]:"): NULL; + + if (! (start && end)) + return; + if (! (end > (start + 2))) + return; + + vString * footnote = vStringNewNInit (start + 2, end - (start + 2)); + const NestingLevel *const nl = nestingLevelsGetCurrent (nestingLevels); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (footnote), K_FOOTNOTE); + if (nl) + e.extensionFields.scopeIndex = nl->corkIndex; + makeTagEntry (&e); + + vStringDelete (footnote); +} + +static bool extractLanguageForCodeBlock (const char *langMarker, + vString *codeLang) +{ + subparser *s; + bool r = false; + + foreachSubparser (s, false) + { + markdownSubparser *m = (markdownSubparser *)s; + enterSubparser(s); + if (m->extractLanguageForCodeBlock) + r = m->extractLanguageForCodeBlock (m, langMarker, codeLang); + leaveSubparser(); + if (r) + break; + } + + return r; +} + +static void findMarkdownTags (void) +{ + vString *prevLine = vStringNew (); + vString *codeLang = vStringNew (); + const unsigned char *line; + char inCodeChar = 0; + long startSourceLineNumber = 0; + long startLineNumber = 0; + bool inPreambule = false; + bool inComment = false; + + subparser *sub = getSubparserRunningBaseparser(); + if (sub) + chooseExclusiveSubparser (sub, NULL); + + nestingLevels = nestingLevelsNew (0); + + while ((line = readLineFromInputFile ()) != NULL) + { + int lineLen = strlen ((const char*) line); + bool lineProcessed = false; + bool indented; + int pos = getFirstCharPos (line, lineLen, &indented); + const int lineNum = getInputLineNumber (); + + if (lineNum == 1 || inPreambule) + { + if (line[pos] == '-' && line[pos + 1] == '-' && line[pos + 2] == '-') + { + if (inPreambule) + { + long endLineNumber = lineNum; + if (startLineNumber < endLineNumber) + makePromise ("FrontMatter", startLineNumber, 0, + endLineNumber, 0, startSourceLineNumber); + } + else + startSourceLineNumber = startLineNumber = lineNum; + inPreambule = !inPreambule; + } + } + + if (inPreambule) + continue; + + /* fenced code block */ + if (line[pos] == '`' || line[pos] == '~') + { + char c = line[pos]; + char otherC = c == '`' ? '~' : '`'; + int nSame; + for (nSame = 1; line[nSame] == line[pos]; ++nSame); + + if (inCodeChar != otherC && nSame >= 3) + { + inCodeChar = inCodeChar ? 0 : c; + if (inCodeChar == c && strstr ((const char *)(line + pos + nSame), "```") != NULL) + inCodeChar = 0; + else if (inCodeChar) + { + const char *langMarker = (const char *)(line + pos + nSame); + startLineNumber = startSourceLineNumber = lineNum + 1; + + vStringClear (codeLang); + if (! extractLanguageForCodeBlock (langMarker, codeLang)) + { + vStringCopyS (codeLang, langMarker); + vStringStripLeading (codeLang); + vStringStripTrailing (codeLang); + } + } + else + { + long endLineNumber = lineNum; + if (vStringLength (codeLang) > 0 + && startLineNumber < endLineNumber) + makePromise (vStringValue (codeLang), startLineNumber, 0, + endLineNumber, 0, startSourceLineNumber); + } + + lineProcessed = true; + } + } + /* XML comment start */ + else if (lineLen >= pos + 4 && line[pos] == '<' && line[pos + 1] == '!' && + line[pos + 2] == '-' && line[pos + 3] == '-') + { + if (strstr ((const char *)(line + pos + 4), "-->") == NULL) + inComment = true; + lineProcessed = true; + } + /* XML comment end */ + else if (inComment && strstr ((const char *)(line + pos), "-->")) + { + inComment = false; + lineProcessed = true; + } + + /* code block or comment */ + if (inCodeChar || inComment) + lineProcessed = true; + + /* code block using indent */ + else if (indented) + lineProcessed = true; + + /* if it's a title underline, or a delimited block marking character */ + else if (line[pos] == '=' || line[pos] == '-' || line[pos] == '#' || line[pos] == '>') + { + int nSame; + for (nSame = 1; line[nSame] == line[pos]; ++nSame); + + /* quote */ + if (line[pos] == '>') + ; /* just to make sure lineProcessed = true so it won't be in a heading */ + /* is it a two line title */ + else if (line[pos] == '=' || line[pos] == '-') + { + char marker[2] = { line[pos], '\0' }; + int kind = line[pos] == '=' ? K_CHAPTER : K_SECTION; + bool whitespaceTerminated = true; + + for (int i = pos + nSame; i < lineLen; i++) + { + if (!isspace (line[i])) + { + whitespaceTerminated = false; + break; + } + } + + vStringStripLeading (prevLine); + vStringStripTrailing (prevLine); + if (whitespaceTerminated && vStringLength (prevLine) > 0) + makeSectionMarkdownTag (prevLine, kind, marker); + } + /* otherwise is it a one line title */ + else if (line[pos] == '#' && nSame <= K_SECTION_COUNT && isspace (line[nSame])) + { + int kind = nSame - 1; + bool delimited = false; + vString *name = getHeading (kind, line, lineLen, &delimited); + if (vStringLength (name) > 0) + makeSectionMarkdownTag (name, kind, delimited ? "##" : "#"); + vStringDelete (name); + } + + lineProcessed = true; + } + + vStringClear (prevLine); + if (!lineProcessed) + { + getFootnoteMaybe ((const char *)line); + vStringCatS (prevLine, (const char*) line); + } + } + vStringDelete (prevLine); + vStringDelete (codeLang); + { + unsigned int line = (unsigned int)getInputLineNumber (); + nestingLevelsFree (nestingLevels); + } +} + +extern parserDefinition* MarkdownParser (void) +{ + parserDefinition* const def = parserNew ("Markdown"); + static const char *const extensions [] = { "md", "markdown", NULL }; + + def->enabled = true; + def->extensions = extensions; + def->useCork = CORK_QUEUE; + def->kindTable = MarkdownKinds; + def->kindCount = ARRAY_SIZE (MarkdownKinds); + def->fieldTable = MarkdownFields; + def->fieldCount = ARRAY_SIZE (MarkdownFields); + def->defaultScopeSeparator = """"; + def->parser = findMarkdownTags; + + /* + * This setting (useMemoryStreamInput) is for running + * Yaml parser from YamlFrontMatter as subparser. + * YamlFrontMatter is run from FrontMatter as a gust parser. + * FrontMatter is run from Markdown as a guest parser. + * This stacked structure hits the limitation of the main + * part: subparser's requirement for memory based input stream + * is not propagated to the main part. + * + * TODO: instead of setting useMemoryStreamInput here, we + * should remove the limitation. + */ + def->useMemoryStreamInput = true; + + return def; +}
Modified: ctags/parsers/markdown.h 29 lines changed, 29 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,29 @@ +/* +* Copyright (c) 2022, Masatake YAMATO +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* The interface for subparsers of Markdown +*/ +#ifndef CTAGS_PARSER_MARKDOWN_H +#define CTAGS_PARSER_MARKDOWN_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "subparser.h" +#include "vstring.h" + +typedef struct sMarkdownSubparser markdownSubparser; + +struct sMarkdownSubparser { + subparser subparser; + bool (* extractLanguageForCodeBlock) (markdownSubparser *s, + const char *langMarker, + vString *langName); +}; + +#endif
Modified: meson.build 3 lines changed, 2 insertions(+), 1 deletions(-) =================================================================== @@ -636,7 +636,6 @@ ctags = static_library('ctags', 'ctags/parsers/geany_fortran.c', 'ctags/parsers/geany_lcpp.c', 'ctags/parsers/geany_lcpp.h', - 'ctags/parsers/geany_markdown.c', 'ctags/parsers/geany_matlab.c', 'ctags/parsers/geany_tcl.c', 'ctags/parsers/geany_tex.c', @@ -653,6 +652,8 @@ ctags = static_library('ctags', 'ctags/parsers/lua.c', 'ctags/parsers/make.c', 'ctags/parsers/make.h', + 'ctags/parsers/markdown.c', + 'ctags/parsers/markdown.h', 'ctags/parsers/nsis.c', 'ctags/parsers/objc.c', 'ctags/parsers/pascal.c',
Modified: src/tagmanager/tm_parser.c 20 lines changed, 17 insertions(+), 3 deletions(-) =================================================================== @@ -694,10 +694,21 @@ static TMParserMapGroup group_NSIS[] = { };
static TMParserMapEntry map_MARKDOWN[] = { - {'v', tm_tag_variable_t}, + {'c', tm_tag_namespace_t}, //chapter + {'s', tm_tag_member_t}, //section + {'S', tm_tag_macro_t}, //subsection + {'t', tm_tag_variable_t}, //subsubsection + {'T', tm_tag_struct_t}, //l4subsection + {'u', tm_tag_union_t}, //l5subsection + {'n', tm_tag_undef_t}, //footnote }; static TMParserMapGroup group_MARKDOWN[] = { - {_("Variables"), TM_ICON_VAR, tm_tag_variable_t}, + {_("Chapters"), TM_ICON_NONE, tm_tag_namespace_t}, + {_("Sections"), TM_ICON_NONE, tm_tag_member_t}, + {_("Subsections"), TM_ICON_NONE, tm_tag_macro_t}, + {_("Subsubsections"), TM_ICON_NONE, tm_tag_variable_t}, + {_("Level 4 sections"), TM_ICON_NONE, tm_tag_struct_t}, + {_("Level 5 sections"), TM_ICON_NONE, tm_tag_union_t}, };
static TMParserMapEntry map_TXT2TAGS[] = { @@ -1435,6 +1446,7 @@ const gchar *tm_parser_scope_separator(TMParserType lang) case TM_PARSER_ZEPHIR: return "::";
+ case TM_PARSER_MARKDOWN: case TM_PARSER_TXT2TAGS: return """";
@@ -1455,10 +1467,11 @@ const gchar *tm_parser_scope_separator_printable(TMParserType lang) { switch (lang) { - case TM_PARSER_TXT2TAGS: case TM_PARSER_ASCIIDOC: case TM_PARSER_CONF: + case TM_PARSER_MARKDOWN: case TM_PARSER_REST: + case TM_PARSER_TXT2TAGS: return " > ";
default: @@ -1485,6 +1498,7 @@ gboolean tm_parser_has_full_scope(TMParserType lang) case TM_PARSER_JAVASCRIPT: case TM_PARSER_JSON: case TM_PARSER_LUA: + case TM_PARSER_MARKDOWN: case TM_PARSER_PHP: case TM_PARSER_POWERSHELL: case TM_PARSER_PYTHON:
Modified: tests/ctags/simple.md.tags 54 lines changed, 27 insertions(+), 27 deletions(-) =================================================================== @@ -1,28 +1,28 @@ # format=tagmanager -# a�16384�0 -# g #�16384�0 -# h ##�16384�0 -## b�16384�0 -## i #�16384�0 -## j ##�16384�0 -## k ###�16384�0 -### c�16384�0 -### l #�16384�0 -### m ##�16384�0 -### n ###�16384�0 -### o ###�16384�0 -#### d�16384�0 -#### p #�16384�0 -#### q #####�16384�0 -##### e�16384�0 -##### r #�16384�0 -##### s ######�16384�0 -###### f�16384�0 -###### t #�16384�0 -###### u #######�16384�0 -A�16384�0 -B�16384�0 -C�16384�0 -D�16384�0 -E�16384�0 -F�16384�0 +A�256�0 +B�256�0 +C�256�0 +D�64�C�0 +E�64�C�0 +F�64�C�0 +a�256�0 +b�64�a�0 +c�65536�a""b�0 +d�16384�a""b""c�0 +e�2048�a""b""c""d�0 +f�8192�a""b""c""d""e�0 +g�256�0 +h�256�0 +i�64�h�0 +j�64�h�0 +k�64�h�0 +l�65536�h""k�0 +m�65536�h""k�0 +n�65536�h""k�0 +o�65536�h""k�0 +p�16384�h""k""o�0 +q�16384�h""k""o�0 +r�2048�h""k""o""q�0 +s�2048�h""k""o""q�0 +t�8192�h""k""o""q""s�0 +u�8192�h""k""o""q""s�0
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).