[geany/geany] 34d0f2: Merge pull request #3161 from techee/markdown_sync
Jiří Techet
git-noreply at geany.org
Thu May 12 22:56:19 UTC 2022
Branch: refs/heads/master
Author: Jiří Techet <techet at gmail.com>
Committer: GitHub <noreply at github.com>
Date: Thu, 12 May 2022 22:56:19 UTC
Commit: 34d0f2e14149faa4ac7efb655e998848d9be5b77
https://github.com/geany/geany/commit/34d0f2e14149faa4ac7efb655e998848d9be5b77
Log Message:
-----------
Merge pull request #3161 from techee/markdown_sync
Use the upstream Markdown parser
Modified Paths:
--------------
ctags/Makefile.am
ctags/parsers/geany_markdown.c
ctags/parsers/markdown.c
ctags/parsers/markdown.h
meson.build
src/tagmanager/tm_parser.c
tests/ctags/simple.md.tags
Modified: ctags/Makefile.am
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -72,7 +72,8 @@ parsers = \
parsers/lua.c \
parsers/make.c \
parsers/make.h \
- parsers/geany_markdown.c \
+ parsers/markdown.c \
+ parsers/markdown.h \
parsers/geany_matlab.c \
parsers/nsis.c \
parsers/objc.c \
Modified: ctags/parsers/geany_markdown.c
103 lines changed, 0 insertions(+), 103 deletions(-)
===================================================================
@@ -1,103 +0,0 @@
-/*
-*
-* Copyright (c) 2009, Jon Strait
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License.
-*
-* This module contains functions for generating tags for Markdown files.
-*/
-
-/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
-
-#include <ctype.h>
-#include <string.h>
-
-#include "parse.h"
-#include "read.h"
-#include "vstring.h"
-#include "routines.h"
-#include "entry.h"
-
-/*
-* DATA DEFINITIONS
-*/
-
-static kindDefinition MarkdownKinds[] = {
- { true, 'v', "variable", "sections" }
-};
-
-/*
-* FUNCTION DEFINITIONS
-*/
-
-/* checks if str is all the same character */
-static bool issame(const char *str)
-{
- char first = *str;
-
- while (*(++str))
- {
- if (*str && *str != first)
- return false;
- }
- return true;
-}
-
-static void makeMarkdownTag (const vString* const name, bool name_before)
-{
- tagEntryInfo e;
- initTagEntry (&e, vStringValue(name), 0);
-
- if (name_before)
- e.lineNumber--; /* we want the line before the underline chars */
-
- makeTagEntry(&e);
-}
-
-
-static void findMarkdownTags (void)
-{
- vString *name = vStringNew();
- const unsigned char *line;
-
- while ((line = readLineFromInputFile()) != NULL)
- {
- int name_len = vStringLength(name);
-
- /* underlines must be the same length or more */
- if (name_len > 0 && (line[0] == '=' || line[0] == '-') && issame((const char*) line))
- {
- makeMarkdownTag(name, true);
- }
- else if (line[0] == '#') {
- vStringClear(name);
- vStringCatS(name, (const char *) line);
- makeMarkdownTag(name, false);
- }
- else {
- vStringClear (name);
- if (! isspace(*line))
- vStringCatS(name, (const char*) line);
- }
- }
- vStringDelete (name);
-}
-
-extern parserDefinition* MarkdownParser (void)
-{
- static const char *const patterns [] = { "*.md", NULL };
- static const char *const extensions [] = { "md", NULL };
- parserDefinition* const def = parserNew ("Markdown");
-
- def->kindTable = MarkdownKinds;
- def->kindCount = ARRAY_SIZE (MarkdownKinds);
- def->patterns = patterns;
- def->extensions = extensions;
- def->parser = findMarkdownTags;
- return def;
-}
-
Modified: ctags/parsers/markdown.c
420 lines changed, 420 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,420 @@
+/*
+ *
+ * Copyright (c) 2007-2011, Nick Treleaven
+ * Copyright (c) 2012, Lex Trotman
+ * Copyright (c) 2021, Jiri Techet
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License version 2 or (at your option) any later version.
+ *
+ * This module contains functions for generating tags for markdown files.
+ *
+ * This parser was based on the asciidoc parser.
+ *
+ * Extended syntax like footnotes is described in
+ * https://www.markdownguide.org/extended-syntax/
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <ctype.h>
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+#include "nestlevel.h"
+#include "routines.h"
+#include "promise.h"
+#include "htable.h"
+
+#include "markdown.h"
+
+/*
+ * DATA DEFINITIONS
+ */
+typedef enum {
+ K_CHAPTER = 0,
+ K_SECTION,
+ K_SUBSECTION,
+ K_SUBSUBSECTION,
+ K_LEVEL4SECTION,
+ K_LEVEL5SECTION,
+ K_SECTION_COUNT,
+ K_FOOTNOTE = K_SECTION_COUNT,
+} markdownKind;
+
+static kindDefinition MarkdownKinds[] = {
+ { true, 'c', "chapter", "chapters"},
+ { true, 's', "section", "sections" },
+ { true, 'S', "subsection", "level 2 sections" },
+ { true, 't', "subsubsection", "level 3 sections" },
+ { true, 'T', "l4subsection", "level 4 sections" },
+ { true, 'u', "l5subsection", "level 5 sections" },
+ { true, 'n', "footnote", "footnotes" },
+};
+
+static fieldDefinition MarkdownFields [] = {
+ {
+ .enabled = false,
+ .name = "sectionMarker",
+ .description = "character used for declaring section(#, ##, =, or -)",
+ },
+};
+
+typedef enum {
+ F_MARKER,
+} markdownField;
+
+static NestingLevels *nestingLevels = NULL;
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static NestingLevel *getNestingLevel (const int kind, unsigned long adjustmentWhenPop)
+{
+ NestingLevel *nl;
+ tagEntryInfo *e;
+ unsigned long line = getInputLineNumber ();
+
+ line = (line > adjustmentWhenPop)? (line - adjustmentWhenPop): 0;
+
+ while (1)
+ {
+ nl = nestingLevelsGetCurrent (nestingLevels);
+ e = getEntryOfNestingLevel (nl);
+ if ((nl && (e == NULL)) || (e && (e->kindIndex >= kind)))
+ nestingLevelsPop (nestingLevels);
+ else
+ break;
+ }
+ return nl;
+}
+
+
+static int makeMarkdownTag (const vString* const name, const int kind, const bool twoLine)
+{
+ int r = CORK_NIL;
+
+ if (vStringLength (name) > 0)
+ {
+ const NestingLevel *const nl = getNestingLevel (kind, twoLine? 2: 1);
+ tagEntryInfo *parent = getEntryOfNestingLevel (nl);
+ tagEntryInfo e;
+
+ initTagEntry (&e, vStringValue (name), kind);
+
+ if (twoLine)
+ {
+ /* we want the line before the '---' underline chars */
+ const unsigned long line = getInputLineNumber ();
+ Assert (line > 0);
+ if (line > 0)
+ {
+ e.lineNumber--;
+ e.filePosition = getInputFilePositionForLine (line - 1);
+ }
+ }
+
+ if (parent && (parent->kindIndex < kind))
+ e.extensionFields.scopeIndex = nl->corkIndex;
+
+ r = makeTagEntry (&e);
+ }
+ return r;
+}
+
+
+static int makeSectionMarkdownTag (const vString* const name, const int kind, const char *marker)
+{
+ int r = makeMarkdownTag (name, kind, marker[0] != '#');
+ attachParserFieldToCorkEntry (r, MarkdownFields [F_MARKER].ftype, marker);
+
+ nestingLevelsPush (nestingLevels, r);
+ return r;
+}
+
+
+static vString *getHeading (const int kind, const unsigned char *line,
+ const int lineLen, bool *delimited)
+{
+ int pos = 0;
+ int start = kind + 1;
+ int end = lineLen - 1;
+ vString *name = vStringNew ();
+
+ Assert (kind >= 0 && kind < K_SECTION_COUNT);
+ Assert (lineLen > start);
+
+ *delimited = false;
+ while (isspace (line[pos])) ++pos;
+ while (line[end] == line[pos] && end - 1 >= 0 && line[end - 1] != '\\')
+ {
+ --end;
+ *delimited = true;
+ }
+ while (isspace (line[start])) ++start;
+ while (isspace (line[end])) --end;
+
+ if (start <= end)
+ vStringNCatS (name, (const char*)(&(line[start])), end - start + 1);
+
+ return name;
+}
+
+
+static int getFirstCharPos (const unsigned char *line, int lineLen, bool *indented)
+{
+ int indent = 0;
+ int i;
+ for (i = 0; i < lineLen && isspace (line[i]); i++)
+ indent += line[i] == '\t' ? 4 : 1;
+ *indented = indent >= 4;
+ return i;
+}
+
+
+static void getFootnoteMaybe (const char *line)
+{
+ const char *start = strstr (line, "[^");
+ const char *end = start? strstr(start + 2, "]:"): NULL;
+
+ if (! (start && end))
+ return;
+ if (! (end > (start + 2)))
+ return;
+
+ vString * footnote = vStringNewNInit (start + 2, end - (start + 2));
+ const NestingLevel *const nl = nestingLevelsGetCurrent (nestingLevels);
+ tagEntryInfo e;
+
+ initTagEntry (&e, vStringValue (footnote), K_FOOTNOTE);
+ if (nl)
+ e.extensionFields.scopeIndex = nl->corkIndex;
+ makeTagEntry (&e);
+
+ vStringDelete (footnote);
+}
+
+static bool extractLanguageForCodeBlock (const char *langMarker,
+ vString *codeLang)
+{
+ subparser *s;
+ bool r = false;
+
+ foreachSubparser (s, false)
+ {
+ markdownSubparser *m = (markdownSubparser *)s;
+ enterSubparser(s);
+ if (m->extractLanguageForCodeBlock)
+ r = m->extractLanguageForCodeBlock (m, langMarker, codeLang);
+ leaveSubparser();
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+static void findMarkdownTags (void)
+{
+ vString *prevLine = vStringNew ();
+ vString *codeLang = vStringNew ();
+ const unsigned char *line;
+ char inCodeChar = 0;
+ long startSourceLineNumber = 0;
+ long startLineNumber = 0;
+ bool inPreambule = false;
+ bool inComment = false;
+
+ subparser *sub = getSubparserRunningBaseparser();
+ if (sub)
+ chooseExclusiveSubparser (sub, NULL);
+
+ nestingLevels = nestingLevelsNew (0);
+
+ while ((line = readLineFromInputFile ()) != NULL)
+ {
+ int lineLen = strlen ((const char*) line);
+ bool lineProcessed = false;
+ bool indented;
+ int pos = getFirstCharPos (line, lineLen, &indented);
+ const int lineNum = getInputLineNumber ();
+
+ if (lineNum == 1 || inPreambule)
+ {
+ if (line[pos] == '-' && line[pos + 1] == '-' && line[pos + 2] == '-')
+ {
+ if (inPreambule)
+ {
+ long endLineNumber = lineNum;
+ if (startLineNumber < endLineNumber)
+ makePromise ("FrontMatter", startLineNumber, 0,
+ endLineNumber, 0, startSourceLineNumber);
+ }
+ else
+ startSourceLineNumber = startLineNumber = lineNum;
+ inPreambule = !inPreambule;
+ }
+ }
+
+ if (inPreambule)
+ continue;
+
+ /* fenced code block */
+ if (line[pos] == '`' || line[pos] == '~')
+ {
+ char c = line[pos];
+ char otherC = c == '`' ? '~' : '`';
+ int nSame;
+ for (nSame = 1; line[nSame] == line[pos]; ++nSame);
+
+ if (inCodeChar != otherC && nSame >= 3)
+ {
+ inCodeChar = inCodeChar ? 0 : c;
+ if (inCodeChar == c && strstr ((const char *)(line + pos + nSame), "```") != NULL)
+ inCodeChar = 0;
+ else if (inCodeChar)
+ {
+ const char *langMarker = (const char *)(line + pos + nSame);
+ startLineNumber = startSourceLineNumber = lineNum + 1;
+
+ vStringClear (codeLang);
+ if (! extractLanguageForCodeBlock (langMarker, codeLang))
+ {
+ vStringCopyS (codeLang, langMarker);
+ vStringStripLeading (codeLang);
+ vStringStripTrailing (codeLang);
+ }
+ }
+ else
+ {
+ long endLineNumber = lineNum;
+ if (vStringLength (codeLang) > 0
+ && startLineNumber < endLineNumber)
+ makePromise (vStringValue (codeLang), startLineNumber, 0,
+ endLineNumber, 0, startSourceLineNumber);
+ }
+
+ lineProcessed = true;
+ }
+ }
+ /* XML comment start */
+ else if (lineLen >= pos + 4 && line[pos] == '<' && line[pos + 1] == '!' &&
+ line[pos + 2] == '-' && line[pos + 3] == '-')
+ {
+ if (strstr ((const char *)(line + pos + 4), "-->") == NULL)
+ inComment = true;
+ lineProcessed = true;
+ }
+ /* XML comment end */
+ else if (inComment && strstr ((const char *)(line + pos), "-->"))
+ {
+ inComment = false;
+ lineProcessed = true;
+ }
+
+ /* code block or comment */
+ if (inCodeChar || inComment)
+ lineProcessed = true;
+
+ /* code block using indent */
+ else if (indented)
+ lineProcessed = true;
+
+ /* if it's a title underline, or a delimited block marking character */
+ else if (line[pos] == '=' || line[pos] == '-' || line[pos] == '#' || line[pos] == '>')
+ {
+ int nSame;
+ for (nSame = 1; line[nSame] == line[pos]; ++nSame);
+
+ /* quote */
+ if (line[pos] == '>')
+ ; /* just to make sure lineProcessed = true so it won't be in a heading */
+ /* is it a two line title */
+ else if (line[pos] == '=' || line[pos] == '-')
+ {
+ char marker[2] = { line[pos], '\0' };
+ int kind = line[pos] == '=' ? K_CHAPTER : K_SECTION;
+ bool whitespaceTerminated = true;
+
+ for (int i = pos + nSame; i < lineLen; i++)
+ {
+ if (!isspace (line[i]))
+ {
+ whitespaceTerminated = false;
+ break;
+ }
+ }
+
+ vStringStripLeading (prevLine);
+ vStringStripTrailing (prevLine);
+ if (whitespaceTerminated && vStringLength (prevLine) > 0)
+ makeSectionMarkdownTag (prevLine, kind, marker);
+ }
+ /* otherwise is it a one line title */
+ else if (line[pos] == '#' && nSame <= K_SECTION_COUNT && isspace (line[nSame]))
+ {
+ int kind = nSame - 1;
+ bool delimited = false;
+ vString *name = getHeading (kind, line, lineLen, &delimited);
+ if (vStringLength (name) > 0)
+ makeSectionMarkdownTag (name, kind, delimited ? "##" : "#");
+ vStringDelete (name);
+ }
+
+ lineProcessed = true;
+ }
+
+ vStringClear (prevLine);
+ if (!lineProcessed)
+ {
+ getFootnoteMaybe ((const char *)line);
+ vStringCatS (prevLine, (const char*) line);
+ }
+ }
+ vStringDelete (prevLine);
+ vStringDelete (codeLang);
+ {
+ unsigned int line = (unsigned int)getInputLineNumber ();
+ nestingLevelsFree (nestingLevels);
+ }
+}
+
+extern parserDefinition* MarkdownParser (void)
+{
+ parserDefinition* const def = parserNew ("Markdown");
+ static const char *const extensions [] = { "md", "markdown", NULL };
+
+ def->enabled = true;
+ def->extensions = extensions;
+ def->useCork = CORK_QUEUE;
+ def->kindTable = MarkdownKinds;
+ def->kindCount = ARRAY_SIZE (MarkdownKinds);
+ def->fieldTable = MarkdownFields;
+ def->fieldCount = ARRAY_SIZE (MarkdownFields);
+ def->defaultScopeSeparator = "\"\"";
+ def->parser = findMarkdownTags;
+
+ /*
+ * This setting (useMemoryStreamInput) is for running
+ * Yaml parser from YamlFrontMatter as subparser.
+ * YamlFrontMatter is run from FrontMatter as a gust parser.
+ * FrontMatter is run from Markdown as a guest parser.
+ * This stacked structure hits the limitation of the main
+ * part: subparser's requirement for memory based input stream
+ * is not propagated to the main part.
+ *
+ * TODO: instead of setting useMemoryStreamInput here, we
+ * should remove the limitation.
+ */
+ def->useMemoryStreamInput = true;
+
+ return def;
+}
Modified: ctags/parsers/markdown.h
29 lines changed, 29 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,29 @@
+/*
+* Copyright (c) 2022, Masatake YAMATO
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*
+* The interface for subparsers of Markdown
+*/
+#ifndef CTAGS_PARSER_MARKDOWN_H
+#define CTAGS_PARSER_MARKDOWN_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "subparser.h"
+#include "vstring.h"
+
+typedef struct sMarkdownSubparser markdownSubparser;
+
+struct sMarkdownSubparser {
+ subparser subparser;
+ bool (* extractLanguageForCodeBlock) (markdownSubparser *s,
+ const char *langMarker,
+ vString *langName);
+};
+
+#endif
Modified: meson.build
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -636,7 +636,6 @@ ctags = static_library('ctags',
'ctags/parsers/geany_fortran.c',
'ctags/parsers/geany_lcpp.c',
'ctags/parsers/geany_lcpp.h',
- 'ctags/parsers/geany_markdown.c',
'ctags/parsers/geany_matlab.c',
'ctags/parsers/geany_tex.c',
'ctags/parsers/go.c',
@@ -651,6 +650,8 @@ ctags = static_library('ctags',
'ctags/parsers/lua.c',
'ctags/parsers/make.c',
'ctags/parsers/make.h',
+ 'ctags/parsers/markdown.c',
+ 'ctags/parsers/markdown.h',
'ctags/parsers/nsis.c',
'ctags/parsers/objc.c',
'ctags/parsers/pascal.c',
Modified: src/tagmanager/tm_parser.c
20 lines changed, 17 insertions(+), 3 deletions(-)
===================================================================
@@ -709,10 +709,21 @@ static TMParserMapGroup group_NSIS[] = {
};
static TMParserMapEntry map_MARKDOWN[] = {
- {'v', tm_tag_variable_t},
+ {'c', tm_tag_namespace_t}, //chapter
+ {'s', tm_tag_member_t}, //section
+ {'S', tm_tag_macro_t}, //subsection
+ {'t', tm_tag_variable_t}, //subsubsection
+ {'T', tm_tag_struct_t}, //l4subsection
+ {'u', tm_tag_union_t}, //l5subsection
+ {'n', tm_tag_undef_t}, //footnote
};
static TMParserMapGroup group_MARKDOWN[] = {
- {_("Variables"), TM_ICON_VAR, tm_tag_variable_t},
+ {_("Chapters"), TM_ICON_NONE, tm_tag_namespace_t},
+ {_("Sections"), TM_ICON_NONE, tm_tag_member_t},
+ {_("Subsections"), TM_ICON_NONE, tm_tag_macro_t},
+ {_("Subsubsections"), TM_ICON_NONE, tm_tag_variable_t},
+ {_("Level 4 sections"), TM_ICON_NONE, tm_tag_struct_t},
+ {_("Level 5 sections"), TM_ICON_NONE, tm_tag_union_t},
};
static TMParserMapEntry map_TXT2TAGS[] = {
@@ -1461,6 +1472,7 @@ const gchar *tm_parser_scope_separator(TMParserType lang)
case TM_PARSER_ZEPHIR:
return "::";
+ case TM_PARSER_MARKDOWN:
case TM_PARSER_TXT2TAGS:
return "\"\"";
@@ -1481,10 +1493,11 @@ const gchar *tm_parser_scope_separator_printable(TMParserType lang)
{
switch (lang)
{
- case TM_PARSER_TXT2TAGS:
case TM_PARSER_ASCIIDOC:
case TM_PARSER_CONF:
+ case TM_PARSER_MARKDOWN:
case TM_PARSER_REST:
+ case TM_PARSER_TXT2TAGS:
return " > ";
default:
@@ -1511,6 +1524,7 @@ gboolean tm_parser_has_full_scope(TMParserType lang)
case TM_PARSER_JAVASCRIPT:
case TM_PARSER_JSON:
case TM_PARSER_LUA:
+ case TM_PARSER_MARKDOWN:
case TM_PARSER_PHP:
case TM_PARSER_POWERSHELL:
case TM_PARSER_PYTHON:
Modified: tests/ctags/simple.md.tags
54 lines changed, 27 insertions(+), 27 deletions(-)
===================================================================
@@ -1,28 +1,28 @@
# format=tagmanager
-# a�16384�0
-# g #�16384�0
-# h ##�16384�0
-## b�16384�0
-## i #�16384�0
-## j ##�16384�0
-## k ###�16384�0
-### c�16384�0
-### l #�16384�0
-### m ##�16384�0
-### n ###�16384�0
-### o ###�16384�0
-#### d�16384�0
-#### p #�16384�0
-#### q #####�16384�0
-##### e�16384�0
-##### r #�16384�0
-##### s ######�16384�0
-###### f�16384�0
-###### t #�16384�0
-###### u #######�16384�0
-A�16384�0
-B�16384�0
-C�16384�0
-D�16384�0
-E�16384�0
-F�16384�0
+A�256�0
+B�256�0
+C�256�0
+D�64�C�0
+E�64�C�0
+F�64�C�0
+a�256�0
+b�64�a�0
+c�65536�a""b�0
+d�16384�a""b""c�0
+e�2048�a""b""c""d�0
+f�8192�a""b""c""d""e�0
+g�256�0
+h�256�0
+i�64�h�0
+j�64�h�0
+k�64�h�0
+l�65536�h""k�0
+m�65536�h""k�0
+n�65536�h""k�0
+o�65536�h""k�0
+p�16384�h""k""o�0
+q�16384�h""k""o�0
+r�2048�h""k""o""q�0
+s�2048�h""k""o""q�0
+t�8192�h""k""o""q""s�0
+u�8192�h""k""o""q""s�0
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list