[geany/geany] 4eb741: Merge pull request #3035 from techee/different_parsers
Jiří Techet
git-noreply at xxxxx
Mon Mar 14 19:19:19 UTC 2022
Branch: refs/heads/master
Author: Jiří Techet <techet at gmail.com>
Committer: GitHub <noreply at github.com>
Date: Mon, 14 Mar 2022 19:19:19 UTC
Commit: 4eb741ec374630c200107faac52b4502fdb0bd62
https://github.com/geany/geany/commit/4eb741ec374630c200107faac52b4502fdb0bd62
Log Message:
-----------
Merge pull request #3035 from techee/different_parsers
Use some parsers from uctags with better implementation
Modified Paths:
--------------
ctags/Makefile.am
ctags/parsers/geany_r.c
ctags/parsers/geany_sh.c
ctags/parsers/geany_verilog.c
ctags/parsers/r.c
ctags/parsers/r.h
ctags/parsers/sh.c
ctags/parsers/verilog.c
src/tagmanager/tm_parser.c
tests/ctags/bug1111214-j-chan.v.tags
tests/ctags/traffic_signal.v.tags
Modified: ctags/Makefile.am
7 lines changed, 4 insertions(+), 3 deletions(-)
===================================================================
@@ -82,16 +82,17 @@ parsers = \
parsers/php.c \
parsers/powershell.c \
parsers/geany_python.c \
- parsers/geany_r.c \
+ parsers/r.c \
+ parsers/r.h \
parsers/rst.c \
parsers/ruby.c \
parsers/rust.c \
- parsers/geany_sh.c \
+ parsers/sh.c \
parsers/sql.c \
parsers/geany_tcl.c \
parsers/geany_tex.c \
parsers/txt2tags.c \
- parsers/geany_verilog.c \
+ parsers/verilog.c \
parsers/geany_vhdl.c
# skip cmd.c and mini-geany.c which define main()
Modified: ctags/parsers/geany_r.c
177 lines changed, 0 insertions(+), 177 deletions(-)
===================================================================
@@ -1,177 +0,0 @@
-/*
-* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License version 2 or (at your option) any later version.
-*
-* This module contains functions for generating tags for R language files.
-* R is a programming language for statistical computing.
-* R is GPL Software, get it from http://www.r-project.org/
-*/
-
-/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
-
-#include <string.h>
-#include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
-
-#include "debug.h"
-#include "entry.h"
-#include "parse.h"
-#include "read.h"
-#include "vstring.h"
-#include "routines.h"
-
-
-#define SKIPSPACE(ch) while (isspace((int)*ch)) \
- ch++
-
-typedef enum {
- K_FUNCTION,
- K_LIBRARY,
- K_SOURCE,
- KIND_COUNT
-} rKind;
-
-static kindDefinition RKinds [KIND_COUNT] = {
- {true, 'f', "function", "functions"},
- {true, 'l', "library", "libraries"},
- {true, 's', "source", "sources"},
-};
-
-static void makeRTag (const vString * const name, rKind kind)
-{
- tagEntryInfo e;
- initTagEntry(&e, vStringValue(name), kind);
-
- Assert (kind < KIND_COUNT);
-
- makeTagEntry (&e);
-}
-
-static void createRTags (void)
-{
- vString *vLine = vStringNew ();
- vString *name = vStringNew ();
- int ikind;
- const unsigned char *line;
-
- while ((line = readLineFromInputFile ()) != NULL)
- {
- const unsigned char *cp = (const unsigned char *) line;
-
- vStringClear (name);
- while ((*cp != '\0') && (*cp != '#'))
- {
- /* iterate to the end of line or to a comment */
- ikind = -1;
- switch (*cp) {
- case 'l':
- case 's':
- if (strncasecmp((const char*)cp, "library", (size_t)7) == 0) {
- /* load a library: library(tools) */
- cp += 7;
- SKIPSPACE(cp);
- if (*cp == '(')
- ikind = K_LIBRARY;
- else
- cp -= 7;
- } else if (strncasecmp((const char*)cp, "source", (size_t)6) == 0) {
- /* load a source file: source("myfile.r") */
- cp += 6;
- SKIPSPACE(cp);
- if (*cp == '(')
- ikind = K_SOURCE;
- else
- cp -= 6;
- }
- if (ikind != -1) {
- cp++;
-
- vStringClear(name);
- while ((!isspace((int)*cp)) && *cp != '\0' && *cp != ')') {
- vStringPut(name, (int)*cp);
- cp++;
- }
-
- /* if the string really exists, make a tag of it */
- if (vStringLength(name) > 0)
- makeRTag(name, ikind);
-
- /* prepare for the next iteration */
- vStringClear(name);
- } else {
- vStringPut(name, (int)*cp);
- cp++;
- }
- break;
- case '<':
- cp++;
- if (*cp == '-') {
- /* assignment: ident <- someval */
- cp++;
- SKIPSPACE(cp);
-
- if (*cp == '\0') {
- /* not in this line, read next */
- /* sometimes functions are declared this way:
- ident <-
- function(...)
- {
- ...
- }
- I don't know if there is a reason to write the function keyword
- in a new line
- */
- if ((line = readLineFromInputFile()) != NULL) {
- cp = (const unsigned char*)line;
- SKIPSPACE(cp);
- }
- }
-
- if (strncasecmp((const char*)cp, "function", (size_t)8) == 0) {
- /* it's a function: ident <- function(args) */
- cp += 8;
- /* if the string really exists, make a tag of it */
- if (vStringLength(name) > 0)
- makeRTag(name, K_FUNCTION);
-
- /* prepare for the next iteration */
- vStringClear(name);
- break;
- }
- }
- /* fall through */
- case ' ':
- case '\x009':
- /* skip whitespace */
- cp++;
- break;
- default:
- /* collect all characters that could be a part of an identifier */
- vStringPut(name, (int)*cp);
- cp++;
- break;
- }
- }
- }
-
- vStringDelete (name);
- vStringDelete (vLine);
-}
-
-extern parserDefinition *RParser (void)
-{
- /* *.r: R files
- * *.s;*.q: S files
- */
- static const char *const extensions [] = { "r", "s", "q", NULL };
- parserDefinition *const def = parserNew ("R");
- def->kindTable = RKinds;
- def->kindCount = ARRAY_SIZE (RKinds);
- def->extensions = extensions;
- def->parser = createRTags;
- return def;
-}
Modified: ctags/parsers/geany_sh.c
110 lines changed, 0 insertions(+), 110 deletions(-)
===================================================================
@@ -1,110 +0,0 @@
-/*
-* Copyright (c) 2000-2002, Darren Hiebert
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License version 2 or (at your option) any later version.
-*
-* This module contains functions for generating tags for scripts for the
-* Bourne shell (and its derivatives, the Korn and Z shells).
-*/
-
-/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
-
-#include <string.h>
-
-#include "parse.h"
-#include "read.h"
-#include "routines.h"
-#include "vstring.h"
-#include "xtag.h"
-
-/*
-* DATA DEFINITIONS
-*/
-typedef enum {
- K_FUNCTION
-} shKind;
-
-static kindDefinition ShKinds [] = {
- { true, 'f', "function", "functions"}
-};
-
-/*
-* FUNCTION DEFINITIONS
-*/
-
-/* Reject any tag "main" from a file named "configure". These appear in
- * here-documents in GNU autoconf scripts and will add a haystack to the
- * needle.
- */
-static bool hackReject (const vString* const tagName)
-{
- const char *const scriptName = baseFilename (getInputFileName ());
- bool result = (bool) (strcmp (scriptName, "configure") == 0 &&
- strcmp (vStringValue (tagName), "main") == 0);
- return result;
-}
-
-static void findShTags (void)
-{
- vString *name = vStringNew ();
- const unsigned char *line;
-
- while ((line = readLineFromInputFile ()) != NULL)
- {
- const unsigned char* cp = line;
- bool functionFound = false;
-
- if (line [0] == '#')
- continue;
-
- while (isspace (*cp))
- cp++;
- if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 &&
- isspace ((int) cp [8]))
- {
- functionFound = true;
- cp += 8;
- if (! isspace ((int) *cp))
- continue;
- while (isspace ((int) *cp))
- ++cp;
- }
- if (! (isalnum ((int) *cp) || *cp == '_'))
- continue;
- while (isalnum ((int) *cp) || *cp == '_')
- {
- vStringPut (name, (int) *cp);
- ++cp;
- }
- while (isspace ((int) *cp))
- ++cp;
- if (*cp++ == '(')
- {
- while (isspace ((int) *cp))
- ++cp;
- if (*cp == ')' && ! hackReject (name))
- functionFound = true;
- }
- if (functionFound)
- makeSimpleTag (name, K_FUNCTION);
- vStringClear (name);
- }
- vStringDelete (name);
-}
-
-extern parserDefinition* ShParser (void)
-{
- static const char *const extensions [] = {
- "sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL
- };
- parserDefinition* def = parserNew ("Sh");
- def->kindTable = ShKinds;
- def->kindCount = ARRAY_SIZE (ShKinds);
- def->extensions = extensions;
- def->parser = findShTags;
- return def;
-}
Modified: ctags/parsers/geany_verilog.c
332 lines changed, 0 insertions(+), 332 deletions(-)
===================================================================
@@ -1,332 +0,0 @@
-/*
-* Copyright (c) 2003, Darren Hiebert
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License version 2 or (at your option) any later version.
-*
-* This module contains functions for generating tags for the Verilog HDL
-* (Hardware Description Language).
-*
-* Language definition documents:
-* http://www.eg.bucknell.edu/~cs320/verilog/verilog-manual.html
-* http://www.sutherland-hdl.com/on-line_ref_guide/vlog_ref_top.html
-* http://www.verilog.com/VerilogBNF.html
-* http://eesun.free.fr/DOC/VERILOG/verilog_manual1.html
-*/
-
-/*
- * INCLUDE FILES
- */
-#include "general.h" /* must always come first */
-
-#include <string.h>
-#include <setjmp.h>
-
-#include "debug.h"
-#include "keyword.h"
-#include "parse.h"
-#include "read.h"
-#include "vstring.h"
-#include "geany_lcpp.h"
-#include "routines.h"
-#include "xtag.h"
-
-/*
- * DATA DECLARATIONS
- */
-typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
-
-typedef enum {
- K_UNDEFINED = -1,
- K_CONSTANT,
- K_EVENT,
- K_FUNCTION,
- K_MODULE,
- K_NET,
- K_PORT,
- K_REGISTER,
- K_TASK
-} verilogKind;
-
-/*
- * DATA DEFINITIONS
- */
-static int Ungetc;
-static int Lang_verilog;
-static jmp_buf Exception;
-
-static kindDefinition VerilogKinds [] = {
- { true, 'c', "constant", "constants (define, parameter, specparam)" },
- { true, 'e', "event", "events" },
- { true, 'f', "function", "functions" },
- { true, 'm', "module", "modules" },
- { true, 'n', "net", "net data types" },
- { true, 'p', "port", "ports" },
- { true, 'r', "register", "register data types" },
- { true, 't', "task", "tasks" }
-};
-
-static keywordTable VerilogKeywordTable [] = {
- { "`define", K_CONSTANT },
- { "event", K_EVENT },
- { "function", K_FUNCTION },
- { "inout", K_PORT },
- { "input", K_PORT },
- { "integer", K_REGISTER },
- { "module", K_MODULE },
- { "output", K_PORT },
- { "parameter", K_CONSTANT },
- { "real", K_REGISTER },
- { "realtime", K_REGISTER },
- { "reg", K_REGISTER },
- { "specparam", K_CONSTANT },
- { "supply0", K_NET },
- { "supply1", K_NET },
- { "task", K_TASK },
- { "time", K_REGISTER },
- { "tri0", K_NET },
- { "tri1", K_NET },
- { "triand", K_NET },
- { "tri", K_NET },
- { "trior", K_NET },
- { "trireg", K_NET },
- { "wand", K_NET },
- { "wire", K_NET },
- { "wor", K_NET }
-};
-
-/*
- * FUNCTION DEFINITIONS
- */
-
-static void initialize (const langType language)
-{
- size_t i;
- const size_t count = ARRAY_SIZE (VerilogKeywordTable);
- Lang_verilog = language;
- for (i = 0 ; i < count ; ++i)
- {
- const keywordTable* const p = &VerilogKeywordTable [i];
- addKeyword (p->name, language, (int) p->id);
- }
-}
-
-static void vUngetc (int c)
-{
- Assert (Ungetc == '\0');
- Ungetc = c;
-}
-
-static int vGetc (void)
-{
- int c;
- if (Ungetc == '\0')
- c = getcFromInputFile ();
- else
- {
- c = Ungetc;
- Ungetc = '\0';
- }
- if (c == '/')
- {
- int c2 = getcFromInputFile ();
- if (c2 == EOF)
- longjmp (Exception, (int) ExceptionEOF);
- else if (c2 == '/') /* strip comment until end-of-line */
- {
- do
- c = getcFromInputFile ();
- while (c != '\n' && c != EOF);
- }
- else if (c2 == '*') /* strip block comment */
- {
- c = lcppSkipOverCComment();
- }
- else
- {
- ungetcToInputFile (c2);
- }
- }
- else if (c == '"') /* strip string contents */
- {
- int c2;
- do
- c2 = getcFromInputFile ();
- while (c2 != '"' && c2 != EOF);
- c = '@';
- }
- if (c == EOF)
- longjmp (Exception, (int) ExceptionEOF);
- return c;
-}
-
-static bool isIdentifierCharacter (const int c)
-{
- return (bool)(isalnum (c) || c == '_' || c == '`');
-}
-
-static int skipWhite (int c)
-{
- while (isspace (c))
- c = vGetc ();
- return c;
-}
-
-static int skipPastMatch (const char *const pair)
-{
- const int begin = pair [0], end = pair [1];
- int matchLevel = 1;
- int c;
- do
- {
- c = vGetc ();
- if (c == begin)
- ++matchLevel;
- else if (c == end)
- --matchLevel;
- }
- while (matchLevel > 0);
- return vGetc ();
-}
-
-static bool readIdentifier (vString *const name, int c)
-{
- vStringClear (name);
- if (isIdentifierCharacter (c))
- {
- while (isIdentifierCharacter (c))
- {
- vStringPut (name, c);
- c = vGetc ();
- }
- vUngetc (c);
- }
- return (bool)(name->length > 0);
-}
-
-static void tagNameList (const verilogKind kind, int c)
-{
- vString *name = vStringNew ();
- bool repeat;
- Assert (isIdentifierCharacter (c));
- do
- {
- repeat = false;
- if (isIdentifierCharacter (c))
- {
- readIdentifier (name, c);
- makeSimpleTag (name, kind);
- }
- else
- break;
- c = skipWhite (vGetc ());
- if (c == '[')
- c = skipPastMatch ("[]");
- c = skipWhite (c);
- if (c == '=')
- {
- c = skipWhite (vGetc ());
- if (c == '{')
- skipPastMatch ("{}");
- else
- {
- do
- c = vGetc ();
- while (c != ',' && c != ';');
- }
- }
- if (c == ',')
- {
- c = skipWhite (vGetc ());
- repeat = true;
- }
- else
- repeat = false;
- } while (repeat);
- vStringDelete (name);
- vUngetc (c);
-}
-
-static void findTag (vString *const name)
-{
- const verilogKind kind = (verilogKind) lookupKeyword (vStringValue (name), Lang_verilog);
- if (kind == K_CONSTANT && vStringChar (name, 0) == '`')
- {
- /* Bug #961001: Verilog compiler directives are line-based. */
- int c = skipWhite (vGetc ());
- readIdentifier (name, c);
- makeSimpleTag (name, kind);
- /* Skip the rest of the line. */
- do {
- c = vGetc();
- } while (c != '\n');
- vUngetc (c);
- }
- else if (kind != K_UNDEFINED)
- {
- int c = skipWhite (vGetc ());
-
- /* Many keywords can have bit width.
- * reg [3:0] net_name;
- * inout [(`DBUSWIDTH-1):0] databus;
- */
- if (c == '(')
- c = skipPastMatch ("()");
- c = skipWhite (c);
- if (c == '[')
- c = skipPastMatch ("[]");
- c = skipWhite (c);
- if (c == '#')
- {
- c = vGetc ();
- if (c == '(')
- c = skipPastMatch ("()");
- }
- c = skipWhite (c);
- if (isIdentifierCharacter (c))
- tagNameList (kind, c);
- }
-}
-
-static void findVerilogTags (void)
-{
- vString *const name = vStringNew ();
- volatile bool newStatement = true;
- volatile int c = '\0';
- exception_t exception = (exception_t) setjmp (Exception);
-
- if (exception == ExceptionNone) while (c != EOF)
- {
- c = vGetc ();
- switch (c)
- {
- case ';':
- case '\n':
- newStatement = true;
- break;
-
- case ' ':
- case '\t':
- break;
-
- default:
- if (newStatement && readIdentifier (name, c))
- findTag (name);
- newStatement = false;
- break;
- }
- }
- vStringDelete (name);
-}
-
-extern parserDefinition* VerilogParser (void)
-{
- static const char *const extensions [] = { "v", NULL };
- parserDefinition* def = parserNew ("Verilog");
- def->kindTable = VerilogKinds;
- def->kindCount = ARRAY_SIZE (VerilogKinds);
- def->extensions = extensions;
- def->parser = findVerilogTags;
- def->initialize = initialize;
- return def;
-}
Modified: ctags/parsers/r.c
1470 lines changed, 1470 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,1470 @@
+/*
+* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
+* Copyright (c) 2020, Masatake YAMATO <yamato at redhat.com>
+* Copyright (c) 2020, Red Hat, Inc.
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*
+* This module contains functions for generating tags for R language files.
+* R is a programming language for statistical computing.
+* R is GPL Software, get it from http://www.r-project.org/
+*
+* The language references are available at
+* https://cran.r-project.org/manuals.html, and
+* https://cran.r-project.org/doc/manuals/r-release/R-lang.html
+*
+* The base library (including library and source functions) release is at
+* https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "selectors.h"
+#include "tokeninfo.h"
+#include "trace.h"
+#include "vstring.h"
+#include "subparser.h"
+#include "r.h"
+
+#include <string.h>
+#include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
+
+
+/*
+* MACROS
+*/
+#ifdef DEBUG
+#define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
+ (TXT), \
+ tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
+ tokenTypeStr(T->type), \
+ (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
+#define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
+ tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
+ tokenTypeStr((T)->type), \
+ (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
+
+#define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
+ tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
+ tokenTypeStr(token->type), \
+ parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
+#define R_TRACE_LEAVE() TRACE_LEAVE()
+#else
+#define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
+#define R_TRACE_TOKEN(T,Q) do {} while (0);
+#define R_TRACE_ENTER() do {} while (0);
+#define R_TRACE_LEAVE() do {} while (0);
+#endif
+
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_UNDEFINED = -1,
+ K_FUNCTION,
+ K_LIBRARY,
+ K_SOURCE,
+ K_GLOBALVAR,
+ K_FUNCVAR,
+ K_PARAM,
+ K_VECTOR,
+ K_LIST,
+ K_DATAFRAME,
+ K_NAMEATTR,
+ KIND_COUNT
+} rKind;
+
+typedef enum {
+ R_LIBRARY_ATTACHED_BY_LIBRARY,
+ R_LIBRARY_ATTACHED_BY_REQUIRE,
+} rLibraryRole;
+
+typedef enum {
+ R_SOURCE_LOADED_BY_SOURCE,
+} rSourceRole;
+
+static roleDefinition RLibraryRoles [] = {
+ { true, "library", "library attached by library function" },
+ { true, "require", "library attached by require function" },
+};
+
+static roleDefinition RSourceRoles [] = {
+ { true, "source", "source loaded by source fucntion" },
+};
+
+static kindDefinition RKinds[KIND_COUNT] = {
+ {true, 'f', "function", "functions"},
+ {true, 'l', "library", "libraries",
+ .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) },
+ {true, 's', "source", "sources",
+ .referenceOnly = true, ATTACH_ROLES (RSourceRoles) },
+ {true, 'g', "globalVar", "global variables having values other than function()"},
+ {true, 'v', "functionVar", "function variables having values other than function()"},
+ {false,'z', "parameter", "function parameters inside function definitions" },
+ {true, 'c', "vector", "vectors explicitly created with `c()'" },
+ {true, 'L', "list", "lists explicitly created with `list()'" },
+ {true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
+ {true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
+};
+
+struct sKindExtraInfo {
+ const char *anon_prefix;
+ const char *ctor;
+};
+
+static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = {
+ [K_FUNCTION] = {
+ "anonFunc",
+ "function",
+ },
+ [K_VECTOR] = {
+ "anonVec",
+ "c",
+ },
+ [K_LIST] = {
+ "anonList",
+ "list",
+ },
+ [K_DATAFRAME] = {
+ "anonDataFrame",
+ "data.frame",
+ },
+};
+
+typedef enum {
+ F_ASSIGNMENT_OPERATOR,
+ F_CONSTRUCTOR,
+} rField;
+
+static fieldDefinition RFields [] = {
+ {
+ .name = "assignmentop",
+ .description = "operator for assignment",
+ .enabled = false,
+ },
+ {
+ .name = "constructor",
+ .description = "function used for making value assigned to the nameattr tag",
+ .enabled = true,
+ }
+};
+
+typedef int keywordId; /* to allow KEYWORD_NONE */
+
+static const keywordTable RKeywordTable [] = {
+ { "c", KEYWORD_R_C },
+ { "list", KEYWORD_R_LIST },
+ { "data.frame",KEYWORD_R_DATAFRAME },
+ { "function", KEYWORD_R_FUNCTION },
+ { "if", KEYWORD_R_IF },
+ { "else", KEYWORD_R_ELSE },
+ { "for", KEYWORD_R_FOR },
+ { "while", KEYWORD_R_WHILE },
+ { "repeat", KEYWORD_R_REPEAT },
+ { "in", KEYWORD_R_IN },
+ { "next", KEYWORD_R_NEXT },
+ { "break", KEYWORD_R_BREAK },
+ { "TRUE", KEYWORD_R_TRUE, },
+ { "FALSE", KEYWORD_R_FALSE, },
+ { "NULL", KEYWORD_R_NULL, },
+ { "Inf", KEYWORD_R_INF, },
+ { "NaN", KEYWORD_R_NAN, },
+ { "NA", KEYWORD_R_NA, },
+ { "NA_integer_", KEYWORD_R_NA, },
+ { "NA_real_", KEYWORD_R_NA, },
+ { "NA_complex_", KEYWORD_R_NA, },
+ { "NA_character_", KEYWORD_R_NA, },
+ { "source", KEYWORD_R_SOURCE },
+ { "library", KEYWORD_R_LIBRARY },
+ { "require", KEYWORD_R_LIBRARY },
+};
+
+#ifdef DEBUG
+static const char *tokenTypeStr(enum RTokenType e);
+#endif
+
+static struct tokenTypePair typePairs [] = {
+ { '{', '}' },
+ { '[', ']' },
+ { '(', ')' },
+};
+
+typedef struct sRToken {
+ tokenInfo base;
+ int scopeIndex;
+ int parenDepth;
+ vString *signature;
+ int kindIndexForParams; /* Used only when gathering parameters */
+} rToken;
+
+#define R(TOKEN) ((rToken *)TOKEN)
+
+static int blackHoleIndex;
+
+static langType Lang_R;
+
+static void readToken (tokenInfo *const token, void *data);
+static void clearToken (tokenInfo *token);
+static struct tokenInfoClass rTokenInfoClass = {
+ .nPreAlloc = 4,
+ .typeForUndefined = TOKEN_R_UNDEFINED,
+ .keywordNone = KEYWORD_NONE,
+ .typeForKeyword = TOKEN_R_KEYWORD,
+ .typeForEOF = TOKEN_R_EOF,
+ .extraSpace = sizeof (rToken) - sizeof (tokenInfo),
+ .pairs = typePairs,
+ .pairCount = ARRAY_SIZE (typePairs),
+ .init = NULL,
+ .read = readToken,
+ .clear = clearToken,
+ .copy = NULL,
+};
+
+
+/*
+ * FUNCTION PROTOTYPES
+ */
+static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair);
+static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall);
+
+static int notifyReadRightSideSymbol (tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token);
+static int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent,
+ bool in_func, int kindInR, const char *assignmentOperator);
+static bool askSubparserTagAcceptancy (tagEntryInfo *pe);
+static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e);
+static int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count)
+{
+ if (e->langType == Lang_R)
+ {
+ for (size_t i = 0; i < count; i++)
+ {
+ if (e->kindIndex == kinds[i])
+ return true;
+ }
+ }
+ else
+ {
+ bool function = false;
+ for (size_t i = 0; i < count; i++)
+ {
+ if (K_FUNCTION == kinds[i])
+ {
+ function = true;
+ break;
+ }
+ }
+ if (function && askSubparserTagHasFunctionAlikeKind (e))
+ return true;
+ }
+
+ const char *tmp = getParserFieldValueForType (e,
+ RFields [F_CONSTRUCTOR].ftype);
+ if (tmp == NULL)
+ return false;
+
+ for (size_t i = 0; i < count; i++)
+ {
+ const char * ctor = kindExtraInfo [kinds[i]].ctor;
+ if (ctor && strcmp (tmp, ctor) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int searchScopeOtherThan (int scope, int kinds[], size_t count)
+{
+ do
+ {
+ tagEntryInfo * e = getEntryInCorkQueue (scope);
+ if (!e)
+ return CORK_NIL;
+
+ if (!hasKindsOrCtors (e, kinds, count))
+ return scope;
+
+ scope = e->extensionFields.scopeIndex;
+ }
+ while (1);
+}
+
+static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind,
+ const char * assignmentOp)
+{
+ if (assignmentOp && (strlen (assignmentOp) == 3))
+ {
+ /* <<- or ->> is used here. */
+ if (anyKindsEntryInScopeRecursive (parent, tokenString (token),
+ (int[]){K_FUNCTION,
+ K_GLOBALVAR,
+ K_FUNCVAR,
+ K_PARAM}, 4) != CORK_NIL)
+ return CORK_NIL;
+
+ parent = CORK_NIL;
+ }
+
+ /* If the tag (T) to be created is defined in a scope and
+ the scope already has another tag having the same name
+ as T, T should not be created. */
+ tagEntryInfo *pe = getEntryInCorkQueue (parent);
+ int cousin = CORK_NIL;
+ if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION)
+ || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe))))
+ {
+ cousin = anyEntryInScope (parent, tokenString (token));
+ if (kind == K_GLOBALVAR)
+ kind = K_FUNCVAR;
+ }
+ else if (pe && (kind == K_GLOBALVAR)
+ && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
+ {
+ parent = searchScopeOtherThan (pe->extensionFields.scopeIndex,
+ (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3);
+ if (parent == CORK_NIL)
+ cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR);
+ else
+ {
+ cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR);
+ kind = K_FUNCVAR;
+ }
+ }
+ else if (pe)
+ {
+ /* The condition for tagging is a bit relaxed here.
+ Even if the same name tag is created in the scope, a name
+ is tagged if kinds are different. */
+ cousin = anyKindEntryInScope (parent, tokenString (token), kind);
+ }
+ if (cousin != CORK_NIL)
+ return CORK_NIL;
+
+ int corkIndex = makeSimpleTag (token->string, kind);
+ tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
+ if (tag)
+ {
+ tag->extensionFields.scopeIndex = parent;
+ if (assignmentOp)
+ {
+ if (strlen (assignmentOp) > 0)
+ attachParserField (tag, true,
+ RFields [F_ASSIGNMENT_OPERATOR].ftype,
+ assignmentOp);
+ else
+ markTagExtraBit (tag, XTAG_ANONYMOUS);
+ }
+ registerEntry (corkIndex);
+ }
+ return corkIndex;
+}
+
+static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind,
+ const char * assignmentOp)
+{
+ int r;
+ const char *ctor = kindExtraInfo [kind].ctor;
+ tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent);
+
+ /* makeTagWithTranslation method for subparsers
+ called from makeSimpleSubparserTag expects
+ kind should be resolved. */
+ if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
+ {
+ if (assignmentOp
+ && strcmp (assignmentOp, "=") == 0)
+ kind = K_NAMEATTR;
+ }
+
+ bool foreign_tag = false;
+ if (pe == NULL || pe->langType == Lang_R ||
+ !askSubparserTagAcceptancy (pe))
+ r = makeSimpleRTagR (token, parent, kind, assignmentOp);
+ else
+ {
+ foreign_tag = true;
+ r = makeSimpleSubparserTag (pe->langType, token, parent, in_func,
+ kind, assignmentOp);
+ }
+
+ if ((kind == K_NAMEATTR || foreign_tag) && ctor)
+ {
+ tagEntryInfo *e = getEntryInCorkQueue (r);
+ if (e)
+ attachParserField (e, true,
+ RFields [F_CONSTRUCTOR].ftype,
+ ctor);
+ }
+
+ return r;
+}
+
+static void clearToken (tokenInfo *token)
+{
+ R (token)->parenDepth = 0;
+ R (token)->scopeIndex = CORK_NIL;
+ R (token)->kindIndexForParams = KIND_GHOST_INDEX;
+ if (R (token)->signature)
+ {
+ vStringDelete (R (token)->signature);
+ R (token)->signature = NULL;
+ }
+}
+
+static void readString (tokenInfo *const token, void *data)
+{
+ int c;
+ bool escaped = false;
+
+ int c0 = tokenString(token)[0];
+
+ while (1)
+ {
+ c = getcFromInputFile ();
+ switch (c)
+ {
+ case EOF:
+ return;
+ case '\'':
+ case '"':
+ case '`':
+ tokenPutc (token, c);
+ if (!escaped && c == c0)
+ return;
+ escaped = false;
+ break;
+ case '\\':
+ tokenPutc (token, c);
+ escaped = !escaped;
+ break;
+ default:
+ tokenPutc (token, c);
+ escaped = false;
+ break;
+ }
+ }
+}
+
+static void readNumber (tokenInfo *const token, void *data)
+{
+ int c;
+
+ /* 10.3.1 Constants
+ *
+ * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
+ * Valid integer constants: 1L, 0x10L, 1000000L, 1e6L
+ * Valid numeric constants: 1.1L, 1e-3L, 0x1.1p-2
+ * Valid complex constants: 2i 4.1i 1e-2i
+ */
+ while ((c = getcFromInputFile ()))
+ {
+ if (isxdigit (c) || c == '.' || c == 'E'
+ || c == '+' || c == '-'
+ || c == 'L' || c == 'x' || c == 'p'
+ || c == 'i')
+ tokenPutc (token, c);
+ else
+ {
+ ungetcToInputFile (c);
+ break;
+ }
+ }
+}
+
+static void readSymbol (tokenInfo *const token, void *data)
+{
+ int c;
+ while ((c = getcFromInputFile ()))
+ {
+ if (isalnum (c) || c == '.' || c == '_')
+ tokenPutc (token, c);
+ else
+ {
+ ungetcToInputFile (c);
+ break;
+ }
+ }
+}
+
+static keywordId resolveKeyword (vString *string)
+{
+ char *s = vStringValue (string);
+ static langType lang = LANG_AUTO;
+
+ if (lang == LANG_AUTO)
+ lang = getInputLanguage ();
+
+ return lookupCaseKeyword (s, lang);
+}
+
+static bool signatureExpectingParameter (vString *signature)
+{
+ if (vStringLast (signature) == '(')
+ return true;
+
+ for (size_t i = vStringLength (signature); i > 0; i--)
+ {
+ char c = vStringChar (signature, i - 1);
+ if (c == ' ')
+ continue;
+ else if (c == ',')
+ return true;
+ break;
+ }
+ return false;
+}
+
+static void readToken (tokenInfo *const token, void *data)
+{
+ int c, c0;
+
+ token->type = TOKEN_R_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+ do
+ c = getcFromInputFile ();
+ while (c == ' ' || c== '\t' || c == '\f');
+
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ switch (c)
+ {
+ case EOF:
+ token->type = TOKEN_R_EOF;
+ break;
+ case '#':
+ while (1)
+ {
+ c = getcFromInputFile ();
+ if (c == EOF)
+ {
+ token->type = TOKEN_R_EOF;
+ break;
+ }
+ else if (c == '\n')
+ {
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ }
+ }
+ break;
+ case '\n':
+ case ';':
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ case '\'':
+ case '"':
+ case '`':
+ token->type = TOKEN_R_STRING;
+ tokenPutc (token, c);
+ readString (token, data);
+ break;
+ case '+':
+ case '/':
+ case '^':
+ case '~':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ break;
+ case ':':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == ':')
+ {
+ tokenPutc (token, c);
+ token->type = TOKEN_R_SCOPE;
+ c = getcFromInputFile ();
+ if (c == ':')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ }
+ else
+ ungetcToInputFile (c);
+ break;
+ case '&':
+ case '|':
+ case '*':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c0 = getcFromInputFile ();
+ if (c == c0)
+ tokenPutc (token, c0);
+ else
+ ungetcToInputFile (c0);
+ break;
+ case '=':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ {
+ token->type = '=';
+ ungetcToInputFile (c);
+ }
+ break;
+ case '-':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '>')
+ {
+ token->type = TOKEN_R_RASSIGN;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '>')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ }
+ else
+ ungetcToInputFile (c);
+ break;
+ case '>':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '<':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+
+ /* <<- */
+ if (c == '<')
+ {
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ }
+
+ if (c == '-')
+ {
+ token->type = TOKEN_R_LASSIGN;
+ tokenPutc (token, c);
+ }
+ else if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '%':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ do
+ {
+ c = getcFromInputFile ();
+ if (c == EOF)
+ break;
+
+ tokenPutc (token, c);
+ if (c == '%')
+ break;
+ }
+ while (1);
+ break;
+ case '!':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case ',':
+ case '$':
+ case '@':
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ case '.':
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (isdigit(c))
+ {
+ token->type = TOKEN_R_NUMBER;
+ tokenPutc (token, c);
+ readNumber(token, data);
+ }
+ else if (isalpha (c) || c == '_')
+ {
+ token->type = TOKEN_R_SYMBOL;
+ tokenPutc (token, c);
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else if (c == '.')
+ {
+ token->type = TOKEN_R_DOTS;
+ tokenPutc (token, c);
+
+ c = getcFromInputFile ();
+ if (c == '.')
+ tokenPutc (token, c);
+ else if (isdigit(c))
+ {
+ token->type = TOKEN_R_DOTS_N;
+ do
+ {
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ }
+ while (isdigit(c));
+ ungetcToInputFile (c);
+ }
+ else if (isalpha (c) || c == '_')
+ {
+ token->type = TOKEN_R_SYMBOL;
+ tokenPutc (token, c);
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else
+ {
+ token->type = TOKEN_R_UNDEFINED;
+ ungetcToInputFile (c);
+ }
+ }
+ break;
+ default:
+ tokenPutc (token, c);
+ if (isdigit (c))
+ {
+ token->type = TOKEN_R_NUMBER;
+ readNumber(token, data);
+ }
+ else if (isalpha (c))
+ {
+ token->type = TOKEN_R_SYMBOL;
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else
+ token->type = TOKEN_R_UNDEFINED;
+ break;
+ }
+
+ /* Handle parameters in a signature */
+ if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n'))
+ {
+ vString *signature = R (token)->signature;
+
+ if (tokenIsTypeVal (token, '('))
+ R (token)->parenDepth++;
+ else if (tokenIsTypeVal (token, ')'))
+ R (token)->parenDepth--;
+
+ if (R (token)->kindIndexForParams != KIND_GHOST_INDEX
+ && R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL)
+ && signatureExpectingParameter (signature))
+ makeSimpleRTag (token, R (token)->scopeIndex, false,
+ R (token)->kindIndexForParams, NULL);
+
+ if (vStringLast (signature) != '(' &&
+ !tokenIsTypeVal (token, ',') &&
+ !tokenIsTypeVal (token, ')'))
+ vStringPut (signature, ' ');
+ vStringCat (signature, token->string);
+ }
+}
+
+#define newRToken rNewToken
+extern tokenInfo *rNewToken (void)
+{
+ return newToken (&rTokenInfoClass);
+}
+
+#define tokenReadNoNewline rTokenReadNoNewline
+extern void rTokenReadNoNewline (tokenInfo *const token)
+{
+ while (1)
+ {
+ tokenRead(token);
+ if (!tokenIsTypeVal (token, '\n'))
+ break;
+ }
+}
+
+static void setupCollectingSignature (tokenInfo *const token,
+ vString *signature,
+ int kindIndexForParams,
+ int corkIndex)
+{
+ R (token)->signature = signature;
+ R (token)->kindIndexForParams = kindIndexForParams;
+ R (token)->scopeIndex = corkIndex;
+ R (token)->parenDepth = 1;
+}
+
+extern void rSetupCollectingSignature (tokenInfo *const token,
+ vString *signature)
+{
+ setupCollectingSignature (token, signature,
+ KIND_GHOST_INDEX, CORK_NIL);
+}
+
+static void teardownCollectingSignature (tokenInfo *const token)
+{
+ R (token)->parenDepth = 0;
+ R (token)->scopeIndex = CORK_NIL;
+ R (token)->kindIndexForParams = KIND_GHOST_INDEX;
+ R (token)->signature = NULL;
+}
+
+extern void rTeardownCollectingSignature (tokenInfo *const token)
+{
+ teardownCollectingSignature (token);
+}
+
+static int getKindForToken (tokenInfo *const token)
+{
+ if (tokenIsKeyword (token, R_FUNCTION))
+ return K_FUNCTION;
+ else if (tokenIsKeyword (token, R_C))
+ return K_VECTOR;
+ else if (tokenIsKeyword (token, R_LIST))
+ return K_LIST;
+ else if (tokenIsKeyword (token, R_DATAFRAME))
+ return K_DATAFRAME;
+ return K_GLOBALVAR;
+}
+
+static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data)
+{
+ bool *any_non_placehoders = data;
+ if (!entry->placeholder)
+ {
+ *any_non_placehoders = true;
+ return false;
+ }
+ return true;
+}
+
+static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent)
+{
+ R_TRACE_ENTER();
+
+ char *const assignment_operator = eStrdup (tokenString (token));
+ vString *signature = NULL;
+
+ tokenReadNoNewline (token);
+
+ int kind = getKindForToken (token);
+
+ /* Call sub parsers */
+ int corkIndex = notifyReadRightSideSymbol (symbol,
+ assignment_operator,
+ parent,
+ token);
+ if (corkIndex == CORK_NIL)
+ {
+ /* No subparser handle the symbol */
+ corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION,
+ kind,
+ assignment_operator);
+ }
+
+ if (kind == K_FUNCTION)
+ {
+ /* parse signature */
+ tokenReadNoNewline (token);
+ if (tokenIsTypeVal (token, '('))
+ {
+ if (corkIndex == CORK_NIL)
+ tokenSkipOverPair (token);
+ else
+ {
+ signature = vStringNewInit("(");
+ setupCollectingSignature (token, signature, K_PARAM, corkIndex);
+ tokenSkipOverPair (token);
+ teardownCollectingSignature (token);
+ }
+ tokenReadNoNewline (token);
+ }
+ parent = (corkIndex == CORK_NIL
+ ? blackHoleIndex
+ : corkIndex);
+ }
+ else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
+ {
+ tokenRead (token);
+ parsePair (token, corkIndex, NULL);
+ tokenRead (token);
+ parent = corkIndex;
+ }
+
+ R_TRACE_TOKEN_TEXT("body", token, parent);
+
+ parseStatement (token, parent, false, false);
+
+ tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
+ if (tag)
+ {
+ tag->extensionFields.endLine = token->lineNumber;
+ if (signature)
+ {
+ tag->extensionFields.signature = vStringDeleteUnwrap(signature);
+ signature = NULL;
+ }
+ /* If a vector has no named attribte and it has no lval,
+ * we don't make a tag for the vector. */
+ if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
+ && *assignment_operator == '\0')
+ {
+ bool any_non_placehoders = false;
+ foreachEntriesInScope (corkIndex, NULL,
+ findNonPlaceholder, &any_non_placehoders);
+ if (!any_non_placehoders)
+ tag->placeholder = 1;
+ }
+ }
+
+ vStringDelete (signature); /* NULL is acceptable. */
+ eFree (assignment_operator);
+ R_TRACE_LEAVE();
+}
+
+/* Parse arguments for library and source. */
+static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall)
+{
+ TRACE_ENTER();
+
+ bool r = true;
+ tokenInfo *prefetch_token = newRToken ();
+
+ tokenReadNoNewline (prefetch_token);
+ if (tokenIsType (prefetch_token, R_SYMBOL)
+ || tokenIsType (prefetch_token, R_STRING))
+ {
+ tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token);
+ tokenReadNoNewline (prefetch_token);
+ if (tokenIsTypeVal (prefetch_token, ')')
+ || tokenIsTypeVal (prefetch_token, ','))
+ {
+ if (tokenIsTypeVal (prefetch_token, ')'))
+ r = false;
+
+ makeSimpleRefTag (loaded_obj_token->string,
+ (tokenIsKeyword (funcall, R_LIBRARY)
+ ? K_LIBRARY
+ : K_SOURCE),
+ (tokenIsKeyword (funcall, R_LIBRARY)
+ ? (strcmp (tokenString(funcall), "library") == 0
+ ? R_LIBRARY_ATTACHED_BY_LIBRARY
+ : R_LIBRARY_ATTACHED_BY_REQUIRE)
+ : R_SOURCE_LOADED_BY_SOURCE));
+ tokenDelete (loaded_obj_token);
+ }
+ else if (tokenIsEOF (prefetch_token))
+ {
+ tokenCopy (token, prefetch_token);
+ tokenDelete (loaded_obj_token);
+ r = false;
+ }
+ else
+ {
+ tokenUnread (prefetch_token);
+ tokenUnread (loaded_obj_token);
+ tokenDelete (loaded_obj_token);
+ }
+ }
+ else if (tokenIsEOF (prefetch_token))
+ {
+ tokenCopy (token, prefetch_token);
+ r = false;
+ }
+ else
+ tokenUnread (prefetch_token);
+
+ tokenDelete (prefetch_token);
+
+ TRACE_LEAVE_TEXT(r
+ ? "unread tokens and request parsing again to the upper context"
+ : "parse all arguments");
+ return r;
+}
+
+static bool preParseLoopCounter(tokenInfo *const token, int parent)
+{
+ bool r = true;
+ TRACE_ENTER();
+
+ tokenReadNoNewline (token);
+ if (tokenIsType (token, R_SYMBOL))
+ makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL);
+
+ if (tokenIsEOF (token)
+ || tokenIsTypeVal (token, ')'))
+ r = false;
+
+ TRACE_LEAVE_TEXT(r
+ ? "unread tokens and request parsing again to the upper context"
+ : "parse all arguments");
+ return r;
+}
+
+
+/* If funcall is non-NULL, this pair represents the argument list for the function
+ * call for FUNCALL. */
+static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall)
+{
+ R_TRACE_ENTER();
+
+ bool in_continuous_pair = tokenIsTypeVal (token, '(')
+ || tokenIsTypeVal (token, '[');
+ bool is_funcall = funcall && tokenIsTypeVal (token, '(');
+ bool done = false;
+
+ if (is_funcall)
+ {
+ if (tokenIsKeyword (funcall, R_LIBRARY) ||
+ tokenIsKeyword (funcall, R_SOURCE))
+ done = !preParseExternalEntitiy (token, funcall);
+ else if (tokenIsKeyword (funcall, R_FOR))
+ done = !preParseLoopCounter (token, parent);
+ else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL)
+ done = true;
+ }
+
+ if (done)
+ {
+ R_TRACE_LEAVE();
+ return;
+ }
+
+ do
+ {
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("inside pair", token, parent);
+ parseStatement (token, parent, (funcall != NULL), in_continuous_pair);
+ }
+ while (! (tokenIsEOF (token)
+ || tokenIsTypeVal (token, ')')
+ || tokenIsTypeVal (token, '}')
+ || tokenIsTypeVal (token, ']')));
+ R_TRACE_LEAVE();
+}
+
+static bool isAtConstructorInvocation (void)
+{
+ bool r = false;
+
+ tokenInfo *const token = newRToken ();
+ tokenRead (token);
+ if (tokenIsTypeVal (token, '('))
+ r = true;
+ tokenUnread (token);
+ tokenDelete (token);
+ return r;
+}
+
+static bool parseStatement (tokenInfo *const token, int parent,
+ bool in_arglist, bool in_continuous_pair)
+{
+ R_TRACE_ENTER();
+ int last_count = rTokenInfoClass.read_counter;
+
+ do
+ {
+ if (tokenIsEOF (token))
+ break;
+ else if (tokenIsTypeVal (token, ';'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with ;", token, parent);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '\n'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with \\n", token, parent);
+ break;
+ }
+ else if ((tokenIsKeyword (token, R_FUNCTION)
+ || ((tokenIsKeyword (token, R_C)
+ || tokenIsKeyword (token, R_LIST)
+ || tokenIsKeyword (token, R_DATAFRAME))
+ && isAtConstructorInvocation ())))
+ {
+ /* This statement doesn't start with a symbol.
+ * This function is not assigned to any symbol. */
+ tokenInfo *const anonfunc = newTokenByCopying (token);
+ int kind = getKindForToken (token);
+ anonGenerate (anonfunc->string,
+ kindExtraInfo [kind].anon_prefix, kind);
+ tokenUnread (token);
+ vStringClear (token->string);
+ parseRightSide (token, anonfunc, parent);
+ tokenDelete (anonfunc);
+ }
+ else if (tokenIsType (token, R_SYMBOL)
+ || tokenIsType (token, R_STRING)
+ || tokenIsType (token, R_KEYWORD))
+ {
+ tokenInfo *const symbol = newTokenByCopying (token);
+
+ if (in_continuous_pair)
+ tokenReadNoNewline (token);
+ else
+ tokenRead (token);
+
+ if (tokenIsType (token, R_LASSIGN))
+ {
+ /* Assignment */
+ parseRightSide (token, symbol, parent);
+ R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
+ tokenDelete(symbol);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '='))
+ {
+ /* Assignment */
+ if (in_arglist)
+ {
+ /* Ignore the left side symbol. */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent);
+ }
+ else
+ {
+ parseRightSide (token, symbol, parent);
+ R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
+ tokenDelete(symbol);
+ break;
+ }
+ }
+ else if (tokenIsTypeVal (token, '('))
+ {
+ /* function call */
+ parsePair (token, parent, symbol);
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after arglist", token, parent);
+ }
+ else if (tokenIsTypeVal (token, '$')
+ || tokenIsTypeVal (token, '@')
+ || tokenIsType (token, R_SCOPE))
+ {
+ tokenReadNoNewline (token); /* Skip the next identifier */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after $", token, parent);
+ }
+ else
+ R_TRACE_TOKEN_TEXT("else after symbol", token, parent);
+ tokenDelete(symbol);
+ }
+ else if (tokenIsType (token, R_RASSIGN))
+ {
+ char *const assignment_operator = eStrdup (tokenString (token));
+ tokenReadNoNewline (token);
+ if (tokenIsType (token, R_SYMBOL)
+ || tokenIsType (token, R_STRING))
+ {
+ makeSimpleRTag (token, parent, false,
+ K_GLOBALVAR, assignment_operator);
+ tokenRead (token);
+ }
+ eFree (assignment_operator);
+ R_TRACE_TOKEN_TEXT("after ->", token, parent);
+ }
+ else if (tokenIsType (token, R_OPERATOR))
+ {
+ tokenReadNoNewline (token);
+ R_TRACE_TOKEN_TEXT("after operator", token, parent);
+ }
+ else if (tokenIsTypeVal (token, '(')
+ || tokenIsTypeVal (token, '{')
+ || tokenIsTypeVal (token, '['))
+ {
+ parsePair (token, parent, NULL);
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after pair", token, parent);
+ }
+ else if (tokenIsTypeVal (token, ')')
+ || tokenIsTypeVal (token, '}')
+ || tokenIsTypeVal (token, ']'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with close", token, parent);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '$')
+ || tokenIsTypeVal (token, '@')
+ || tokenIsType (token, R_SCOPE))
+ {
+ tokenReadNoNewline (token); /* Skip the next identifier */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after $", token, parent);
+ }
+ else
+ {
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("else", token, parent);
+ }
+ }
+ while (!tokenIsEOF (token));
+
+ R_TRACE_LEAVE();
+
+ return (last_count != rTokenInfoClass.read_counter);
+}
+
+extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist)
+{
+ pushLanguage (Lang_R);
+ bool r = parseStatement (token, parentIndex, in_arglist, true);
+ popLanguage ();
+ return r;
+}
+
+static int notifyReadRightSideSymbol (tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token)
+{
+ subparser *sub;
+ int q = CORK_NIL;
+
+ foreachSubparser (sub, false)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->readRightSideSymbol)
+ {
+ enterSubparser (sub);
+ q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token);
+ leaveSubparser ();
+ if (q != CORK_NIL)
+ break;
+ }
+ }
+
+ return q;
+}
+
+static int makeSimpleSubparserTag (int langType,
+ tokenInfo *const token, int parent,
+ bool in_func, int kindInR,
+ const char *assignmentOperator)
+{
+ int q = CORK_NIL;
+ subparser *sub = getLanguageSubparser (langType, false);
+ if (sub)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->makeTagWithTranslation)
+ {
+ enterSubparser (sub);
+ q = rsub->makeTagWithTranslation (rsub,
+ token, parent,
+ in_func, kindInR,
+ assignmentOperator);
+ leaveSubparser ();
+ }
+ }
+ return q;
+}
+
+static bool askSubparserTagAcceptancy (tagEntryInfo *pe)
+{
+ bool q = false;
+ subparser *sub = getLanguageSubparser (pe->langType, false);
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->askTagAcceptancy)
+ {
+ enterSubparser (sub);
+ q = rsub->askTagAcceptancy (rsub, pe);
+ leaveSubparser ();
+ }
+ }
+ return q;
+}
+
+static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e)
+{
+ bool q = false;
+ pushLanguage (Lang_R);
+ subparser *sub = getLanguageSubparser (e->langType, false);
+ Assert (sub);
+ popLanguage ();
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->hasFunctionAlikeKind)
+ {
+ enterSubparser (sub);
+ q = rsub->hasFunctionAlikeKind (rsub, e);
+ leaveSubparser ();
+ }
+ return q;
+}
+
+static int notifyReadFuncall (tokenInfo *const func,
+ tokenInfo *const token,
+ int parent)
+{
+ int q = CORK_NIL;
+ subparser *sub;
+ foreachSubparser (sub, false)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->readFuncall)
+ {
+ enterSubparser (sub);
+ q = rsub->readFuncall (rsub, func, token, parent);
+ leaveSubparser ();
+ if (q != CORK_NIL)
+ break;
+ }
+ }
+ return q;
+}
+
+static void findRTags (void)
+{
+ tokenInfo *const token = newRToken ();
+
+ blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
+ registerEntry (blackHoleIndex);
+
+ TRACE_PRINT ("install blackhole: %d", blackHoleIndex);
+
+ do
+ {
+ tokenRead(token);
+ R_TRACE_TOKEN(token, CORK_NIL);
+ parseStatement (token, CORK_NIL, false, false);
+ }
+ while (!tokenIsEOF (token));
+
+ TRACE_PRINT ("run blackhole", blackHoleIndex);
+ markAllEntriesInScopeAsPlaceholder (blackHoleIndex);
+
+ tokenDelete (token);
+}
+
+static void initializeRParser (const langType language)
+{
+ Lang_R = language;
+}
+
+extern parserDefinition *RParser (void)
+{
+ static const char *const extensions[] = { "r", "R", "s", "q", NULL };
+ parserDefinition *const def = parserNew ("R");
+ static selectLanguage selectors[] = { selectByArrowOfR,
+ NULL };
+
+ def->extensions = extensions;
+ def->kindTable = RKinds;
+ def->kindCount = ARRAY_SIZE(RKinds);
+ def->fieldTable = RFields;
+ def->fieldCount = ARRAY_SIZE (RFields);
+ def->keywordTable = RKeywordTable;
+ def->keywordCount = ARRAY_SIZE(RKeywordTable);
+ def->useCork = CORK_QUEUE | CORK_SYMTAB;
+ def->parser = findRTags;
+ def->selectLanguage = selectors;
+ def->initialize = initializeRParser;
+
+ return def;
+}
+
+extern vString *rExtractNameFromString (vString* str)
+{
+ int offset = 0;
+
+ if (vStringLength (str) == 0)
+ return NULL;
+
+ char b = vStringChar (str, 0);
+ if (b == '\'' || b == '"' || b == '`')
+ offset = 1;
+
+ if (offset && vStringLength (str) < 3)
+ return NULL;
+
+ vString *n = vStringNewInit (vStringValue (str) + offset);
+ if (vStringChar (n, vStringLength (n) - 1) == b)
+ vStringChop (n);
+
+ return n;
+}
+
+#ifdef DEBUG
+static const char *tokenTypeStr(enum RTokenType e)
+{ /* Generated by misc/enumstr.sh with cmdline:
+ parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
+ switch (e)
+ {
+ case TOKEN_R_EOF: return "EOF";
+ case TOKEN_R_UNDEFINED: return "UNDEFINED";
+ case TOKEN_R_KEYWORD: return "KEYWORD";
+ case TOKEN_R_NEWLINE: return "NEWLINE";
+ case TOKEN_R_NUMBER: return "NUMBER";
+ case TOKEN_R_SYMBOL: return "SYMBOL";
+ case TOKEN_R_STRING: return "STRING";
+ case TOKEN_R_OPERATOR: return "OPERATOR";
+ case TOKEN_R_DOTS: return "DOTS";
+ case TOKEN_R_DOTS_N: return "DOTS_N";
+ case TOKEN_R_LASSIGN: return "LASSIGN";
+ case TOKEN_R_RASSIGN: return "RASSIGN";
+ case TOKEN_R_SCOPE: return "SCOPE";
+ default: break;
+ }
+ static char buf[3];
+ if (isprint (e))
+ {
+ buf[0] = e;
+ buf[1] = '\0';
+ }
+ else if (e == '\n')
+ {
+ buf[0] = '\\';
+ buf[1] = 'n';
+ buf[2] = '\0';
+ }
+ else
+ {
+ buf[0] = '\0';
+ }
+ return buf;
+}
+#endif
Modified: ctags/parsers/r.h
111 lines changed, 111 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,111 @@
+/*
+* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
+* Copyright (c) 2020, Masatake YAMATO
+* Copyright (c) 2020, Red Hat, Inc.
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*/
+
+#ifndef CTAGS_PARSER_R_H
+#define CTAGS_PARSER_R_H
+
+/*
+* INCLUDE FILES
+*/
+
+#include "general.h" /* must always come first */
+
+#include "subparser.h"
+#include "tokeninfo.h"
+#include "entry.h"
+
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef struct sRSubparser rSubparser;
+
+enum RTokenType {
+ /* 0..255 are the byte's values */
+ TOKEN_R_EOF = 256,
+ TOKEN_R_UNDEFINED,
+ TOKEN_R_KEYWORD,
+ TOKEN_R_NEWLINE,
+ TOKEN_R_NUMBER, /* 1, 1L */
+ TOKEN_R_SYMBOL, /* [0-9a-zA-Z._] */
+ TOKEN_R_STRING,
+ TOKEN_R_OPERATOR, /* - + ! ~ ? : * / ^ %...%, <, > ==
+ * >=, <=, &, &&, |, || */
+ TOKEN_R_DOTS, /* ... */
+ TOKEN_R_DOTS_N, /* ..1, ..2, etc */
+ TOKEN_R_LASSIGN, /* <-, <<- */
+ TOKEN_R_RASSIGN, /* ->, ->> */
+ TOKEN_R_SCOPE, /* ::, ::: */
+};
+
+enum eRKeywordId
+{
+ KEYWORD_R_C,
+ KEYWORD_R_DATAFRAME,
+ KEYWORD_R_FUNCTION,
+ KEYWORD_R_IF,
+ KEYWORD_R_ELSE,
+ KEYWORD_R_FOR,
+ KEYWORD_R_WHILE,
+ KEYWORD_R_REPEAT,
+ KEYWORD_R_IN,
+ KEYWORD_R_NEXT,
+ KEYWORD_R_BREAK,
+ KEYWORD_R_TRUE,
+ KEYWORD_R_FALSE,
+ KEYWORD_R_NULL,
+ KEYWORD_R_INF,
+ KEYWORD_R_LIST,
+ KEYWORD_R_NAN,
+ KEYWORD_R_NA,
+ KEYWORD_R_SOURCE,
+ KEYWORD_R_LIBRARY,
+};
+
+struct sRSubparser {
+ subparser subparser;
+ int (* readRightSideSymbol) (rSubparser *s,
+ tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token);
+ int (* makeTagWithTranslation) (rSubparser *s,
+ tokenInfo *const token,
+ int parent,
+ bool in_func,
+ int kindInR,
+ const char *const assignmentOperator);
+ bool (* askTagAcceptancy) (rSubparser *s, tagEntryInfo *pe);
+ bool (* hasFunctionAlikeKind) (rSubparser *s, tagEntryInfo *pe);
+ int (* readFuncall) (rSubparser *s,
+ tokenInfo *const func,
+ tokenInfo *const token,
+ int parent);
+};
+
+extern void rSetupCollectingSignature (tokenInfo *const token,
+ vString *signature);
+extern void rTeardownCollectingSignature (tokenInfo *const token);
+
+/*
+ * FUNCTION PROTOTYPES
+ */
+
+extern tokenInfo *rNewToken (void);
+
+extern void rTokenReadNoNewline (tokenInfo *const token);
+
+/* This function returns true if a new token is read.
+ * EOF is exception. If EOF is read, this function returns FALSE. */
+extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool inArgList);
+
+extern vString *rExtractNameFromString (vString* str);
+
+#endif /* CTAGS_PARSER_TEX_H */
Modified: ctags/parsers/sh.c
490 lines changed, 490 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,490 @@
+/*
+* Copyright (c) 2000-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*
+* This module contains functions for generating tags for scripts for the
+* Bourne shell (and its derivatives, the Korn and Z shells).
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "kind.h"
+#include "parse.h"
+#include "read.h"
+#include "promise.h"
+#include "routines.h"
+#include "vstring.h"
+#include "xtag.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_NOTHING = -1, /* place holder. Never appears on tags file. */
+ K_ALIAS,
+ K_FUNCTION,
+ K_SOURCE,
+ K_HEREDOCLABEL,
+} shKind;
+
+typedef enum {
+ R_SCRIPT_LOADED,
+} shScriptRole;
+
+static roleDefinition ShScriptRoles [] = {
+ { true, "loaded", "loaded" },
+};
+
+typedef enum {
+ R_HEREDOC_ENDMARKER,
+} shHeredocRole;
+
+static roleDefinition ShHeredocRoles [] = {
+ { true, "endmarker", "end marker" },
+};
+
+static kindDefinition ShKinds [] = {
+ { true, 'a', "alias", "aliases"},
+ { true, 'f', "function", "functions"},
+ { true, 's', "script", "script files",
+ .referenceOnly = true, ATTACH_ROLES (ShScriptRoles) },
+ { true, 'h', "heredoc", "label for here document",
+ .referenceOnly = false, ATTACH_ROLES (ShHeredocRoles) },
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static bool isFileChar (int c)
+{
+ return (isalnum (c)
+ || c == '_' || c == '-'
+ || c == '/' || c == '.'
+ || c == '+' || c == '^'
+ || c == '%' || c == '@'
+ || c == '~');
+}
+
+static bool isIdentChar (int c)
+{
+ return (isalnum (c) || c == '_' || c == '-');
+}
+
+/* bash allows all kinds of crazy stuff as the identifier after 'function' */
+static bool isBashFunctionChar (int c)
+{
+ return (c > 1 /* NUL and SOH are disallowed */ && c != 0x7f &&
+ /* blanks are disallowed, but VT and FF (and CR to some extent, but
+ * let's not fall into the pit of craziness) */
+ c != ' ' && c != '\t' && c != '\n' && c != '\r' &&
+ c != '"' && c != '\'' && c != '$' && c != '`' && c != '\\' &&
+ c != '&' && c != ';' &&
+ c != '(' && c != ')' &&
+ c != '<' && c != '>');
+}
+
+static const unsigned char *skipDoubleString (const unsigned char *cp)
+{
+ const unsigned char* prev = cp;
+ cp++;
+ while ((*cp != '"' || *prev == '\\') && *cp != '\0')
+ {
+ prev = cp;
+ cp++;
+ }
+ return cp;
+}
+
+static const unsigned char *skipSingleString (const unsigned char *cp)
+{
+ cp++;
+ while (*cp != '\'' && *cp != '\0')
+ cp++;
+ return cp;
+}
+
+static bool isEnvCommand (const vString *cmd)
+{
+ const char *lc = vStringValue(cmd);
+ const char * tmp = baseFilename (lc);
+
+ return (strcmp(tmp, "env") == 0);
+}
+
+static int readDestfileName (const unsigned char *cp, vString *destfile)
+{
+ const unsigned char *origin = cp;
+
+ while (isspace ((int) *cp))
+ ++cp;
+
+ /* >... */
+ if (*cp != '>')
+ return 0;
+
+ /* >>... */
+ if (*cp == '>')
+ ++cp;
+
+ while (isspace ((int) *cp))
+ ++cp;
+
+ if (!isFileChar ((int) *cp))
+ return 0;
+
+ vStringClear(destfile);
+ do {
+ vStringPut (destfile, (int) *cp);
+ ++cp;
+ } while (isFileChar ((int) *cp));
+
+ if (vStringLength(destfile) > 0)
+ return cp - origin;
+
+ return 0;
+}
+
+struct hereDocParsingState {
+ vString *args[2];
+ vString *destfile;
+ langType sublang;
+ unsigned long startLine;
+
+ int corkIndex;
+};
+
+static void hdocStateInit (struct hereDocParsingState *hstate)
+{
+ hstate->args[0] = vStringNew ();
+ hstate->args[1] = vStringNew ();
+ hstate->destfile = vStringNew ();
+
+ hstate->corkIndex = CORK_NIL;
+ hstate->sublang = LANG_IGNORE;
+}
+
+static void hdocStateClear (struct hereDocParsingState *hstate)
+{
+ vStringClear (hstate->args[0]);
+ vStringClear (hstate->args[1]);
+ vStringClear (hstate->destfile);
+}
+
+static void hdocStateFini (struct hereDocParsingState *hstate)
+{
+ vStringDelete (hstate->args[0]);
+ vStringDelete (hstate->args[1]);
+ vStringDelete (hstate->destfile);
+}
+
+static void hdocStateUpdateArgs (struct hereDocParsingState *hstate,
+ vString *name)
+{
+ if (vStringIsEmpty(hstate->args[0]))
+ vStringCopy(hstate->args[0], name);
+ else if (vStringIsEmpty(hstate->args[1]))
+ vStringCopy(hstate->args[1], name);
+}
+
+static void hdocStateMakePromiseMaybe (struct hereDocParsingState *hstate)
+{
+ if (hstate->sublang != LANG_IGNORE)
+ makePromise (getLanguageName(hstate->sublang),
+ hstate->startLine, 0,
+ getInputLineNumber(), 0,
+ 0);
+ hstate->sublang = LANG_IGNORE;
+}
+
+static void hdocStateRecordStartlineFromDestfileMaybe (struct hereDocParsingState *hstate)
+{
+ const char *f = vStringValue(hstate->destfile);
+
+ if (hstate->sublang != LANG_IGNORE)
+ return;
+
+ hstate->sublang = getLanguageForFilename (f, 0);
+ if (hstate->sublang != LANG_IGNORE)
+ hstate->startLine = getInputLineNumber () + 1;
+ vStringClear (hstate->destfile);
+}
+
+static void hdocStateRecordStatelineMaybe (struct hereDocParsingState *hstate)
+{
+ if (!vStringIsEmpty(hstate->args[0]))
+ {
+ const char *cmd;
+
+ cmd = vStringValue(hstate->args[0]);
+ if (isEnvCommand (hstate->args[0]))
+ {
+ cmd = NULL;
+ if (!vStringIsEmpty(hstate->args[1]))
+ cmd = vStringValue(hstate->args[1]);
+ }
+
+ if (cmd)
+ {
+ hstate->sublang = getLanguageForCommand (cmd, 0);
+ if (hstate->sublang != LANG_IGNORE)
+ hstate->startLine = getInputLineNumber () + 1;
+ }
+ }
+
+ if (vStringLength(hstate->destfile) > 0)
+ hdocStateRecordStartlineFromDestfileMaybe (hstate);
+}
+
+static int hdocStateReadDestfileName (struct hereDocParsingState *hstate,
+ const unsigned char* cp,
+ const vString *const hereDocDelimiter)
+{
+ int d = readDestfileName (cp, hstate->destfile);
+
+ if (d > 0 && hereDocDelimiter)
+ hdocStateRecordStartlineFromDestfileMaybe (hstate);
+
+ return d;
+}
+
+static void hdocStateUpdateTag (struct hereDocParsingState *hstate, unsigned long endLine)
+{
+ tagEntryInfo *tag = getEntryInCorkQueue (hstate->corkIndex);
+ if (tag)
+ {
+ tag->extensionFields.endLine = endLine;
+ hstate->corkIndex = CORK_NIL;
+ }
+}
+
+static void findShTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+ vString *hereDocDelimiter = NULL;
+ bool hereDocIndented = false;
+ bool (* check_char)(int);
+
+ struct hereDocParsingState hstate;
+ hdocStateInit (&hstate);
+
+ while ((line = readLineFromInputFile ()) != NULL)
+ {
+ const unsigned char* cp = line;
+ shKind found_kind = K_NOTHING;
+
+ if (hereDocDelimiter)
+ {
+ if (hereDocIndented)
+ {
+ while (*cp == '\t')
+ cp++;
+ }
+ if ((strncmp ((const char *) cp, vStringValue (hereDocDelimiter), vStringLength (hereDocDelimiter)) == 0)
+ && ((*(cp + vStringLength (hereDocDelimiter)) == '\0')
+ || isspace (*(cp + vStringLength (hereDocDelimiter)) )))
+ {
+ hdocStateUpdateTag (&hstate, getInputLineNumber ());
+ hdocStateMakePromiseMaybe (&hstate);
+
+ if (!vStringIsEmpty(hereDocDelimiter))
+ makeSimpleRefTag(hereDocDelimiter, K_HEREDOCLABEL, R_HEREDOC_ENDMARKER);
+ vStringDelete (hereDocDelimiter);
+ hereDocDelimiter = NULL;
+ }
+ continue;
+ }
+
+ hdocStateClear (&hstate);
+ while (*cp != '\0')
+ {
+ /* jump over whitespace */
+ while (isspace ((int)*cp))
+ cp++;
+
+ /* jump over strings */
+ if (*cp == '"')
+ cp = skipDoubleString (cp);
+ else if (*cp == '\'')
+ cp = skipSingleString (cp);
+ /* jump over comments */
+ else if (*cp == '#')
+ break;
+ /* jump over here-documents */
+ else if (cp[0] == '<' && cp[1] == '<')
+ {
+ const unsigned char *start, *end;
+ bool trimEscapeSequences = false;
+ bool quoted = false;
+ cp += 2;
+ /* an optional "-" strips leading tabulations from the heredoc lines */
+ if (*cp != '-')
+ hereDocIndented = false;
+ else
+ {
+ hereDocIndented = true;
+ cp++;
+ }
+ while (isspace (*cp))
+ cp++;
+ start = end = cp;
+ /* the delimiter can be surrounded by quotes */
+ if (*cp == '"')
+ {
+ start++;
+ end = cp = skipDoubleString (cp);
+ /* we need not to worry about variable substitution, they
+ * don't happen in heredoc delimiter definition */
+ trimEscapeSequences = true;
+ quoted = true;
+ }
+ else if (*cp == '\'')
+ {
+ start++;
+ end = cp = skipSingleString (cp);
+ quoted = true;
+ }
+ else
+ {
+ while (isIdentChar ((int) *cp))
+ cp++;
+ end = cp;
+ }
+ if (end > start || quoted)
+ {
+ /* The input may be broken as a shell script but we need to avoid
+ memory leaking. */
+ if (hereDocDelimiter)
+ vStringClear(hereDocDelimiter);
+ else
+ hereDocDelimiter = vStringNew ();
+ for (; end > start; start++)
+ {
+ if (trimEscapeSequences && *start == '\\')
+ start++;
+ vStringPut (hereDocDelimiter, *start);
+ }
+ if (vStringLength(hereDocDelimiter) > 0)
+ hstate.corkIndex = makeSimpleTag(hereDocDelimiter, K_HEREDOCLABEL);
+
+ hdocStateRecordStatelineMaybe(&hstate);
+ }
+ }
+
+ check_char = isBashFunctionChar;
+
+ if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 &&
+ isspace ((int) cp [8]))
+ {
+ found_kind = K_FUNCTION;
+ cp += 8;
+ }
+ else if (strncmp ((const char*) cp, "alias", (size_t) 5) == 0 &&
+ isspace ((int) cp [5]))
+ {
+ check_char = isIdentChar;
+ found_kind = K_ALIAS;
+ cp += 5;
+ }
+ else if (cp [0] == '.'
+ && isspace((int) cp [1]))
+ {
+ found_kind = K_SOURCE;
+ ++cp;
+ check_char = isFileChar;
+ }
+ else if (strncmp ((const char*) cp, "source", (size_t) 6) == 0
+ && isspace((int) cp [6]))
+ {
+ found_kind = K_SOURCE;
+ cp += 6;
+ check_char = isFileChar;
+ }
+
+ if (found_kind != K_NOTHING)
+ while (isspace ((int) *cp))
+ ++cp;
+
+ // Get the name of the function, alias or file to be read by source
+ if (! check_char ((int) *cp))
+ {
+ found_kind = K_NOTHING;
+
+ int d = hdocStateReadDestfileName (&hstate, cp,
+ hereDocDelimiter);
+ if (d > 0)
+ cp += d;
+ else if (*cp != '\0')
+ ++cp;
+ continue;
+ }
+ while (check_char ((int) *cp))
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+
+ while (isspace ((int) *cp))
+ ++cp;
+
+ if ((found_kind != K_SOURCE)
+ && *cp == '(')
+ {
+ ++cp;
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*cp == ')')
+ {
+ found_kind = K_FUNCTION;
+ ++cp;
+ }
+ }
+
+ if (found_kind != K_NOTHING)
+ {
+ if (found_kind == K_SOURCE)
+ makeSimpleRefTag (name, K_SOURCE, R_SCRIPT_LOADED);
+ else
+ makeSimpleTag (name, found_kind);
+ found_kind = K_NOTHING;
+ }
+ else if (!hereDocDelimiter)
+ hdocStateUpdateArgs (&hstate, name);
+ vStringClear (name);
+ }
+ }
+ hdocStateFini (&hstate);
+ vStringDelete (name);
+ if (hereDocDelimiter)
+ vStringDelete (hereDocDelimiter);
+}
+
+extern parserDefinition* ShParser (void)
+{
+ static const char *const extensions [] = {
+ "sh", "SH", "bsh", "bash", "ksh", "zsh", "ash", NULL
+ };
+ static const char *const aliases [] = {
+ "sh", "bash", "ksh", "zsh", "ash",
+ /* major mode name in emacs */
+ "shell-script",
+ NULL
+ };
+ parserDefinition* def = parserNew ("Sh");
+ def->kindTable = ShKinds;
+ def->kindCount = ARRAY_SIZE (ShKinds);
+ def->extensions = extensions;
+ def->aliases = aliases;
+ def->parser = findShTags;
+ def->useCork = CORK_QUEUE;
+ return def;
+}
Modified: ctags/parsers/verilog.c
2024 lines changed, 2024 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,2024 @@
+/*
+ * Copyright (c) 2003, Darren Hiebert
+ * Copyright (c) 2017, Vitor Antunes
+ * Copyright (c) 2020, Hiroo Hayashi
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License version 2 or (at your option) any later version.
+ *
+ * This module contains functions for generating tags for the Verilog or
+ * SystemVerilog HDL (Hardware Description Language).
+ *
+ * References:
+ * IEEE Std 1800-2017, SystemVerilog Language Reference Manual
+ * https://ieeexplore.ieee.org/document/8299595
+ * SystemVerilog IEEE Std 1800-2012 Grammer
+ * https://insights.sigasi.com/tech/systemverilog.ebnf/
+ * Verilog Formal Syntax Specification
+ * http://www.verilog.com/VerilogBNF.html
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "xtag.h"
+#include "ptrarray.h"
+
+/*
+ * MACROS
+ */
+#define NUMBER_LANGUAGES 2 /* Indicates number of defined indexes */
+#define IDX_SYSTEMVERILOG 0
+#define IDX_VERILOG 1
+
+/*
+ * DATA DECLARATIONS
+ */
+
+/* A callback function searching a symbol from the cork symbol table assumes
+ * this kind definitions are shared in Verilog and SystemVerilog parsers.
+ * If you will separate the definitions for the parsers, you must revise the
+ * code related to the symbol table. */
+typedef enum {
+ /* parser private items */
+ K_IGNORE = -16, /* Verilog/SystemVerilog keywords to be ignored */
+ K_DEFINE,
+ K_DIRECTIVE,
+ K_END,
+ K_END_DE, /* End of Design Elements */
+ K_IDENTIFIER,
+ K_LOCALPARAM,
+ K_PARAMETER,
+ K_IMPORT,
+ K_WITH,
+
+ K_UNDEFINED = KEYWORD_NONE,
+ /* the followings items are also used as indices for VerilogKinds[] and SystemVerilogKinds[] */
+ K_CONSTANT= 0,
+ K_EVENT,
+ K_FUNCTION,
+ K_MODULE,
+ K_NET,
+ K_PORT,
+ K_REGISTER,
+ K_TASK,
+ K_BLOCK,
+ K_INSTANCE,
+ K_ASSERTION,
+ K_CLASS,
+ K_COVERGROUP,
+ K_ENUM,
+ K_INTERFACE,
+ K_MODPORT,
+ K_PACKAGE,
+ K_PROGRAM,
+ K_PROTOTYPE,
+ K_PROPERTY,
+ K_STRUCT,
+ K_TYPEDEF,
+ K_CHECKER,
+ K_CLOCKING,
+ K_SEQUENCE,
+ K_MEMBER,
+ K_IFCLASS, /* interface class */
+ K_CONSTRAINT,
+ K_NETTYPE,
+} verilogKind;
+
+typedef struct {
+ const char *keyword;
+ verilogKind kind;
+ short isValid [NUMBER_LANGUAGES];
+} keywordAssoc;
+
+typedef struct sTokenInfo {
+ verilogKind kind;
+ vString* name; /* the name of the token */
+ unsigned long lineNumber; /* line number where token was found */
+ MIOPos filePosition; /* file position where token was found */
+ struct sTokenInfo* scope; /* context of keyword */
+ int nestLevel; /* Current nest level */
+ verilogKind lastKind; /* Kind of last found tag */
+ vString* blockName; /* Current block name */
+ vString* inheritance; /* Class inheritance */
+ bool prototype; /* Is only a prototype */
+ bool classScope; /* Context is local to the current sub-context */
+ bool parameter; /* parameter which can be overridden */
+ bool hasParamList; /* module definition has a parameter port list */
+} tokenInfo;
+
+typedef enum {
+ F_PARAMETER,
+} verilogField;
+
+/*
+ * DATA DEFINITIONS
+ */
+static int Ungetc;
+static int Lang_verilog;
+static int Lang_systemverilog;
+
+static kindDefinition VerilogKinds [] = {
+ { true, 'c', "constant", "constants (define, parameter, specparam)" },
+ { true, 'e', "event", "events" },
+ { true, 'f', "function", "functions" },
+ { true, 'm', "module", "modules" },
+ { true, 'n', "net", "net data types" },
+ { true, 'p', "port", "ports" },
+ { true, 'r', "register", "variable data types" },
+ { true, 't', "task", "tasks" },
+ { true, 'b', "block", "blocks (begin, fork)" },
+ { true, 'i', "instance", "instances of module" },
+};
+
+static kindDefinition SystemVerilogKinds [] = {
+ { true, 'c', "constant", "constants (define, parameter, specparam, enum values)" },
+ { true, 'e', "event", "events" },
+ { true, 'f', "function", "functions" },
+ { true, 'm', "module", "modules" },
+ { true, 'n', "net", "net data types" },
+ { true, 'p', "port", "ports" },
+ { true, 'r', "register", "variable data types" },
+ { true, 't', "task", "tasks" },
+ { true, 'b', "block", "blocks (begin, fork)" },
+ { true, 'i', "instance", "instances of module or interface" },
+ { true, 'A', "assert", "assertions (assert, assume, cover, restrict)" },
+ { true, 'C', "class", "classes" },
+ { true, 'V', "covergroup","covergroups" },
+ { true, 'E', "enum", "enumerators" },
+ { true, 'I', "interface", "interfaces" },
+ { true, 'M', "modport", "modports" },
+ { true, 'K', "package", "packages" },
+ { true, 'P', "program", "programs" },
+ { false,'Q', "prototype", "prototypes (extern, pure)" },
+ { true, 'R', "property", "properties" },
+ { true, 'S', "struct", "structs and unions" },
+ { true, 'T', "typedef", "type declarations" },
+ { true, 'H', "checker", "checkers" },
+ { true, 'L', "clocking", "clocking" },
+ { true, 'q', "sequence", "sequences" },
+ { true, 'w', "member", "struct and union members" },
+ { true, 'l', "ifclass", "interface class" },
+ { true, 'O', "constraint","constraints" },
+ { true, 'N', "nettype", "nettype declarations" },
+};
+
+static const keywordAssoc KeywordTable [] = {
+ /* SystemVerilog */
+ /* | Verilog */
+ /* keyword keyword ID | | */
+ { "`define", K_DEFINE, { 1, 1 } },
+ { "begin", K_BLOCK, { 1, 1 } },
+ { "end", K_END, { 1, 1 } },
+ { "endfunction", K_END_DE, { 1, 1 } },
+ { "endmodule", K_END_DE, { 1, 1 } },
+ { "endtask", K_END_DE, { 1, 1 } },
+ { "event", K_EVENT, { 1, 1 } },
+ { "fork", K_BLOCK, { 1, 1 } },
+ { "function", K_FUNCTION, { 1, 1 } },
+ { "genvar", K_REGISTER, { 1, 1 } },
+ { "inout", K_PORT, { 1, 1 } },
+ { "input", K_PORT, { 1, 1 } },
+ { "integer", K_REGISTER, { 1, 1 } },
+ { "join", K_END, { 1, 1 } },
+ { "localparam", K_LOCALPARAM, { 1, 1 } },
+ { "module", K_MODULE, { 1, 1 } },
+ { "output", K_PORT, { 1, 1 } },
+ { "parameter", K_PARAMETER, { 1, 1 } },
+ { "real", K_REGISTER, { 1, 1 } },
+ { "realtime", K_REGISTER, { 1, 1 } },
+ { "reg", K_REGISTER, { 1, 1 } },
+ { "signed", K_IGNORE, { 1, 1 } },
+ { "specparam", K_CONSTANT, { 1, 1 } },
+ { "supply0", K_NET, { 1, 1 } },
+ { "supply1", K_NET, { 1, 1 } },
+ { "task", K_TASK, { 1, 1 } },
+ { "time", K_REGISTER, { 1, 1 } },
+ { "tri", K_NET, { 1, 1 } },
+ { "triand", K_NET, { 1, 1 } },
+ { "trior", K_NET, { 1, 1 } },
+ { "trireg", K_NET, { 1, 1 } },
+ { "tri0", K_NET, { 1, 1 } },
+ { "tri1", K_NET, { 1, 1 } },
+ { "uwire", K_NET, { 1, 1 } },
+ { "wand", K_NET, { 1, 1 } },
+ { "wire", K_NET, { 1, 1 } },
+ { "wor", K_NET, { 1, 1 } },
+ { "assert", K_ASSERTION, { 1, 0 } },
+ { "assume", K_ASSERTION, { 1, 0 } },
+ { "bit", K_REGISTER, { 1, 0 } },
+ { "byte", K_REGISTER, { 1, 0 } },
+ { "chandle", K_REGISTER, { 1, 0 } },
+ { "checker", K_CHECKER, { 1, 0 } },
+ { "class", K_CLASS, { 1, 0 } },
+ { "constraint", K_CONSTRAINT, { 1, 0 } },
+ { "cover", K_ASSERTION, { 1, 0 } },
+ { "clocking", K_CLOCKING, { 1, 0 } },
+ { "covergroup", K_COVERGROUP, { 1, 0 } },
+ { "endchecker", K_END_DE, { 1, 0 } },
+ { "endclass", K_END_DE, { 1, 0 } },
+ { "endclocking", K_END_DE, { 1, 0 } },
+ { "endgroup", K_END_DE, { 1, 0 } },
+ { "endinterface", K_END_DE, { 1, 0 } },
+ { "endpackage", K_END_DE, { 1, 0 } },
+ { "endprogram", K_END_DE, { 1, 0 } },
+ { "endproperty", K_END_DE, { 1, 0 } },
+ { "endsequence", K_END_DE, { 1, 0 } },
+ { "enum", K_ENUM, { 1, 0 } },
+ { "extern", K_PROTOTYPE, { 1, 0 } },
+ { "import", K_IMPORT, { 1, 0 } },
+ { "int", K_REGISTER, { 1, 0 } },
+ { "interconnect", K_NET, { 1, 0 } },
+ { "interface", K_INTERFACE, { 1, 0 } },
+ { "join_any", K_END, { 1, 0 } },
+ { "join_none", K_END, { 1, 0 } },
+ { "logic", K_REGISTER, { 1, 0 } },
+ { "longint", K_REGISTER, { 1, 0 } },
+ { "modport", K_MODPORT, { 1, 0 } },
+ { "package", K_PACKAGE, { 1, 0 } },
+ { "program", K_PROGRAM, { 1, 0 } },
+ { "property", K_PROPERTY, { 1, 0 } },
+ { "pure", K_PROTOTYPE, { 1, 0 } },
+ { "ref", K_PORT, { 1, 0 } },
+ { "restrict", K_ASSERTION, { 1, 0 } },
+ { "sequence", K_SEQUENCE, { 1, 0 } },
+ { "shortint", K_REGISTER, { 1, 0 } },
+ { "shortreal", K_REGISTER, { 1, 0 } },
+ { "string", K_REGISTER, { 1, 0 } },
+ { "struct", K_STRUCT, { 1, 0 } },
+ { "type", K_REGISTER, { 1, 0 } },
+ { "typedef", K_TYPEDEF, { 1, 0 } },
+ { "union", K_STRUCT, { 1, 0 } },
+ { "var", K_REGISTER, { 1, 0 } },
+ { "void", K_REGISTER, { 1, 0 } },
+ { "with", K_WITH, { 1, 0 } },
+ { "nettype", K_NETTYPE, { 1, 0 } },
+// { "virtual", K_PROTOTYPE, { 1, 0 } }, // do not add for now
+};
+
+static tokenInfo *currentContext = NULL;
+static ptrArray *tagContents;
+static fieldDefinition *fieldTable = NULL;
+
+// IEEE Std 1364-2005 LRM, Appendix B "List of Keywords"
+const static struct keywordGroup verilogKeywords = {
+ .value = K_IGNORE,
+ .addingUnlessExisting = true,
+ .keywords = {
+ "always", "and", "assign", "automatic", "begin", "buf", "bufif0",
+ "bufif1", "case", "casex", "casez", "cell", "cmos", "config",
+ "deassign", "default", "defparam", "design", "disable", "edge",
+ "else", "end", "endcase", "endconfig", "endfunction", "endgenerate",
+ "endmodule", "endprimitive", "endspecify", "endtable", "endtask",
+ "event", "for", "force", "forever", "fork", "function", "generate",
+ "genvar", "highz0", "highz1", "if", "ifnone", "incdir", "include",
+ "initial", "inout", "input", "instance", "integer", "join", "large",
+ "liblist", "library", "localparam", "macromodule", "medium", "module",
+ "nand", "negedge", "nmos", "nor", "noshowcancelled", "not", "notif0",
+ "notif1", "or", "output", "parameter", "pmos", "posedge", "primitive",
+ "pull0", "pull1", "pulldown", "pullup", "pulsestyle_onevent",
+ "pulsestyle_ondetect", "rcmos", "real", "realtime", "reg", "release",
+ "repeat", "rnmos", "rpmos", "rtran", "rtranif0", "rtranif1",
+ "scalared", "showcancelled", "signed", "small", "specify",
+ "specparam", "strong0", "strong1", "supply0", "supply1", "table",
+ "task", "time", "tran", "tranif0", "tranif1", "tri", "tri0", "tri1",
+ "triand", "trior", "trireg", "unsigned1", "use", "uwire", "vectored",
+ "wait", "wand", "weak0", "weak1", "while", "wire", "wor", "xnor", "xor",
+ NULL
+ },
+};
+// IEEE Std 1800-2017 LRM, Annex B "Keywords"
+const static struct keywordGroup systemVerilogKeywords = {
+ .value = K_IGNORE,
+ .addingUnlessExisting = true,
+ .keywords = {
+ "accept_on", "alias", "always", "always_comb", "always_ff",
+ "always_latch", "and", "assert", "assign", "assume", "automatic",
+ "before", "begin", "bind", "bins", "binsof", "bit", "break", "buf",
+ "bufif0", "bufif1", "byte", "case", "casex", "casez", "cell",
+ "chandle", "checker", "class", "clocking", "cmos", "config", "const",
+ "constraint", "context", "continue", "cover", "covergroup",
+ "coverpoint", "cross", "deassign", "default", "defparam", "design",
+ "disable", "dist", "do", "edge", "else", "end", "endcase",
+ "endchecker", "endclass", "endclocking", "endconfig", "endfunction",
+ "endgenerate", "endgroup", "endinterface", "endmodule", "endpackage",
+ "endprimitive", "endprogram", "endproperty", "endspecify",
+ "endsequence", "endtable", "endtask", "enum", "event", "eventually",
+ "expect", "export", "extends", "extern", "final", "first_match",
+ "for", "force", "foreach", "forever", "fork", "forkjoin", "function",
+ "generate", "genvar", "global", "highz0", "highz1", "if", "iff",
+ "ifnone", "ignore_bins", "illegal_bins", "implements", "implies",
+ "import", "incdir", "include", "initial", "inout", "input", "inside",
+ "instance", "int", "integer", "interconnect", "interface",
+ "intersect", "join", "join_any", "join_none", "large", "let",
+ "liblist", "library", "local", "localparam", "logic", "longint",
+ "macromodule", "matches", "medium", "modport", "module", "nand",
+ "negedge", "nettype", "new", "nexttime", "nmos", "nor",
+ "noshowcancelled", "not", "notif0", "notif1", "null", "or", "output",
+ "package", "packed", "parameter", "pmos", "posedge", "primitive",
+ "priority", "program", "property", "protected", "pull0", "pull1",
+ "pulldown", "pullup", "pulsestyle_ondetect", "pulsestyle_onevent",
+ "pure", "rand", "randc", "randcase", "randsequence", "rcmos", "real",
+ "realtime", "ref", "reg", "reject_on", "release", "repeat",
+ "restrict", "return", "rnmos", "rpmos", "rtran", "rtranif0",
+ "rtranif1", "s_always", "s_eventually", "s_nexttime", "s_until",
+ "s_until_with", "scalared", "sequence", "shortint", "shortreal",
+ "showcancelled", "signed", "small", "soft", "solve", "specify",
+ "specparam", "static", "string", "strong", "strong0", "strong1",
+ "struct", "super", "supply0", "supply1", "sync_accept_on",
+ "sync_reject_on", "table", "tagged", "task", "this", "throughout",
+ "time", "timeprecision", "timeunit", "tran", "tranif0", "tranif1",
+ "tri", "tri0", "tri1", "triand", "trior", "trireg", "type", "typedef",
+ "union", "unique", "unique0", "unsigned", "until", "until_with",
+ "untyped", "use", "uwire", "var", "vectored", "virtual", "void",
+ "wait", "wait_order", "wand", "weak", "weak0", "weak1", "while",
+ "wildcard", "wire", "with", "within", "wor", "xnor", "xor",
+ NULL
+ },
+};
+
+// IEEE Std 1364-2005 LRM, "19. Compiler directives"
+const static struct keywordGroup verilogDirectives = {
+ .value = K_DIRECTIVE,
+ .addingUnlessExisting = true,
+ .keywords = {
+ "`begin_keywords", "`celldefine", "`default_nettype", "`define",
+ "`else", "`elsif", "`end_keywords", "`endcelldefine", "`endif",
+ "`ifdef", "`ifndef", "`include", "`line", "`nounconnected_drive",
+ "`pragma", "`resetall", "`timescale", "`unconnected_drive", "`undef",
+ NULL
+ },
+};
+
+// IEEE Std 1800-2017 LRM, "22. Compiler directives"
+const static struct keywordGroup systemVerilogDirectives = {
+ .value = K_DIRECTIVE,
+ .addingUnlessExisting = true,
+ .keywords = {
+ "`__LINE__", "`begin_keywords", "`celldefine", "`default_nettype",
+ "`define", "`else", "`elsif", "`end_keywords", "`endcelldefine",
+ "`endif", "`ifdef", "`ifndef", "`include", "`line",
+ "`nounconnected_drive", "`pragma", "`resetall", "`timescale",
+ "`unconnected_drive", "`undef", "`undefineall",
+ NULL
+ },
+};
+
+// .enabled field cannot be shared by two languages
+static fieldDefinition VerilogFields[] = {
+ { .name = "parameter",
+ .description = "parameter whose value can be overridden.",
+ .enabled = false,
+ .dataType = FIELDTYPE_BOOL },
+};
+
+static fieldDefinition SystemVerilogFields[] = {
+ { .name = "parameter",
+ .description = "parameter whose value can be overridden.",
+ .enabled = false,
+ .dataType = FIELDTYPE_BOOL },
+};
+
+/*
+ * PROTOTYPE DEFINITIONS
+ */
+
+static bool isIdentifier (tokenInfo* token);
+static int processDefine (tokenInfo *const token, int c);
+static int processType (tokenInfo* token, int c, verilogKind* kind, bool* with);
+static int pushEnumNames (tokenInfo* token, int c);
+static int pushMembers (tokenInfo* token, int c);
+static int readWordToken (tokenInfo *const token, int c);
+static int readWordTokenNoSkip (tokenInfo *const token, int c);
+static int skipBlockName (tokenInfo *const token, int c);
+static int skipClockEvent (tokenInfo* token, int c);
+static int skipDelay (tokenInfo* token, int c);
+static int tagIdentifierList (tokenInfo *const token, int c, verilogKind kind, bool mayPortDecl);
+static int tagNameList (tokenInfo* token, int c, verilogKind kind);
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static short isContainer (verilogKind kind)
+{
+ switch (kind)
+ {
+ case K_MODULE:
+ case K_TASK:
+ case K_FUNCTION:
+ case K_BLOCK:
+ case K_CHECKER:
+ case K_CLASS:
+ case K_CLOCKING:
+ case K_COVERGROUP:
+ case K_IFCLASS:
+ case K_INTERFACE:
+ case K_PACKAGE:
+ case K_PROGRAM:
+ case K_PROPERTY:
+ case K_SEQUENCE:
+ case K_TYPEDEF:
+ case K_NETTYPE:
+ case K_ENUM:
+ case K_STRUCT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static short isTempContext (tokenInfo const* token)
+{
+ switch (token->kind)
+ {
+ case K_TYPEDEF:
+ case K_NETTYPE:
+ case K_ENUM:
+ case K_STRUCT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void clearToken (tokenInfo *token)
+{
+ token->kind = K_UNDEFINED; // to be set by updateKind()
+ vStringClear (token->name);
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->scope = NULL;
+ token->nestLevel = 0;
+ token->lastKind = K_UNDEFINED;
+ vStringClear (token->blockName);
+ vStringClear (token->inheritance);
+ token->prototype = false;
+ token->classScope = false;
+ token->parameter = false;
+ token->hasParamList = false;
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+ token->name = vStringNew ();
+ token->blockName = vStringNew ();
+ token->inheritance = vStringNew ();
+ clearToken (token);
+ return token;
+}
+
+static tokenInfo *dupToken (tokenInfo *token)
+{
+ tokenInfo *dup = newToken ();
+ tokenInfo tmp = *dup; // save vStrings, name, blockName, and inheritance
+ *dup = *token;
+ // revert vStrings allocated for dup
+ dup->name = tmp.name;
+ dup->blockName = tmp.blockName;
+ dup->inheritance = tmp.inheritance;
+ // copy contents of vStrings
+ vStringCopy (dup->name, token->name);
+ vStringCopy (dup->blockName, token->blockName);
+ vStringCopy (dup->inheritance, token->inheritance);
+ return dup;
+}
+
+static void deleteToken (tokenInfo * const token)
+{
+ if (token != NULL)
+ {
+ vStringDelete (token->name);
+ vStringDelete (token->blockName);
+ vStringDelete (token->inheritance);
+ eFree (token);
+ }
+}
+
+static tokenInfo *pushToken (tokenInfo * const token, tokenInfo * const tokenPush)
+{
+ tokenPush->scope = token;
+ return tokenPush;
+}
+
+static tokenInfo *popToken (tokenInfo * const token)
+{
+ tokenInfo *localToken;
+ if (token != NULL)
+ {
+ localToken = token->scope;
+ deleteToken (token);
+ return localToken;
+ }
+ return NULL;
+}
+
+static void pruneTokens (tokenInfo * token)
+{
+ while ((token = popToken (token)))
+ ;
+}
+
+static void swapToken (tokenInfo *t0, tokenInfo *t1)
+{
+ tokenInfo tmp = *t0;
+ *t0 = *t1;
+ *t1 = tmp;
+}
+
+static const char *getNameForKind (const verilogKind kind)
+{
+ if (isInputLanguage (Lang_systemverilog))
+ return (SystemVerilogKinds[kind]).name;
+ else /* isInputLanguage (Lang_verilog) */
+ return (VerilogKinds[kind]).name;
+}
+
+static char kindEnabled (const verilogKind kind)
+{
+ if (isInputLanguage (Lang_systemverilog))
+ return SystemVerilogKinds[kind].enabled;
+ else /* isInputLanguage (Lang_verilog) */
+ return VerilogKinds[kind].enabled;
+}
+
+static void buildKeywordHash (const langType language, unsigned int idx)
+{
+ size_t i;
+ const size_t count = ARRAY_SIZE (KeywordTable);
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordAssoc *p = &KeywordTable [i];
+ if (p->isValid [idx])
+ addKeyword (p->keyword, language, (int) p->kind);
+ }
+}
+
+static void initializeVerilog (const langType language)
+{
+ Lang_verilog = language;
+ buildKeywordHash (language, IDX_VERILOG);
+ addKeywordGroup (&verilogKeywords, language);
+ addKeywordGroup (&verilogDirectives, language);
+ if (tagContents == NULL)
+ tagContents = ptrArrayNew ((ptrArrayDeleteFunc)deleteToken);
+
+}
+
+static void initializeSystemVerilog (const langType language)
+{
+ Lang_systemverilog = language;
+ buildKeywordHash (language, IDX_SYSTEMVERILOG);
+ addKeywordGroup (&systemVerilogKeywords, language);
+ addKeywordGroup (&systemVerilogDirectives, language);
+ if (tagContents == NULL)
+ tagContents = ptrArrayNew ((ptrArrayDeleteFunc)deleteToken);
+}
+
+static void vUngetc (int c)
+{
+ Assert (Ungetc == '\0');
+ Ungetc = c;
+}
+
+/* Mostly copied from cppSkipOverCComment() in cpreprocessor.c.
+ *
+ * cppSkipOverCComment() uses the internal ungetc buffer of
+ * CPreProcessor. On the other hand, the Verilog parser uses
+ * getcFromInputFile() directly. getcFromInputFile() uses just
+ * another internal ungetc buffer. Using them mixed way will
+ * cause a trouble. */
+static int verilogSkipOverCComment (void)
+{
+ int c = getcFromInputFile ();
+
+ while (c != EOF)
+ {
+ if (c != '*')
+ c = getcFromInputFile ();
+ else
+ {
+ const int next = getcFromInputFile ();
+
+ if (next != '/')
+ c = next;
+ else
+ {
+ c = SPACE; /* replace comment with space */
+ break;
+ }
+ }
+ }
+ return c;
+}
+
+static int _vGetc (bool inSkipPastMatch)
+{
+ int c;
+ if (Ungetc == '\0')
+ c = getcFromInputFile ();
+ else
+ {
+ c = Ungetc;
+ Ungetc = '\0';
+ }
+ if (c == '/')
+ {
+ int c2 = getcFromInputFile ();
+ if (c2 == EOF)
+ return EOF;
+ else if (c2 == '/') /* strip comment until end-of-line */
+ {
+ do
+ c = getcFromInputFile ();
+ while (c != '\n' && c != EOF);
+ }
+ else if (c2 == '*') /* strip block comment */
+ c = verilogSkipOverCComment ();
+ else
+ ungetcToInputFile (c2);
+ }
+ // replace a string with "@" only in skipPastMatch()
+ // because the string may contain parens, etc.
+ else if (inSkipPastMatch && c == '"')
+ {
+ int c2;
+ do
+ c2 = getcFromInputFile ();
+ while (c2 != '"' && c2 != EOF);
+ c = '@';
+ }
+ return c;
+}
+
+static int vGetc (void)
+{
+ return _vGetc (false);
+}
+
+// Is the first charactor in an identifier? [a-zA-Z_`]
+static bool isWordToken (const int c)
+{
+ return (isalpha (c) || c == '_' || c == '`');
+}
+
+// Is a charactor in an identifier? [a-zA-Z0-9_`$]
+static bool isIdentifierCharacter (const int c)
+{
+ return (isalnum (c) || c == '_' || c == '`' || c == '$');
+}
+
+static int skipWhite (int c)
+{
+ while (isspace (c))
+ c = vGetc ();
+ return c;
+}
+
+static int skipPastMatch (const char *const pair)
+{
+ const int begin = pair [0], end = pair [1];
+ int matchLevel = 1;
+ int c;
+ do
+ {
+ c = _vGetc (true);
+ if (c == begin)
+ ++matchLevel;
+ else if (c == end)
+ --matchLevel;
+ }
+ while (matchLevel > 0 && c != EOF);
+ return skipWhite (vGetc ());
+}
+
+static int skipDimension (int c)
+{
+ while (c == '[' && c != EOF)
+ c = skipPastMatch ("[]");
+ return c;
+}
+
+static int skipToSemiColon (int c)
+{
+ while (c != ';' && c != EOF)
+ c = vGetc ();
+ return c; // ';' or EOF
+}
+
+static int skipString (int c)
+{
+ if (c == '"')
+ {
+ do
+ c = vGetc ();
+ while (c != '"' && c != EOF);
+ }
+ c = skipWhite (vGetc ());
+ return c;
+}
+
+static int skipExpression (int c)
+{
+ while (c != ',' && c != ';' && c != ')' && c != '}' && c != ']' && c != EOF)
+ {
+ if (c == '(')
+ c = skipPastMatch ("()");
+ else if (c == '{')
+ c = skipPastMatch ("{}");
+ else if (c == '[')
+ c = skipPastMatch ("[]");
+ else if (c == '"')
+ c = skipString (c);
+ else
+ c = skipWhite (vGetc ());
+ }
+ return c;
+}
+
+// Skip to newline. The newline preceded by a backslash ( \ ) is ignored.
+// Should be used after readWordTokenNoSkip() for compiler directives
+static int skipToNewLine (int c)
+{
+ bool escape = false;
+ for ( ; (c != '\n' || escape) && c != EOF; c = vGetc ())
+ escape = (c == '\\');
+
+ return c; // '\n' or EOF
+}
+
+static int skipMacro (int c, tokenInfo *token)
+{
+ tokenInfo *localToken = newToken (); // don't update token outside
+ while (c == '`') // to support back-to-back compiler directives
+ {
+ c = readWordTokenNoSkip (localToken, c);
+ /* Skip compiler directive other than `define */
+ if (localToken->kind == K_DIRECTIVE)
+ {
+ c = skipToNewLine (c);
+ c = skipWhite (c);
+ }
+ /* Skip `define */
+ else if (localToken->kind == K_DEFINE)
+ {
+ c = skipWhite (c);
+ c = processDefine (localToken, c);
+ }
+ /* return macro expansion */
+ else
+ {
+ swapToken (token, localToken);
+ c = skipWhite (c);
+ if (c == '(')
+ c = skipPastMatch ("()");
+ break;
+ }
+ }
+ deleteToken (localToken);
+ return c;
+}
+
+static void _updateKind (tokenInfo *const token)
+{
+ verilogKind kind = (verilogKind) lookupKeyword (vStringValue (token->name), getInputLanguage () );
+ token->kind = ((kind == K_UNDEFINED) && isIdentifier (token)) ? K_IDENTIFIER : kind;
+}
+
+/* read an identifier, keyword, number, compiler directive, or macro identifier */
+static int _readWordToken (tokenInfo *const token, int c, bool skip)
+{
+ Assert (isWordToken (c));
+
+ clearToken (token);
+ do
+ {
+ vStringPut (token->name, c);
+ c = vGetc ();
+ } while (isIdentifierCharacter (c));
+ _updateKind (token);
+
+ if (skip)
+ return skipWhite (c);
+ else
+ return c;
+}
+
+// read a word token starting with "c".
+// returns the first charactor of the next token.
+static int readWordToken (tokenInfo *const token, int c)
+{
+ return _readWordToken (token, c, true);
+}
+
+// read a word token starting with "c".
+// returns the next charactor of the token read.
+// for compiler directives. Since they are line-based, skipWhite() cannot be used.
+static int readWordTokenNoSkip (tokenInfo *const token, int c)
+{
+ return _readWordToken (token, c, false);
+}
+
+/* check if an identifier:
+ * simple_identifier ::= [ a-zA-Z_ ] { [ a-zA-Z0-9_$ ] } */
+static bool isIdentifier (tokenInfo* token)
+{
+ if (token->kind == K_UNDEFINED)
+ {
+ for (int i = 0; i < vStringLength (token->name); i++)
+ {
+ int c = vStringChar (token->name, i);
+ if (i == 0)
+ {
+ if (c == '`' || !isWordToken (c))
+ return false;
+ }
+ else
+ {
+ if (!isIdentifierCharacter (c))
+ return false;
+ }
+ }
+ return true;
+ }
+ else
+ return false;
+}
+
+static void createContext (verilogKind kind, vString* const name)
+{
+ tokenInfo *const scope = newToken ();
+ vStringCopy (scope->name, name);
+ scope->kind = kind;
+
+ if (scope)
+ {
+ vString *contextName = vStringNew ();
+
+ /* Determine full context name */
+ if (currentContext->kind != K_UNDEFINED)
+ {
+ vStringCopy (contextName, currentContext->name);
+ vStringPut (contextName, '.');
+ }
+ vStringCat (contextName, scope->name);
+ /* Create context */
+ currentContext = pushToken (currentContext, scope);
+ vStringCopy (currentContext->name, contextName);
+ vStringDelete (contextName);
+ verbose ("Created new context %s (kind %d)\n", vStringValue (currentContext->name), currentContext->kind);
+ }
+}
+
+static void dropContext ()
+{
+ verbose ("Dropping context %s\n", vStringValue (currentContext->name));
+ currentContext = popToken (currentContext);
+}
+
+/* Drop context, but only if an end token is found */
+static int dropEndContext (tokenInfo *const token, int c)
+{
+ verbose ("current context %s; context kind %0d; nest level %0d\n", vStringValue (currentContext->name), currentContext->kind, currentContext->nestLevel);
+ if ((currentContext->kind == K_COVERGROUP && strcmp (vStringValue (token->name), "endgroup") == 0)
+ || (currentContext->kind == K_IFCLASS && strcmp (vStringValue (token->name), "endclass") == 0))
+ {
+ dropContext ();
+ c = skipBlockName (token ,c);
+ }
+ else if (currentContext->kind != K_UNDEFINED)
+ {
+ vString *endTokenName = vStringNewInit ("end");
+ vStringCatS (endTokenName, getNameForKind (currentContext->kind));
+ if (strcmp (vStringValue (token->name), vStringValue (endTokenName)) == 0)
+ {
+ dropContext ();
+ c = skipBlockName (token ,c);
+ if (currentContext->classScope)
+ {
+ verbose ("Dropping local context %s\n", vStringValue (currentContext->name));
+ currentContext = popToken (currentContext);
+ }
+ }
+ vStringDelete (endTokenName);
+ }
+ else
+ verbose ("Unexpected current context %s\n", vStringValue (currentContext->name));
+ return c;
+}
+
+
+static void createTag (tokenInfo *const token, verilogKind kind)
+{
+ tagEntryInfo tag;
+
+ if (kind == K_LOCALPARAM)
+ kind = K_CONSTANT;
+ else if (kind == K_PARAMETER)
+ {
+ kind = K_CONSTANT;
+ // See LRM 2017 6.20.1 Parameter declaration syntax
+ if (currentContext->kind != K_CLASS && currentContext->kind != K_PACKAGE && !currentContext->hasParamList)
+ token->parameter = true;
+ }
+ Assert (kind >= 0 && kind != K_UNDEFINED && kind != K_IDENTIFIER);
+ Assert (vStringLength (token->name) > 0);
+
+ /* check if a container before kind is modified by prototype */
+ /* BTW should we create a context for a prototype? */
+ bool container = isContainer (kind);
+
+ /* Determine if kind is prototype */
+ if (currentContext->prototype)
+ kind = K_PROTOTYPE;
+
+ /* Do nothing if tag kind is disabled */
+ if (! kindEnabled (kind))
+ {
+ verbose ("kind disabled\n");
+ return;
+ }
+
+ /* Create tag */
+ initTagEntry (&tag, vStringValue (token->name), kind);
+ tag.lineNumber = token->lineNumber;
+ tag.filePosition = token->filePosition;
+
+ verbose ("Adding tag %s (kind %d)", vStringValue (token->name), kind);
+ if (currentContext->kind != K_UNDEFINED)
+ {
+ verbose (" to context %s\n", vStringValue (currentContext->name));
+ currentContext->lastKind = kind;
+ tag.extensionFields.scopeKindIndex = currentContext->kind;
+ tag.extensionFields.scopeName = vStringValue (currentContext->name);
+ }
+ verbose ("\n");
+ if (vStringLength (token->inheritance) > 0)
+ {
+ tag.extensionFields.inheritance = vStringValue (token->inheritance);
+ verbose ("Class %s extends %s\n", vStringValue (token->name), tag.extensionFields.inheritance);
+ }
+
+ if (token->parameter)
+ attachParserField (&tag, false, fieldTable [F_PARAMETER].ftype, "");
+
+ makeTagEntry (&tag);
+
+ if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && currentContext->kind != K_UNDEFINED)
+ {
+ vString *const scopedName = vStringNew ();
+
+ vStringCopy (scopedName, currentContext->name);
+ vStringPut (scopedName, '.');
+ vStringCat (scopedName, token->name);
+ tag.name = vStringValue (scopedName);
+
+ markTagExtraBit (&tag, XTAG_QUALIFIED_TAGS);
+ makeTagEntry (&tag);
+
+ vStringDelete (scopedName);
+ }
+
+ /* Push token as context if it is a container */
+ if (container)
+ {
+ createContext (kind, token->name);
+
+ /* Put found contents in context */
+ verbose ("Putting tagContents: %d element(s)\n",
+ ptrArrayCount (tagContents));
+ for (unsigned int i = 0; i < ptrArrayCount (tagContents); i++)
+ {
+ tokenInfo *content = ptrArrayItem (tagContents, i);
+ createTag (content, content->kind);
+ }
+
+ /* Drop temporary contexts */
+ if (isTempContext (currentContext))
+ dropContext ();
+ }
+
+ /* Clear no longer required inheritance information */
+ vStringClear (token->inheritance);
+}
+
+static int skipBlockName (tokenInfo *const token, int c)
+{
+ if (c == ':')
+ {
+ c = skipWhite (vGetc ());
+ if (isWordToken (c))
+ c = readWordToken (token, c);
+ }
+ return c;
+}
+
+// begin, fork
+static int processBlock (tokenInfo *const token, int c)
+{
+ if (c == ':') // tag an optional block identifier
+ {
+ c = skipWhite (vGetc ());
+ if (isWordToken (c))
+ {
+ c = readWordToken (token, c);
+ verbose ("Found block: %s\n", vStringValue (token->name));
+ createTag (token, K_BLOCK);
+ verbose ("Current context %s\n", vStringValue (currentContext->name));
+ }
+ }
+ currentContext->nestLevel++; // increment after creating a context
+ return c;
+}
+
+// end, join, join_any, join_none
+static int processEnd (tokenInfo *const token, int c)
+{
+ if (currentContext->nestLevel > 0) // for sanity check
+ currentContext->nestLevel--;
+ if (currentContext->kind == K_BLOCK && currentContext->nestLevel == 0)
+ dropContext ();
+
+ c = skipBlockName (token, c);
+ return c;
+}
+
+static int processPortList (tokenInfo *token, int c, bool mayPortDecl)
+{
+ if (c == '(')
+ {
+ c = skipWhite (vGetc ()); // skip '('
+ c = tagIdentifierList (token, c, K_PORT, mayPortDecl);
+ if (c == ')') // sanity check
+ c = skipWhite (vGetc ());
+ else
+ verbose ("Unexpected input: %c\n", c);
+ }
+ return c;
+}
+
+static int skipParameterAssignment (int c)
+{
+ if (c == '#')
+ {
+ c = skipWhite (vGetc ());
+ if (c ==@@ Diff output truncated at 100000 characters. @@
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list