[geany/geany] 5d5d05: Use uctags R parser
Jiří Techet
git-noreply at xxxxx
Mon Mar 14 19:05:25 UTC 2022
Branch: refs/heads/master
Author: Jiří Techet <techet at gmail.com>
Committer: Jiří Techet <techet at gmail.com>
Date: Mon, 14 Mar 2022 19:05:25 UTC
Commit: 5d5d05d63e8e55cacf60f6d95bda65c4027027eb
https://github.com/geany/geany/commit/5d5d05d63e8e55cacf60f6d95bda65c4027027eb
Log Message:
-----------
Use uctags R parser
The uctags R parser is token-based and seems to be much more advanced than
our ad-hoc parser.
Modified Paths:
--------------
ctags/Makefile.am
ctags/parsers/geany_r.c
ctags/parsers/r.c
ctags/parsers/r.h
Modified: ctags/Makefile.am
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -82,7 +82,8 @@ parsers = \
parsers/php.c \
parsers/powershell.c \
parsers/geany_python.c \
- parsers/geany_r.c \
+ parsers/r.c \
+ parsers/r.h \
parsers/rst.c \
parsers/ruby.c \
parsers/rust.c \
Modified: ctags/parsers/geany_r.c
177 lines changed, 0 insertions(+), 177 deletions(-)
===================================================================
@@ -1,177 +0,0 @@
-/*
-* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
-*
-* This source code is released for free distribution under the terms of the
-* GNU General Public License version 2 or (at your option) any later version.
-*
-* This module contains functions for generating tags for R language files.
-* R is a programming language for statistical computing.
-* R is GPL Software, get it from http://www.r-project.org/
-*/
-
-/*
-* INCLUDE FILES
-*/
-#include "general.h" /* must always come first */
-
-#include <string.h>
-#include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
-
-#include "debug.h"
-#include "entry.h"
-#include "parse.h"
-#include "read.h"
-#include "vstring.h"
-#include "routines.h"
-
-
-#define SKIPSPACE(ch) while (isspace((int)*ch)) \
- ch++
-
-typedef enum {
- K_FUNCTION,
- K_LIBRARY,
- K_SOURCE,
- KIND_COUNT
-} rKind;
-
-static kindDefinition RKinds [KIND_COUNT] = {
- {true, 'f', "function", "functions"},
- {true, 'l', "library", "libraries"},
- {true, 's', "source", "sources"},
-};
-
-static void makeRTag (const vString * const name, rKind kind)
-{
- tagEntryInfo e;
- initTagEntry(&e, vStringValue(name), kind);
-
- Assert (kind < KIND_COUNT);
-
- makeTagEntry (&e);
-}
-
-static void createRTags (void)
-{
- vString *vLine = vStringNew ();
- vString *name = vStringNew ();
- int ikind;
- const unsigned char *line;
-
- while ((line = readLineFromInputFile ()) != NULL)
- {
- const unsigned char *cp = (const unsigned char *) line;
-
- vStringClear (name);
- while ((*cp != '\0') && (*cp != '#'))
- {
- /* iterate to the end of line or to a comment */
- ikind = -1;
- switch (*cp) {
- case 'l':
- case 's':
- if (strncasecmp((const char*)cp, "library", (size_t)7) == 0) {
- /* load a library: library(tools) */
- cp += 7;
- SKIPSPACE(cp);
- if (*cp == '(')
- ikind = K_LIBRARY;
- else
- cp -= 7;
- } else if (strncasecmp((const char*)cp, "source", (size_t)6) == 0) {
- /* load a source file: source("myfile.r") */
- cp += 6;
- SKIPSPACE(cp);
- if (*cp == '(')
- ikind = K_SOURCE;
- else
- cp -= 6;
- }
- if (ikind != -1) {
- cp++;
-
- vStringClear(name);
- while ((!isspace((int)*cp)) && *cp != '\0' && *cp != ')') {
- vStringPut(name, (int)*cp);
- cp++;
- }
-
- /* if the string really exists, make a tag of it */
- if (vStringLength(name) > 0)
- makeRTag(name, ikind);
-
- /* prepare for the next iteration */
- vStringClear(name);
- } else {
- vStringPut(name, (int)*cp);
- cp++;
- }
- break;
- case '<':
- cp++;
- if (*cp == '-') {
- /* assignment: ident <- someval */
- cp++;
- SKIPSPACE(cp);
-
- if (*cp == '\0') {
- /* not in this line, read next */
- /* sometimes functions are declared this way:
- ident <-
- function(...)
- {
- ...
- }
- I don't know if there is a reason to write the function keyword
- in a new line
- */
- if ((line = readLineFromInputFile()) != NULL) {
- cp = (const unsigned char*)line;
- SKIPSPACE(cp);
- }
- }
-
- if (strncasecmp((const char*)cp, "function", (size_t)8) == 0) {
- /* it's a function: ident <- function(args) */
- cp += 8;
- /* if the string really exists, make a tag of it */
- if (vStringLength(name) > 0)
- makeRTag(name, K_FUNCTION);
-
- /* prepare for the next iteration */
- vStringClear(name);
- break;
- }
- }
- /* fall through */
- case ' ':
- case '\x009':
- /* skip whitespace */
- cp++;
- break;
- default:
- /* collect all characters that could be a part of an identifier */
- vStringPut(name, (int)*cp);
- cp++;
- break;
- }
- }
- }
-
- vStringDelete (name);
- vStringDelete (vLine);
-}
-
-extern parserDefinition *RParser (void)
-{
- /* *.r: R files
- * *.s;*.q: S files
- */
- static const char *const extensions [] = { "r", "s", "q", NULL };
- parserDefinition *const def = parserNew ("R");
- def->kindTable = RKinds;
- def->kindCount = ARRAY_SIZE (RKinds);
- def->extensions = extensions;
- def->parser = createRTags;
- return def;
-}
Modified: ctags/parsers/r.c
1470 lines changed, 1470 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,1470 @@
+/*
+* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
+* Copyright (c) 2020, Masatake YAMATO <yamato at redhat.com>
+* Copyright (c) 2020, Red Hat, Inc.
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*
+* This module contains functions for generating tags for R language files.
+* R is a programming language for statistical computing.
+* R is GPL Software, get it from http://www.r-project.org/
+*
+* The language references are available at
+* https://cran.r-project.org/manuals.html, and
+* https://cran.r-project.org/doc/manuals/r-release/R-lang.html
+*
+* The base library (including library and source functions) release is at
+* https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "selectors.h"
+#include "tokeninfo.h"
+#include "trace.h"
+#include "vstring.h"
+#include "subparser.h"
+#include "r.h"
+
+#include <string.h>
+#include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
+
+
+/*
+* MACROS
+*/
+#ifdef DEBUG
+#define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
+ (TXT), \
+ tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
+ tokenTypeStr(T->type), \
+ (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
+#define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
+ tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
+ tokenTypeStr((T)->type), \
+ (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
+
+#define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
+ tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
+ tokenTypeStr(token->type), \
+ parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
+#define R_TRACE_LEAVE() TRACE_LEAVE()
+#else
+#define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
+#define R_TRACE_TOKEN(T,Q) do {} while (0);
+#define R_TRACE_ENTER() do {} while (0);
+#define R_TRACE_LEAVE() do {} while (0);
+#endif
+
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_UNDEFINED = -1,
+ K_FUNCTION,
+ K_LIBRARY,
+ K_SOURCE,
+ K_GLOBALVAR,
+ K_FUNCVAR,
+ K_PARAM,
+ K_VECTOR,
+ K_LIST,
+ K_DATAFRAME,
+ K_NAMEATTR,
+ KIND_COUNT
+} rKind;
+
+typedef enum {
+ R_LIBRARY_ATTACHED_BY_LIBRARY,
+ R_LIBRARY_ATTACHED_BY_REQUIRE,
+} rLibraryRole;
+
+typedef enum {
+ R_SOURCE_LOADED_BY_SOURCE,
+} rSourceRole;
+
+static roleDefinition RLibraryRoles [] = {
+ { true, "library", "library attached by library function" },
+ { true, "require", "library attached by require function" },
+};
+
+static roleDefinition RSourceRoles [] = {
+ { true, "source", "source loaded by source fucntion" },
+};
+
+static kindDefinition RKinds[KIND_COUNT] = {
+ {true, 'f', "function", "functions"},
+ {true, 'l', "library", "libraries",
+ .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) },
+ {true, 's', "source", "sources",
+ .referenceOnly = true, ATTACH_ROLES (RSourceRoles) },
+ {true, 'g', "globalVar", "global variables having values other than function()"},
+ {true, 'v', "functionVar", "function variables having values other than function()"},
+ {false,'z', "parameter", "function parameters inside function definitions" },
+ {true, 'c', "vector", "vectors explicitly created with `c()'" },
+ {true, 'L', "list", "lists explicitly created with `list()'" },
+ {true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
+ {true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
+};
+
+struct sKindExtraInfo {
+ const char *anon_prefix;
+ const char *ctor;
+};
+
+static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = {
+ [K_FUNCTION] = {
+ "anonFunc",
+ "function",
+ },
+ [K_VECTOR] = {
+ "anonVec",
+ "c",
+ },
+ [K_LIST] = {
+ "anonList",
+ "list",
+ },
+ [K_DATAFRAME] = {
+ "anonDataFrame",
+ "data.frame",
+ },
+};
+
+typedef enum {
+ F_ASSIGNMENT_OPERATOR,
+ F_CONSTRUCTOR,
+} rField;
+
+static fieldDefinition RFields [] = {
+ {
+ .name = "assignmentop",
+ .description = "operator for assignment",
+ .enabled = false,
+ },
+ {
+ .name = "constructor",
+ .description = "function used for making value assigned to the nameattr tag",
+ .enabled = true,
+ }
+};
+
+typedef int keywordId; /* to allow KEYWORD_NONE */
+
+static const keywordTable RKeywordTable [] = {
+ { "c", KEYWORD_R_C },
+ { "list", KEYWORD_R_LIST },
+ { "data.frame",KEYWORD_R_DATAFRAME },
+ { "function", KEYWORD_R_FUNCTION },
+ { "if", KEYWORD_R_IF },
+ { "else", KEYWORD_R_ELSE },
+ { "for", KEYWORD_R_FOR },
+ { "while", KEYWORD_R_WHILE },
+ { "repeat", KEYWORD_R_REPEAT },
+ { "in", KEYWORD_R_IN },
+ { "next", KEYWORD_R_NEXT },
+ { "break", KEYWORD_R_BREAK },
+ { "TRUE", KEYWORD_R_TRUE, },
+ { "FALSE", KEYWORD_R_FALSE, },
+ { "NULL", KEYWORD_R_NULL, },
+ { "Inf", KEYWORD_R_INF, },
+ { "NaN", KEYWORD_R_NAN, },
+ { "NA", KEYWORD_R_NA, },
+ { "NA_integer_", KEYWORD_R_NA, },
+ { "NA_real_", KEYWORD_R_NA, },
+ { "NA_complex_", KEYWORD_R_NA, },
+ { "NA_character_", KEYWORD_R_NA, },
+ { "source", KEYWORD_R_SOURCE },
+ { "library", KEYWORD_R_LIBRARY },
+ { "require", KEYWORD_R_LIBRARY },
+};
+
+#ifdef DEBUG
+static const char *tokenTypeStr(enum RTokenType e);
+#endif
+
+static struct tokenTypePair typePairs [] = {
+ { '{', '}' },
+ { '[', ']' },
+ { '(', ')' },
+};
+
+typedef struct sRToken {
+ tokenInfo base;
+ int scopeIndex;
+ int parenDepth;
+ vString *signature;
+ int kindIndexForParams; /* Used only when gathering parameters */
+} rToken;
+
+#define R(TOKEN) ((rToken *)TOKEN)
+
+static int blackHoleIndex;
+
+static langType Lang_R;
+
+static void readToken (tokenInfo *const token, void *data);
+static void clearToken (tokenInfo *token);
+static struct tokenInfoClass rTokenInfoClass = {
+ .nPreAlloc = 4,
+ .typeForUndefined = TOKEN_R_UNDEFINED,
+ .keywordNone = KEYWORD_NONE,
+ .typeForKeyword = TOKEN_R_KEYWORD,
+ .typeForEOF = TOKEN_R_EOF,
+ .extraSpace = sizeof (rToken) - sizeof (tokenInfo),
+ .pairs = typePairs,
+ .pairCount = ARRAY_SIZE (typePairs),
+ .init = NULL,
+ .read = readToken,
+ .clear = clearToken,
+ .copy = NULL,
+};
+
+
+/*
+ * FUNCTION PROTOTYPES
+ */
+static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair);
+static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall);
+
+static int notifyReadRightSideSymbol (tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token);
+static int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent,
+ bool in_func, int kindInR, const char *assignmentOperator);
+static bool askSubparserTagAcceptancy (tagEntryInfo *pe);
+static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e);
+static int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count)
+{
+ if (e->langType == Lang_R)
+ {
+ for (size_t i = 0; i < count; i++)
+ {
+ if (e->kindIndex == kinds[i])
+ return true;
+ }
+ }
+ else
+ {
+ bool function = false;
+ for (size_t i = 0; i < count; i++)
+ {
+ if (K_FUNCTION == kinds[i])
+ {
+ function = true;
+ break;
+ }
+ }
+ if (function && askSubparserTagHasFunctionAlikeKind (e))
+ return true;
+ }
+
+ const char *tmp = getParserFieldValueForType (e,
+ RFields [F_CONSTRUCTOR].ftype);
+ if (tmp == NULL)
+ return false;
+
+ for (size_t i = 0; i < count; i++)
+ {
+ const char * ctor = kindExtraInfo [kinds[i]].ctor;
+ if (ctor && strcmp (tmp, ctor) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int searchScopeOtherThan (int scope, int kinds[], size_t count)
+{
+ do
+ {
+ tagEntryInfo * e = getEntryInCorkQueue (scope);
+ if (!e)
+ return CORK_NIL;
+
+ if (!hasKindsOrCtors (e, kinds, count))
+ return scope;
+
+ scope = e->extensionFields.scopeIndex;
+ }
+ while (1);
+}
+
+static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind,
+ const char * assignmentOp)
+{
+ if (assignmentOp && (strlen (assignmentOp) == 3))
+ {
+ /* <<- or ->> is used here. */
+ if (anyKindsEntryInScopeRecursive (parent, tokenString (token),
+ (int[]){K_FUNCTION,
+ K_GLOBALVAR,
+ K_FUNCVAR,
+ K_PARAM}, 4) != CORK_NIL)
+ return CORK_NIL;
+
+ parent = CORK_NIL;
+ }
+
+ /* If the tag (T) to be created is defined in a scope and
+ the scope already has another tag having the same name
+ as T, T should not be created. */
+ tagEntryInfo *pe = getEntryInCorkQueue (parent);
+ int cousin = CORK_NIL;
+ if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION)
+ || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe))))
+ {
+ cousin = anyEntryInScope (parent, tokenString (token));
+ if (kind == K_GLOBALVAR)
+ kind = K_FUNCVAR;
+ }
+ else if (pe && (kind == K_GLOBALVAR)
+ && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
+ {
+ parent = searchScopeOtherThan (pe->extensionFields.scopeIndex,
+ (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3);
+ if (parent == CORK_NIL)
+ cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR);
+ else
+ {
+ cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR);
+ kind = K_FUNCVAR;
+ }
+ }
+ else if (pe)
+ {
+ /* The condition for tagging is a bit relaxed here.
+ Even if the same name tag is created in the scope, a name
+ is tagged if kinds are different. */
+ cousin = anyKindEntryInScope (parent, tokenString (token), kind);
+ }
+ if (cousin != CORK_NIL)
+ return CORK_NIL;
+
+ int corkIndex = makeSimpleTag (token->string, kind);
+ tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
+ if (tag)
+ {
+ tag->extensionFields.scopeIndex = parent;
+ if (assignmentOp)
+ {
+ if (strlen (assignmentOp) > 0)
+ attachParserField (tag, true,
+ RFields [F_ASSIGNMENT_OPERATOR].ftype,
+ assignmentOp);
+ else
+ markTagExtraBit (tag, XTAG_ANONYMOUS);
+ }
+ registerEntry (corkIndex);
+ }
+ return corkIndex;
+}
+
+static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind,
+ const char * assignmentOp)
+{
+ int r;
+ const char *ctor = kindExtraInfo [kind].ctor;
+ tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent);
+
+ /* makeTagWithTranslation method for subparsers
+ called from makeSimpleSubparserTag expects
+ kind should be resolved. */
+ if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
+ {
+ if (assignmentOp
+ && strcmp (assignmentOp, "=") == 0)
+ kind = K_NAMEATTR;
+ }
+
+ bool foreign_tag = false;
+ if (pe == NULL || pe->langType == Lang_R ||
+ !askSubparserTagAcceptancy (pe))
+ r = makeSimpleRTagR (token, parent, kind, assignmentOp);
+ else
+ {
+ foreign_tag = true;
+ r = makeSimpleSubparserTag (pe->langType, token, parent, in_func,
+ kind, assignmentOp);
+ }
+
+ if ((kind == K_NAMEATTR || foreign_tag) && ctor)
+ {
+ tagEntryInfo *e = getEntryInCorkQueue (r);
+ if (e)
+ attachParserField (e, true,
+ RFields [F_CONSTRUCTOR].ftype,
+ ctor);
+ }
+
+ return r;
+}
+
+static void clearToken (tokenInfo *token)
+{
+ R (token)->parenDepth = 0;
+ R (token)->scopeIndex = CORK_NIL;
+ R (token)->kindIndexForParams = KIND_GHOST_INDEX;
+ if (R (token)->signature)
+ {
+ vStringDelete (R (token)->signature);
+ R (token)->signature = NULL;
+ }
+}
+
+static void readString (tokenInfo *const token, void *data)
+{
+ int c;
+ bool escaped = false;
+
+ int c0 = tokenString(token)[0];
+
+ while (1)
+ {
+ c = getcFromInputFile ();
+ switch (c)
+ {
+ case EOF:
+ return;
+ case '\'':
+ case '"':
+ case '`':
+ tokenPutc (token, c);
+ if (!escaped && c == c0)
+ return;
+ escaped = false;
+ break;
+ case '\\':
+ tokenPutc (token, c);
+ escaped = !escaped;
+ break;
+ default:
+ tokenPutc (token, c);
+ escaped = false;
+ break;
+ }
+ }
+}
+
+static void readNumber (tokenInfo *const token, void *data)
+{
+ int c;
+
+ /* 10.3.1 Constants
+ *
+ * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
+ * Valid integer constants: 1L, 0x10L, 1000000L, 1e6L
+ * Valid numeric constants: 1.1L, 1e-3L, 0x1.1p-2
+ * Valid complex constants: 2i 4.1i 1e-2i
+ */
+ while ((c = getcFromInputFile ()))
+ {
+ if (isxdigit (c) || c == '.' || c == 'E'
+ || c == '+' || c == '-'
+ || c == 'L' || c == 'x' || c == 'p'
+ || c == 'i')
+ tokenPutc (token, c);
+ else
+ {
+ ungetcToInputFile (c);
+ break;
+ }
+ }
+}
+
+static void readSymbol (tokenInfo *const token, void *data)
+{
+ int c;
+ while ((c = getcFromInputFile ()))
+ {
+ if (isalnum (c) || c == '.' || c == '_')
+ tokenPutc (token, c);
+ else
+ {
+ ungetcToInputFile (c);
+ break;
+ }
+ }
+}
+
+static keywordId resolveKeyword (vString *string)
+{
+ char *s = vStringValue (string);
+ static langType lang = LANG_AUTO;
+
+ if (lang == LANG_AUTO)
+ lang = getInputLanguage ();
+
+ return lookupCaseKeyword (s, lang);
+}
+
+static bool signatureExpectingParameter (vString *signature)
+{
+ if (vStringLast (signature) == '(')
+ return true;
+
+ for (size_t i = vStringLength (signature); i > 0; i--)
+ {
+ char c = vStringChar (signature, i - 1);
+ if (c == ' ')
+ continue;
+ else if (c == ',')
+ return true;
+ break;
+ }
+ return false;
+}
+
+static void readToken (tokenInfo *const token, void *data)
+{
+ int c, c0;
+
+ token->type = TOKEN_R_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+ do
+ c = getcFromInputFile ();
+ while (c == ' ' || c== '\t' || c == '\f');
+
+ token->lineNumber = getInputLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ switch (c)
+ {
+ case EOF:
+ token->type = TOKEN_R_EOF;
+ break;
+ case '#':
+ while (1)
+ {
+ c = getcFromInputFile ();
+ if (c == EOF)
+ {
+ token->type = TOKEN_R_EOF;
+ break;
+ }
+ else if (c == '\n')
+ {
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ }
+ }
+ break;
+ case '\n':
+ case ';':
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ case '\'':
+ case '"':
+ case '`':
+ token->type = TOKEN_R_STRING;
+ tokenPutc (token, c);
+ readString (token, data);
+ break;
+ case '+':
+ case '/':
+ case '^':
+ case '~':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ break;
+ case ':':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == ':')
+ {
+ tokenPutc (token, c);
+ token->type = TOKEN_R_SCOPE;
+ c = getcFromInputFile ();
+ if (c == ':')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ }
+ else
+ ungetcToInputFile (c);
+ break;
+ case '&':
+ case '|':
+ case '*':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c0 = getcFromInputFile ();
+ if (c == c0)
+ tokenPutc (token, c0);
+ else
+ ungetcToInputFile (c0);
+ break;
+ case '=':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ {
+ token->type = '=';
+ ungetcToInputFile (c);
+ }
+ break;
+ case '-':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '>')
+ {
+ token->type = TOKEN_R_RASSIGN;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '>')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ }
+ else
+ ungetcToInputFile (c);
+ break;
+ case '>':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '<':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+
+ /* <<- */
+ if (c == '<')
+ {
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ }
+
+ if (c == '-')
+ {
+ token->type = TOKEN_R_LASSIGN;
+ tokenPutc (token, c);
+ }
+ else if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '%':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ do
+ {
+ c = getcFromInputFile ();
+ if (c == EOF)
+ break;
+
+ tokenPutc (token, c);
+ if (c == '%')
+ break;
+ }
+ while (1);
+ break;
+ case '!':
+ token->type = TOKEN_R_OPERATOR;
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (c == '=')
+ tokenPutc (token, c);
+ else
+ ungetcToInputFile (c);
+ break;
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case ',':
+ case '$':
+ case '@':
+ token->type = c;
+ tokenPutc (token, c);
+ break;
+ case '.':
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ if (isdigit(c))
+ {
+ token->type = TOKEN_R_NUMBER;
+ tokenPutc (token, c);
+ readNumber(token, data);
+ }
+ else if (isalpha (c) || c == '_')
+ {
+ token->type = TOKEN_R_SYMBOL;
+ tokenPutc (token, c);
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else if (c == '.')
+ {
+ token->type = TOKEN_R_DOTS;
+ tokenPutc (token, c);
+
+ c = getcFromInputFile ();
+ if (c == '.')
+ tokenPutc (token, c);
+ else if (isdigit(c))
+ {
+ token->type = TOKEN_R_DOTS_N;
+ do
+ {
+ tokenPutc (token, c);
+ c = getcFromInputFile ();
+ }
+ while (isdigit(c));
+ ungetcToInputFile (c);
+ }
+ else if (isalpha (c) || c == '_')
+ {
+ token->type = TOKEN_R_SYMBOL;
+ tokenPutc (token, c);
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else
+ {
+ token->type = TOKEN_R_UNDEFINED;
+ ungetcToInputFile (c);
+ }
+ }
+ break;
+ default:
+ tokenPutc (token, c);
+ if (isdigit (c))
+ {
+ token->type = TOKEN_R_NUMBER;
+ readNumber(token, data);
+ }
+ else if (isalpha (c))
+ {
+ token->type = TOKEN_R_SYMBOL;
+ readSymbol (token, data);
+
+ token->keyword = resolveKeyword (token->string);
+ if (token->keyword != KEYWORD_NONE)
+ token->type = TOKEN_R_KEYWORD;
+ }
+ else
+ token->type = TOKEN_R_UNDEFINED;
+ break;
+ }
+
+ /* Handle parameters in a signature */
+ if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n'))
+ {
+ vString *signature = R (token)->signature;
+
+ if (tokenIsTypeVal (token, '('))
+ R (token)->parenDepth++;
+ else if (tokenIsTypeVal (token, ')'))
+ R (token)->parenDepth--;
+
+ if (R (token)->kindIndexForParams != KIND_GHOST_INDEX
+ && R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL)
+ && signatureExpectingParameter (signature))
+ makeSimpleRTag (token, R (token)->scopeIndex, false,
+ R (token)->kindIndexForParams, NULL);
+
+ if (vStringLast (signature) != '(' &&
+ !tokenIsTypeVal (token, ',') &&
+ !tokenIsTypeVal (token, ')'))
+ vStringPut (signature, ' ');
+ vStringCat (signature, token->string);
+ }
+}
+
+#define newRToken rNewToken
+extern tokenInfo *rNewToken (void)
+{
+ return newToken (&rTokenInfoClass);
+}
+
+#define tokenReadNoNewline rTokenReadNoNewline
+extern void rTokenReadNoNewline (tokenInfo *const token)
+{
+ while (1)
+ {
+ tokenRead(token);
+ if (!tokenIsTypeVal (token, '\n'))
+ break;
+ }
+}
+
+static void setupCollectingSignature (tokenInfo *const token,
+ vString *signature,
+ int kindIndexForParams,
+ int corkIndex)
+{
+ R (token)->signature = signature;
+ R (token)->kindIndexForParams = kindIndexForParams;
+ R (token)->scopeIndex = corkIndex;
+ R (token)->parenDepth = 1;
+}
+
+extern void rSetupCollectingSignature (tokenInfo *const token,
+ vString *signature)
+{
+ setupCollectingSignature (token, signature,
+ KIND_GHOST_INDEX, CORK_NIL);
+}
+
+static void teardownCollectingSignature (tokenInfo *const token)
+{
+ R (token)->parenDepth = 0;
+ R (token)->scopeIndex = CORK_NIL;
+ R (token)->kindIndexForParams = KIND_GHOST_INDEX;
+ R (token)->signature = NULL;
+}
+
+extern void rTeardownCollectingSignature (tokenInfo *const token)
+{
+ teardownCollectingSignature (token);
+}
+
+static int getKindForToken (tokenInfo *const token)
+{
+ if (tokenIsKeyword (token, R_FUNCTION))
+ return K_FUNCTION;
+ else if (tokenIsKeyword (token, R_C))
+ return K_VECTOR;
+ else if (tokenIsKeyword (token, R_LIST))
+ return K_LIST;
+ else if (tokenIsKeyword (token, R_DATAFRAME))
+ return K_DATAFRAME;
+ return K_GLOBALVAR;
+}
+
+static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data)
+{
+ bool *any_non_placehoders = data;
+ if (!entry->placeholder)
+ {
+ *any_non_placehoders = true;
+ return false;
+ }
+ return true;
+}
+
+static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent)
+{
+ R_TRACE_ENTER();
+
+ char *const assignment_operator = eStrdup (tokenString (token));
+ vString *signature = NULL;
+
+ tokenReadNoNewline (token);
+
+ int kind = getKindForToken (token);
+
+ /* Call sub parsers */
+ int corkIndex = notifyReadRightSideSymbol (symbol,
+ assignment_operator,
+ parent,
+ token);
+ if (corkIndex == CORK_NIL)
+ {
+ /* No subparser handle the symbol */
+ corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION,
+ kind,
+ assignment_operator);
+ }
+
+ if (kind == K_FUNCTION)
+ {
+ /* parse signature */
+ tokenReadNoNewline (token);
+ if (tokenIsTypeVal (token, '('))
+ {
+ if (corkIndex == CORK_NIL)
+ tokenSkipOverPair (token);
+ else
+ {
+ signature = vStringNewInit("(");
+ setupCollectingSignature (token, signature, K_PARAM, corkIndex);
+ tokenSkipOverPair (token);
+ teardownCollectingSignature (token);
+ }
+ tokenReadNoNewline (token);
+ }
+ parent = (corkIndex == CORK_NIL
+ ? blackHoleIndex
+ : corkIndex);
+ }
+ else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
+ {
+ tokenRead (token);
+ parsePair (token, corkIndex, NULL);
+ tokenRead (token);
+ parent = corkIndex;
+ }
+
+ R_TRACE_TOKEN_TEXT("body", token, parent);
+
+ parseStatement (token, parent, false, false);
+
+ tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
+ if (tag)
+ {
+ tag->extensionFields.endLine = token->lineNumber;
+ if (signature)
+ {
+ tag->extensionFields.signature = vStringDeleteUnwrap(signature);
+ signature = NULL;
+ }
+ /* If a vector has no named attribte and it has no lval,
+ * we don't make a tag for the vector. */
+ if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
+ && *assignment_operator == '\0')
+ {
+ bool any_non_placehoders = false;
+ foreachEntriesInScope (corkIndex, NULL,
+ findNonPlaceholder, &any_non_placehoders);
+ if (!any_non_placehoders)
+ tag->placeholder = 1;
+ }
+ }
+
+ vStringDelete (signature); /* NULL is acceptable. */
+ eFree (assignment_operator);
+ R_TRACE_LEAVE();
+}
+
+/* Parse arguments for library and source. */
+static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall)
+{
+ TRACE_ENTER();
+
+ bool r = true;
+ tokenInfo *prefetch_token = newRToken ();
+
+ tokenReadNoNewline (prefetch_token);
+ if (tokenIsType (prefetch_token, R_SYMBOL)
+ || tokenIsType (prefetch_token, R_STRING))
+ {
+ tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token);
+ tokenReadNoNewline (prefetch_token);
+ if (tokenIsTypeVal (prefetch_token, ')')
+ || tokenIsTypeVal (prefetch_token, ','))
+ {
+ if (tokenIsTypeVal (prefetch_token, ')'))
+ r = false;
+
+ makeSimpleRefTag (loaded_obj_token->string,
+ (tokenIsKeyword (funcall, R_LIBRARY)
+ ? K_LIBRARY
+ : K_SOURCE),
+ (tokenIsKeyword (funcall, R_LIBRARY)
+ ? (strcmp (tokenString(funcall), "library") == 0
+ ? R_LIBRARY_ATTACHED_BY_LIBRARY
+ : R_LIBRARY_ATTACHED_BY_REQUIRE)
+ : R_SOURCE_LOADED_BY_SOURCE));
+ tokenDelete (loaded_obj_token);
+ }
+ else if (tokenIsEOF (prefetch_token))
+ {
+ tokenCopy (token, prefetch_token);
+ tokenDelete (loaded_obj_token);
+ r = false;
+ }
+ else
+ {
+ tokenUnread (prefetch_token);
+ tokenUnread (loaded_obj_token);
+ tokenDelete (loaded_obj_token);
+ }
+ }
+ else if (tokenIsEOF (prefetch_token))
+ {
+ tokenCopy (token, prefetch_token);
+ r = false;
+ }
+ else
+ tokenUnread (prefetch_token);
+
+ tokenDelete (prefetch_token);
+
+ TRACE_LEAVE_TEXT(r
+ ? "unread tokens and request parsing again to the upper context"
+ : "parse all arguments");
+ return r;
+}
+
+static bool preParseLoopCounter(tokenInfo *const token, int parent)
+{
+ bool r = true;
+ TRACE_ENTER();
+
+ tokenReadNoNewline (token);
+ if (tokenIsType (token, R_SYMBOL))
+ makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL);
+
+ if (tokenIsEOF (token)
+ || tokenIsTypeVal (token, ')'))
+ r = false;
+
+ TRACE_LEAVE_TEXT(r
+ ? "unread tokens and request parsing again to the upper context"
+ : "parse all arguments");
+ return r;
+}
+
+
+/* If funcall is non-NULL, this pair represents the argument list for the function
+ * call for FUNCALL. */
+static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall)
+{
+ R_TRACE_ENTER();
+
+ bool in_continuous_pair = tokenIsTypeVal (token, '(')
+ || tokenIsTypeVal (token, '[');
+ bool is_funcall = funcall && tokenIsTypeVal (token, '(');
+ bool done = false;
+
+ if (is_funcall)
+ {
+ if (tokenIsKeyword (funcall, R_LIBRARY) ||
+ tokenIsKeyword (funcall, R_SOURCE))
+ done = !preParseExternalEntitiy (token, funcall);
+ else if (tokenIsKeyword (funcall, R_FOR))
+ done = !preParseLoopCounter (token, parent);
+ else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL)
+ done = true;
+ }
+
+ if (done)
+ {
+ R_TRACE_LEAVE();
+ return;
+ }
+
+ do
+ {
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("inside pair", token, parent);
+ parseStatement (token, parent, (funcall != NULL), in_continuous_pair);
+ }
+ while (! (tokenIsEOF (token)
+ || tokenIsTypeVal (token, ')')
+ || tokenIsTypeVal (token, '}')
+ || tokenIsTypeVal (token, ']')));
+ R_TRACE_LEAVE();
+}
+
+static bool isAtConstructorInvocation (void)
+{
+ bool r = false;
+
+ tokenInfo *const token = newRToken ();
+ tokenRead (token);
+ if (tokenIsTypeVal (token, '('))
+ r = true;
+ tokenUnread (token);
+ tokenDelete (token);
+ return r;
+}
+
+static bool parseStatement (tokenInfo *const token, int parent,
+ bool in_arglist, bool in_continuous_pair)
+{
+ R_TRACE_ENTER();
+ int last_count = rTokenInfoClass.read_counter;
+
+ do
+ {
+ if (tokenIsEOF (token))
+ break;
+ else if (tokenIsTypeVal (token, ';'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with ;", token, parent);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '\n'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with \\n", token, parent);
+ break;
+ }
+ else if ((tokenIsKeyword (token, R_FUNCTION)
+ || ((tokenIsKeyword (token, R_C)
+ || tokenIsKeyword (token, R_LIST)
+ || tokenIsKeyword (token, R_DATAFRAME))
+ && isAtConstructorInvocation ())))
+ {
+ /* This statement doesn't start with a symbol.
+ * This function is not assigned to any symbol. */
+ tokenInfo *const anonfunc = newTokenByCopying (token);
+ int kind = getKindForToken (token);
+ anonGenerate (anonfunc->string,
+ kindExtraInfo [kind].anon_prefix, kind);
+ tokenUnread (token);
+ vStringClear (token->string);
+ parseRightSide (token, anonfunc, parent);
+ tokenDelete (anonfunc);
+ }
+ else if (tokenIsType (token, R_SYMBOL)
+ || tokenIsType (token, R_STRING)
+ || tokenIsType (token, R_KEYWORD))
+ {
+ tokenInfo *const symbol = newTokenByCopying (token);
+
+ if (in_continuous_pair)
+ tokenReadNoNewline (token);
+ else
+ tokenRead (token);
+
+ if (tokenIsType (token, R_LASSIGN))
+ {
+ /* Assignment */
+ parseRightSide (token, symbol, parent);
+ R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
+ tokenDelete(symbol);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '='))
+ {
+ /* Assignment */
+ if (in_arglist)
+ {
+ /* Ignore the left side symbol. */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent);
+ }
+ else
+ {
+ parseRightSide (token, symbol, parent);
+ R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
+ tokenDelete(symbol);
+ break;
+ }
+ }
+ else if (tokenIsTypeVal (token, '('))
+ {
+ /* function call */
+ parsePair (token, parent, symbol);
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after arglist", token, parent);
+ }
+ else if (tokenIsTypeVal (token, '$')
+ || tokenIsTypeVal (token, '@')
+ || tokenIsType (token, R_SCOPE))
+ {
+ tokenReadNoNewline (token); /* Skip the next identifier */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after $", token, parent);
+ }
+ else
+ R_TRACE_TOKEN_TEXT("else after symbol", token, parent);
+ tokenDelete(symbol);
+ }
+ else if (tokenIsType (token, R_RASSIGN))
+ {
+ char *const assignment_operator = eStrdup (tokenString (token));
+ tokenReadNoNewline (token);
+ if (tokenIsType (token, R_SYMBOL)
+ || tokenIsType (token, R_STRING))
+ {
+ makeSimpleRTag (token, parent, false,
+ K_GLOBALVAR, assignment_operator);
+ tokenRead (token);
+ }
+ eFree (assignment_operator);
+ R_TRACE_TOKEN_TEXT("after ->", token, parent);
+ }
+ else if (tokenIsType (token, R_OPERATOR))
+ {
+ tokenReadNoNewline (token);
+ R_TRACE_TOKEN_TEXT("after operator", token, parent);
+ }
+ else if (tokenIsTypeVal (token, '(')
+ || tokenIsTypeVal (token, '{')
+ || tokenIsTypeVal (token, '['))
+ {
+ parsePair (token, parent, NULL);
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after pair", token, parent);
+ }
+ else if (tokenIsTypeVal (token, ')')
+ || tokenIsTypeVal (token, '}')
+ || tokenIsTypeVal (token, ']'))
+ {
+ R_TRACE_TOKEN_TEXT ("break with close", token, parent);
+ break;
+ }
+ else if (tokenIsTypeVal (token, '$')
+ || tokenIsTypeVal (token, '@')
+ || tokenIsType (token, R_SCOPE))
+ {
+ tokenReadNoNewline (token); /* Skip the next identifier */
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("after $", token, parent);
+ }
+ else
+ {
+ tokenRead (token);
+ R_TRACE_TOKEN_TEXT("else", token, parent);
+ }
+ }
+ while (!tokenIsEOF (token));
+
+ R_TRACE_LEAVE();
+
+ return (last_count != rTokenInfoClass.read_counter);
+}
+
+extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist)
+{
+ pushLanguage (Lang_R);
+ bool r = parseStatement (token, parentIndex, in_arglist, true);
+ popLanguage ();
+ return r;
+}
+
+static int notifyReadRightSideSymbol (tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token)
+{
+ subparser *sub;
+ int q = CORK_NIL;
+
+ foreachSubparser (sub, false)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->readRightSideSymbol)
+ {
+ enterSubparser (sub);
+ q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token);
+ leaveSubparser ();
+ if (q != CORK_NIL)
+ break;
+ }
+ }
+
+ return q;
+}
+
+static int makeSimpleSubparserTag (int langType,
+ tokenInfo *const token, int parent,
+ bool in_func, int kindInR,
+ const char *assignmentOperator)
+{
+ int q = CORK_NIL;
+ subparser *sub = getLanguageSubparser (langType, false);
+ if (sub)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->makeTagWithTranslation)
+ {
+ enterSubparser (sub);
+ q = rsub->makeTagWithTranslation (rsub,
+ token, parent,
+ in_func, kindInR,
+ assignmentOperator);
+ leaveSubparser ();
+ }
+ }
+ return q;
+}
+
+static bool askSubparserTagAcceptancy (tagEntryInfo *pe)
+{
+ bool q = false;
+ subparser *sub = getLanguageSubparser (pe->langType, false);
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->askTagAcceptancy)
+ {
+ enterSubparser (sub);
+ q = rsub->askTagAcceptancy (rsub, pe);
+ leaveSubparser ();
+ }
+ }
+ return q;
+}
+
+static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e)
+{
+ bool q = false;
+ pushLanguage (Lang_R);
+ subparser *sub = getLanguageSubparser (e->langType, false);
+ Assert (sub);
+ popLanguage ();
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->hasFunctionAlikeKind)
+ {
+ enterSubparser (sub);
+ q = rsub->hasFunctionAlikeKind (rsub, e);
+ leaveSubparser ();
+ }
+ return q;
+}
+
+static int notifyReadFuncall (tokenInfo *const func,
+ tokenInfo *const token,
+ int parent)
+{
+ int q = CORK_NIL;
+ subparser *sub;
+ foreachSubparser (sub, false)
+ {
+ rSubparser *rsub = (rSubparser *)sub;
+ if (rsub->readFuncall)
+ {
+ enterSubparser (sub);
+ q = rsub->readFuncall (rsub, func, token, parent);
+ leaveSubparser ();
+ if (q != CORK_NIL)
+ break;
+ }
+ }
+ return q;
+}
+
+static void findRTags (void)
+{
+ tokenInfo *const token = newRToken ();
+
+ blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
+ registerEntry (blackHoleIndex);
+
+ TRACE_PRINT ("install blackhole: %d", blackHoleIndex);
+
+ do
+ {
+ tokenRead(token);
+ R_TRACE_TOKEN(token, CORK_NIL);
+ parseStatement (token, CORK_NIL, false, false);
+ }
+ while (!tokenIsEOF (token));
+
+ TRACE_PRINT ("run blackhole", blackHoleIndex);
+ markAllEntriesInScopeAsPlaceholder (blackHoleIndex);
+
+ tokenDelete (token);
+}
+
+static void initializeRParser (const langType language)
+{
+ Lang_R = language;
+}
+
+extern parserDefinition *RParser (void)
+{
+ static const char *const extensions[] = { "r", "R", "s", "q", NULL };
+ parserDefinition *const def = parserNew ("R");
+ static selectLanguage selectors[] = { selectByArrowOfR,
+ NULL };
+
+ def->extensions = extensions;
+ def->kindTable = RKinds;
+ def->kindCount = ARRAY_SIZE(RKinds);
+ def->fieldTable = RFields;
+ def->fieldCount = ARRAY_SIZE (RFields);
+ def->keywordTable = RKeywordTable;
+ def->keywordCount = ARRAY_SIZE(RKeywordTable);
+ def->useCork = CORK_QUEUE | CORK_SYMTAB;
+ def->parser = findRTags;
+ def->selectLanguage = selectors;
+ def->initialize = initializeRParser;
+
+ return def;
+}
+
+extern vString *rExtractNameFromString (vString* str)
+{
+ int offset = 0;
+
+ if (vStringLength (str) == 0)
+ return NULL;
+
+ char b = vStringChar (str, 0);
+ if (b == '\'' || b == '"' || b == '`')
+ offset = 1;
+
+ if (offset && vStringLength (str) < 3)
+ return NULL;
+
+ vString *n = vStringNewInit (vStringValue (str) + offset);
+ if (vStringChar (n, vStringLength (n) - 1) == b)
+ vStringChop (n);
+
+ return n;
+}
+
+#ifdef DEBUG
+static const char *tokenTypeStr(enum RTokenType e)
+{ /* Generated by misc/enumstr.sh with cmdline:
+ parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
+ switch (e)
+ {
+ case TOKEN_R_EOF: return "EOF";
+ case TOKEN_R_UNDEFINED: return "UNDEFINED";
+ case TOKEN_R_KEYWORD: return "KEYWORD";
+ case TOKEN_R_NEWLINE: return "NEWLINE";
+ case TOKEN_R_NUMBER: return "NUMBER";
+ case TOKEN_R_SYMBOL: return "SYMBOL";
+ case TOKEN_R_STRING: return "STRING";
+ case TOKEN_R_OPERATOR: return "OPERATOR";
+ case TOKEN_R_DOTS: return "DOTS";
+ case TOKEN_R_DOTS_N: return "DOTS_N";
+ case TOKEN_R_LASSIGN: return "LASSIGN";
+ case TOKEN_R_RASSIGN: return "RASSIGN";
+ case TOKEN_R_SCOPE: return "SCOPE";
+ default: break;
+ }
+ static char buf[3];
+ if (isprint (e))
+ {
+ buf[0] = e;
+ buf[1] = '\0';
+ }
+ else if (e == '\n')
+ {
+ buf[0] = '\\';
+ buf[1] = 'n';
+ buf[2] = '\0';
+ }
+ else
+ {
+ buf[0] = '\0';
+ }
+ return buf;
+}
+#endif
Modified: ctags/parsers/r.h
111 lines changed, 111 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,111 @@
+/*
+* Copyright (c) 2003-2004, Ascher Stefan <stievie at utanet.at>
+* Copyright (c) 2020, Masatake YAMATO
+* Copyright (c) 2020, Red Hat, Inc.
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*/
+
+#ifndef CTAGS_PARSER_R_H
+#define CTAGS_PARSER_R_H
+
+/*
+* INCLUDE FILES
+*/
+
+#include "general.h" /* must always come first */
+
+#include "subparser.h"
+#include "tokeninfo.h"
+#include "entry.h"
+
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef struct sRSubparser rSubparser;
+
+enum RTokenType {
+ /* 0..255 are the byte's values */
+ TOKEN_R_EOF = 256,
+ TOKEN_R_UNDEFINED,
+ TOKEN_R_KEYWORD,
+ TOKEN_R_NEWLINE,
+ TOKEN_R_NUMBER, /* 1, 1L */
+ TOKEN_R_SYMBOL, /* [0-9a-zA-Z._] */
+ TOKEN_R_STRING,
+ TOKEN_R_OPERATOR, /* - + ! ~ ? : * / ^ %...%, <, > ==
+ * >=, <=, &, &&, |, || */
+ TOKEN_R_DOTS, /* ... */
+ TOKEN_R_DOTS_N, /* ..1, ..2, etc */
+ TOKEN_R_LASSIGN, /* <-, <<- */
+ TOKEN_R_RASSIGN, /* ->, ->> */
+ TOKEN_R_SCOPE, /* ::, ::: */
+};
+
+enum eRKeywordId
+{
+ KEYWORD_R_C,
+ KEYWORD_R_DATAFRAME,
+ KEYWORD_R_FUNCTION,
+ KEYWORD_R_IF,
+ KEYWORD_R_ELSE,
+ KEYWORD_R_FOR,
+ KEYWORD_R_WHILE,
+ KEYWORD_R_REPEAT,
+ KEYWORD_R_IN,
+ KEYWORD_R_NEXT,
+ KEYWORD_R_BREAK,
+ KEYWORD_R_TRUE,
+ KEYWORD_R_FALSE,
+ KEYWORD_R_NULL,
+ KEYWORD_R_INF,
+ KEYWORD_R_LIST,
+ KEYWORD_R_NAN,
+ KEYWORD_R_NA,
+ KEYWORD_R_SOURCE,
+ KEYWORD_R_LIBRARY,
+};
+
+struct sRSubparser {
+ subparser subparser;
+ int (* readRightSideSymbol) (rSubparser *s,
+ tokenInfo *const symbol,
+ const char *const assignmentOperator,
+ int parent,
+ tokenInfo *const token);
+ int (* makeTagWithTranslation) (rSubparser *s,
+ tokenInfo *const token,
+ int parent,
+ bool in_func,
+ int kindInR,
+ const char *const assignmentOperator);
+ bool (* askTagAcceptancy) (rSubparser *s, tagEntryInfo *pe);
+ bool (* hasFunctionAlikeKind) (rSubparser *s, tagEntryInfo *pe);
+ int (* readFuncall) (rSubparser *s,
+ tokenInfo *const func,
+ tokenInfo *const token,
+ int parent);
+};
+
+extern void rSetupCollectingSignature (tokenInfo *const token,
+ vString *signature);
+extern void rTeardownCollectingSignature (tokenInfo *const token);
+
+/*
+ * FUNCTION PROTOTYPES
+ */
+
+extern tokenInfo *rNewToken (void);
+
+extern void rTokenReadNoNewline (tokenInfo *const token);
+
+/* This function returns true if a new token is read.
+ * EOF is exception. If EOF is read, this function returns FALSE. */
+extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool inArgList);
+
+extern vString *rExtractNameFromString (vString* str);
+
+#endif /* CTAGS_PARSER_TEX_H */
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list