[geany/geany] 90c609: Create julia lexer and tags parser (#2584)
getzze
git-noreply at xxxxx
Tue Jul 20 04:05:49 UTC 2021
Branch: refs/heads/master
Author: getzze <getzze at gmail.com>
Committer: GitHub <noreply at github.com>
Date: Tue, 20 Jul 2021 04:05:49 UTC
Commit: 90c6096ed6ea167f9100ce8f74229a3f47acc29a
https://github.com/geany/geany/commit/90c6096ed6ea167f9100ce8f74229a3f47acc29a
Log Message:
-----------
Create julia lexer and tags parser (#2584)
* Create julia parser, ctags and lexilla
* add ctags test file
Modified Paths:
--------------
ctags/Makefile.am
ctags/parsers/geany_julia.c
data/Makefile.am
data/filedefs/filetypes.common
data/filedefs/filetypes.julia
data/filetype_extensions.conf
scintilla/Makefile.am
scintilla/include/SciLexer.h
scintilla/include/Scintilla.iface
scintilla/julia_lexilla_v5.patch
scintilla/lexers/LexJulia.cxx
scintilla/src/Catalogue.cxx
src/filetypes.c
src/filetypes.h
src/highlighting.c
src/highlightingmappings.h
src/symbols.c
src/tagmanager/tm_parser.c
src/tagmanager/tm_parser.h
src/tagmanager/tm_parsers.h
tests/ctags/julia-corner_cases.jl
Modified: ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -31,6 +31,7 @@ parsers = \
parsers/geany_html.c \
parsers/geany_jscript.c \
parsers/geany_json.c \
+ parsers/geany_julia.c \
parsers/geany_lcpp.c \
parsers/geany_lcpp.h \
parsers/geany_lua.c \
Modified: ctags/parsers/geany_julia.c
1554 lines changed, 1554 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,1554 @@
+/*
+* Copyright (c) 2020-2021, getzze <getzze at gmail.com>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License version 2 or (at your option) any later version.
+*
+* This module contains functions for generating tags for Julia files.
+*
+* Documented 'kinds':
+* https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
+* Language parser in Scheme:
+* https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "keyword.h"
+#include "parse.h"
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+#include "xtag.h"
+
+/*
+* MACROS
+*/
+#define MAX_STRING_LENGTH 256
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_CONSTANT,
+ K_FUNCTION,
+ K_FIELD,
+ K_MACRO,
+ K_MODULE,
+ K_STRUCT,
+ K_TYPE,
+ K_UNKNOWN,
+ K_NONE
+} JuliaKind;
+
+typedef enum {
+ JULIA_MODULE_IMPORTED,
+ JULIA_MODULE_USED,
+ JULIA_MODULE_NAMESPACE,
+} juliaModuleRole;
+
+typedef enum {
+ JULIA_UNKNOWN_IMPORTED,
+ JULIA_UNKNOWN_USED,
+} juliaUnknownRole;
+
+/*
+* using X X = (kind:module, role:used)
+*
+* using X: a, b X = (kind:module, role:namespace)
+* a, b = (kind:unknown, role:used, scope:module:X)
+*
+* import X X = (kind:module, role:imported)
+*
+* import X.a, Y.b X, Y = (kind:module, role:namespace)
+* a, b = (kind:unknown, role:imported, scope:module:X)
+*
+* import X: a, b Same as the above one
+*/
+static roleDefinition JuliaModuleRoles [] = {
+ { true, "imported", "loaded by \"import\"" },
+ { true, "used", "loaded by \"using\"" },
+ { true, "namespace", "only some symbols in it are imported" },
+};
+
+static roleDefinition JuliaUnknownRoles [] = {
+ { true, "imported", "loaded by \"import\"" },
+ { true, "used", "loaded by \"using\""},
+};
+
+static kindDefinition JuliaKinds [] = {
+ { true, 'c', "constant", "Constants" },
+ { true, 'f', "function", "Functions" },
+ { true, 'g', "field", "Fields" },
+ { true, 'm', "macro", "Macros" },
+ { true, 'n', "module", "Modules",
+ ATTACH_ROLES(JuliaModuleRoles) },
+ { true, 's', "struct", "Structures" },
+ { true, 't', "type", "Types" },
+ { true, 'x', "unknown", "name defined in other modules",
+ .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
+};
+
+typedef enum {
+ TOKEN_NONE=0, /* none */
+ TOKEN_WHITESPACE,
+ TOKEN_PAREN_BLOCK,
+ TOKEN_BRACKET_BLOCK,
+ TOKEN_CURLY_BLOCK,
+ TOKEN_OPEN_BLOCK,
+ TOKEN_CLOSE_BLOCK,
+ TOKEN_TYPE_ANNOTATION,
+ TOKEN_TYPE_WHERE,
+ TOKEN_CONST,
+ TOKEN_STRING, /* = 10 */
+ TOKEN_COMMAND,
+ TOKEN_MACROCALL,
+ TOKEN_IDENTIFIER,
+ TOKEN_MODULE,
+ TOKEN_MACRO,
+ TOKEN_FUNCTION,
+ TOKEN_STRUCT,
+ TOKEN_ENUM,
+ TOKEN_TYPE,
+ TOKEN_IMPORT, /* = 20 */
+ TOKEN_USING,
+ TOKEN_EXPORT,
+ TOKEN_NEWLINE,
+ TOKEN_SEMICOLON,
+ TOKEN_COMPOSER_KWD, /* KEYWORD only */
+ TOKEN_EOF,
+ TOKEN_COUNT
+} tokenType;
+
+static const keywordTable JuliaKeywordTable [] = {
+ /* TODO: Sort by keys. */
+ { "mutable", TOKEN_COMPOSER_KWD },
+ { "primitive", TOKEN_COMPOSER_KWD },
+ { "abstract", TOKEN_COMPOSER_KWD },
+
+ { "if", TOKEN_OPEN_BLOCK },
+ { "for", TOKEN_OPEN_BLOCK },
+ { "while", TOKEN_OPEN_BLOCK },
+ { "try", TOKEN_OPEN_BLOCK },
+ { "do", TOKEN_OPEN_BLOCK },
+ { "begin", TOKEN_OPEN_BLOCK },
+ { "let", TOKEN_OPEN_BLOCK },
+ { "quote", TOKEN_OPEN_BLOCK },
+
+ { "module", TOKEN_MODULE },
+ { "baremodule",TOKEN_MODULE },
+
+ { "using", TOKEN_USING },
+ { "import", TOKEN_IMPORT },
+
+ { "export", TOKEN_EXPORT },
+ { "const", TOKEN_CONST },
+ { "macro", TOKEN_MACRO },
+ { "function", TOKEN_FUNCTION },
+ { "struct", TOKEN_STRUCT },
+ { "type", TOKEN_TYPE },
+ { "where", TOKEN_TYPE_WHERE },
+ { "end", TOKEN_CLOSE_BLOCK },
+};
+
+typedef struct {
+ /* Characters */
+ int prev_c;
+ int cur_c;
+ int next_c;
+
+ /* Tokens */
+ bool first_token;
+ int cur_token;
+ vString* token_str;
+ unsigned long line;
+ MIOPos pos;
+} lexerState;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+
+static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
+
+static void scanParenBlock (lexerState *lexer);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static int endswith(const char* what, const char* withwhat)
+{
+ int l1 = strlen(what);
+ int l2 = strlen(withwhat);
+ if (l2 > l1)
+ {
+ return 0;
+ }
+
+ return strcmp(withwhat, what + (l1 - l2)) == 0;
+}
+
+/* Resets the scope string to the old length */
+static void resetScope (vString *scope, size_t old_len)
+{
+ vStringTruncate (scope, old_len);
+}
+
+/* Adds a name to the end of the scope string */
+static void addToScope (vString *scope, vString *name)
+{
+ if (vStringLength(scope) > 0)
+ {
+ vStringPut(scope, '.');
+ }
+ vStringCat(scope, name);
+}
+
+/* Reads a character from the file */
+static void advanceChar (lexerState *lexer)
+{
+ lexer->prev_c = lexer->cur_c;
+ lexer->cur_c = lexer->next_c;
+ lexer->next_c = getcFromInputFile();
+}
+
+/* Reads N characters from the file */
+static void advanceNChar (lexerState *lexer, int n)
+{
+ while (n--)
+ {
+ advanceChar(lexer);
+ }
+}
+
+/* Store the current character in lexerState::token_str if there is space
+ * (set by MAX_STRING_LENGTH), and then read the next character from the file */
+static void advanceAndStoreChar (lexerState *lexer)
+{
+ if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
+ {
+ vStringPut(lexer->token_str, (char) lexer->cur_c);
+ }
+ advanceChar(lexer);
+}
+
+static bool isWhitespace (int c, bool newline)
+{
+ if (newline)
+ {
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n';
+ }
+ return c == ' ' || c == '\t';
+}
+
+static bool isAscii (int c)
+{
+ return (c >= 0) && (c < 0x80);
+}
+
+static bool isOperator (int c)
+{
+ if (c == '%' || c == '^' || c == '&' || c == '|' ||
+ c == '*' || c == '-' || c == '+' || c == '~' ||
+ c == '<' || c == '>' || c == ',' || c == '/' ||
+ c == '?' || c == '=' || c == ':' )
+ {
+ return true;
+ }
+ return false;
+}
+
+/* This does not distinguish Unicode letters from operators... */
+static bool isIdentifierFirstCharacter (int c)
+{
+ return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
+}
+
+/* This does not distinguish Unicode letters from operators... */
+static bool isIdentifierCharacter (int c)
+{
+ return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
+}
+
+static void skipWhitespace (lexerState *lexer, bool newline)
+{
+ while (isWhitespace(lexer->cur_c, newline))
+ {
+ advanceChar(lexer);
+ }
+}
+
+/* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
+static bool isTranspose (int c)
+{
+ return (isIdentifierCharacter(c) || c == ')' || c == ']');
+}
+
+
+/*
+ * Lexer functions
+ * */
+
+/* Check that the current character sequence is a type declaration or inheritance */
+static bool isTypeDecl (lexerState *lexer)
+{
+ if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
+ (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
+ (lexer->cur_c == ':' && lexer->next_c == ':') )
+ {
+ return true;
+ }
+ return false;
+}
+
+/* Check if the current char is a new line */
+static bool isNewLine (lexerState *lexer)
+{
+ return (lexer->cur_c == '\n')? true: false;
+}
+
+/* Check if the current char is a new line.
+ * If it is, skip the newline and return true */
+static bool skipNewLine (lexerState *lexer)
+{
+ if (isNewLine(lexer))
+ {
+ advanceChar(lexer);
+ return true;
+ }
+ return false;
+}
+
+/* Skip a single comment or multiline comment
+ * A single line comment starts with #
+ * A multi-line comment is encapsulated in #=...=# and they are nesting
+ * */
+static void skipComment (lexerState *lexer)
+{
+ /* # */
+ if (lexer->next_c != '=')
+ {
+ advanceNChar(lexer, 1);
+ while (lexer->cur_c != EOF && lexer->cur_c != '\n')
+ {
+ advanceChar(lexer);
+ }
+ }
+ /* block comment */
+ else /* if (lexer->next_c == '=') */
+ {
+ int level = 1;
+ advanceNChar(lexer, 2);
+ while (lexer->cur_c != EOF && level > 0)
+ {
+ if (lexer->cur_c == '=' && lexer->next_c == '#')
+ {
+ level--;
+ advanceNChar(lexer, 2);
+ }
+ else if (lexer->cur_c == '#' && lexer->next_c == '=')
+ {
+ level++;
+ advanceNChar(lexer, 2);
+ }
+ else
+ {
+ advanceChar(lexer);
+ }
+ }
+ }
+}
+
+static void scanIdentifier (lexerState *lexer, bool clear)
+{
+ if (clear)
+ {
+ vStringClear(lexer->token_str);
+ }
+
+ do
+ {
+ advanceAndStoreChar(lexer);
+ } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
+}
+
+/* Scan a quote-like expression.
+ * Allow for triple-character variand and interpolation with `$`.
+ * These last past the end of the line, so be careful
+ * not to store too much of them (see MAX_STRING_LENGTH). */
+static void scanStringOrCommand (lexerState *lexer, int c)
+{
+ bool istriple = false;
+
+ /* Pass the first "quote"-character */
+ advanceAndStoreChar(lexer);
+
+ /* Check for triple "quote"-character */
+ if (lexer->cur_c == c && lexer->next_c == c)
+ {
+ istriple = true;
+ advanceAndStoreChar(lexer);
+ advanceAndStoreChar(lexer);
+
+ /* Cancel up to 2 "quote"-characters after opening the triple */
+ if (lexer->cur_c == c)
+ {
+ advanceAndStoreChar(lexer);
+ if (lexer->cur_c == c)
+ {
+ advanceAndStoreChar(lexer);
+ }
+ }
+ }
+
+ while (lexer->cur_c != EOF && lexer->cur_c != c)
+ {
+ /* Check for interpolation before checking for end of "quote" */
+ if (lexer->cur_c == '$' && lexer->next_c == '(')
+ {
+ advanceAndStoreChar(lexer);
+ scanParenBlock(lexer);
+ /* continue to avoid advance character again. Correct bug
+ * with "quote"-character just after closing parenthesis */
+ continue;
+ }
+
+ if (lexer->cur_c == '\\' &&
+ (lexer->next_c == c || lexer->next_c == '\\'))
+ {
+ advanceAndStoreChar(lexer);
+ }
+ advanceAndStoreChar(lexer);
+
+ /* Cancel up to 2 "quote"-characters if triple string */
+ if (istriple && lexer->cur_c == c)
+ {
+ advanceAndStoreChar(lexer);
+ if (lexer->cur_c == c)
+ {
+ advanceAndStoreChar(lexer);
+ }
+ }
+ }
+ /* Pass the last "quote"-character */
+ advanceAndStoreChar(lexer);
+}
+
+
+/* Scan commands surrounded by backticks,
+ * possibly triple backticks */
+static void scanCommand (lexerState *lexer)
+{
+ scanStringOrCommand(lexer, '`');
+}
+
+/* Double-quoted strings,
+ * possibly triple doublequotes */
+static void scanString (lexerState *lexer)
+{
+ scanStringOrCommand(lexer, '"');
+}
+
+
+/* This deals with character literals: 'n', '\n', '\uFFFF';
+ * and matrix transpose: A'.
+ * We'll use this approximate regexp for the literals:
+ * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
+ * Either way, we'll treat this token as a string, so it gets preserved */
+static bool scanCharacterOrTranspose (lexerState *lexer)
+{
+ if (isTranspose(lexer->prev_c))
+ {
+ /* deal with untranspose/transpose sequence */
+ while (lexer->cur_c != EOF && lexer->cur_c == '\'')
+ {
+ advanceAndStoreChar(lexer);
+ }
+ return false;
+ }
+
+ //vStringClear(lexer->token_str);
+ advanceAndStoreChar(lexer);
+
+ if (lexer->cur_c == '\\')
+ {
+ advanceAndStoreChar(lexer);
+ /* The \' \\ \' \' (literally '\'') case */
+ if (lexer->cur_c == '\'' && lexer->next_c == '\'')
+ {
+ advanceAndStoreChar(lexer);
+ advanceAndStoreChar(lexer);
+ }
+ /* The \' \\ [^']+ \' case */
+ else
+ {
+ while (lexer->cur_c != EOF && lexer->cur_c != '\'')
+ {
+ advanceAndStoreChar(lexer);
+ }
+ }
+ }
+ /* The \' [^'] \' and \' \' \' cases */
+ else if (lexer->next_c == '\'')
+ {
+ advanceAndStoreChar(lexer);
+ advanceAndStoreChar(lexer);
+ }
+ /* Otherwise it is malformed */
+ return true;
+}
+
+/* Parse a block with opening and closing character */
+static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
+{
+ /* Assume the current char is `open` */
+ int level = 1;
+
+ /* Pass the first opening */
+ advanceAndStoreChar(lexer);
+
+ while (lexer->cur_c != EOF && level > 0)
+ {
+ /* Parse everything */
+ if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
+ {
+ skipWhitespace(lexer, false);
+ vStringPut(lexer->token_str, ' ');
+ }
+ if (lexer->cur_c == '#')
+ {
+ skipComment(lexer);
+ }
+ else if (lexer->cur_c == '\"')
+ {
+ scanString(lexer);
+ }
+ else if (lexer->cur_c == '\'')
+ {
+ scanCharacterOrTranspose(lexer);
+ }
+
+ /* Parse opening/closing */
+ if (lexer->cur_c == open)
+ {
+ level++;
+ }
+ else if (lexer->cur_c == close)
+ {
+ level--;
+ }
+
+ if (convert_newline && skipNewLine(lexer))
+ {
+ vStringPut(lexer->token_str, ' ');
+ }
+ else
+ {
+ advanceAndStoreChar(lexer);
+ }
+
+ }
+ /* Lexer position is just after `close` */
+}
+
+
+/* Parse a block inside parenthesis, for example a function argument list */
+static void scanParenBlock (lexerState *lexer)
+{
+ scanBlock(lexer, '(', ')', true);
+}
+
+/* Indexing block with bracket.
+ * Some keywords have a special meaning in this environment:
+ * end, begin, for and if */
+static void scanIndexBlock (lexerState *lexer)
+{
+ scanBlock(lexer, '[', ']', false);
+
+}
+
+/* Parse a block inside curly brackets, for type parametrization */
+static void scanCurlyBlock (lexerState *lexer)
+{
+ scanBlock(lexer, '{', '}', true);
+}
+
+/* Scan type annotation like
+ * `::Type`, `::Type{T}`
+ */
+static void scanTypeAnnotation (lexerState *lexer)
+{
+ /* assume that current char is '<', '>' or ':', followed by ':' */
+ advanceAndStoreChar(lexer);
+ advanceAndStoreChar(lexer);
+
+ skipWhitespace(lexer, true);
+ scanIdentifier(lexer, false);
+ if (lexer->cur_c == '{')
+ {
+ scanCurlyBlock(lexer);
+ }
+}
+
+/* Scan type annotation like
+ * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
+ */
+static void scanTypeWhere (lexerState *lexer)
+{
+ /* assume that current token is 'where'
+ * allow line continuation */
+ vStringPut(lexer->token_str, ' ');
+ skipWhitespace(lexer, true);
+
+ while (lexer->cur_c != EOF)
+ {
+
+ if (lexer->cur_c == '{')
+ {
+ scanCurlyBlock(lexer);
+ }
+ else if (isIdentifierFirstCharacter(lexer->cur_c))
+ {
+ scanIdentifier(lexer, false);
+ if (endswith(vStringValue(lexer->token_str), "where"))
+ {
+ /* allow line continuation */
+ vStringPut(lexer->token_str, ' ');
+ skipWhitespace(lexer, true);
+ }
+ }
+ else if (isTypeDecl(lexer))
+ {
+ scanTypeAnnotation(lexer);
+ //skipWhitespace(lexer, false);
+ }
+ else if (lexer->cur_c == '#')
+ {
+ skipComment(lexer);
+ /* allow line continuation */
+ if (endswith(vStringValue(lexer->token_str), "where "))
+ {
+ skipWhitespace(lexer, true);
+ }
+ }
+ else if (isWhitespace(lexer->cur_c, false))
+ {
+ while (isWhitespace(lexer->cur_c, false))
+ {
+ advanceChar(lexer);
+ }
+ /* Add a space, if it is not a trailing space */
+ if (!(isNewLine(lexer)))
+ {
+ vStringPut(lexer->token_str, ' ');
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+}
+
+
+static int parseIdentifier (lexerState *lexer)
+{
+ langType julia = getInputLanguage ();
+ scanIdentifier(lexer, true);
+
+ int k = lookupKeyword (vStringValue(lexer->token_str), julia);
+ /* First part of a composed identifier */
+ if (k == TOKEN_COMPOSER_KWD)
+ {
+ skipWhitespace(lexer, false);
+ scanIdentifier(lexer, true);
+ k = lookupKeyword (vStringValue(lexer->token_str), julia);
+ }
+
+ if ((k == TOKEN_OPEN_BLOCK)
+ || (k == TOKEN_MODULE)
+ || (k == TOKEN_IMPORT)
+ || (k == TOKEN_USING)
+ || (k == TOKEN_EXPORT)
+ || (k == TOKEN_CONST)
+ || (k == TOKEN_MACRO)
+ || (k == TOKEN_FUNCTION)
+ || (k == TOKEN_STRUCT)
+ || (k == TOKEN_TYPE)
+ || (k == TOKEN_TYPE_WHERE)
+ || (k == TOKEN_CLOSE_BLOCK))
+ {
+ if (k == TOKEN_TYPE_WHERE)
+ {
+ scanTypeWhere(lexer);
+ }
+ return lexer->cur_token = k;
+ }
+ return lexer->cur_token = TOKEN_IDENTIFIER;
+}
+
+
+/* Advances the parser one token, optionally skipping whitespace
+ * (otherwise it is concatenated and returned as a single whitespace token).
+ * Whitespace is needed to properly render function signatures. Unrecognized
+ * token starts are stored literally, e.g. token may equal to a character '#'. */
+static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
+{
+ bool have_whitespace = false;
+ bool newline = false;
+ lexer->line = getInputLineNumber();
+ lexer->pos = getInputFilePosition();
+
+ /* the next token is the first token of the line */
+ if (!propagate_first)
+ {
+ if (lexer->cur_token == TOKEN_NEWLINE ||
+ lexer->cur_token == TOKEN_SEMICOLON ||
+ lexer->cur_token == TOKEN_NONE ||
+ (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
+ {
+ lexer->first_token = true;
+ }
+ else
+ {
+ lexer->first_token = false;
+ }
+ }
+
+ while (lexer->cur_c != EOF)
+ {
+ /* skip whitespaces but not newlines */
+ if (isWhitespace(lexer->cur_c, newline))
+ {
+ skipWhitespace(lexer, newline);
+ have_whitespace = true;
+ }
+ else if (lexer->cur_c == '#')
+ {
+ skipComment(lexer);
+ have_whitespace = true;
+ }
+ else
+ {
+ if (have_whitespace && !skip_whitespace)
+ {
+ return lexer->cur_token = TOKEN_WHITESPACE;
+ }
+ break;
+ }
+ }
+ lexer->line = getInputLineNumber();
+ lexer->pos = getInputFilePosition();
+ while (lexer->cur_c != EOF)
+ {
+ if (lexer->cur_c == '"')
+ {
+ vStringClear(lexer->token_str);
+ scanString(lexer);
+ return lexer->cur_token = TOKEN_STRING;
+ }
+ else if (lexer->cur_c == '\'')
+ {
+ vStringClear(lexer->token_str);
+ if (scanCharacterOrTranspose(lexer))
+ {
+ return lexer->cur_token = TOKEN_STRING;
+ }
+ else
+ {
+ return lexer->cur_token = '\'';
+ }
+ }
+ else if (lexer->cur_c == '`')
+ {
+ vStringClear(lexer->token_str);
+ scanCommand(lexer);
+ return lexer->cur_token = TOKEN_COMMAND;
+ }
+ else if (isIdentifierFirstCharacter(lexer->cur_c))
+ {
+ return parseIdentifier(lexer);
+ }
+ else if (lexer->cur_c == '@')
+ {
+ vStringClear(lexer->token_str);
+ advanceAndStoreChar(lexer);
+ do
+ {
+ advanceAndStoreChar(lexer);
+ } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
+ return lexer->cur_token = TOKEN_MACROCALL;
+ }
+ else if (lexer->cur_c == '(')
+ {
+ vStringClear(lexer->token_str);
+ scanParenBlock(lexer);
+ return lexer->cur_token = TOKEN_PAREN_BLOCK;
+ }
+ else if (lexer->cur_c == '[')
+ {
+ vStringClear(lexer->token_str);
+ scanIndexBlock(lexer);
+ return lexer->cur_token = TOKEN_BRACKET_BLOCK;
+ }
+ else if (lexer->cur_c == '{')
+ {
+ vStringClear(lexer->token_str);
+ scanCurlyBlock(lexer);
+ return lexer->cur_token = TOKEN_CURLY_BLOCK;
+ }
+ else if (isTypeDecl(lexer))
+ {
+ vStringClear(lexer->token_str);
+ scanTypeAnnotation(lexer);
+ return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
+ }
+ else if (skipNewLine(lexer))
+ {
+ /* allow line continuation */
+ if (isOperator(lexer->cur_token))
+ {
+ return lexer->cur_token;
+ }
+ return lexer->cur_token = TOKEN_NEWLINE;
+ }
+ else if (lexer->cur_c == ';')
+ {
+ advanceChar(lexer);
+ return lexer->cur_token = TOKEN_SEMICOLON;
+ }
+ else
+ {
+ int c = lexer->cur_c;
+ advanceChar(lexer);
+ return lexer->cur_token = c;
+ }
+ }
+ return lexer->cur_token = TOKEN_EOF;
+}
+
+static void initLexer (lexerState *lexer)
+{
+ advanceNChar(lexer, 2);
+ lexer->token_str = vStringNew();
+ lexer->first_token = true;
+ lexer->cur_token = TOKEN_NONE;
+ lexer->prev_c = '\0';
+
+ if (lexer->cur_c == '#' && lexer->next_c == '!')
+ {
+ skipComment(lexer);
+ }
+ advanceToken(lexer, true, false);
+}
+
+static void deInitLexer (lexerState *lexer)
+{
+ vStringDelete(lexer->token_str);
+ lexer->token_str = NULL;
+}
+
+#if 0
+static void debugLexer (lexerState *lexer)
+{
+ printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
+ printf(vStringValue(lexer->token_str));
+ printf("`\n");
+}
+#endif
+
+static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
+{
+ if (kind == K_NONE)
+ {
+ return;
+ }
+ tagEntryInfo tag;
+ initTagEntry(&tag, vStringValue(ident), kind);
+
+ tag.lineNumber = line;
+ tag.filePosition = pos;
+ tag.sourceFileName = getInputFileName();
+
+ tag.extensionFields.signature = arg_list;
+ /* tag.extensionFields.varType = type; */ /* Needs a workaround */
+ if (parent_kind != K_NONE)
+ {
+ tag.extensionFields.scopeKindIndex = parent_kind;
+ tag.extensionFields.scopeName = vStringValue(scope);
+ }
+ makeTagEntry(&tag);
+}
+
+static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
+{
+ if (kind == K_NONE)
+ {
+ return;
+ }
+ tagEntryInfo tag;
+ initRefTagEntry(&tag, vStringValue(ident), kind, role);
+ tag.lineNumber = line;
+ tag.filePosition = pos;
+ if (parent_kind != K_NONE)
+ {
+ tag.extensionFields.scopeKindIndex = parent_kind;
+ tag.extensionFields.scopeName = vStringValue(scope);
+ }
+ makeTagEntry(&tag);
+}
+
+/* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
+ * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
+static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
+{
+ int block_level = 0;
+
+ while (lexer->cur_token != TOKEN_EOF)
+ {
+ /* check if the keyword is reached, only if outside a block */
+ if (block_level == 0)
+ {
+ int ii = 0;
+ for(ii = 0; ii < num_goal_tokens; ii++)
+ {
+ if (lexer->cur_token == goal_tokens[ii])
+ {
+ break;
+ }
+ }
+ if (ii < num_goal_tokens)
+ {
+ /* parse the next token */
+ advanceToken(lexer, true, false);
+ break;
+ }
+ }
+
+ /* take into account nested blocks */
+ switch (lexer->cur_token)
+ {
+ case TOKEN_OPEN_BLOCK:
+ block_level++;
+ break;
+ case TOKEN_CLOSE_BLOCK:
+ block_level--;
+ break;
+ default:
+ break;
+ }
+
+ /* Has to be after the token switch to catch the case when we start with the initial level token */
+ if (num_goal_tokens == 0 && block_level == 0)
+ {
+ break;
+ }
+
+ advanceToken(lexer, true, false);
+ }
+}
+
+/* Skip until the end of the block */
+static void skipUntilEnd (lexerState *lexer)
+{
+ int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
+
+ skipUntil(lexer, goal_tokens, 1);
+}
+
+/* Skip a function body after assignment operator '='
+ * Beware of continuation lines after operators
+ * */
+static void skipBody (lexerState *lexer)
+{
+ /* assume position just after '=' */
+ while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
+ {
+ advanceToken(lexer, true, false);
+
+ if (lexer->cur_token == TOKEN_OPEN_BLOCK)
+ {
+ /* pass the keyword */
+ advanceToken(lexer, true, false);
+ skipUntilEnd(lexer);
+ /* the next token is already selected */
+ }
+ }
+}
+
+/* Short function format:
+ * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
+ * */
+static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
+{
+ /* assume the current char is just after identifier */
+ vString *name;
+ vString *arg_list;
+ unsigned long line;
+ MIOPos pos;
+
+ /* should be an open parenthesis after identifier
+ * with potentially parametric type */
+ skipWhitespace(lexer, false);
+ if (lexer->cur_c == '{')
+ {
+ scanCurlyBlock(lexer);
+ skipWhitespace(lexer, false);
+ }
+
+ if (lexer->cur_c != '(')
+ {
+ advanceToken(lexer, true, false);
+ return;
+ }
+
+ name = vStringNewCopy(lexer->token_str);
+ line = lexer->line;
+ pos = lexer->pos;
+
+ /* scan argument list */
+ advanceToken(lexer, true, false);
+ arg_list = vStringNewCopy(lexer->token_str);
+
+ /* scan potential type casting */
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
+ {
+ vStringCat(arg_list, lexer->token_str);
+ advanceToken(lexer, true, false);
+ }
+ /* scan potential type union with 'where' */
+ if (lexer->cur_token == TOKEN_TYPE_WHERE)
+ {
+ vStringPut(arg_list, ' ');
+ vStringCat(arg_list, lexer->token_str);
+ advanceToken(lexer, true, false);
+ }
+
+ /* scan equal sign, ignore `==` and `=>` */
+ if (!(lexer->cur_token == '=' &&
+ lexer->cur_c != '=' &&
+ lexer->cur_c != '>'))
+ {
+ vStringDelete(name);
+ vStringDelete(arg_list);
+ return;
+ }
+
+ addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
+
+ /* scan until end of function definition */
+ skipBody(lexer);
+
+ /* Should end on a new line, parse next token */
+ advanceToken(lexer, true, false);
+ lexer->first_token = true;
+
+ vStringDelete(name);
+ vStringDelete(arg_list);
+}
+
+/* Function format:
+ * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
+ * */
+static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
+{
+ vString *name;
+ vString *arg_list;
+ vString *local_scope;
+ int local_parent_kind;
+ unsigned long line;
+ MIOPos pos;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+ else if (lexer->cur_c == '.')
+ {
+ local_scope = vStringNewCopy(lexer->token_str);
+ local_parent_kind = K_MODULE;
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ }
+ else
+ {
+ local_scope = vStringNewCopy(scope);
+ local_parent_kind = parent_kind;
+ }
+
+ /* Scan for parametric type constructor */
+ skipWhitespace(lexer, false);
+ if (lexer->cur_c == '{')
+ {
+ scanCurlyBlock(lexer);
+ skipWhitespace(lexer, false);
+ }
+
+ name = vStringNewCopy(lexer->token_str);
+ arg_list = vStringNew();
+ line = lexer->line;
+ pos = lexer->pos;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_PAREN_BLOCK)
+ {
+ vStringCopy(arg_list, lexer->token_str);
+
+ /* scan potential type casting */
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
+ {
+ vStringCat(arg_list, lexer->token_str);
+ advanceToken(lexer, true, false);
+ }
+ /* scan potential type union with 'where' */
+ if (lexer->cur_token == TOKEN_TYPE_WHERE)
+ {
+ vStringPut(arg_list, ' ');
+ vStringCat(arg_list, lexer->token_str);
+ advanceToken(lexer, true, false);
+ }
+
+ addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
+ addToScope(scope, name);
+ parseExpr(lexer, true, K_FUNCTION, scope);
+ }
+ else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
+ {
+ /* Function without method */
+ addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
+ /* Go to the closing 'end' keyword */
+ skipUntilEnd(lexer);
+ }
+
+ vStringDelete(name);
+ vStringDelete(arg_list);
+ vStringDelete(local_scope);
+}
+
+/* Macro format:
+ * "macro" <ident>()
+ */
+static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
+{
+ vString *name;
+ unsigned long line;
+ MIOPos pos;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+
+ name = vStringNewCopy(lexer->token_str);
+ line = lexer->line;
+ pos = lexer->pos;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_PAREN_BLOCK)
+ {
+ addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
+ }
+
+ skipUntilEnd(lexer);
+ vStringDelete(name);
+}
+
+/* Const format:
+ * "const" <ident>
+ */
+static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
+{
+ vString *name;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+
+ name = vStringNewCopy(lexer->token_str);
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
+ {
+ addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
+ advanceToken(lexer, true, false);
+ }
+ else
+ {
+ addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
+ }
+
+ vStringDelete(name);
+}
+
+/* Type format:
+ * [ "abstract" | "primitive" ] "type" <ident>
+ */
+static void parseType (lexerState *lexer, vString *scope, int parent_kind)
+{
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+
+ addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
+
+ skipUntilEnd(lexer);
+}
+
+/* Module format:
+ * [ "baremodule" | "module" ] <ident>
+ */
+static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
+{
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+
+ addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
+ addToScope(scope, lexer->token_str);
+ advanceToken(lexer, true, false);
+ parseExpr(lexer, true, K_MODULE, scope);
+}
+
+/*
+ * Parse comma separated entity in import/using expressions. An entity could be
+ * in the form of "Module" or "Module.symbol". The lexer should be at the end
+ * of "Module", and this function will take it to the end of the entity
+ * (whitespaces also skipped).
+ */
+static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
+{
+ if (lexer->cur_c == '.')
+ {
+ if (token_type == TOKEN_IMPORT)
+ {
+ vString *module_name = vStringNewCopy(lexer->token_str);
+ addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
+ vStringDelete(module_name);
+ }
+ else /* if (token_type == TOKEN_USING) */
+ {
+ /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ }
+ }
+ else
+ {
+ if (token_type == TOKEN_IMPORT)
+ {
+ addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
+ }
+ else /* if (token_type == TOKEN_USING) */
+ {
+ addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
+ }
+ }
+}
+
+/* Parse import/using expressions with a colon, like: */
+/* import Module: symbol1, symbol2 */
+/* using Module: symbol1, symbol2 */
+/* The lexer should be at the end of "Module", and this function will take it
+ * to the end of the token after this expression (whitespaces also skipped). */
+static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
+{
+ int symbol_role;
+ if (token_type == TOKEN_IMPORT)
+ {
+ symbol_role = JULIA_UNKNOWN_IMPORTED;
+ }
+ else /* if (token_type == TOKEN_USING) */
+ {
+ symbol_role = JULIA_UNKNOWN_USED;
+ }
+ vString *name = vStringNewCopy(lexer->token_str);
+ addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_NEWLINE)
+ {
+ advanceToken(lexer, true, false);
+ }
+ while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
+ {
+ addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
+ if (lexer->cur_c == ',')
+ {
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_NEWLINE)
+ {
+ advanceToken(lexer, true, false);
+ }
+ }
+ else
+ {
+ advanceToken(lexer, true, false);
+ }
+ }
+ vStringDelete(name);
+}
+
+/* Import format:
+ * [ "import" | "using" ] <ident> [: <name>]
+ */
+static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
+{
+ /* capture the imported name */
+ advanceToken(lexer, true, false);
+ /* import Mod1: symbol1, symbol2 */
+ /* using Mod1: symbol1, symbol2 */
+ if (lexer->cur_c == ':')
+ {
+ parseColonImportExpr(lexer, scope, token_type, parent_kind);
+ }
+ /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
+ else
+ {
+ while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
+ {
+ parseImportEntity(lexer, scope, token_type, parent_kind);
+ if (lexer->cur_c == ',')
+ {
+ advanceChar(lexer);
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_NEWLINE)
+ {
+ advanceToken(lexer, true, false);
+ }
+ }
+ else
+ {
+ advanceToken(lexer, true, false);
+ }
+ }
+ }
+}
+
+/* Structs format:
+ * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
+ * */
+static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
+{
+ vString *name;
+ vString *field;
+ size_t old_scope_len;
+ unsigned long line;
+ MIOPos pos;
+
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token != TOKEN_IDENTIFIER)
+ {
+ return;
+ }
+
+ name = vStringNewCopy(lexer->token_str);
+ field = vStringNew();
+ line = lexer->line;
+ pos = lexer->pos;
+
+ /* scan parametrization */
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_CURLY_BLOCK)
+ {
+ addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
+ advanceToken(lexer, true, false);
+ }
+ else
+ {
+ addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
+ }
+ addToScope(scope, name);
+
+ /* skip inheritance */
+ if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
+ {
+ advanceToken(lexer, true, false);
+ }
+
+ /* keep the struct scope in memory to reset it after parsing constructors */
+ old_scope_len = vStringLength(scope);
+ /* Parse fields and inner constructors */
+ while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
+ {
+ if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
+ {
+ if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
+ {
+ /* inner constructor */
+ parseShortFunction(lexer, scope, K_STRUCT);
+ continue;
+ }
+
+ vStringCopy(field, lexer->token_str);
+
+ /* parse type annotation */
+ advanceToken(lexer, true, false);
+ if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
+ {
+ addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
+ advanceToken(lexer, true, false);
+ }
+ else
+ {
+ addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
+ }
+ }
+ else if (lexer->cur_token == TOKEN_FUNCTION)
+ {
+ /* inner constructor */
+ parseFunction(lexer, scope, K_STRUCT);
+ }
+ else
+ {
+ /* Get next token */
+ advanceToken(lexer, true, false);
+ }
+ resetScope(scope, old_scope_len);
+ }
+
+ vStringDelete(name);
+ vStringDelete(field);
+}
+
+
+static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
+{
+ int level = 1;
+ size_t old_scope_len;
+ vString *local_scope = NULL;
+
+ while (lexer->cur_token != TOKEN_EOF)
+ {
+ old_scope_len = vStringLength(scope);
+ /* Advance token and update if this is a new line */
+ while (lexer->cur_token == TOKEN_NEWLINE ||
+ lexer->cur_token == TOKEN_SEMICOLON ||
+ lexer->cur_token == TOKEN_NONE )
+ {
+ advanceToken(lexer, true, false);
+ }
+
+ /* Make sure every case advances the token
+ * otherwise we can be stuck in infinite loop */
+ switch (lexer->cur_token)
+ {
+ case TOKEN_CONST:
+ parseConst(lexer, scope, kind);
+ break;
+ case TOKEN_FUNCTION:
+ parseFunction(lexer, scope, kind);
+ break;
+ case TOKEN_MACRO:
+ parseMacro(lexer, scope, kind);
+ break;
+ case TOKEN_MODULE:
+ parseModule(lexer, scope, kind);
+ break;
+ case TOKEN_STRUCT:
+ parseStruct(lexer, scope, kind);
+ break;
+ case TOKEN_TYPE:
+ parseType(lexer, scope, kind);
+ break;
+ case TOKEN_IMPORT:
+ parseImport(lexer, scope, TOKEN_IMPORT, kind);
+ break;
+ case TOKEN_USING:
+ parseImport(lexer, scope, TOKEN_USING, kind);
+ case TOKEN_IDENTIFIER:
+ if (lexer->first_token && lexer->cur_c == '.')
+ {
+ if (local_scope == NULL)
+ {
+ local_scope = vStringNew();
+ }
+ vStringCopy(local_scope, lexer->token_str);
+ advanceChar(lexer);
+ // next token, but keep the first_token value
+ advanceToken(lexer, true, true);
+ skipWhitespace(lexer, false);
+ if (lexer->cur_c == '(')
+ {
+ parseShortFunction(lexer, local_scope, K_MODULE);
+ }
+ }
+ else
+ {
+ skipWhitespace(lexer, false);
+ if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
+ {
+ parseShortFunction(lexer, scope, kind);
+ }
+ else
+ {
+ advanceToken(lexer, true, false);
+ }
+ }
+ break;
+ case TOKEN_OPEN_BLOCK:
+ level++;
+ advanceToken(lexer, true, false);
+ break;
+ case TOKEN_CLOSE_BLOCK:
+ level--;
+ advanceToken(lexer, true, false);
+ break;
+ default:
+ advanceToken(lexer, true, false);
+ break;
+ }
+ resetScope(scope, old_scope_len);
+ if (delim && level <= 0)
+ {
+ break;
+ }
+ }
+ vStringDelete(local_scope);
+}
+
+static void findJuliaTags (void)
+{
+ lexerState lexer;
+ vString* scope = vStringNew();
+ initLexer(&lexer);
+
+ parseExpr(&lexer, false, K_NONE, scope);
+ vStringDelete(scope);
+
+ deInitLexer(&lexer);
+}
+
+extern parserDefinition* JuliaParser (void)
+{
+ static const char *const extensions [] = { "jl", NULL };
+ parserDefinition* def = parserNew ("Julia");
+ def->kindTable = JuliaKinds;
+ def->kindCount = ARRAY_SIZE (JuliaKinds);
+ def->extensions = extensions;
+ def->parser = findJuliaTags;
+ def->keywordTable = JuliaKeywordTable;
+ def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
+ return def;
+}
Modified: data/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -44,6 +44,7 @@ filetypes_dist = \
filedefs/filetypes.html \
filedefs/filetypes.java \
filedefs/filetypes.javascript \
+ filedefs/filetypes.julia \
filedefs/filetypes.JSON.conf \
filedefs/filetypes.latex \
filedefs/filetypes.lisp \
Modified: data/filedefs/filetypes.common
3 lines changed, 3 insertions(+), 0 deletions(-)
===================================================================
@@ -132,11 +132,13 @@ comment_doc_keyword_error=comment_doc,italic
number=0x007f00
number_1=number
number_2=number_1
+number_3=0x808000
type=0x0000d0;;true;false
class=type
function=0x000080
parameter=function
+annotation=0x8080ff;;true;false
keyword=0x00007f;;true;false
keyword_1=keyword
@@ -163,6 +165,7 @@ preprocessor=0x007f7f
regex=number_1
operator=0x301010
decorator=string_1,bold
+macro=preprocessor,bold
other=0x404080
tag=type
Modified: data/filedefs/filetypes.julia
81 lines changed, 81 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,81 @@
+# filetypes.julia
+#
+# For complete documentation of this file, please see Geany's main documentation
+#
+# Keywords from pygment lexer (http://pygments.org/)
+# and from vim parser (https://github.com/JuliaEditorSupport/julia-vim/)
+#
+
+[styling]
+default=default
+comment=comment
+number=number_3
+keyword1=keyword_1
+keyword2=keyword_2
+keyword3=number_3
+keyword4=identifier_1
+char=string_1
+operator=operator
+bracket=operator
+identifier=identifier_1
+string=string_2
+symbol=string_1
+macro=macro
+stringinterp=default
+docstring=string_2
+stringliteral=string_2,bold
+command=default,italic
+commandliteral=default,italic,bold
+typeoperator=annotation
+typeannotation=keyword_2
+lexerror=error
+
+[keywords]
+# all items must be in one line
+# primary should contain at least the reserved keyword (for, if, begin, end, ...)
+primary=baremodule begin break catch const continue do else elseif end export finally for function global if import let local macro module quote return struct try using while abstract mutable primitive type where in isa as
+secondary=Main Base Core Any AbstractArray AbstractRange LinRange OrdinalRange AbstractUnitRange UnitRange StepRange StepRangeLen BitArray CartesianIndices DenseArray Array LinearIndices PermutedDimsArray SubArray AbstractChannel Channel AbstractChar Char AbstractDict Dict IdDict WeakKeyDict AbstractDisplay TextDisplay AbstractSet BitSet Set AbstractString String SubString SubstitutionString Cstring Cwstring Enum Exception ArgumentError AssertionError BoundsError CapturedException CompositeException DimensionMismatch DivideError DomainError EOFError ErrorException InexactError InterruptException InvalidStateException KeyError MethodError MissingException OutOfMemoryError OverflowError ProcessFailedException ReadOnlyMemoryError SegmentationFault StackOverflowError StringIndexError SystemError TaskFailedException TypeError UndefKeywordError UndefRefError UndefVarError ExponentialBackOff Expr GlobalRef HTML IO IOStream IndexStyle IndexCartesian IndexLinear LineNumberNode MIME Method MethodSummary Missing Module NamedTuple Nothing Number Complex Real AbstractFloat BigFloat Float16 Float32 Float64 AbstractIrrational Irrational Integer Bool Signed BigInt Int Int128 Int16 Int32 Int64 Int8 Unsigned UInt UInt128 UInt16 UInt32 UInt64 UInt8 Rational Pair QuoteNode RawFD Ref Ptr Regex RegexMatch RoundingMode Some Symbol Task Text Timer Tuple Type DataType Union UnionAll TypeVar UndefInitializer Val Vararg VecElement VersionNumber WeakRef AbstractVector DenseVector StridedVector AbstractMatrix DenseMatrix StridedMatrix AbstractVecOrMat DenseVecOrMat StridedVecOrMat
+tertiary=true false missing Inf NaN pi stdin stdout stderr devnull nothing undef ARGS ENV ENDIAN_BOM LOAD_PATH VERSION PROGRAM_FILE DEPOT_PATH
+functions=
+
+[lexer_properties]
+# Fold multiline triple-doublequote strings, usually used to document a function or type above the definition.
+fold.julia.docstring=1
+
+# Set this property to 0 to disable syntax based folding.
+fold.julia.syntax.based=1
+
+# This option enables highlighting of the type identifier after `::`.
+lexer.julia.highlight.typeannotation=0
+
+# This option enables highlighting of syntax error int character or number definition.
+lexer.julia.highlight.lexerror=0
+
+[settings]
+lexer_filetype=Julia
+tag_parser=Julia
+
+# default extension used when saving files
+extension=jl
+
+# MIME type
+mime_type=text/x-julia
+
+# single comments, like # in this file
+comment_single=#
+# multiline comments
+comment_open=#=
+comment_close==#
+
+[indentation]
+width=4
+# 0 is spaces, 1 is tabs, 2 is tab & spaces
+type=0
+
+
+[build-menu]
+# %f will be replaced by the complete filename
+# %e will be replaced by the filename without extension
+# (use only one of it at one time)
+compiler=
+run_cmd=julia "%f"
Modified: data/filetype_extensions.conf
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -43,6 +43,7 @@ HTML=*.htm;*.html;*.shtml;*.hta;*.htd;*.htt;*.cfm;*.tpl;
Java=*.java;*.jsp;
Javascript=*.js;
JSON=*.json;
+Julia=*.jl;
Kotlin=*.kt;*.kts;
LaTeX=*.tex;*.sty;*.idx;*.ltx;*.latex;*.aux;
Lisp=*.lisp;
Modified: scintilla/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -25,6 +25,7 @@ lexers/LexForth.cxx \
lexers/LexFortran.cxx \
lexers/LexHTML.cxx \
lexers/LexHaskell.cxx \
+lexers/LexJulia.cxx \
lexers/LexLaTeX.cxx \
lexers/LexLisp.cxx \
lexers/LexLua.cxx \
Modified: scintilla/include/SciLexer.h
23 lines changed, 23 insertions(+), 0 deletions(-)
===================================================================
@@ -144,6 +144,7 @@
#define SCLEX_DATAFLEX 129
#define SCLEX_HOLLYWOOD 130
#define SCLEX_RAKU 131
+#define SCLEX_JULIA 133
#define SCLEX_LPEG 999
#define SCLEX_AUTOMATIC 1000
#define SCE_P_DEFAULT 0
@@ -932,6 +933,28 @@
#define SCE_ERLANG_MODULES 23
#define SCE_ERLANG_MODULES_ATT 24
#define SCE_ERLANG_UNKNOWN 31
+#define SCE_JULIA_DEFAULT 0
+#define SCE_JULIA_COMMENT 1
+#define SCE_JULIA_NUMBER 2
+#define SCE_JULIA_KEYWORD1 3
+#define SCE_JULIA_KEYWORD2 4
+#define SCE_JULIA_KEYWORD3 5
+#define SCE_JULIA_CHAR 6
+#define SCE_JULIA_OPERATOR 7
+#define SCE_JULIA_BRACKET 8
+#define SCE_JULIA_IDENTIFIER 9
+#define SCE_JULIA_STRING 10
+#define SCE_JULIA_SYMBOL 11
+#define SCE_JULIA_MACRO 12
+#define SCE_JULIA_STRINGINTERP 13
+#define SCE_JULIA_DOCSTRING 14
+#define SCE_JULIA_STRINGLITERAL 15
+#define SCE_JULIA_COMMAND 16
+#define SCE_JULIA_COMMANDLITERAL 17
+#define SCE_JULIA_TYPEANNOT 18
+#define SCE_JULIA_LEXERROR 19
+#define SCE_JULIA_KEYWORD4 20
+#define SCE_JULIA_TYPEOPERATOR 21
#define SCE_MSSQL_DEFAULT 0
#define SCE_MSSQL_COMMENT 1
#define SCE_MSSQL_LINE_COMMENT 2
Modified: scintilla/include/Scintilla.iface
25 lines changed, 25 insertions(+), 0 deletions(-)
===================================================================
@@ -3255,6 +3255,7 @@ val SCLEX_X12=128
val SCLEX_DATAFLEX=129
val SCLEX_HOLLYWOOD=130
val SCLEX_RAKU=131
+val SCLEX_JULIA=133
val SCLEX_LPEG=999
# When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
@@ -4162,6 +4163,30 @@ val SCE_ERLANG_MODULES_ATT=24
val SCE_ERLANG_UNKNOWN=31
# Lexical states for SCLEX_OCTAVE are identical to MatLab
lex Octave=SCLEX_OCTAVE SCE_MATLAB_
+# Lexical states for SCLEX_JULIA
+lex Julia=SCLEX_JULIA SCE_JULIA_
+val SCE_JULIA_DEFAULT=0
+val SCE_JULIA_COMMENT=1
+val SCE_JULIA_NUMBER=2
+val SCE_JULIA_KEYWORD1=3
+val SCE_JULIA_KEYWORD2=4
+val SCE_JULIA_KEYWORD3=5
+val SCE_JULIA_CHAR=6
+val SCE_JULIA_OPERATOR=7
+val SCE_JULIA_BRACKET=8
+val SCE_JULIA_IDENTIFIER=9
+val SCE_JULIA_STRING=10
+val SCE_JULIA_SYMBOL=11
+val SCE_JULIA_MACRO=12
+val SCE_JULIA_STRINGINTERP=13
+val SCE_JULIA_DOCSTRING=14
+val SCE_JULIA_STRINGLITERAL=15
+val SCE_JULIA_COMMAND=16
+val SCE_JULIA_COMMANDLITERAL=17
+val SCE_JULIA_TYPEANNOT=18
+val SCE_JULIA_LEXERROR=19
+val SCE_JULIA_KEYWORD4=20
+val SCE_JULIA_TYPEOPERATOR=21
# Lexical states for SCLEX_MSSQL
lex MSSQL=SCLEX_MSSQL SCE_MSSQL_
val SCE_MSSQL_DEFAULT=0
Modified: scintilla/julia_lexilla_v5.patch
36 lines changed, 36 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,36 @@
+diff --git a/lexilla/lexers/LexJulia.cxx b/geany/scintilla/lexers/LexJulia.cxx
+index 6730074..ccf947d 100644
+--- a/lexilla/lexers/LexJulia.cxx
++++ b/geany/scintilla/lexers/LexJulia.cxx
+@@ -39,7 +39,8 @@
+ #include "DefaultLexer.h"
+
+ using namespace Scintilla;
+-using namespace Lexilla;
++// Geany still uses Scintilla v3.5
++//using namespace Lexilla;
+
+ static const int MAX_JULIA_IDENT_CHARS = 1023;
+
+@@ -138,7 +139,9 @@ public:
+ delete this;
+ }
+ int SCI_METHOD Version() const override {
+- return lvRelease5;
++ // Geany still uses Scintilla v3.5
++ //return lvRelease5;
++ return lvIdentity;
+ }
+ const char * SCI_METHOD PropertyNames() override {
+ return osJulia.PropertyNames();
+@@ -163,7 +166,9 @@ public:
+ return 0;
+ }
+
+- static ILexer5 *LexerFactoryJulia() {
++ // Geany still uses Scintilla v3.5
++ //static ILexer5 *LexerFactoryJulia() {
++ static ILexer *LexerFactoryJulia() {
+ return new LexerJulia();
+ }
+ };
Modified: scintilla/lexers/LexJulia.cxx
1269 lines changed, 1269 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,1269 @@
+// Scintilla source code edit control
+// Encoding: UTF-8
+/** @file LexJulia.cxx
+ ** Lexer for Julia.
+ ** Reusing code from LexMatlab, LexPython and LexRust
+ **
+ ** Written by Bertrand Lacoste
+ **
+ **/
+// Copyright 1998-2001 by Neil Hodgson <neilh at scintilla.org>
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+
+#include <string>
+#include <string_view>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <functional>
+
+#include "ILexer.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+
+#include "StringCopy.h"
+#include "PropSetSimple.h"
+#include "StringCopy.h"
+#include "WordList.h"
+#include "LexAccessor.h"
+#include "Accessor.h"
+#include "StyleContext.h"
+#include "CharacterSet.h"
+#include "CharacterCategory.h"
+#include "LexerModule.h"
+#include "OptionSet.h"
+#include "DefaultLexer.h"
+
+using namespace Scintilla;
+// Geany still uses Scintilla v3.5
+//using namespace Lexilla;
+
+static const int MAX_JULIA_IDENT_CHARS = 1023;
+
+// Options used for LexerJulia
+struct OptionsJulia {
+ bool fold;
+ bool foldComment;
+ bool foldCompact;
+ bool foldDocstring;
+ bool foldSyntaxBased;
+ bool highlightTypeannotation;
+ bool highlightLexerror;
+ OptionsJulia() {
+ fold = true;
+ foldComment = true;
+ foldCompact = false;
+ foldDocstring = true;
+ foldSyntaxBased = true;
+ highlightTypeannotation = false;
+ highlightLexerror = false;
+ }
+};
+
+const char * const juliaWordLists[] = {
+ "Primary keywords and identifiers",
+ "Built in types",
+ "Other keywords",
+ "Built in functions",
+ 0,
+};
+
+struct OptionSetJulia : public OptionSet<OptionsJulia> {
+ OptionSetJulia() {
+ DefineProperty("fold", &OptionsJulia::fold);
+
+ DefineProperty("fold.compact", &OptionsJulia::foldCompact);
+
+ DefineProperty("fold.comment", &OptionsJulia::foldComment);
+
+ DefineProperty("fold.julia.docstring", &OptionsJulia::foldDocstring,
+ "Fold multiline triple-doublequote strings, usually used to document a function or type above the definition.");
+
+ DefineProperty("fold.julia.syntax.based", &OptionsJulia::foldSyntaxBased,
+ "Set this property to 0 to disable syntax based folding.");
+
+ DefineProperty("lexer.julia.highlight.typeannotation", &OptionsJulia::highlightTypeannotation,
+ "This option enables highlighting of the type identifier after `::`.");
+
+ DefineProperty("lexer.julia.highlight.lexerror", &OptionsJulia::highlightLexerror,
+ "This option enables highlighting of syntax error int character or number definition.");
+
+ DefineWordListSets(juliaWordLists);
+ }
+};
+
+LexicalClass juliaLexicalClasses[] = {
+ // Lexer Julia SCLEX_JULIA SCE_JULIA_:
+ 0, "SCE_JULIA_DEFAULT", "default", "White space",
+ 1, "SCE_JULIA_COMMENT", "comment", "Comment",
+ 2, "SCE_JULIA_NUMBER", "literal numeric", "Number",
+ 3, "SCE_JULIA_KEYWORD1", "keyword", "Reserved keywords",
+ 4, "SCE_JULIA_KEYWORD2", "identifier", "Builtin type names",
+ 5, "SCE_JULIA_KEYWORD3", "identifier", "Constants",
+ 6, "SCE_JULIA_CHAR", "literal string character", "Single quoted string",
+ 7, "SCE_JULIA_OPERATOR", "operator", "Operator",
+ 8, "SCE_JULIA_BRACKET", "bracket operator", "Bracket operator",
+ 9, "SCE_JULIA_IDENTIFIER", "identifier", "Identifier",
+ 10, "SCE_JULIA_STRING", "literal string", "Double quoted String",
+ 11, "SCE_JULIA_SYMBOL", "literal string symbol", "Symbol",
+ 12, "SCE_JULIA_MACRO", "macro preprocessor", "Macro",
+ 13, "SCE_JULIA_STRINGINTERP", "literal string interpolated", "String interpolation",
+ 14, "SCE_JULIA_DOCSTRING", "literal string documentation", "Docstring",
+ 15, "SCE_JULIA_STRINGLITERAL", "literal string", "String literal prefix",
+ 16, "SCE_JULIA_COMMAND", "literal string command", "Command",
+ 17, "SCE_JULIA_COMMANDLITERAL", "literal string command", "Command literal prefix",
+ 18, "SCE_JULIA_TYPEANNOT", "identifier type", "Type annotation identifier",
+ 19, "SCE_JULIA_LEXERROR", "lexer error", "Lexing error",
+ 20, "SCE_JULIA_KEYWORD4", "identifier", "Builtin function names",
+ 21, "SCE_JULIA_TYPEOPERATOR", "operator type", "Type annotation operator",
+};
+
+class LexerJulia : public DefaultLexer {
+ WordList keywords;
+ WordList identifiers2;
+ WordList identifiers3;
+ WordList identifiers4;
+ OptionsJulia options;
+ OptionSetJulia osJulia;
+public:
+ explicit LexerJulia() :
+ DefaultLexer("julia", SCLEX_JULIA, juliaLexicalClasses, ELEMENTS(juliaLexicalClasses)) {
+ }
+ virtual ~LexerJulia() {
+ }
+ void SCI_METHOD Release() override {
+ delete this;
+ }
+ int SCI_METHOD Version() const override {
+ // Geany still uses Scintilla v3.5
+ //return lvRelease5;
+ return lvIdentity;
+ }
+ const char * SCI_METHOD PropertyNames() override {
+ return osJulia.PropertyNames();
+ }
+ int SCI_METHOD PropertyType(const char *name) override {
+ return osJulia.PropertyType(name);
+ }
+ const char * SCI_METHOD DescribeProperty(const char *name) override {
+ return osJulia.DescribeProperty(name);
+ }
+ Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
+ const char * SCI_METHOD PropertyGet(const char *key) override {
+ return osJulia.PropertyGet(key);
+ }
+ const char * SCI_METHOD DescribeWordListSets() override {
+ return osJulia.DescribeWordListSets();
+ }
+ Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
+ void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
+ void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
+ void * SCI_METHOD PrivateCall(int, void *) override {
+ return 0;
+ }
+
+ // Geany still uses Scintilla v3.5
+ //static ILexer5 *LexerFactoryJulia() {
+ static ILexer *LexerFactoryJulia() {
+ return new LexerJulia();
+ }
+};
+
+Sci_Position SCI_METHOD LexerJulia::PropertySet(const char *key, const char *val) {
+ if (osJulia.PropertySet(&options, key, val)) {
+ return 0;
+ }
+ return -1;
+}
+
+Sci_Position SCI_METHOD LexerJulia::WordListSet(int n, const char *wl) {
+ WordList *wordListN = nullptr;
+ switch (n) {
+ case 0:
+ wordListN = &keywords;
+ break;
+ case 1:
+ wordListN = &identifiers2;
+ break;
+ case 2:
+ wordListN = &identifiers3;
+ break;
+ case 3:
+ wordListN = &identifiers4;
+ break;
+ }
+ Sci_Position firstModification = -1;
+ if (wordListN) {
+ WordList wlNew;
+ wlNew.Set(wl);
+ if (*wordListN != wlNew) {
+ wordListN->Set(wl);
+ firstModification = 0;
+ }
+ }
+ return firstModification;
+}
+
+static inline bool IsJuliaOperator(int ch) {
+ if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
+ ch == '-' || ch == '+' || ch == '=' || ch == '|' ||
+ ch == '<' || ch == '>' || ch == '/' || ch == '~' ||
+ ch == '\\' ) {
+ return true;
+ }
+ return false;
+}
+
+// The list contains non-ascii unary operators
+static inline bool IsJuliaUnaryOperator (int ch) {
+ if (ch == 0x00ac || ch == 0x221a || ch == 0x221b ||
+ ch == 0x221c || ch == 0x22c6 || ch == 0x00b1 ||
+ ch == 0x2213 ) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool IsJuliaParen (int ch) {
+ if (ch == '(' || ch == ')' || ch == '{' || ch == '}' ||
+ ch == '[' || ch == ']' ) {
+ return true;
+ }
+ return false;
+}
+
+// Unicode parsing from Julia source code:
+// https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_extensions.c
+// keep the same function name to be easy to find again
+static int is_wc_cat_id_start(uint32_t wc) {
+ const CharacterCategory cat = CategoriseCharacter((int) wc);
+
+ return (cat == ccLu || cat == ccLl ||
+ cat == ccLt || cat == ccLm ||
+ cat == ccLo || cat == ccNl ||
+ cat == ccSc || // allow currency symbols
+ // other symbols, but not arrows or replacement characters
+ (cat == ccSo && !(wc >= 0x2190 && wc <= 0x21FF) &&
+ wc != 0xfffc && wc != 0xfffd &&
+ wc != 0x233f && // notslash
+ wc != 0x00a6) || // broken bar
+
+ // math symbol (category Sm) whitelist
+ (wc >= 0x2140 && wc <= 0x2a1c &&
+ ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
+ wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
+ wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥
+
+ (wc >= 0x2202 && wc <= 0x2233 &&
+ (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
+ wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
+ wc == 0x2210 || wc == 0x2211 || // ∐, ∑
+ wc == 0x221e || wc == 0x221f || // ∞, ∟
+ wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
+
+ (wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, ⋁, ⋂, ⋃
+ (wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿
+
+ (wc >= 0x266f &&
+ (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙
+ (wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁
+ (wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴
+ (wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆
+ (wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖
+ wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜
+
+ (wc >= 0x1d6c1 && // variants of \nabla and \partial
+ (wc == 0x1d6c1 || wc == 0x1d6db ||
+ wc == 0x1d6fb || wc == 0x1d715 ||
+ wc == 0x1d735 || wc == 0x1d74f ||
+ wc == 0x1d76f || wc == 0x1d789 ||
+ wc == 0x1d7a9 || wc == 0x1d7c3)) ||
+
+ // super- and subscript +-=()
+ (wc >= 0x207a && wc <= 0x207e) ||
+ (wc >= 0x208a && wc <= 0x208e) ||
+
+ // angle symbols
+ (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢
+ (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯
+
+ // Other_ID_Start
+ wc == 0x2118 || wc == 0x212E || // ℘, ℮
+ (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks
+
+ // bold-digits and double-struck digits
+ (wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive)
+}
+
+static inline bool IsIdentifierFirstCharacter (int ch) {
+ if (IsASCII(ch)) {
+ return (bool) (isalpha(ch) || ch == '_');
+ }
+ if (ch < 0xA1 || ch > 0x10ffff) {
+ return false;
+ }
+
+ return is_wc_cat_id_start((uint32_t) ch);
+}
+
+static inline bool IsIdentifierCharacter (int ch) {
+ if (IsASCII(ch)) {
+ return (bool) (isalnum(ch) || ch == '_' || ch == '!');
+ }
+ if (ch < 0xA1 || ch > 0x10ffff) {
+ return false;
+ }
+
+ if (is_wc_cat_id_start((uint32_t) ch)) {
+ return true;
+ }
+
+ const CharacterCategory cat = CategoriseCharacter(ch);
+
+ if (cat == ccMn || cat == ccMc ||
+ cat == ccNd || cat == ccPc ||
+ cat == ccSk || cat == ccMe ||
+ cat == ccNo ||
+ // primes (single, double, triple, their reverses, and quadruple)
+ (ch >= 0x2032 && ch <= 0x2037) || (ch == 0x2057)) {
+ return true;
+ }
+ return false;
+}
+
+// keep the same function name to be easy to find again
+static const uint32_t opsuffs[] = {
+ 0x00b2, // ²
+ 0x00b3, // ³
+ 0x00b9, // ¹
+ 0x02b0, // ʰ
+ 0x02b2, // ʲ
+ 0x02b3, // ʳ
+ 0x02b7, // ʷ
+ 0x02b8, // ʸ
+ 0x02e1, // ˡ
+ 0x02e2, // ˢ
+ 0x02e3, // ˣ
+ 0x1d2c, // ᴬ
+ 0x1d2e, // ᴮ
+ 0x1d30, // ᴰ
+ 0x1d31, // ᴱ
+ 0x1d33, // ᴳ
+ 0x1d34, // ᴴ
+ 0x1d35, // ᴵ
+ 0x1d36, // ᴶ
+ 0x1d37, // ᴷ
+ 0x1d38, // ᴸ
+ 0x1d39, // ᴹ
+ 0x1d3a, // ᴺ
+ 0x1d3c, // ᴼ
+ 0x1d3e, // ᴾ
+ 0x1d3f, // ᴿ
+ 0x1d40, // ᵀ
+ 0x1d41, // ᵁ
+ 0x1d42, // ᵂ
+ 0x1d43, // ᵃ
+ 0x1d47, // ᵇ
+ 0x1d48, // ᵈ
+ 0x1d49, // ᵉ
+ 0x1d4d, // ᵍ
+ 0x1d4f, // ᵏ
+ 0x1d50, // ᵐ
+ 0x1d52, // ᵒ
+ 0x1d56, // ᵖ
+ 0x1d57, // ᵗ
+ 0x1d58, // ᵘ
+ 0x1d5b, // ᵛ
+ 0x1d5d, // ᵝ
+ 0x1d5e, // ᵞ
+ 0x1d5f, // ᵟ
+ 0x1d60, // ᵠ
+ 0x1d61, // ᵡ
+ 0x1d62, // ᵢ
+ 0x1d63, // ᵣ
+ 0x1d64, // ᵤ
+ 0x1d65, // ᵥ
+ 0x1d66, // ᵦ
+ 0x1d67, // ᵧ
+ 0x1d68, // ᵨ
+ 0x1d69, // ᵩ
+ 0x1d6a, // ᵪ
+ 0x1d9c, // ᶜ
+ 0x1da0, // ᶠ
+ 0x1da5, // ᶥ
+ 0x1da6, // ᶦ
+ 0x1dab, // ᶫ
+ 0x1db0, // ᶰ
+ 0x1db8, // ᶸ
+ 0x1dbb, // ᶻ
+ 0x1dbf, // ᶿ
+ 0x2032, // ′
+ 0x2033, // ″
+ 0x2034, // ‴
+ 0x2035, // ‵
+ 0x2036, // ‶
+ 0x2037, // ‷
+ 0x2057, // ⁗
+ 0x2070, // ⁰
+ 0x2071, // ⁱ
+ 0x2074, // ⁴
+ 0x2075, // ⁵
+ 0x2076, // ⁶
+ 0x2077, // ⁷
+ 0x2078, // ⁸
+ 0x2079, // ⁹
+ 0x207a, // ⁺
+ 0x207b, // ⁻
+ 0x207c, // ⁼
+ 0x207d, // ⁽
+ 0x207e, // ⁾
+ 0x207f, // ⁿ
+ 0x2080, // ₀
+ 0x2081, // ₁
+ 0x2082, // ₂
+ 0x2083, // ₃
+ 0x2084, // ₄
+ 0x2085, // ₅
+ 0x2086, // ₆
+ 0x2087, // ₇
+ 0x2088, // ₈
+ 0x2089, // ₉
+ 0x208a, // ₊
+ 0x208b, // ₋
+ 0x208c, // ₌
+ 0x208d, // ₍
+ 0x208e, // ₎
+ 0x2090, // ₐ
+ 0x2091, // ₑ
+ 0x2092, // ₒ
+ 0x2093, // ₓ
+ 0x2095, // ₕ
+ 0x2096, // ₖ
+ 0x2097, // ₗ
+ 0x2098, // ₘ
+ 0x2099, // ₙ
+ 0x209a, // ₚ
+ 0x209b, // ₛ
+ 0x209c, // ₜ
+ 0x2c7c, // ⱼ
+ 0x2c7d, // ⱽ
+ 0xa71b, // ꜛ
+ 0xa71c, // ꜜ
+ 0xa71d // ꜝ
+};
+static const size_t opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
+
+// keep the same function name to be easy to find again
+static bool jl_op_suffix_char(uint32_t wc) {
+ if (wc < 0xA1 || wc > 0x10ffff) {
+ return false;
+ }
+ const CharacterCategory cat = CategoriseCharacter((int) wc);
+ if (cat == ccMn || cat == ccMc ||
+ cat == ccMe) {
+ return true;
+ }
+
+ for (size_t i = 0; i < opsuffs_len; ++i) {
+ if (wc == opsuffs[i]) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// keep the same function name to be easy to find again
+static bool never_id_char(uint32_t wc) {
+ const CharacterCategory cat = CategoriseCharacter((int) wc);
+ return (
+ // spaces and control characters:
+ (cat >= ccZs && cat <= ccCs) ||
+
+ // ASCII and Latin1 non-connector punctuation
+ (wc < 0xff &&
+ cat >= ccPd && cat <= ccPo) ||
+
+ wc == '`' ||
+
+ // mathematical brackets
+ (wc >= 0x27e6 && wc <= 0x27ef) ||
+ // angle, corner, and lenticular brackets
+ (wc >= 0x3008 && wc <= 0x3011) ||
+ // tortoise shell, square, and more lenticular brackets
+ (wc >= 0x3014 && wc <= 0x301b) ||
+ // fullwidth parens
+ (wc == 0xff08 || wc == 0xff09) ||
+ // fullwidth square brackets
+ (wc == 0xff3b || wc == 0xff3d));
+}
+
+
+static bool IsOperatorFirstCharacter (int ch) {
+ if (IsASCII(ch)) {
+ if (IsJuliaOperator(ch) ||
+ ch == '!' || ch == '?' ||
+ ch == ':' || ch == ';' ||
+ ch == ',' || ch == '.' ) {
+ return true;
+ }else {
+ return false;
+ }
+ } else if (is_wc_cat_id_start((uint32_t) ch)) {
+ return false;
+ } else if (IsJuliaUnaryOperator(ch) ||
+ ! never_id_char((uint32_t) ch)) {
+ return true;
+ }
+ return false;
+}
+
+static bool IsOperatorCharacter (int ch) {
+ if (IsOperatorFirstCharacter(ch) ||
+ (!IsASCII(ch) && jl_op_suffix_char((uint32_t) ch)) ) {
+ return true;
+ }
+ return false;
+}
+
+static bool CheckBoundsIndexing(char *str) {
+ if (strcmp("begin", str) == 0 || strcmp("end", str) == 0 ) {
+ return true;
+ }
+ return false;
+}
+
+static int CheckKeywordFoldPoint(char *str) {
+ if (strcmp ("if", str) == 0 ||
+ strcmp ("for", str) == 0 ||
+ strcmp ("while", str) == 0 ||
+ strcmp ("try", str) == 0 ||
+ strcmp ("do", str) == 0 ||
+ strcmp ("begin", str) == 0 ||
+ strcmp ("let", str) == 0 ||
+ strcmp ("baremodule", str) == 0 ||
+ strcmp ("quote", str) == 0 ||
+ strcmp ("module", str) == 0 ||
+ strcmp ("struct", str) == 0 ||
+ strcmp ("type", str) == 0 ||
+ strcmp ("macro", str) == 0 ||
+ strcmp ("function", str) == 0) {
+ return 1;
+ }
+ if (strcmp("end", str) == 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static bool IsNumberExpon(int ch, int base) {
+ if ((base == 10 && (ch == 'e' || ch == 'E' || ch == 'f')) ||
+ (base == 16 && (ch == 'p' || ch == 'P'))) {
+ return true;
+ }
+ return false;
+}
+
+/* Scans a sequence of digits, returning true if it found any. */
+static bool ScanDigits(StyleContext& sc, int base, bool allow_sep) {
+ bool found = false;
+ for (;;) {
+ if (IsADigit(sc.chNext, base) || (allow_sep && sc.chNext == '_')) {
+ found = true;
+ sc.Forward();
+ } else {
+ break;
+ }
+ }
+ return found;
+}
+
+static inline bool ScanNHexas(StyleContext &sc, int max) {
+ int n = 0;
+ bool error = false;
+
+ sc.Forward();
+ if (!IsADigit(sc.ch, 16)) {
+ error = true;
+ } else {
+ while (IsADigit(sc.ch, 16) && n < max) {
+ sc.Forward();
+ n++;
+ }
+ }
+ return error;
+}
+
+static void resumeCharacter(StyleContext &sc, bool lexerror) {
+ bool error = false;
+
+ // ''' case
+ if (sc.chPrev == '\'' && sc.ch == '\'' && sc.chNext == '\'') {
+ sc.Forward();
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ return;
+ } else if (lexerror && sc.chPrev == '\'' && sc.ch == '\'') {
+ sc.ChangeState(SCE_JULIA_LEXERROR);
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+
+ // Escape characters
+ } else if (sc.ch == '\\') {
+ sc.Forward();
+ if (sc.ch == '\'' || sc.ch == '\\' ) {
+ sc.Forward();
+ } else if (sc.ch == 'n' || sc.ch == 't' || sc.ch == 'a' ||
+ sc.ch == 'b' || sc.ch == 'e' || sc.ch == 'f' ||
+ sc.ch == 'r' || sc.ch == 'v' ) {
+ sc.Forward();
+ } else if (sc.ch == 'x') {
+ error |= ScanNHexas(sc, 2);
+ } else if (sc.ch == 'u') {
+ error |= ScanNHexas(sc, 4);
+ } else if (sc.ch == 'U') {
+ error |= ScanNHexas(sc, 8);
+ } else if (IsADigit(sc.ch, 8)) {
+ int n = 1;
+ int max = 3;
+ sc.Forward();
+ while (IsADigit(sc.ch, 8) && n < max) {
+ sc.Forward();
+ n++;
+ }
+ }
+
+ if (lexerror) {
+ if (sc.ch != '\'') {
+ error = true;
+ while (sc.ch != '\'' &&
+ sc.ch != '\r' &&
+ sc.ch != '\n') {
+ sc.Forward();
+ }
+ }
+
+ if (error) {
+ sc.ChangeState(SCE_JULIA_LEXERROR);
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ }
+ } else if (lexerror) {
+ if (sc.ch < 0x20 || sc.ch > 0x10ffff) {
+ error = true;
+ } else {
+ // single character
+ sc.Forward();
+
+ if (sc.ch != '\'') {
+ error = true;
+ while (sc.ch != '\'' &&
+ sc.ch != '\r' &&
+ sc.ch != '\n') {
+ sc.Forward();
+ }
+ }
+ }
+
+ if (error) {
+ sc.ChangeState(SCE_JULIA_LEXERROR);
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ }
+
+ // closing quote
+ if (sc.ch == '\'') {
+ if (sc.chNext == '\'') {
+ sc.Forward();
+ } else {
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ }
+}
+
+static inline bool IsACharacter(StyleContext &sc) {
+ return (sc.chPrev == '\'' && sc.chNext == '\'');
+}
+
+static void ScanParenInterpolation(StyleContext &sc) {
+ // TODO: no syntax highlighting inside a string interpolation
+
+ // Level of nested parenthesis
+ int interp_level = 0;
+
+ // If true, it is inside a string and parenthesis are not counted.
+ bool allow_paren_string = false;
+
+
+ // check for end of states
+ for (; sc.More(); sc.Forward()) {
+ // TODO: check corner cases for nested string interpolation
+ // TODO: check corner cases with Command inside interpolation
+
+ if ( sc.ch == '\"' && sc.chPrev != '\\') {
+ // Toggle the string environment (parenthesis are not counted inside a string)
+ allow_paren_string = !allow_paren_string;
+ } else if ( !allow_paren_string ) {
+ if ( sc.ch == '(' && !IsACharacter(sc) ) {
+ interp_level ++;
+ } else if ( sc.ch == ')' && !IsACharacter(sc) && interp_level > 0 ) {
+ interp_level --;
+ if (interp_level == 0) {
+ // Exit interpolation
+ return;
+ }
+ }
+ }
+ }
+}
+/*
+ * Start parsing a number, parse the base.
+ */
+static void initNumber (StyleContext &sc, int &base, bool &with_dot) {
+ base = 10;
+ with_dot = false;
+ sc.SetState(SCE_JULIA_NUMBER);
+ if (sc.ch == '0') {
+ if (sc.chNext == 'x') {
+ sc.Forward();
+ base = 16;
+ if (sc.chNext == '.') {
+ sc.Forward();
+ with_dot = true;
+ }
+ } else if (sc.chNext == 'o') {
+ sc.Forward();
+ base = 8;
+ } else if (sc.chNext == 'b') {
+ sc.Forward();
+ base = 2;
+ }
+ } else if (sc.ch == '.') {
+ with_dot = true;
+ }
+}
+
+/*
+ * Resume parsing a String or Command, bounded by the `quote` character (\" or \`)
+ * The `triple` argument specifies if it is a triple-quote String or Command.
+ * Interpolation is detected (with `$`), and parsed if `allow_interp` is true.
+ */
+static void resumeStringLike(StyleContext &sc, int quote, bool triple, bool allow_interp, bool full_highlight) {
+ int stylePrev = sc.state;
+ bool checkcurrent = false;
+
+ // Escape characters
+ if (sc.ch == '\\') {
+ if (sc.chNext == quote || sc.chNext == '\\' || sc.chNext == '$') {
+ sc.Forward();
+ }
+ } else if (allow_interp && sc.ch == '$') {
+ // If the interpolation is only of a variable, do not change state
+ if (sc.chNext == '(') {
+ if (full_highlight) {
+ sc.SetState(SCE_JULIA_STRINGINTERP);
+ } else {
+ sc.ForwardSetState(SCE_JULIA_STRINGINTERP);
+ }
+ ScanParenInterpolation(sc);
+ sc.ForwardSetState(stylePrev);
+
+ checkcurrent = true;
+
+ } else if (full_highlight && IsIdentifierFirstCharacter(sc.chNext)) {
+ sc.SetState(SCE_JULIA_STRINGINTERP);
+ sc.Forward();
+ sc.Forward();
+ for (; sc.More(); sc.Forward()) {
+ if (! IsIdentifierCharacter(sc.ch)) {
+ break;
+ }
+ }
+ sc.SetState(stylePrev);
+
+ checkcurrent = true;
+ }
+
+ if (checkcurrent) {
+ // Check that the current character is not a special char,
+ // otherwise it will be skipped
+ resumeStringLike(sc, quote, triple, allow_interp, full_highlight);
+ }
+
+ } else if (sc.ch == quote) {
+ if (triple) {
+ if (sc.chNext == quote && sc.GetRelativeCharacter(2) == quote) {
+ // Move to the end of the triple quotes
+ Sci_PositionU nextIndex = sc.currentPos + 2;
+ while (nextIndex > sc.currentPos && sc.More()) {
+ sc.Forward();
+ }
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ } else {
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ }
+}
+
+static void resumeCommand(StyleContext &sc, bool triple, bool allow_interp) {
+ return resumeStringLike(sc, '`', triple, allow_interp, true);
+}
+
+static void resumeString(StyleContext &sc, bool triple, bool allow_interp) {
+ return resumeStringLike(sc, '"', triple, allow_interp, true);
+}
+
+static void resumeNumber (StyleContext &sc, int base, bool &with_dot, bool lexerror) {
+ if (IsNumberExpon(sc.ch, base)) {
+ if (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-') {
+ sc.Forward();
+ // Capture all digits
+ ScanDigits(sc, 10, false);
+ sc.Forward();
+ }
+ sc.SetState(SCE_JULIA_DEFAULT);
+ } else if (sc.ch == '.' && sc.chNext == '.') {
+ // Interval operator `..`
+ sc.SetState(SCE_JULIA_OPERATOR);
+ sc.Forward();
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ } else if (sc.ch == '.' && !with_dot) {
+ with_dot = true;
+ ScanDigits(sc, base, true);
+ } else if (IsADigit(sc.ch, base) || sc.ch == '_') {
+ ScanDigits(sc, base, true);
+ } else if (IsADigit(sc.ch) && !IsADigit(sc.ch, base)) {
+ if (lexerror) {
+ sc.ChangeState(SCE_JULIA_LEXERROR);
+ }
+ ScanDigits(sc, 10, false);
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ } else {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ }
+}
+
+static void resumeOperator (StyleContext &sc) {
+ if (sc.chNext == ':' && (sc.ch == ':' || sc.ch == '<' ||
+ (sc.ch == '>' && (sc.chPrev != '-' && sc.chPrev != '=')))) {
+ // Case `:a=>:b`
+ sc.Forward();
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ } else if (sc.ch == ':') {
+ // Case `foo(:baz,:baz)` or `:one+:two`
+ // Let the default case switch decide if it is a symbol
+ sc.SetState(SCE_JULIA_DEFAULT);
+ } else if (sc.ch == '\'') {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ } else if ((sc.ch == '.' && sc.chPrev != '.') || IsIdentifierFirstCharacter(sc.ch) ||
+ (! (sc.chPrev == '.' && IsOperatorFirstCharacter(sc.ch)) &&
+ ! IsOperatorCharacter(sc.ch)) ) {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ }
+}
+
+void SCI_METHOD LexerJulia::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
+ PropSetSimple props;
+ Accessor styler(pAccess, &props);
+
+ Sci_Position pos = startPos;
+ styler.StartAt(pos);
+ styler.StartSegment(pos);
+
+ // use the line state of each line to store block/multiline states
+ Sci_Position curLine = styler.GetLine(startPos);
+ // Default is false for everything and 0 counters.
+ int lineState = (curLine > 0) ? styler.GetLineState(curLine-1) : 0;
+
+ bool transpose = (lineState >> 0) & 0x01; // 1 bit to know if ' is allowed to mean transpose
+ bool istripledocstring = (lineState >> 1) & 0x01; // 1 bit to know if we are in a triple doublequotes string
+ bool triple_backtick = (lineState >> 2) & 0x01; // 1 bit to know if we are in a triple backtick command
+ bool israwstring = (lineState >> 3) & 0x01; // 1 bit to know if we are in a raw string
+ int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
+ int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
+ int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
+
+ // base for parsing number
+ int base = 10;
+ // number has a float dot ?
+ bool with_dot = false;
+
+ StyleContext sc(startPos, length, initStyle, styler);
+
+ for (; sc.More(); sc.Forward()) {
+
+ //// check for end of states
+ switch (sc.state) {
+ case SCE_JULIA_BRACKET:
+ sc.SetState(SCE_JULIA_DEFAULT);
+ break;
+ case SCE_JULIA_OPERATOR:
+ resumeOperator(sc);
+ break;
+ case SCE_JULIA_TYPEOPERATOR:
+ sc.SetState(SCE_JULIA_DEFAULT);
+ break;
+ case SCE_JULIA_TYPEANNOT:
+ if (! IsIdentifierCharacter(sc.ch)) {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ }
+ break;
+ case SCE_JULIA_IDENTIFIER:
+ // String literal
+ if (sc.ch == '\"') {
+ // If the string literal has a prefix, interpolation is disabled
+ israwstring = true;
+ sc.ChangeState(SCE_JULIA_STRINGLITERAL);
+ sc.SetState(SCE_JULIA_DEFAULT);
+
+ } else if (sc.ch == '`') {
+ // If the string literal has a prefix, interpolation is disabled
+ israwstring = true;
+ sc.ChangeState(SCE_JULIA_COMMANDLITERAL);
+ sc.SetState(SCE_JULIA_DEFAULT);
+
+ // Continue if the character is an identifier character
+ } else if (! IsIdentifierCharacter(sc.ch)) {
+ char s[MAX_JULIA_IDENT_CHARS + 1];
+ sc.GetCurrent(s, sizeof(s));
+
+ // Treat the keywords differently if we are indexing or not
+ if ( indexing_level > 0 && CheckBoundsIndexing(s)) {
+ // Inside [], (), `begin` and `end` are numbers not block keywords
+ sc.ChangeState(SCE_JULIA_NUMBER);
+ transpose = false;
+
+ } else {
+ if (keywords.InList(s)) {
+ sc.ChangeState(SCE_JULIA_KEYWORD1);
+ transpose = false;
+ } else if (identifiers2.InList(s)) {
+ sc.ChangeState(SCE_JULIA_KEYWORD2);
+ transpose = false;
+ } else if (identifiers3.InList(s)) {
+ sc.ChangeState(SCE_JULIA_KEYWORD3);
+ transpose = false;
+ } else if (identifiers4.InList(s)) {
+ sc.ChangeState(SCE_JULIA_KEYWORD4);
+ // These identifiers can be used for variable names also,
+ // so transpose is not forbidden.
+ //transpose = false;
+ }
+ }
+ sc.SetState(SCE_JULIA_DEFAULT);
+
+ // TODO: recognize begin-end blocks inside list comprehension
+ // b = [(begin n%2; n*2 end) for n in 1:10]
+ // TODO: recognize better comprehension for-if to avoid problem with code-folding
+ // c = [(if isempty(a); missing else first(b) end) for (a, b) in zip(l1, l2)]
+ }
+ break;
+ case SCE_JULIA_NUMBER:
+ resumeNumber(sc, base, with_dot, options.highlightLexerror);
+ break;
+ case SCE_JULIA_CHAR:
+ resumeCharacter(sc, options.highlightLexerror);
+ break;
+ case SCE_JULIA_DOCSTRING:
+ resumeString(sc, true, !israwstring);
+ if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
+ israwstring = false;
+ }
+ break;
+ case SCE_JULIA_STRING:
+ resumeString(sc, false, !israwstring);
+ if (sc.state == SCE_JULIA_DEFAULT && israwstring) {
+ israwstring = false;
+ }
+ break;
+ case SCE_JULIA_COMMAND:
+ resumeCommand(sc, triple_backtick, !israwstring);
+ break;
+ case SCE_JULIA_MACRO:
+ if (IsASpace(sc.ch) || ! IsIdentifierCharacter(sc.ch)) {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ }
+ break;
+ case SCE_JULIA_SYMBOL:
+ if (! IsIdentifierCharacter(sc.ch)) {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ }
+ break;
+ case SCE_JULIA_COMMENT:
+ if( commentDepth > 0 ) {
+ // end or start of a nested a block comment
+ if ( sc.ch == '=' && sc.chNext == '#') {
+ commentDepth --;
+ sc.Forward();
+
+ if (commentDepth == 0) {
+ sc.ForwardSetState(SCE_JULIA_DEFAULT);
+ }
+ } else if( sc.ch == '#' && sc.chNext == '=') {
+ commentDepth ++;
+ sc.Forward();
+ }
+ } else {
+ // single line comment
+ if (sc.atLineEnd || sc.ch == '\r' || sc.ch == '\n') {
+ sc.SetState(SCE_JULIA_DEFAULT);
+ transpose = false;
+ }
+ }
+ break;
+ }
+
+ // check start of a new state
+ if (sc.state == SCE_JULIA_DEFAULT) {
+ if (sc.ch == '#') {
+ sc.SetState(SCE_JULIA_COMMENT);
+ // increment depth if we are a block comment
+ if(sc.chNext == '=') {
+ commentDepth ++;
+ sc.Forward();
+ }
+ } else if (sc.ch == '!') {
+ sc.SetState(SCE_JULIA_OPERATOR);
+ } else if (sc.ch == '\'') {
+ if (transpose) {
+ sc.SetState(SCE_JULIA_OPERATOR);
+ } else {
+ sc.SetState(SCE_JULIA_CHAR);
+ }
+ } else if (sc.ch == '\"') {
+ istripledocstring = (sc.chNext == '\"' && sc.GetRelativeCharacter(2) == '\"');
+ if (istripledocstring) {
+ sc.SetState(SCE_JULIA_DOCSTRING);
+ // Move to the end of the triple quotes
+ Sci_PositionU nextIndex = sc.currentPos + 2;
+ while (nextIndex > sc.currentPos && sc.More()) {
+ sc.Forward();
+ }
+ } else {
+ sc.SetState(SCE_JULIA_STRING);
+ }
+ } else if (sc.ch == '`') {
+ triple_backtick = (sc.chNext == '`' && sc.GetRelativeCharacter(2) == '`');
+ sc.SetState(SCE_JULIA_COMMAND);
+ if (triple_backtick) {
+ // Move to the end of the triple backticks
+ Sci_PositionU nextIndex = sc.currentPos + 2;
+ while (nextIndex > sc.currentPos && sc.More()) {
+ sc.Forward();
+ }
+ }
+ } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
+ initNumber(sc, base, with_dot);
+ } else if (IsIdentifierFirstCharacter(sc.ch)) {
+ sc.SetState(SCE_JULIA_IDENTIFIER);
+ transpose = true;
+ } else if (sc.ch == '@') {
+ sc.SetState(SCE_JULIA_MACRO);
+ transpose = false;
+
+ // Several parsing of operators, should keep the order of `if` blocks
+ } else if ((sc.ch == ':' || sc.ch == '<' || sc.ch == '>') && sc.chNext == ':') {
+ sc.SetState(SCE_JULIA_TYPEOPERATOR);
+ sc.Forward();
+ // Highlight the next identifier, if option is set
+ if (options.highlightTypeannotation &&
+ IsIdentifierFirstCharacter(sc.chNext)) {
+ sc.ForwardSetState(SCE_JULIA_TYPEANNOT);
+ }
+ } else if (sc.ch == ':') {
+ // TODO: improve detection of range
+ // should be solved with begin-end parsing
+ // `push!(arr, s1 :s2)` and `a[begin :end]
+ if (IsIdentifierFirstCharacter(sc.chNext) &&
+ ! IsIdentifierCharacter(sc.chPrev) &&
+ sc.chPrev != ')' && sc.chPrev != ']' ) {
+ sc.SetState(SCE_JULIA_SYMBOL);
+ } else {
+ sc.SetState(SCE_JULIA_OPERATOR);
+ }
+ } else if (IsJuliaParen(sc.ch)) {
+ if (sc.ch == '[') {
+ list_comprehension ++;
+ indexing_level ++;
+ } else if (sc.ch == ']' && (indexing_level > 0)) {
+ list_comprehension --;
+ indexing_level --;
+ } else if (sc.ch == '(') {
+ list_comprehension ++;
+ } else if (sc.ch == ')' && (list_comprehension > 0)) {
+ list_comprehension --;
+ }
+
+ if (sc.ch == ')' || sc.ch == ']' || sc.ch == '}') {
+ transpose = true;
+ } else {
+ transpose = false;
+ }
+ sc.SetState(SCE_JULIA_BRACKET);
+ } else if (IsOperatorFirstCharacter(sc.ch)) {
+ transpose = false;
+ sc.SetState(SCE_JULIA_OPERATOR);
+ } else {
+ transpose = false;
+ }
+ }
+
+ // update the line information (used for line-by-line lexing and folding)
+ if (sc.atLineEnd) {
+ // set the line state to the current state
+ curLine = styler.GetLine(sc.currentPos);
+
+ lineState = ((transpose ? 1 : 0) << 0) |
+ ((istripledocstring ? 1 : 0) << 1) |
+ ((triple_backtick ? 1 : 0) << 2) |
+ ((israwstring ? 1 : 0) << 3) |
+ ((indexing_level & 0x0F) << 4) |
+ ((list_comprehension & 0x0F) << 8) |
+ ((commentDepth & 0x0F) << 12);
+ styler.SetLineState(curLine, lineState);
+ }
+ }
+ sc.Complete();
+}
+
+void SCI_METHOD LexerJulia::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
+
+ if (!options.fold)
+ return;
+
+ LexAccessor styler(pAccess);
+
+ Sci_PositionU endPos = startPos + length;
+ int visibleChars = 0;
+ Sci_Position lineCurrent = styler.GetLine(startPos);
+ int levelCurrent = SC_FOLDLEVELBASE;
+ int lineState = 0;
+ if (lineCurrent > 0) {
+ levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
+ lineState = styler.GetLineState(lineCurrent-1);
+ }
+
+ // level of nested brackets
+ int indexing_level = (int)((lineState >> 4) & 0x0F); // 4 bits of bracket nesting counter
+ // level of nested parenthesis or brackets
+ int list_comprehension = (int)((lineState >> 8) & 0x0F); // 4 bits of parenthesis nesting counter
+ //int commentDepth = (int)((lineState >> 12) & 0x0F); // 4 bits of nested comment counter
+
+ Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
+ int levelNext = levelCurrent;
+ char chNext = styler[startPos];
+ int stylePrev = styler.StyleAt(startPos - 1);
+ int styleNext = styler.StyleAt(startPos);
+ int style = initStyle;
+ char word[100];
+ int wordlen = 0;
+ for (Sci_PositionU i = startPos; i < endPos; i++) {
+ char ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ style = styleNext;
+ styleNext = styler.StyleAt(i + 1);
+ bool atEOL = i == (lineStartNext-1);
+
+ // a start/end of comment block
+ if (options.foldComment && style == SCE_JULIA_COMMENT) {
+ // start of block comment
+ if (ch == '#' && chNext == '=') {
+ levelNext ++;
+ }
+ // end of block comment
+ if (ch == '=' && chNext == '#' && levelNext > 0) {
+ levelNext --;
+ }
+ }
+
+ // Syntax based folding, accounts for list comprehension
+ if (options.foldSyntaxBased) {
+ // list comprehension allow `for`, `if` and `begin` without `end`
+ if (style == SCE_JULIA_BRACKET) {
+ if (ch == '[') {
+ list_comprehension ++;
+ indexing_level ++;
+ levelNext ++;
+ } else if (ch == ']') {
+ list_comprehension --;
+ indexing_level --;
+ levelNext --;
+ } else if (ch == '(') {
+ list_comprehension ++;
+ levelNext ++;
+ } else if (ch == ')') {
+ list_comprehension --;
+ levelNext --;
+ }
+ // check non-negative
+ if (indexing_level < 0) {
+ indexing_level = 0;
+ }
+ if (list_comprehension < 0) {
+ list_comprehension = 0;
+ }
+ }
+
+ // keyword
+ if (style == SCE_JULIA_KEYWORD1) {
+ word[wordlen++] = static_cast<char>(ch);
+ if (wordlen == 100) { // prevent overflow
+ word[0] = '\0';
+ wordlen = 1;
+ }
+ if (styleNext != SCE_JULIA_KEYWORD1) {
+ word[wordlen] = '\0';
+ wordlen = 0;
+ if (list_comprehension <= 0 && indexing_level <= 0) {
+ levelNext += CheckKeywordFoldPoint(word);
+ }
+ }
+ }
+ }
+
+ // Docstring
+ if (options.foldDocstring) {
+ if (stylePrev != SCE_JULIA_DOCSTRING && style == SCE_JULIA_DOCSTRING) {
+ levelNext ++;
+ } else if (style == SCE_JULIA_DOCSTRING && styleNext != SCE_JULIA_DOCSTRING) {
+ levelNext --;
+ }
+ }
+
+ // check non-negative level
+ if (levelNext < 0) {
+ levelNext = 0;
+ }
+
+ if (!IsASpace(ch)) {
+ visibleChars++;
+ }
+ stylePrev = style;
+
+ if (atEOL || (i == endPos-1)) {
+ int levelUse = levelCurrent;
+ int lev = levelUse | levelNext << 16;
+ if (visibleChars == 0 && options.foldCompact) {
+ lev |= SC_FOLDLEVELWHITEFLAG;
+ }
+ if (levelUse < levelNext) {
+ lev |= SC_FOLDLEVELHEADERFLAG;
+ }
+ if (lev != styler.LevelAt(lineCurrent)) {
+ styler.SetLevel(lineCurrent, lev);
+ }
+ lineCurrent++;
+ lineStartNext = styler.LineStart(lineCurrent+1);
+ levelCurrent = levelNext;
+ if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
+ // There is an empty line at end of file so give it same level and empty
+ styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
+ }
+ visibleChars = 0;
+ }
+ }
+}
+
+LexerModule lmJulia(SCLEX_JULIA, LexerJulia::LexerFactoryJulia, "julia", juliaWordLists);
Modified: scintilla/src/Catalogue.cxx
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -80,6 +80,7 @@ int Scintilla_LinkLexers() {
LINK_LEXER(lmFreeBasic);
LINK_LEXER(lmHaskell);
LINK_LEXER(lmHTML);
+ LINK_LEXER(lmJulia);
LINK_LEXER(lmLatex);
LINK_LEXER(lmLISP);
LINK_LEXER(lmLua);
Modified: src/filetypes.c
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -187,6 +187,7 @@ static void init_builtin_filetypes(void)
FT_INIT( GO, GO, "Go", NULL, SOURCE_FILE, COMPILED );
FT_INIT( ZEPHIR, ZEPHIR, "Zephir", NULL, SOURCE_FILE, COMPILED );
FT_INIT( SMALLTALK, NONE, "Smalltalk", NULL, SOURCE_FILE, SCRIPT );
+ FT_INIT( JULIA, JULIA, "Julia", NULL, SOURCE_FILE, SCRIPT );
}
Modified: src/filetypes.h
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -107,6 +107,7 @@ typedef enum
GEANY_FILETYPES_ZEPHIR,
GEANY_FILETYPES_BIBTEX,
GEANY_FILETYPES_SMALLTALK,
+ GEANY_FILETYPES_JULIA,
/* ^ append items here */
GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */
}
Modified: src/highlighting.c
12 lines changed, 12 insertions(+), 0 deletions(-)
===================================================================
@@ -1026,6 +1026,7 @@ void highlighting_init_styles(guint filetype_idx, GKeyFile *config, GKeyFile *co
init_styleset_case(HTML);
init_styleset_case(JAVA);
init_styleset_case(JS);
+ init_styleset_case(JULIA);
init_styleset_case(LATEX);
init_styleset_case(LUA);
init_styleset_case(MAKE);
@@ -1115,6 +1116,7 @@ void highlighting_set_styles(ScintillaObject *sci, GeanyFiletype *ft)
styleset_case(HTML@@ Diff output truncated at 100000 characters. @@
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list