Branch: refs/heads/master Author: Jiří Techet techet@gmail.com Committer: GitHub noreply@github.com Date: Sun, 19 Dec 2021 00:04:54 UTC Commit: 5ea3e3ec77eaa87ff709f003743dbb80285e3b6f https://github.com/geany/geany/commit/5ea3e3ec77eaa87ff709f003743dbb80285e3b...
Log Message: ----------- Use cxx parser from uctags (#3032)
* Add "l" prefix to functions in lcpp.c/h
This is to avoid clash with cpreprocessor.c/h used by the new cxx parser. Merging lcpp.c/h with cpreprocessor.c/h would be difficult (at least for now) because of the differences in c.c so keep them separate for now.
* Rename C/C++ parsers to "Old"
As a result, when we copy the new cxx parser, we don't have clashes of these symbols from the two different parsers.
* Add the new cxx parser
This patch only makes the parser compile, it doesn't enable it yet.
* Enable the new cxx parser
There are several things needed for this:
1. The new preprocessor has to be defined as a separate parser.
2. Tags from the new c/c++ parsers and the preprocessor parser have to be mapped to Geany types. We still need to keep the old mappings because some parsers like Ferite or GLSL still use the old C parser.
3. Anonymous tags have a different name so we have to reflect this in tm_tag_is_anon().
* Update C/C++ unit tests
The changes are mostly these:
1. Spaces in function argument list
(int var1, int var2, ...) - before (int var1,int var2,...) - now
2. Anonymous tags
anon_struct_1 anon_union_2 anon_typedef_3
vs
__anon1 __anon2 __anon3
3. Improved parsing of the new parser
* Eliminate console warning for cxx 'using' tags
Fix from
https://github.com/universal-ctags/ctags/commit/fb305d8814c4dc53a94fcbc5f0c0...
* Update update-ctags.py to also update the cxx parser
* Pass our ignore.tags file with ignored symbols to cxx preprocessor
The syntax is slightly different from the previous syntax and is described here:
https://docs.ctags.io/en/latest/parser-cxx.html
Basic usage should be the same, uctags just doesn't support Geany's wildcard ignores like G_GNUC_*. On the other hand the new parser is much more resilient to macros so there shouldn't be so much need for manual ignores.
The original code is still kept for parsers from c.c that still use the old preprocessor.
* Update documentation regarding ignore.tags
Modified Paths: -------------- ctags/Makefile.am ctags/parsers/cpreprocessor.c ctags/parsers/cpreprocessor.h ctags/parsers/cxx/cxx.c ctags/parsers/cxx/cxx_debug.c ctags/parsers/cxx/cxx_debug.h ctags/parsers/cxx/cxx_debug_type.c ctags/parsers/cxx/cxx_keyword.c ctags/parsers/cxx/cxx_keyword.h ctags/parsers/cxx/cxx_parser.c ctags/parsers/cxx/cxx_parser.h ctags/parsers/cxx/cxx_parser_block.c ctags/parsers/cxx/cxx_parser_function.c ctags/parsers/cxx/cxx_parser_internal.h ctags/parsers/cxx/cxx_parser_lambda.c ctags/parsers/cxx/cxx_parser_namespace.c ctags/parsers/cxx/cxx_parser_template.c ctags/parsers/cxx/cxx_parser_tokenizer.c ctags/parsers/cxx/cxx_parser_typedef.c ctags/parsers/cxx/cxx_parser_using.c ctags/parsers/cxx/cxx_parser_variable.c ctags/parsers/cxx/cxx_qtmoc.c ctags/parsers/cxx/cxx_scope.c ctags/parsers/cxx/cxx_scope.h ctags/parsers/cxx/cxx_subparser.c ctags/parsers/cxx/cxx_subparser.h ctags/parsers/cxx/cxx_subparser_internal.h ctags/parsers/cxx/cxx_tag.c ctags/parsers/cxx/cxx_tag.h ctags/parsers/cxx/cxx_token.c ctags/parsers/cxx/cxx_token.h ctags/parsers/cxx/cxx_token_chain.c ctags/parsers/cxx/cxx_token_chain.h ctags/parsers/geany_c.c ctags/parsers/geany_lcpp.c ctags/parsers/geany_lcpp.h ctags/parsers/geany_verilog.c doc/geany.txt scripts/update-ctags.py src/symbols.c src/tagmanager/tm_ctags.c src/tagmanager/tm_ctags.h src/tagmanager/tm_parser.c src/tagmanager/tm_parser.h src/tagmanager/tm_parsers.h src/tagmanager/tm_tag.c tests/ctags/backslashes.c.tags tests/ctags/bit_field.c.tags tests/ctags/bug1201689.c.tags tests/ctags/bug1466117.c.tags tests/ctags/bug1491666.c.tags tests/ctags/bug1563476.cpp.tags tests/ctags/bug1575055.cpp.tags tests/ctags/bug1585745.cpp.tags tests/ctags/bug1764143.h.tags tests/ctags/bug1770479.cpp.tags tests/ctags/bug1773926.cpp.tags tests/ctags/bug1799340.cpp.tags tests/ctags/bug1907083.cpp.tags tests/ctags/bug1924919.cpp.tags tests/ctags/bug507864.c.tags tests/ctags/bug556645.c.tags tests/ctags/bug556646.c.tags tests/ctags/bug639639.h.tags tests/ctags/bug639644.hpp.tags tests/ctags/c-digraphs.c.tags tests/ctags/c-trigraphs.c.tags tests/ctags/cpp_destructor.cpp.tags tests/ctags/cxx11-final.cpp.tags tests/ctags/cxx11-noexcept.cpp.tags tests/ctags/cxx11-override.cpp.tags tests/ctags/cxx11enum.cpp.tags tests/ctags/cxx14-combined.cpp.tags tests/ctags/extern_variable.h.tags tests/ctags/func_typedef.h.tags tests/ctags/local.c.tags tests/ctags/macros.c.tags tests/ctags/namespace.cpp.tags tests/ctags/process_order.c.tags tests/ctags/prototype.h.tags tests/ctags/signature.cpp.tags tests/ctags/static_array.c.tags tests/ctags/var-and-return-type.cpp.tags
Modified: ctags/Makefile.am 32 lines changed, 32 insertions(+), 0 deletions(-) =================================================================== @@ -11,6 +11,38 @@ AM_CFLAGS = \ noinst_LTLIBRARIES = libctags.la
parsers = \ + parsers/cxx/cxx.c \ + parsers/cxx/cxx_debug.c \ + parsers/cxx/cxx_debug.h \ + parsers/cxx/cxx_debug_type.c \ + parsers/cxx/cxx_keyword.c \ + parsers/cxx/cxx_keyword.h \ + parsers/cxx/cxx_parser_block.c \ + parsers/cxx/cxx_parser.c \ + parsers/cxx/cxx_parser_function.c \ + parsers/cxx/cxx_parser.h \ + parsers/cxx/cxx_parser_internal.h \ + parsers/cxx/cxx_parser_lambda.c \ + parsers/cxx/cxx_parser_namespace.c \ + parsers/cxx/cxx_parser_template.c \ + parsers/cxx/cxx_parser_tokenizer.c \ + parsers/cxx/cxx_parser_typedef.c \ + parsers/cxx/cxx_parser_using.c \ + parsers/cxx/cxx_parser_variable.c \ + parsers/cxx/cxx_qtmoc.c \ + parsers/cxx/cxx_scope.c \ + parsers/cxx/cxx_scope.h \ + parsers/cxx/cxx_subparser.c \ + parsers/cxx/cxx_subparser.h \ + parsers/cxx/cxx_subparser_internal.h \ + parsers/cxx/cxx_tag.c \ + parsers/cxx/cxx_tag.h \ + parsers/cxx/cxx_token.c \ + parsers/cxx/cxx_token_chain.c \ + parsers/cxx/cxx_token_chain.h \ + parsers/cxx/cxx_token.h \ + parsers/cpreprocessor.c \ + parsers/cpreprocessor.h \ parsers/abaqus.c \ parsers/abc.c \ parsers/asciidoc.c \
Modified: ctags/parsers/cpreprocessor.c 2297 lines changed, 2297 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,2297 @@ +/* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains the high level input read functions (preprocessor +* directives are handled within this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include <string.h> + +#include "debug.h" +#include "entry.h" +#include "htable.h" +#include "cpreprocessor.h" +#include "kind.h" +#include "options.h" +#include "read.h" +#include "vstring.h" +#include "param.h" +#include "parse.h" +#include "xtag.h" + +#include "cxx/cxx_debug.h" + +/* +* MACROS +*/ +#define stringMatch(s1,s2) (strcmp (s1,s2) == 0) +#define isspacetab(c) ((c) == SPACE || (c) == TAB) + +/* +* DATA DECLARATIONS +*/ +typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS, COMMENT_D } Comment; + +enum eCppLimits { + MaxCppNestingLevel = 20, + MaxDirectiveName = 10 +}; + +/* Defines the one nesting level of a preprocessor conditional. + */ +typedef struct sConditionalInfo { + bool ignoreAllBranches; /* ignoring parent conditional branch */ + bool singleBranch; /* choose only one branch */ + bool branchChosen; /* branch already selected */ + bool ignoring; /* current ignore state */ + int enterExternalParserBlockNestLevel; /* the parser state when entering this conditional: used only by cxx */ +} conditionalInfo; + +enum eState { + DRCTV_NONE, /* no known directive - ignore to end of line */ + DRCTV_DEFINE, /* "#define" encountered */ + DRCTV_HASH, /* initial '#' read; determine directive */ + DRCTV_IF, /* "#if" or "#ifdef" encountered */ + DRCTV_PRAGMA, /* #pragma encountered */ + DRCTV_UNDEF, /* "#undef" encountered */ + DRCTV_INCLUDE, /* "#include" encountered */ +}; + +/* Defines the current state of the pre-processor. + */ +typedef struct sCppState { + langType lang; + langType clientLang; + + int * ungetBuffer; /* memory buffer for unget characters */ + int ungetBufferSize; /* the current unget buffer size */ + int * ungetPointer; /* the current unget char: points in the middle of the buffer */ + int ungetDataSize; /* the number of valid unget characters in the buffer */ + + /* the contents of the last SYMBOL_CHAR or SYMBOL_STRING */ + vString * charOrStringContents; + + bool resolveRequired; /* must resolve if/else/elif/endif branch */ + bool hasAtLiteralStrings; /* supports @"c:" strings */ + bool hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */ + bool hasSingleQuoteLiteralNumbers; /* supports vera number literals: + 'h..., 'o..., 'd..., and 'b... */ + + bool useClientLangDefineMacroKindIndex; + int defineMacroKindIndex; + int macroUndefRoleIndex; + + bool useClientLangMacroParamKindIndex; + int macroParamKindIndex; + + bool useClientLangHeaderKindIndex; + int headerKindIndex; + int headerSystemRoleIndex; + int headerLocalRoleIndex; + + int macrodefFieldIndex; + + struct sDirective { + enum eState state; /* current directive being processed */ + bool accept; /* is a directive syntactically permitted? */ + vString * name; /* macro name */ + unsigned int nestLevel; /* level 0 is not used */ + conditionalInfo ifdef [MaxCppNestingLevel]; + } directive; + + cppMacroInfo * macroInUse; + hashTable * fileMacroTable; + +} cppState; + + +typedef enum { + CPREPRO_MACRO_KIND_UNDEF_ROLE, +} cPreProMacroRole; + +static roleDefinition CPREPROMacroRoles [] = { + RoleTemplateUndef, +}; + + +typedef enum { + CPREPRO_HEADER_KIND_SYSTEM_ROLE, + CPREPRO_HEADER_KIND_LOCAL_ROLE, +} cPreProHeaderRole; + +static roleDefinition CPREPROHeaderRoles [] = { + RoleTemplateSystem, + RoleTemplateLocal, +}; + + +typedef enum { + CPREPRO_MACRO, CPREPRO_HEADER, CPREPRO_PARAM, +} cPreProkind; + +static kindDefinition CPreProKinds [] = { + { true, 'd', "macro", "macro definitions", + .referenceOnly = false, ATTACH_ROLES(CPREPROMacroRoles)}, + { true, 'h', "header", "included header files", + .referenceOnly = true, ATTACH_ROLES(CPREPROHeaderRoles)}, + { false, 'D', "parameter", "macro parameters", }, +}; + +typedef enum { + F_MACRODEF, + COUNT_FIELD +} cPreProField; + +static fieldDefinition CPreProFields[COUNT_FIELD] = { + { .name = "macrodef", + .description = "macro definition", + .enabled = false }, +}; + +/* +* DATA DEFINITIONS +*/ + +static bool doesExaminCodeWithInIf0Branch; +static bool doesExpandMacros; + +/* +* CXX parser state. This is stored at the beginning of a conditional. +* If at the exit of the conditional the state is changed then we assume +* that no further branches should be followed. +*/ +static int externalParserBlockNestLevel; + + +/* Use brace formatting to detect end of block. + */ +static bool BraceFormat = false; + +void cppPushExternalParserBlock(void) +{ + externalParserBlockNestLevel++; +} + +void cppPopExternalParserBlock(void) +{ + externalParserBlockNestLevel--; +} + + +static cppState Cpp = { + .lang = LANG_IGNORE, + .clientLang = LANG_IGNORE, + .ungetBuffer = NULL, + .ungetBufferSize = 0, + .ungetPointer = NULL, + .ungetDataSize = 0, + .charOrStringContents = NULL, + .resolveRequired = false, + .hasAtLiteralStrings = false, + .hasCxxRawLiteralStrings = false, + .hasSingleQuoteLiteralNumbers = false, + .useClientLangDefineMacroKindIndex = false, + .defineMacroKindIndex = CPREPRO_MACRO, + .macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE, + .useClientLangMacroParamKindIndex = false, + .macroParamKindIndex = CPREPRO_PARAM, + .useClientLangHeaderKindIndex = false, + .headerKindIndex = CPREPRO_HEADER, + .headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE, + .headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE, + .macrodefFieldIndex = FIELD_UNKNOWN, + .directive = { + .state = DRCTV_NONE, + .accept = false, + .name = NULL, + .nestLevel = 0, + .ifdef = { + { + .ignoreAllBranches = false, + .singleBranch = false, + .branchChosen = false, + .ignoring = false, + } + } + } /* directive */ +}; + +/* +* FUNCTION DECLARATIONS +*/ + +static hashTable *makeMacroTable (void); +static cppMacroInfo * saveMacro(hashTable *table, const char * macro); + +/* +* FUNCTION DEFINITIONS +*/ + +extern bool cppIsBraceFormat (void) +{ + return BraceFormat; +} + +extern unsigned int cppGetDirectiveNestLevel (void) +{ + return Cpp.directive.nestLevel; +} + +static void cppInitCommon(langType clientLang, + const bool state, const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int macroParamKindIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macrodefFieldIndex) +{ + BraceFormat = state; + + CXX_DEBUG_PRINT("cppInit: brace format is %d",BraceFormat); + + externalParserBlockNestLevel = 0; + + if (Cpp.lang == LANG_IGNORE) + { + langType t; + + t = getNamedLanguage ("CPreProcessor", 0); + initializeParser (t); + } + + Cpp.clientLang = clientLang; + Cpp.ungetBuffer = NULL; + Cpp.ungetPointer = NULL; + + CXX_DEBUG_ASSERT(!Cpp.charOrStringContents,"This string should be null when CPP is not initialized"); + Cpp.charOrStringContents = vStringNew(); + + Cpp.resolveRequired = false; + Cpp.hasAtLiteralStrings = hasAtLiteralStrings; + Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings; + Cpp.hasSingleQuoteLiteralNumbers = hasSingleQuoteLiteralNumbers; + + if (defineMacroKindIndex != KIND_GHOST_INDEX) + { + Cpp.defineMacroKindIndex = defineMacroKindIndex; + Cpp.useClientLangDefineMacroKindIndex = true; + + Cpp.macroUndefRoleIndex = macroUndefRoleIndex; + Cpp.macrodefFieldIndex = macrodefFieldIndex; + } + else + { + Cpp.defineMacroKindIndex = CPREPRO_MACRO; + Cpp.useClientLangDefineMacroKindIndex = false; + + Cpp.macroUndefRoleIndex = CPREPRO_MACRO_KIND_UNDEF_ROLE; + Cpp.macrodefFieldIndex = CPreProFields [F_MACRODEF].ftype; + } + + if (macroParamKindIndex != KIND_GHOST_INDEX) + { + Cpp.macroParamKindIndex = macroParamKindIndex; + Cpp.useClientLangMacroParamKindIndex = true; + } + else + { + Cpp.macroParamKindIndex = CPREPRO_PARAM; + Cpp.useClientLangMacroParamKindIndex = false; + } + + if (headerKindIndex != KIND_GHOST_INDEX) + { + Cpp.headerKindIndex = headerKindIndex; + Cpp.useClientLangHeaderKindIndex = true; + + Cpp.headerSystemRoleIndex = headerSystemRoleIndex; + Cpp.headerLocalRoleIndex = headerLocalRoleIndex; + } + else + { + Cpp.headerKindIndex = CPREPRO_HEADER; + Cpp.useClientLangHeaderKindIndex = false; + + Cpp.headerSystemRoleIndex = CPREPRO_HEADER_KIND_SYSTEM_ROLE; + Cpp.headerLocalRoleIndex = CPREPRO_HEADER_KIND_LOCAL_ROLE; + } + + Cpp.directive.state = DRCTV_NONE; + Cpp.directive.accept = true; + Cpp.directive.nestLevel = 0; + + Cpp.directive.ifdef [0].ignoreAllBranches = false; + Cpp.directive.ifdef [0].singleBranch = false; + Cpp.directive.ifdef [0].branchChosen = false; + Cpp.directive.ifdef [0].ignoring = false; + + Cpp.directive.name = vStringNewOrClear (Cpp.directive.name); + + Cpp.macroInUse = NULL; + Cpp.fileMacroTable = + (doesExpandMacros + && isFieldEnabled (FIELD_SIGNATURE) + && isFieldEnabled (Cpp.macrodefFieldIndex) + && (getLanguageCorkUsage ((clientLang == LANG_IGNORE) + ? Cpp.lang + : clientLang) & CORK_SYMTAB)) + ? makeMacroTable () + : NULL; +} + +extern void cppInit (const bool state, const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int macroParamKindIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macrodefFieldIndex) +{ + langType client = getInputLanguage (); + + cppInitCommon (client, state, hasAtLiteralStrings, + hasCxxRawLiteralStrings, hasSingleQuoteLiteralNumbers, + defineMacroKindIndex, macroUndefRoleIndex, macroParamKindIndex, + headerKindIndex, headerSystemRoleIndex, headerLocalRoleIndex, + macrodefFieldIndex); +} + +static void cppClearMacroInUse (cppMacroInfo **pM) +{ + for (cppMacroInfo *p = *pM; p; p = p->next) + { + CXX_DEBUG_PRINT("Macro <%p> clear useCount: %d -> 0", p, p->useCount); + p->useCount = 0; + } + *pM = NULL; +} + +extern void cppTerminate (void) +{ + if (Cpp.directive.name != NULL) + { + vStringDelete (Cpp.directive.name); + Cpp.directive.name = NULL; + } + + if(Cpp.ungetBuffer) + { + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = NULL; + } + + if(Cpp.charOrStringContents) + { + vStringDelete(Cpp.charOrStringContents); + Cpp.charOrStringContents = NULL; + } + + Cpp.clientLang = LANG_IGNORE; + + cppClearMacroInUse (&Cpp.macroInUse); + + if (Cpp.fileMacroTable) + { + hashTableDelete (Cpp.fileMacroTable); + Cpp.fileMacroTable = NULL; + } +} + +extern void cppBeginStatement (void) +{ + Cpp.resolveRequired = true; +} + +extern void cppEndStatement (void) +{ + Cpp.resolveRequired = false; +} + +/* +* Scanning functions +* +* This section handles preprocessor directives. It strips out all +* directives and may emit a tag for #define directives. +*/ + +/* This puts a character back into the input queue for the input File. */ +extern void cppUngetc (const int c) +{ + if(!Cpp.ungetPointer) + { + // no unget data + if(!Cpp.ungetBuffer) + { + Cpp.ungetBuffer = (int *)eMalloc(8 * sizeof(int)); + Cpp.ungetBufferSize = 8; + } + Assert(Cpp.ungetBufferSize > 0); + Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - 1; + *(Cpp.ungetPointer) = c; + Cpp.ungetDataSize = 1; + return; + } + + // Already have some unget data in the buffer. Must prepend. + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + + if(Cpp.ungetPointer == Cpp.ungetBuffer) + { + Cpp.ungetBufferSize += 8; + int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + memcpy(tmp+8,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int)); + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = tmp; + Cpp.ungetPointer = tmp + 7; + } else { + Cpp.ungetPointer--; + } + + *(Cpp.ungetPointer) = c; + Cpp.ungetDataSize++; +} + +int cppUngetBufferSize() +{ + return Cpp.ungetBufferSize; +} + +/* This puts an entire string back into the input queue for the input File. */ +void cppUngetString(const char * string,int len) +{ + if(!string) + return; + if(len < 1) + return; + + if(!Cpp.ungetPointer) + { + // no unget data + if(!Cpp.ungetBuffer) + { + Cpp.ungetBufferSize = 8 + len; + Cpp.ungetBuffer = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + } else if(Cpp.ungetBufferSize < len) + { + Cpp.ungetBufferSize = 8 + len; + Cpp.ungetBuffer = (int *)eRealloc(Cpp.ungetBuffer,Cpp.ungetBufferSize * sizeof(int)); + } + Cpp.ungetPointer = Cpp.ungetBuffer + Cpp.ungetBufferSize - len; + } else { + // Already have some unget data in the buffer. Must prepend. + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + + if(Cpp.ungetBufferSize < (Cpp.ungetDataSize + len)) + { + Cpp.ungetBufferSize = 8 + len + Cpp.ungetDataSize; + int * tmp = (int *)eMalloc(Cpp.ungetBufferSize * sizeof(int)); + memcpy(tmp + 8 + len,Cpp.ungetPointer,Cpp.ungetDataSize * sizeof(int)); + eFree(Cpp.ungetBuffer); + Cpp.ungetBuffer = tmp; + Cpp.ungetPointer = tmp + 8; + } else { + Cpp.ungetPointer -= len; + Assert(Cpp.ungetPointer >= Cpp.ungetBuffer); + } + } + + int * p = Cpp.ungetPointer; + const char * s = string; + const char * e = string + len; + + while(s < e) + *p++ = *s++; + + Cpp.ungetDataSize += len; +} + +extern void cppUngetStringBuiltByMacro(const char * string,int len, cppMacroInfo *macro) +{ + if (macro->useCount == 0) + { + cppMacroInfo *m = Cpp.macroInUse; + Cpp.macroInUse = macro; + macro->next = m; + } + macro->useCount++; + + CXX_DEBUG_PRINT("Macro <%p> increment useCount: %d->%d", macro, + (macro->useCount - 1), macro->useCount); + + cppUngetString (string, len); +} + +static int cppGetcFromUngetBufferOrFile(void) +{ + if(Cpp.ungetPointer) + { + Assert(Cpp.ungetBuffer); + Assert(Cpp.ungetBufferSize > 0); + Assert(Cpp.ungetDataSize > 0); + + int c = *(Cpp.ungetPointer); + Cpp.ungetDataSize--; + if(Cpp.ungetDataSize > 0) + Cpp.ungetPointer++; + else + Cpp.ungetPointer = NULL; + return c; + } + + if (Cpp.macroInUse) + cppClearMacroInUse (&Cpp.macroInUse); + return getcFromInputFile(); +} + + +/* Reads a directive, whose first character is given by "c", into "name". + */ +static bool readDirective (int c, char *const name, unsigned int maxLength) +{ + unsigned int i; + + for (i = 0 ; i < maxLength - 1 ; ++i) + { + if (i > 0) + { + c = cppGetcFromUngetBufferOrFile (); + if (c == EOF || ! isalpha (c)) + { + cppUngetc (c); + break; + } + } + name [i] = c; + } + name [i] = '\0'; /* null terminate */ + + return (bool) isspacetab (c); +} + +/* Reads an identifier, whose first character is given by "c", into "tag", + * together with the file location and corresponding line number. + */ +static void readIdentifier (int c, vString *const name) +{ + vStringClear (name); + do + { + vStringPut (name, c); + c = cppGetcFromUngetBufferOrFile (); + } while (c != EOF && cppIsident (c)); + cppUngetc (c); +} + +static void readFilename (int c, vString *const name) +{ + int c_end = (c == '<')? '>': '"'; + + vStringClear (name); + + while (c = cppGetcFromUngetBufferOrFile (), (c != EOF && c != c_end && c != '\n')) + vStringPut (name, c); +} + +static conditionalInfo *currentConditional (void) +{ + return &Cpp.directive.ifdef [Cpp.directive.nestLevel]; +} + +static bool isIgnore (void) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring; +} + +static bool setIgnore (const bool ignore) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore; +} + +static bool isIgnoreBranch (void) +{ + conditionalInfo *const ifdef = currentConditional (); + + /* Force a single branch if an incomplete statement is discovered + * en route. This may have allowed earlier branches containing complete + * statements to be followed, but we must follow no further branches. + */ + + /* + * CXX: Force a single branch if the external parser (cxx) block nest level at the beginning + * of this conditional is not equal to the current block nest level (at exit of the first branch). + * + * Follow both branches example: (same state at enter and exit) + * + * #if something + * xxxxx; + * #else + * yyyy; + * #endif + * + * Follow single branch example: (different block level at enter and exit) + * + * if { + * #if something + * } else x; + * #else + * } + * #endif + */ + + if ( + (Cpp.resolveRequired || (ifdef->enterExternalParserBlockNestLevel != externalParserBlockNestLevel)) && + (!BraceFormat) + ) + { + CXX_DEBUG_PRINT("Choosing single branch"); + ifdef->singleBranch = true; + } + + /* We will ignore this branch in the following cases: + * + * 1. We are ignoring all branches (conditional was within an ignored + * branch of the parent conditional) + * 2. A branch has already been chosen and either of: + * a. A statement was incomplete upon entering the conditional + * b. A statement is incomplete upon encountering a branch + */ + return (bool) (ifdef->ignoreAllBranches || + (ifdef->branchChosen && ifdef->singleBranch)); +} + +static void chooseBranch (void) +{ + if (! BraceFormat) + { + conditionalInfo *const ifdef = currentConditional (); + + ifdef->branchChosen = (bool) (ifdef->singleBranch || + Cpp.resolveRequired); + } +} + +/* Pushes one nesting level for an #if directive, indicating whether or not + * the branch should be ignored and whether a branch has already been chosen. + */ +static bool pushConditional (const bool firstBranchChosen) +{ + const bool ignoreAllBranches = isIgnore (); /* current ignore */ + bool ignoreBranch = false; + + if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1) + { + conditionalInfo *ifdef; + + ++Cpp.directive.nestLevel; + ifdef = currentConditional (); + + /* We take a snapshot of whether there is an incomplete statement in + * progress upon encountering the preprocessor conditional. If so, + * then we will flag that only a single branch of the conditional + * should be followed. + */ + ifdef->ignoreAllBranches = ignoreAllBranches; + ifdef->singleBranch = Cpp.resolveRequired; + ifdef->branchChosen = firstBranchChosen; + ifdef->ignoring = (bool) (ignoreAllBranches || ( + ! firstBranchChosen && ! BraceFormat && + (ifdef->singleBranch || !doesExaminCodeWithInIf0Branch))); + ifdef->enterExternalParserBlockNestLevel = externalParserBlockNestLevel; + ignoreBranch = ifdef->ignoring; + } + return ignoreBranch; +} + +/* Pops one nesting level for an #endif directive. + */ +static bool popConditional (void) +{ + if (Cpp.directive.nestLevel > 0) + --Cpp.directive.nestLevel; + + return isIgnore (); +} + +static bool doesCPreProRunAsStandaloneParser (int kind) +{ + if (kind == CPREPRO_HEADER) + return !Cpp.useClientLangDefineMacroKindIndex; + else if (kind == CPREPRO_MACRO) + return !Cpp.useClientLangHeaderKindIndex; + else if (kind == CPREPRO_PARAM) + return !Cpp.useClientLangMacroParamKindIndex; + else + { + AssertNotReached(); + return true; + } +} + +static int makeDefineTag (const char *const name, const char* const signature, bool undef) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + const bool isFileScope = (bool) (! isInputHeaderFile ()); + + if (!isLanguageEnabled (lang)) + return CORK_NIL; + + Assert (Cpp.defineMacroKindIndex != KIND_GHOST_INDEX); + + if (isFileScope && !isXtagEnabled(XTAG_FILE_SCOPE)) + return CORK_NIL; + + if (undef && (Cpp.macroUndefRoleIndex == ROLE_DEFINITION_INDEX)) + return CORK_NIL; + + if (! isLanguageKindEnabled (lang, + Cpp.defineMacroKindIndex)) + return CORK_NIL; + + if ( + /* condition for definition tag */ + (!undef) + || /* condition for reference tag */ + (undef && isXtagEnabled(XTAG_REFERENCE_TAGS) && + isLanguageRoleEnabled(lang, Cpp.defineMacroKindIndex, + Cpp.macroUndefRoleIndex))) + { + tagEntryInfo e; + int r; + + if (standing_alone) + pushLanguage (Cpp.lang); + + if (undef) + initRefTagEntry (&e, name, Cpp.defineMacroKindIndex, + Cpp.macroUndefRoleIndex); + else + initTagEntry (&e, name, Cpp.defineMacroKindIndex); + e.isFileScope = isFileScope; + if (isFileScope) + markTagExtraBit (&e, XTAG_FILE_SCOPE); + e.truncateLineAfterTag = true; + e.extensionFields.signature = signature; + + r = makeTagEntry (&e); + + if (standing_alone) + popLanguage (); + + return r; + } + return CORK_NIL; +} + +static void makeIncludeTag (const char *const name, bool systemHeader) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_HEADER); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + tagEntryInfo e; + int role_index; + + if (!isLanguageEnabled (lang)) + return; + + Assert (Cpp.headerKindIndex != KIND_GHOST_INDEX); + + role_index = systemHeader? Cpp.headerSystemRoleIndex: Cpp.headerLocalRoleIndex; + if (role_index == ROLE_DEFINITION_INDEX) + return; + + if (!isXtagEnabled (XTAG_REFERENCE_TAGS)) + return; + + if (!isLanguageKindEnabled(lang, Cpp.headerKindIndex)) + return; + + if (isLanguageRoleEnabled(lang, Cpp.headerKindIndex, role_index)) + { + if (doesCPreProRunAsStandaloneParser (CPREPRO_HEADER)) + pushLanguage (Cpp.lang); + + initRefTagEntry (&e, name, Cpp.headerKindIndex, role_index); + e.isFileScope = false; + e.truncateLineAfterTag = true; + makeTagEntry (&e); + + if (doesCPreProRunAsStandaloneParser (CPREPRO_HEADER)) + popLanguage (); + } +} + +static void makeParamTag (vString *name, short nth, bool placeholder) +{ + bool standing_alone = doesCPreProRunAsStandaloneParser(CPREPRO_MACRO); + langType lang = standing_alone ? Cpp.lang: Cpp.clientLang; + + Assert (Cpp.macroParamKindIndex != KIND_GHOST_INDEX); + + int r; + pushLanguage (lang); + r = makeSimpleTag (name, Cpp.macroParamKindIndex); + popLanguage (); + + tagEntryInfo *e = getEntryInCorkQueue (r); + if (e) + { + e->extensionFields.nth = nth; + if (placeholder) + e->placeholder = 1; + } +} + +static void regenreateSignatureFromParameters (vString * buffer, int from, int to) +{ + vStringPut(buffer, '('); + for (int pindex = from; pindex < to; pindex++) + { + tagEntryInfo *e = getEntryInCorkQueue (pindex); + if (e && !isTagExtra (e)) + { + vStringCatS (buffer, e->name); + vStringPut (buffer, ','); + } + } + if (vStringLast (buffer) == ',') + vStringChop (buffer); + vStringPut (buffer, ')'); +} + +static void patchScopeFieldOfParameters(int from, int to, int parentIndex) +{ + for (int pindex = from; pindex < to; pindex++) + { + tagEntryInfo *e = getEntryInCorkQueue (pindex); + if (e) + e->extensionFields.scopeIndex = parentIndex; + } +} + +static int directiveDefine (const int c, bool undef) +{ + // FIXME: We could possibly handle the macros here! + // However we'd need a separate hash table for macros of the current file + // to avoid breaking the "global" ones. + + int r = CORK_NIL; + + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (! isIgnore ()) + { + unsigned long lineNumber = getInputLineNumber (); + MIOPos filePosition = getInputFilePosition (); + int p = cppGetcFromUngetBufferOrFile (); + short nth = 0; + + if (p == '(') + { + vString *param = vStringNew (); + int param_start = (int)countEntryInCorkQueue(); + do { + p = cppGetcFromUngetBufferOrFile (); + if (isalnum(p) || p == '_' || p == '$' + /* Handle variadic macros like (a,...) */ + || p == '.') + { + vStringPut (param, p); + continue; + } + + if (vStringLength (param) > 0) + { + makeParamTag (param, nth++, vStringChar(param, 0) == '.'); + vStringClear (param); + } + if (p == '\') + cppGetcFromUngetBufferOrFile (); /* Throw away the next char */ + } while (p != ')' && p != EOF); + vStringDelete (param); + + int param_end = (int)countEntryInCorkQueue(); + if (p == ')') + { + vString *signature = vStringNew (); + regenreateSignatureFromParameters (signature, param_start, param_end); + r = makeDefineTag (vStringValue (Cpp.directive.name), vStringValue (signature), undef); + vStringDelete (signature); + } + else + r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef); + + tagEntryInfo *e = getEntryInCorkQueue (r); + if (e) + { + e->lineNumber = lineNumber; + e->filePosition = filePosition; + patchScopeFieldOfParameters (param_start, param_end, r); + } + } + else + { + cppUngetc (p); + r = makeDefineTag (vStringValue (Cpp.directive.name), NULL, undef); + } + } + } + Cpp.directive.state = DRCTV_NONE; + + if (r != CORK_NIL && Cpp.fileMacroTable) + registerEntry (r); + return r; +} + +static void directiveUndef (const int c) +{ + if (isXtagEnabled (XTAG_REFERENCE_TAGS)) + { + directiveDefine (c, true); + } + else + { + Cpp.directive.state = DRCTV_NONE; + } +} + +static void directivePragma (int c) +{ + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (stringMatch (vStringValue (Cpp.directive.name), "weak")) + { + /* generate macro tag for weak name */ + do + { + c = cppGetcFromUngetBufferOrFile (); + } while (c == SPACE); + if (cppIsident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + makeDefineTag (vStringValue (Cpp.directive.name), NULL, false); + } + } + } + Cpp.directive.state = DRCTV_NONE; +} + +static bool directiveIf (const int c) +{ + DebugStatement ( const bool ignore0 = isIgnore (); ) + const bool ignore = pushConditional ((bool) (c != '0')); + + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel); + if (ignore != ignore0) debugCppIgnore (ignore); ) + + return ignore; +} + + +static void directiveInclude (const int c) +{ + if (c == '<' || c == '"') + { + readFilename (c, Cpp.directive.name); + if ((! isIgnore ()) && vStringLength (Cpp.directive.name)) + makeIncludeTag (vStringValue (Cpp.directive.name), + c == '<'); + } + Cpp.directive.state = DRCTV_NONE; +} + +static bool directiveHash (const int c) +{ + bool ignore = false; + char directive [MaxDirectiveName]; + DebugStatement ( const bool ignore0 = isIgnore (); ) + + readDirective (c, directive, MaxDirectiveName); + if (stringMatch (directive, "define")) + Cpp.directive.state = DRCTV_DEFINE; + else if (stringMatch (directive, "include")) + Cpp.directive.state = DRCTV_INCLUDE; + else if (stringMatch (directive, "undef")) + Cpp.directive.state = DRCTV_UNDEF; + else if (strncmp (directive, "if", (size_t) 2) == 0) + Cpp.directive.state = DRCTV_IF; + else if (stringMatch (directive, "elif") || + stringMatch (directive, "else")) + { + ignore = setIgnore (isIgnoreBranch ()); + CXX_DEBUG_PRINT("Found #elif or #else: ignore is %d",ignore); + if (! ignore && stringMatch (directive, "else")) + chooseBranch (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "endif")) + { + DebugStatement ( debugCppNest (false, Cpp.directive.nestLevel); ) + ignore = popConditional (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "pragma")) + Cpp.directive.state = DRCTV_PRAGMA; + else + Cpp.directive.state = DRCTV_NONE; + + return ignore; +} + +/* Handles a pre-processor directive whose first character is given by "c". + */ +static bool handleDirective (const int c, int *macroCorkIndex) +{ + bool ignore = isIgnore (); + + switch (Cpp.directive.state) + { + case DRCTV_NONE: ignore = isIgnore (); break; + case DRCTV_DEFINE: + *macroCorkIndex = directiveDefine (c, false); + break; + case DRCTV_HASH: ignore = directiveHash (c); break; + case DRCTV_IF: ignore = directiveIf (c); break; + case DRCTV_PRAGMA: directivePragma (c); break; + case DRCTV_UNDEF: directiveUndef (c); break; + case DRCTV_INCLUDE: directiveInclude (c); break; + } + return ignore; +} + +/* Called upon reading of a slash ('/') characters, determines whether a + * comment is encountered, and its type. + */ +static Comment isComment (void) +{ + Comment comment; + const int next = cppGetcFromUngetBufferOrFile (); + + if (next == '*') + comment = COMMENT_C; + else if (next == '/') + comment = COMMENT_CPLUS; + else if (next == '+') + comment = COMMENT_D; + else + { + cppUngetc (next); + comment = COMMENT_NONE; + } + return comment; +} + +/* Skips over a C style comment. According to ANSI specification a comment + * is treated as white space, so we perform this substitution. + */ +static int cppSkipOverCComment (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + while (c != EOF) + { + if (c != '*') + c = cppGetcFromUngetBufferOrFile (); + else + { + const int next = cppGetcFromUngetBufferOrFile (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +/* Skips over a C++ style comment. + */ +static int skipOverCplusComment (void) +{ + int c; + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + if (c == BACKSLASH) + cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + else if (c == NEWLINE) + break; + } + return c; +} + +/* Skips over a D style comment. + * Really we should match nested /+ comments. At least they're less common. + */ +static int skipOverDComment (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + while (c != EOF) + { + if (c != '+') + c = cppGetcFromUngetBufferOrFile (); + else + { + const int next = cppGetcFromUngetBufferOrFile (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +const vString * cppGetLastCharOrStringContents (void) +{ + CXX_DEBUG_ASSERT(Cpp.charOrStringContents,"Shouldn't be called when CPP is not initialized"); + return Cpp.charOrStringContents; +} + +/* Skips to the end of a string, returning a special character to + * symbolically represent a generic string. + */ +static int skipToEndOfString (bool ignoreBackslash) +{ + int c; + + vStringClear(Cpp.charOrStringContents); + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + if (c == BACKSLASH && ! ignoreBackslash) + { + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + if (c != EOF) + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + } + else if (c == DOUBLE_QUOTE) + break; + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 1024); + } + return STRING_SYMBOL; /* symbolic representation of string */ +} + +static int isCxxRawLiteralDelimiterChar (int c) +{ + return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' && + c != '(' && c != ')' && c != '\'); +} + +static int skipToEndOfCxxRawLiteralString (void) +{ + int c = cppGetcFromUngetBufferOrFile (); + + if (c != '(' && ! isCxxRawLiteralDelimiterChar (c)) + { + cppUngetc (c); + c = skipToEndOfString (false); + } + else + { + char delim[16]; + unsigned int delimLen = 0; + bool collectDelim = true; + + do + { + if (collectDelim) + { + if (isCxxRawLiteralDelimiterChar (c) && + delimLen < (sizeof delim / sizeof *delim)) + delim[delimLen++] = c; + else + collectDelim = false; + } + else if (c == ')') + { + unsigned int i = 0; + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF && i < delimLen && delim[i] == c) + i++; + if (i == delimLen && c == DOUBLE_QUOTE) + break; + else + cppUngetc (c); + } + } + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF); + c = STRING_SYMBOL; + } + return c; +} + +/* Skips to the end of the three (possibly four) 'c' sequence, returning a + * special character to symbolically represent a generic character. + * Also detects Vera numbers that include a base specifier (ie. 'b1010). + */ +static int skipToEndOfChar () +{ + int c; + int count = 0, veraBase = '\0'; + + vStringClear(Cpp.charOrStringContents); + + while ((c = cppGetcFromUngetBufferOrFile ()) != EOF) + { + ++count; + if (c == BACKSLASH) + { + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + c = cppGetcFromUngetBufferOrFile (); /* throw away next character, too */ + if (c != EOF) + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else if (c == SINGLE_QUOTE) + break; + else if (c == NEWLINE) + { + cppUngetc (c); + break; + } + else if (Cpp.hasSingleQuoteLiteralNumbers) + { + if (count == 1 && strchr ("DHOB", toupper (c)) != NULL) + { + veraBase = c; + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else if (veraBase != '\0' && ! isalnum (c)) + { + cppUngetc (c); + break; + } + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + else + vStringPutWithLimit (Cpp.charOrStringContents, c, 10); + } + return CHAR_SYMBOL; /* symbolic representation of character */ +} + +static void attachFields (int macroCorkIndex, unsigned long endLine, const char *macrodef) +{ + tagEntryInfo *tag = getEntryInCorkQueue (macroCorkIndex); + if (!tag) + return; + + tag->extensionFields.endLine = endLine; + if (macrodef) + attachParserFieldToCorkEntry (macroCorkIndex, Cpp.macrodefFieldIndex, macrodef); +} + + +/* This function returns the next character, stripping out comments, + * C pre-processor directives, and the contents of single and double + * quoted strings. In short, strip anything which places a burden upon + * the tokenizer. + */ +extern int cppGetc (void) +{ + bool directive = false; + bool ignore = false; + int c; + int macroCorkIndex = CORK_NIL; + vString *macrodef = NULL; + + + do { +start_loop: + c = cppGetcFromUngetBufferOrFile (); +process: + switch (c) + { + case EOF: + ignore = false; + directive = false; + if (macroCorkIndex != CORK_NIL) + { + attachFields (macroCorkIndex, + getInputLineNumber(), + macrodef? vStringValue (macrodef): NULL); + macroCorkIndex = CORK_NIL; + } + break; + + case TAB: + case SPACE: + if (macrodef && vStringLength (macrodef) > 0 + && vStringLast (macrodef) != ' ') + vStringPut (macrodef, ' '); + break; /* ignore most white space */ + + case NEWLINE: + if (directive && ! ignore) + { + directive = false; + if (macroCorkIndex != CORK_NIL) + { + attachFields (macroCorkIndex, + getInputLineNumber(), + macrodef? vStringValue (macrodef): NULL); + macroCorkIndex = CORK_NIL; + } + } + Cpp.directive.accept = true; + break; + + case DOUBLE_QUOTE: + if (Cpp.directive.state == DRCTV_INCLUDE) + goto enter; + else + { + Cpp.directive.accept = false; + c = skipToEndOfString (false); + } + + if (macrodef) + { + /* We record the contents of string literal. + * + */ + vStringPut (macrodef, '"'); + vStringCat (macrodef, Cpp.charOrStringContents); + vStringPut (macrodef, '"'); + } + + break; + + case '#': + if (Cpp.directive.accept) + { + directive = true; + Cpp.directive.state = DRCTV_HASH; + Cpp.directive.accept = false; + } + if (macrodef) + vStringPut (macrodef, '#'); + break; + + case SINGLE_QUOTE: + Cpp.directive.accept = false; + c = skipToEndOfChar (); + + /* We assume none may want to know the content of the + * literal; just put ''. */ + if (macrodef) + vStringCatS (macrodef, "''"); + + break; + + case '/': + { + const Comment comment = isComment (); + + if (comment == COMMENT_C) + c = cppSkipOverCComment (); + else if (comment == COMMENT_CPLUS) + { + c = skipOverCplusComment (); + if (c == NEWLINE) + cppUngetc (c); + } + else if (comment == COMMENT_D) + c = skipOverDComment (); + else + { + Cpp.directive.accept = false; + if (macrodef) + vStringPut (macrodef, '/'); + } + break; + } + + case BACKSLASH: + { + int next = cppGetcFromUngetBufferOrFile (); + + if (next == NEWLINE) + goto start_loop; + else + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '\'); + } + break; + } + + case '?': + { + int next = cppGetcFromUngetBufferOrFile (); + if (next != '?') + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '?'); + } + else + { + next = cppGetcFromUngetBufferOrFile (); + switch (next) + { + case '(': c = '['; break; + case ')': c = ']'; break; + case '<': c = '{'; break; + case '>': c = '}'; break; + case '/': c = BACKSLASH; goto process; + case '!': c = '|'; break; + case SINGLE_QUOTE: c = '^'; break; + case '-': c = '~'; break; + case '=': c = '#'; goto process; + default: + cppUngetc ('?'); + cppUngetc (next); + break; + } + if (macrodef) + vStringPut (macrodef, c); + } + } break; + + /* digraphs: + * input: <: :> <% %> %: %:%: + * output: [ ] { } # ## + */ + case '<': + { + /* + Quoted from http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3237.html: + ------ + if the next three characters are <:: and the + subsequent character is neither : nor >, the < is + treated as a preprocessor token by itself (and not as + the first character of the alternative token */ + int next[3]; + next[0] = cppGetcFromUngetBufferOrFile (); + switch (next[0]) + { + case ':': + next[1] = cppGetcFromUngetBufferOrFile (); + if (next[1] == ':') + { + next[2] = cppGetcFromUngetBufferOrFile (); + if (! (next[2] == ':' || next[2] == '>')) + { + cppUngetc (next[2]); + cppUngetc (next[1]); + cppUngetc (next[0]); + c = '<'; + } + else + { + cppUngetc (next[2]); + cppUngetc (next[1]); + c = '['; + } + } + else + { + cppUngetc (next[1]); + c = '['; + } + break; + case '%': c = '{'; break; + default: cppUngetc (next[0]); + } + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + case ':': + { + int next = cppGetcFromUngetBufferOrFile (); + if (next == '>') + c = ']'; + else + cppUngetc (next); + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + case '%': + { + int next = cppGetcFromUngetBufferOrFile (); + switch (next) + { + case '>': c = '}'; break; + case ':': c = '#'; goto process; + default: cppUngetc (next); + } + + if (macrodef) + vStringPut (macrodef, c); + + goto enter; + } + + default: + if (c == '@' && Cpp.hasAtLiteralStrings) + { + int next = cppGetcFromUngetBufferOrFile (); + if (next == DOUBLE_QUOTE) + { + Cpp.directive.accept = false; + c = skipToEndOfString (true); + if (macrodef) + vStringCatS (macrodef, "@"""); + break; + } + else + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, '@'); + } + } + else if (c == 'R' && Cpp.hasCxxRawLiteralStrings) + { + /* OMG!11 HACK!!11 Get the previous character. + * + * We need to know whether the previous character was an identifier or not, + * because "R" has to be on its own, not part of an identifier. This allows + * for constructs like: + * + * #define FOUR "4" + * const char *p = FOUR"5"; + * + * which is not a raw literal, but a preprocessor concatenation. + * + * FIXME: handle + * + * const char *p = R\ + * "xxx(raw)xxx"; + * + * which is perfectly valid (yet probably very unlikely). */ + int prev = getNthPrevCFromInputFile (1, '\0'); + int prev2 = getNthPrevCFromInputFile (2, '\0'); + int prev3 = getNthPrevCFromInputFile (3, '\0'); + + if (! cppIsident (prev) || + (! cppIsident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) || + (! cppIsident (prev3) && (prev2 == 'u' && prev == '8'))) + { + int next = cppGetcFromUngetBufferOrFile (); + if (next != DOUBLE_QUOTE) + { + cppUngetc (next); + if (macrodef) + vStringPut (macrodef, 'R'); + } + else + { + Cpp.directive.accept = false; + c = skipToEndOfCxxRawLiteralString (); + + /* We assume none may want to know the content of the + * literal; just put "". */ + if (macrodef) + vStringCatS (macrodef, """"); + + break; + } + } + else + { + if (macrodef) + vStringPut (macrodef, 'R'); + } + } + else if(isxdigit(c)) + { + /* Check for digit separator. If we find it we just skip it */ + int next = cppGetcFromUngetBufferOrFile(); + if(next != SINGLE_QUOTE) + cppUngetc(next); + if (macrodef) + vStringPut (macrodef, c); + + } + else + { + if (macrodef) + vStringPut (macrodef, c); + } + enter: + Cpp.directive.accept = false; + if (directive) + { + ignore = handleDirective (c, ¯oCorkIndex); + if (Cpp.macrodefFieldIndex != FIELD_UNKNOWN + && macroCorkIndex != CORK_NIL + && macrodef == NULL) + macrodef = vStringNew (); + } + break; + } + } while (directive || ignore); + + if (macrodef) + vStringDelete (macrodef); + + DebugStatement ( debugPutc (DEBUG_CPP, c); ) + DebugStatement ( if (c == NEWLINE) + debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); ) + + return c; +} + +static void findCppTags (void) +{ + cppInitCommon (Cpp.lang, 0, false, false, false, + KIND_GHOST_INDEX, 0, KIND_GHOST_INDEX, + KIND_GHOST_INDEX, 0, 0, + FIELD_UNKNOWN); + + findRegexTagsMainloop (cppGetc); + + cppTerminate (); +} + + +/* + * Token ignore processing + */ + +static hashTable * cmdlineMacroTable; + + +static bool buildMacroInfoFromTagEntry (int corkIndex, + tagEntryInfo * entry, + void * data) +{ + cppMacroInfo **info = data; + + if (entry->langType == Cpp.clientLang + && entry->kindIndex == Cpp.defineMacroKindIndex + && isRoleAssigned (entry, ROLE_DEFINITION_INDEX)) + { + vString *macrodef = vStringNewInit (entry->name); + if (entry->extensionFields.signature) + vStringCatS (macrodef, entry->extensionFields.signature); + vStringPut (macrodef, '='); + + const char *val = getParserFieldValueForType (entry, Cpp.macrodefFieldIndex); + if (val) + vStringCatS (macrodef, val); + + *info = saveMacro (Cpp.fileMacroTable, vStringValue (macrodef)); + vStringDelete (macrodef); + + return false; + } + return true; +} + +extern cppMacroInfo * cppFindMacroFromSymtab (const char *const name) +{ + cppMacroInfo *info = NULL; + foreachEntriesInScope (CORK_NIL, name, buildMacroInfoFromTagEntry, &info); + + return info; +} + +/* Determines whether or not "name" should be ignored, per the ignore list. + */ +extern cppMacroInfo * cppFindMacro (const char *const name) +{ + cppMacroInfo *info; + + if (cmdlineMacroTable) + { + info = (cppMacroInfo *)hashTableGetItem (cmdlineMacroTable,(char *)name); + if (info) + return info; + } + + if (Cpp.fileMacroTable) + { + info = (cppMacroInfo *)hashTableGetItem (Cpp.fileMacroTable,(char *)name); + if (info) + return info; + + info = cppFindMacroFromSymtab(name); + if (info) + return info; + } + return NULL; +} + +extern vString * cppBuildMacroReplacement( + const cppMacroInfo * macro, + const char ** parameters, /* may be NULL */ + int parameterCount + ) +{ + if(!macro) + return NULL; + + if(!macro->replacements) + return NULL; + + vString * ret = vStringNew(); + + cppMacroReplacementPartInfo * r = macro->replacements; + + while(r) + { + if(r->parameterIndex < 0) + { + if(r->constant) + vStringCat(ret,r->constant); + } else { + if(parameters && (r->parameterIndex < parameterCount)) + { + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY) + vStringPut(ret,'"'); + + vStringCatS(ret,parameters[r->parameterIndex]); + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_VARARGS) + { + int idx = r->parameterIndex + 1; + while(idx < parameterCount) + { + vStringPut(ret,','); + vStringCatS(ret,parameters[idx]); + idx++; + } + } + + if(r->flags & CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY) + vStringPut(ret,'"'); + } + } + + r = r->next; + } + + return ret; +} + + +static void saveIgnoreToken(const char * ignoreToken) +{ + if(!ignoreToken) + return; + + Assert (cmdlineMacroTable); + + const char * c = ignoreToken; + char cc = *c; + + const char * tokenBegin = c; + const char * tokenEnd = NULL; + const char * replacement = NULL; + bool ignoreFollowingParenthesis = false; + + while(cc) + { + if(cc == '=') + { + if(!tokenEnd) + tokenEnd = c; + c++; + if(*c) + replacement = c; + break; + } + + if(cc == '+') + { + if(!tokenEnd) + tokenEnd = c; + ignoreFollowingParenthesis = true; + } + + c++; + cc = *c; + } + + if(!tokenEnd) + tokenEnd = c; + + if(tokenEnd <= tokenBegin) + return; + + cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo)); + + info->hasParameterList = ignoreFollowingParenthesis; + if(replacement) + { + cppMacroReplacementPartInfo * rep = \ + (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); + rep->parameterIndex = -1; + rep->flags = 0; + rep->constant = vStringNewInit(replacement); + rep->next = NULL; + info->replacements = rep; + } else { + info->replacements = NULL; + } + info->useCount = 0; + info->next = NULL; + + hashTablePutItem(cmdlineMacroTable,eStrndup(tokenBegin,tokenEnd - tokenBegin),info); + + verbose (" ignore token: %s\n", ignoreToken); +} + +static cppMacroInfo * saveMacro(hashTable *table, const char * macro) +{ + CXX_DEBUG_ENTER_TEXT("Save macro %s",macro); + + if(!macro) + return NULL; + + Assert (table); + + const char * c = macro; + + // skip initial spaces + while(*c && isspacetab(*c)) + c++; + + if(!*c) + { + CXX_DEBUG_LEAVE_TEXT("Bad empty macro definition"); + return NULL; + } + + if(!(isalpha(*c) || (*c == '_' || (*c == '$') ))) + { + CXX_DEBUG_LEAVE_TEXT("Macro does not start with an alphanumeric character"); + return NULL; // must be a sequence of letters and digits + } + + const char * identifierBegin = c; + + while(*c && (isalnum(*c) || (*c == '_') || (*c == '$') )) + c++; + + const char * identifierEnd = c; + + CXX_DEBUG_PRINT("Macro identifier '%.*s'",identifierEnd - identifierBegin,identifierBegin); + +#define MAX_PARAMS 16 + + const char * paramBegin[MAX_PARAMS]; + const char * paramEnd[MAX_PARAMS]; + + int iParamCount = 0; + + while(*c && isspacetab(*c)) + c++; + + cppMacroInfo * info = (cppMacroInfo *)eMalloc(sizeof(cppMacroInfo)); + info->useCount = 0; + info->next = NULL; + + if(*c == '(') + { + // parameter list + CXX_DEBUG_PRINT("Macro has a parameter list"); + + info->hasParameterList = true; + + c++; + while(*c) + { + while(*c && isspacetab(*c)) + c++; + + if(*c && (*c != ',') && (*c != ')')) + { + paramBegin[iParamCount] = c; + c++; + while(*c && (*c != ',') && (*c != ')') && (!isspacetab(*c))) + c++; + paramEnd[iParamCount] = c; + + CXX_DEBUG_PRINT( + "Macro parameter %d '%.*s'", + iParamCount, + paramEnd[iParamCount] - paramBegin[iParamCount], + paramBegin[iParamCount] + ); + + iParamCount++; + if(iParamCount >= MAX_PARAMS) + break; + } + + while(*c && isspacetab(*c)) + c++; + + if(*c == ')') + break; + + if(*c == ',') + c++; + } + + while(*c && (*c != ')')) + c++; + + if(*c == ')') + c++; + + CXX_DEBUG_PRINT("Got %d parameters",iParamCount); + + } else { + info->hasParameterList = false; + } + + while(*c && isspacetab(*c)) + c++; + + info->replacements = NULL; + + + if(*c == '=') + { + CXX_DEBUG_PRINT("Macro has a replacement part"); + + // have replacement part + c++; + + cppMacroReplacementPartInfo * lastReplacement = NULL; + int nextParameterReplacementFlags = 0; + +#define ADD_REPLACEMENT_NEW_PART(part) \ + do { \ + if(lastReplacement) \ + lastReplacement->next = part; \ + else \ + info->replacements = part; \ + lastReplacement = part; \ + } while(0) + +#define ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len) \ + do { \ + cppMacroReplacementPartInfo * rep = \ + (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); \ + rep->parameterIndex = -1; \ + rep->flags = 0; \ + rep->constant = vStringNew(); \ + vStringNCatS(rep->constant,start,len); \ + rep->next = NULL; \ + CXX_DEBUG_PRINT("Constant replacement part: '%s'",vStringValue(rep->constant)); \ + ADD_REPLACEMENT_NEW_PART(rep); \ + } while(0) + +#define ADD_CONSTANT_REPLACEMENT(start,len) \ + do { \ + if(lastReplacement && (lastReplacement->parameterIndex == -1)) \ + { \ + vStringNCatS(lastReplacement->constant,start,len); \ + CXX_DEBUG_PRINT( \ + "Constant replacement part changed: '%s'", \ + vStringValue(lastReplacement->constant) \ + ); \ + } else { \ + ADD_CONSTANT_REPLACEMENT_NEW_PART(start,len); \ + } \ + } while(0) + + // parse replacements + const char * begin = c; + + while(*c) + { + if(isalpha(*c) || (*c == '_')) + { + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + + const char * tokenBegin = c; + + while(*c && (isalnum(*c) || (*c == '_'))) + c++; + + // check if it is a parameter + int tokenLen = c - tokenBegin; + + CXX_DEBUG_PRINT("Check token '%.*s'",tokenLen,tokenBegin); + + bool bIsVarArg = (tokenLen == 11) && (strncmp(tokenBegin,"__VA_ARGS__",11) == 0); + + int i = 0; + for(;i<iParamCount;i++) + { + int paramLen = paramEnd[i] - paramBegin[i]; + + if( + ( + bIsVarArg && + (paramLen == 3) && + (strncmp(paramBegin[i],"...",3) == 0) + ) || ( + (!bIsVarArg) && + (paramLen == tokenLen) && + (strncmp(paramBegin[i],tokenBegin,paramLen) == 0) + ) + ) + { + // parameter! + cppMacroReplacementPartInfo * rep = \ + (cppMacroReplacementPartInfo *)eMalloc(sizeof(cppMacroReplacementPartInfo)); + rep->parameterIndex = i; + rep->flags = nextParameterReplacementFlags | + (bIsVarArg ? CPP_MACRO_REPLACEMENT_FLAG_VARARGS : 0); + rep->constant = NULL; + rep->next = NULL; + + nextParameterReplacementFlags = 0; + + CXX_DEBUG_PRINT("Parameter replacement part: %d (vararg %d)",i,bIsVarArg); + + ADD_REPLACEMENT_NEW_PART(rep); + break; + } + } + + if(i >= iParamCount) + { + // no parameter found + ADD_CONSTANT_REPLACEMENT(tokenBegin,tokenLen); + } + + begin = c; + continue; + } + + if((*c == '"') || (*c == ''')) + { + // skip string/char constant + char term = *c; + c++; + while(*c) + { + if(*c == '\') + { + c++; + if(*c) + c++; + } else if(*c == term) + { + c++; + break; + } + c++; + } + continue; + } + + if(*c == '#') + { + // check for token paste/stringification + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + + c++; + if(*c == '#') + { + // token paste + CXX_DEBUG_PRINT("Found token paste operator"); + while(*c == '#') + c++; + + // we just skip this part and the following spaces + while(*c && isspacetab(*c)) + c++; + + if(lastReplacement && (lastReplacement->parameterIndex == -1)) + { + // trim spaces from the last replacement constant! + vStringStripTrailing(lastReplacement->constant); + CXX_DEBUG_PRINT( + "Last replacement truncated to '%s'", + vStringValue(lastReplacement->constant) + ); + } + } else { + // stringification + CXX_DEBUG_PRINT("Found stringification operator"); + nextParameterReplacementFlags |= CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY; + } + + begin = c; + continue; + } + + c++; + } + + if(c > begin) + ADD_CONSTANT_REPLACEMENT(begin,c - begin); + } + + hashTablePutItem(table,eStrndup(identifierBegin,identifierEnd - identifierBegin),info); + CXX_DEBUG_LEAVE(); + + return info; +} + +static void freeMacroInfo(cppMacroInfo * info) +{ + if(!info) + return; + cppMacroReplacementPartInfo * pPart = info->replacements; + while(pPart) + { + if(pPart->constant) + vStringDelete(pPart->constant); + cppMacroReplacementPartInfo * pPartToDelete = pPart; + pPart = pPart->next; + eFree(pPartToDelete); + } + eFree(info); +} + +static hashTable *makeMacroTable (void) +{ + return hashTableNew( + 1024, + hashCstrhash, + hashCstreq, + eFree, + (void (*)(void *))freeMacroInfo + ); +} + +static void initializeCpp (const langType language) +{ + Cpp.lang = language; +} + +static void finalizeCpp (const langType language, bool initialized) +{ + if (cmdlineMacroTable) + { + hashTableDelete (cmdlineMacroTable); + cmdlineMacroTable = NULL; + } +} + +static void CpreProExpandMacrosInInput (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg) +{ + doesExpandMacros = paramParserBool (arg, doesExpandMacros, + name, "parameter"); +} + +static void CpreProInstallIgnoreToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg) +{ + if (arg == NULL || arg[0] == '\0') + { + if (cmdlineMacroTable) + { + hashTableDelete(cmdlineMacroTable); + cmdlineMacroTable = NULL; + } + verbose (" clearing list\n"); + } else { + if (!cmdlineMacroTable) + cmdlineMacroTable = makeMacroTable (); + saveIgnoreToken(arg); + } +} + +static void CpreProInstallMacroToken (const langType language CTAGS_ATTR_UNUSED, const char *optname CTAGS_ATTR_UNUSED, const char *arg) +{ + if (arg == NULL || arg[0] == '\0') + { + if (cmdlineMacroTable) + { + hashTableDelete(cmdlineMacroTable); + cmdlineMacroTable = NULL; + } + verbose (" clearing list\n"); + } else { + if (!cmdlineMacroTable) + cmdlineMacroTable = makeMacroTable (); + saveMacro(cmdlineMacroTable, arg); + } +} + +static void CpreProSetIf0 (const langType language CTAGS_ATTR_UNUSED, const char *name, const char *arg) +{ + doesExaminCodeWithInIf0Branch = paramParserBool (arg, doesExaminCodeWithInIf0Branch, + name, "parameter"); +} + +static parameterHandlerTable CpreProParameterHandlerTable [] = { + { .name = "if0", + .desc = "examine code within "#if 0" branch (true or [false])", + .handleParameter = CpreProSetIf0, + }, + { .name = "ignore", + .desc = "a token to be specially handled", + .handleParameter = CpreProInstallIgnoreToken, + }, + { .name = "define", + .desc = "define replacement for an identifier (name(params,...)=definition)", + .handleParameter = CpreProInstallMacroToken, + }, + { .name = "_expand", + .desc = "expand macros if their definitions are in the current C/C++/CUDA input file (true or [false])", + .handleParameter = CpreProExpandMacrosInInput, + } +}; + +extern parserDefinition* CPreProParser (void) +{ + parserDefinition* const def = parserNew ("CPreProcessor"); + def->kindTable = CPreProKinds; + def->kindCount = ARRAY_SIZE (CPreProKinds); + def->initialize = initializeCpp; + def->parser = findCppTags; + def->finalize = finalizeCpp; + + def->fieldTable = CPreProFields; + def->fieldCount = ARRAY_SIZE (CPreProFields); + + def->parameterHandlerTable = CpreProParameterHandlerTable; + def->parameterHandlerCount = ARRAY_SIZE(CpreProParameterHandlerTable); + + def->useCork = CORK_QUEUE | CORK_SYMTAB; + return def; +}
Modified: ctags/parsers/cpreprocessor.h 137 lines changed, 137 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,137 @@ +/* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* External interface to get.c +*/ +#ifndef CTAGS_MAIN_GET_H +#define CTAGS_MAIN_GET_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include "types.h" +#include "vstring.h" + +/* +* MACROS +*/ + +/* + * cppIs... macros are for the value returned from cppGetc(). Don't + * use "char" value. Don't pass a value stored to C-string + * (char*... or char[]) or vString. + * + * cppGetc() can return the value out of range of unsigned char. + * cppGetc calls skipToEndOfString() and skipToEndOfString() internally. + * They return STRING_SYMBOL (== 338) and CHAR_SYMBOL (== 322) in a + * case. (cppGetc() can return EOF (== -1). However, it is not an issue + * here.) + * + * is...() macros/functions defined in ctype.h can handle the value of + * an unsigned char or EOF; we cannot pass STRING_SYMBOL or CHAR_SYMBOL + * returned from cppGetc(). + * + * Depending on the platform, isalpha(338) returns different value. + * As far as Fedora22, it returns 0. On Windows 2010, it returns 1. + * + * So, we need cppIs... macros. + * cppIs... macros considers STRING_SYMBOL and CHAR_SYMBOL */ + +#define cppIsascii(c) ((c >= 0) && (c < 0x80)) +/* isascii is not portable enough. */ + +/* Is the character valid as a character of a C identifier? + * VMS allows '$' in identifiers. + */ +#define cppIsalnum(c) (cppIsascii(c) && isalnum(c)) +#define cppIsident(c) (cppIsalnum(c) \ + || (c) == '_' || (c) == '$') + +/* Is the character valid as the first character of a C identifier? + * C++ allows '~' in destructors. + * VMS allows '$' in identifiers. + */ +#define cppIsalpha(c) (cppIsascii(c) && isalpha(c)) +#define cppIsident1(c) (cppIsalpha(c) \ + || (c) == '_' || (c) == '~' || (c) == '$') + +#define cppIsspace(c) (cppIsascii(c) && isspace(c)) +#define cppIsdigit(c) (cppIsascii(c) && isdigit(c)) + + +#define RoleTemplateUndef { true, "undef", "undefined" } + +#define RoleTemplateSystem { true, "system", "system header" } +#define RoleTemplateLocal { true, "local", "local header" } + +/* +* FUNCTION PROTOTYPES +*/ +extern bool cppIsBraceFormat (void); +extern unsigned int cppGetDirectiveNestLevel (void); + +/* Don't forget to set useCort true in your parser. + * The corkQueue is needed to capture macro parameters. + */ +extern void cppInit (const bool state, + const bool hasAtLiteralStrings, + const bool hasCxxRawLiteralStrings, + const bool hasSingleQuoteLiteralNumbers, + int defineMacroKindIndex, + int macroUndefRoleIndex, + int headerKindIndex, + int headerSystemRoleIndex, int headerLocalRoleIndex, + int macroParamKindIndex, + int macrodefFieldIndex); + +extern void cppTerminate (void); +extern void cppBeginStatement (void); +extern void cppEndStatement (void); +extern void cppUngetc (const int c); +extern int cppUngetBufferSize(); +extern void cppUngetString(const char * string,int len); +extern int cppGetc (void); +extern const vString * cppGetLastCharOrStringContents (void); + +/* Notify the external parser state for the purpose of conditional + * branch choice. The CXX parser stores the block level here. */ +extern void cppPushExternalParserBlock(void); +extern void cppPopExternalParserBlock(void); + +#define CPP_MACRO_REPLACEMENT_FLAG_VARARGS 1 +#define CPP_MACRO_REPLACEMENT_FLAG_STRINGIFY 2 + +typedef struct sCppMacroReplacementPartInfo { + int parameterIndex; /* -1 if this part is a constant */ + int flags; + vString * constant; /* not NULL only if parameterIndex != -1 */ + struct sCppMacroReplacementPartInfo * next; +} cppMacroReplacementPartInfo; + +typedef struct sCppMacroInfo { + bool hasParameterList; /* true if the macro has a trailing () */ + cppMacroReplacementPartInfo * replacements; + int useCount; + struct sCppMacroInfo * next; +} cppMacroInfo; + +extern cppMacroInfo * cppFindMacro (const char *const name); +extern void cppUngetStringBuiltByMacro (const char * string,int len, cppMacroInfo *macro); + +/* +* Build a replacement string for the specified macro. +* If the macro has parameters, they will be used. +* Parameters not found in the list will be assumed to be empty. +* May return NULL or equivalently an empty replacement string. +*/ +extern vString * cppBuildMacroReplacement( + const cppMacroInfo * macro, + const char ** parameters, /* may be NULL */ + int parameterCount + ); + +#endif /* CTAGS_MAIN_GET_H */
Modified: ctags/parsers/cxx/cxx.c 163 lines changed, 163 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,163 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_parser.h" +#include "cxx_scope.h" +#include "cxx_tag.h" + +#include "dependency.h" +#include "selectors.h" + +// +// ---------------------------------------------------------------------------- +// Assumptions. +// ---------------------------------------------------------------------------- +// +// - Parsing C/C++ is hard. Parsing C/C++ correctly without includes and +// without a complete preprocessor is close to impossible in the general +// case. Also ctags is not a compiler. This means that our parser must be +// a "guessing" parser. It's hopeless to try to decode the syntax of the +// language down to the last bit. +// +// - The input may contain syntax errors. This is because we don't have a full +// preprocessor and also because ctags is often used "online" in editors, +// while the user is typing. ctags should be tolerant and try to do its best +// even with syntax errors but: +// - Syntax errors that break the scope hierarchy should be detected and tag +// emission should probably be stopped. Correct tags in a broken hierarchy +// are useless (well, unless the hierarchy itself is ignored by the ctags +// user). +// - CTags should try to avoid emitting tags which involve syntax errors +// +// - There will always be pathologic cases. Don't cry, live with it. +// +// ---------------------------------------------------------------------------- +// TODO LIST +// ---------------------------------------------------------------------------- +// +// - In case of simple syntax error try to recover: +// Skip to the next ; without entering or exiting scopes. +// If this can be done then recovery is feasible. +// - Extension of each block/scope. +// - Unnamed blocks/scopes? +// - Handle syntax errors: +// - If a special switch is used then stop on detecting a syntax error +// (this is useful for code editors that frequently update tags for +// single files) +// - If the switch is not used then do NOT emit tags for a file on a syntax +// error [but do not stop execution of the whole program and continue on +// other files] +// For this purpose: +// - Do not emit tags until the end of the file, if scopes do not match we +// either screwed up something or the programmer did +// Maybe the cork api can be used for this? +// +// Handle variable declarations inside things like while() foreach() FOR() etc.. +// +// - Friend classes. +// - Template parameters as field +// - Template specialisations (another field?) +// - Forward declarations might become tags + + +parserDefinition * CParser (void) +{ + static const char * const extensions [] = + { + "c", + NULL + }; + + static selectLanguage selectors[] = { selectByObjectiveCKeywords, NULL }; + + parserDefinition* def = parserNew("C"); + + def->kindTable = cxxTagGetCKindDefinitions(); + def->kindCount = cxxTagGetCKindDefinitionCount(); + def->fieldTable = cxxTagGetCFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCParserMain; + def->initialize = cxxCParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = selectors; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +} + +parserDefinition * CppParser (void) +{ + static const char * const extensions [] = + { + "c++", "cc", "cp", "cpp", "cxx", + "h", "h++", "hh", "hp", "hpp", "hxx", "inl", +#ifndef CASE_INSENSITIVE_FILENAMES + "C", "H", "CPP", "CXX", +#endif + NULL + }; + static parserDependency dependencies [] = { + { DEPTYPE_KIND_OWNER, "C" }, + }; + + static selectLanguage selectors[] = { selectByObjectiveCKeywords, NULL }; + + parserDefinition* def = parserNew("C++"); + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + def->kindTable = cxxTagGetCPPKindDefinitions(); + def->kindCount = cxxTagGetCPPKindDefinitionCount(); + def->fieldTable = cxxTagGetCPPFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCPPFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCppParserMain; + def->initialize = cxxCppParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = selectors; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +} + +parserDefinition * CUDAParser (void) +{ + static const char * const extensions [] = + { + "cu", "cuh", + NULL + }; + static parserDependency dependencies [] = { + { DEPTYPE_KIND_OWNER, "C" }, + }; + + parserDefinition* def = parserNew("CUDA"); + + def->dependencies = dependencies; + def->dependencyCount = ARRAY_SIZE (dependencies); + def->kindTable = cxxTagGetCUDAKindDefinitions(); + def->kindCount = cxxTagGetCUDAKindDefinitionCount(); + def->fieldTable = cxxTagGetCUDAFieldDefinitionifiers(); + def->fieldCount = cxxTagGetCUDAFieldDefinitionifierCount(); + def->extensions = extensions; + def->parser2 = cxxCUDAParserMain; + def->initialize = cxxCUDAParserInitialize; + def->finalize = cxxParserCleanup; + def->selectLanguage = NULL; + def->useCork = CORK_QUEUE|CORK_SYMTAB; // We use corking to block output until the end of file + + return def; +}
Modified: ctags/parsers/cxx/cxx_debug.c 182 lines changed, 182 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,182 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_debug.h" + +#ifdef CXX_DO_DEBUGGING + +#include "trashbox.h" +#include "cxx_parser_internal.h" +#include "cxx_scope.h" + +static void cxxDebugDumpToken0 (CXXToken *pToken, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level); + +static void cxxDebugDumpChain0 (CXXTokenChain *pChain, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level) +{ + int backref; + + if (top_level) + { + debugIndent (); + fprintf (stderr, "<chain "); + } + else if (pChain == NULL) + { + fprintf (stderr, "NULL\n"); + return; + } + else + { + fprintf (stderr, "<"); + } + + backref = circularRefCheckerCheck (pChainChecker, pChain); + if (backref) + { + fprintf (stderr, "*C#%d>\n", backref); + return; + } + + backref = circularRefCheckerGetCurrent (pChainChecker); + + fprintf (stderr, "[%d %p&C#%d]\n", pChain->iCount, pChain, backref); + + debugInc(); + debugIndent (); + cxxDebugDumpToken0 (pChain->pHead, pTokenChecker, pChainChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, ">\n"); +} + +static void cxxDebugDumpToken0 (CXXToken *pToken, + struct circularRefChecker *pTokenChecker, + struct circularRefChecker *pChainChecker, + bool top_level) +{ + int backref; + + if (top_level) + { + debugIndent (); + fprintf (stderr, "<token "); + } + else if (pToken == NULL) + { + fprintf (stderr, "NULL\n"); + return; + } + else + { + fprintf (stderr, "<"); + } + + backref = circularRefCheckerCheck (pTokenChecker, pToken); + if (backref) + { + fprintf (stderr, "*T#%d>\n", backref); + return; + } + + backref = circularRefCheckerGetCurrent (pTokenChecker); + + fprintf (stderr, ""%s": [%s %p &T#%d]\n", + vStringValue (pToken->pszWord), + cxxDebugTypeDecode (pToken->eType), pToken, backref); + + debugIndent (); + fprintf (stderr, " chain: "); + debugInc(); + cxxDebugDumpChain0 (pToken->pChain, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, " next: "); + debugInc(); + cxxDebugDumpToken0 (pToken->pNext, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, " prev: "); + debugInc(); + cxxDebugDumpToken0 (pToken->pPrev, pTokenChecker, pTokenChecker, false); + debugDec(); + + debugIndent (); + fprintf (stderr, ">\n"); +} + +typedef void (* cxxDebugDumpCommonFunc)(void *, + struct circularRefChecker *, + struct circularRefChecker *, + bool); +void cxxDebugDumpCommon (void *data, + void (* func)(void *, + struct circularRefChecker *, + struct circularRefChecker *, + bool)) +{ + static struct circularRefChecker *pTokenChecker; + static struct circularRefChecker *pChainChecker; + + if (!pTokenChecker) + { + pTokenChecker = circularRefCheckerNew(); + DEFAULT_TRASH_BOX(pTokenChecker, (TrashBoxDestroyItemProc)circularRefCheckerDestroy); + } + + if (!pChainChecker) + { + pChainChecker = circularRefCheckerNew(); + DEFAULT_TRASH_BOX(pChainChecker, (TrashBoxDestroyItemProc)circularRefCheckerDestroy); + } + + func(data, pTokenChecker, pChainChecker, true); + + circularRefCheckClear (pTokenChecker); + circularRefCheckClear (pChainChecker); +} + +void cxxDebugDumpToken (CXXToken *pToken) +{ + cxxDebugDumpCommon (pToken, (cxxDebugDumpCommonFunc)cxxDebugDumpToken0); +} + +void cxxDebugDumpChain (CXXTokenChain *pChain) +{ + cxxDebugDumpCommon (pChain, (cxxDebugDumpCommonFunc)cxxDebugDumpChain0); +} + +const char* cxxDebugScopeDecode(enum CXXScopeType scope) +{ + const char * table[] = { + [CXXScopeTypeFunction] = "function", + [CXXScopeTypeNamespace] = "namespace", + [CXXScopeTypeClass] = "class", + [CXXScopeTypeEnum] = "enum", + [CXXScopeTypeUnion] = "union", + [CXXScopeTypeStruct] = "struct", + [CXXScopeTypeVariable] = "variable", + [CXXScopeTypePrototype] = "prototype", + [CXXScopeTypeTypedef] = "typedef", + }; + if (CXXScopeTypeLAST > scope) + return table[scope]; + else + return NULL; +} + +#endif
Modified: ctags/parsers/cxx/cxx_debug.h 63 lines changed, 63 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,63 @@ +#ifndef ctags_cxx_debug_h_ +#define ctags_cxx_debug_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" +#include "debug.h" +#include "trace.h" +#include "cxx_token.h" +#include "cxx_scope.h" + +#if defined(DO_TRACING) + #define CXX_DO_DEBUGGING +#endif + +#ifdef CXX_DO_DEBUGGING + +const char* cxxDebugTypeDecode(enum CXXTokenType); +void cxxDebugDumpToken (CXXToken *pToken); +void cxxDebugDumpChain (CXXTokenChain *pChain); +const char* cxxDebugScopeDecode(enum CXXScopeType); + +#define CXX_DEBUG_ENTER() TRACE_ENTER() +#define CXX_DEBUG_LEAVE() TRACE_LEAVE() + +#define CXX_DEBUG_ENTER_TEXT(_szFormat,...) \ + TRACE_ENTER_TEXT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_LEAVE_TEXT(_szFormat,...) \ + TRACE_LEAVE_TEXT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_PRINT(_szFormat,...) \ + TRACE_PRINT(_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_ASSERT(_condition,_szFormat,...) \ + TRACE_ASSERT(_condition,_szFormat,## __VA_ARGS__) + +#define CXX_DEBUG_TOKEN(T) cxxDebugDumpToken(T) +#define CXX_DEBUG_CHAIN(C) cxxDebugDumpChain(C) +#else //!CXX_DO_DEBUGGING + +#define CXX_DEBUG_ENTER() do { } while(0) +#define CXX_DEBUG_LEAVE() do { } while(0) + +#define CXX_DEBUG_ENTER_TEXT(_szFormat,...) do { } while(0) +#define CXX_DEBUG_LEAVE_TEXT(_szFormat,...) do { } while(0) + +#define CXX_DEBUG_PRINT(_szFormat,...) do { } while(0) + +#define CXX_DEBUG_ASSERT(_condition,_szFormat,...) do { } while(0) + +#define CXX_DEBUG_TOKEN(T) do { } while(0) +#define CXX_DEBUG_CHAIN(T) do { } while(0) +#endif //!CXX_DO_DEBUGGING + + +#endif //!ctags_cxx_debug_h_
Modified: ctags/parsers/cxx/cxx_debug_type.c 54 lines changed, 54 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,54 @@ +/* Automatically generated by misc/gencxxtypedumper.sh */ + +#include "cxx_token.h" +#include "cxx_debug.h" + +#ifdef CXX_DO_DEBUGGING +static bool append(vString *buf, const char *str, bool appended) +{ + if (appended) vStringPut(buf, ' '); + vStringCatS (buf, str); + return true; +} + +const char * cxxDebugTypeDecode (enum CXXTokenType eType) +{ + bool a = false; + static vString *buf; + buf = vStringNewOrClearWithAutoRelease (buf); + + if (eType & CXXTokenTypeEOF) a = append (buf, "EOF", a); + if (eType & CXXTokenTypeIdentifier) a = append (buf, "Identifier", a); + if (eType & CXXTokenTypeKeyword) a = append (buf, "Keyword", a); + if (eType & CXXTokenTypeNumber) a = append (buf, "Number", a); + if (eType & CXXTokenTypeSingleColon) a = append (buf, "SingleColon", a); + if (eType & CXXTokenTypeMultipleColons) a = append (buf, "MultipleColons", a); + if (eType & CXXTokenTypeSemicolon) a = append (buf, "Semicolon", a); + if (eType & CXXTokenTypeComma) a = append (buf, "Comma", a); + if (eType & CXXTokenTypeAssignment) a = append (buf, "Assignment", a); + if (eType & CXXTokenTypeOperator) a = append (buf, "Operator", a); + if (eType & CXXTokenTypeUnknown) a = append (buf, "Unknown", a); + if (eType & CXXTokenTypeDotOperator) a = append (buf, "DotOperator", a); + if (eType & CXXTokenTypePointerOperator) a = append (buf, "PointerOperator", a); + if (eType & CXXTokenTypeStringConstant) a = append (buf, "StringConstant", a); + if (eType & CXXTokenTypeStar) a = append (buf, "Star", a); + if (eType & CXXTokenTypeAnd) a = append (buf, "And", a); + if (eType & CXXTokenTypeMultipleAnds) a = append (buf, "MultipleAnds", a); + if (eType & CXXTokenTypeCharacterConstant) a = append (buf, "CharacterConstant", a); + if (eType & CXXTokenTypeMultipleDots) a = append (buf, "MultipleDots", a); + if (eType & CXXTokenTypeOpeningBracket) a = append (buf, "OpeningBracket", a); + if (eType & CXXTokenTypeOpeningParenthesis) a = append (buf, "OpeningParenthesis", a); + if (eType & CXXTokenTypeOpeningSquareParenthesis) a = append (buf, "OpeningSquareParenthesis", a); + if (eType & CXXTokenTypeSmallerThanSign) a = append (buf, "SmallerThanSign", a); + if (eType & CXXTokenTypeClosingBracket) a = append (buf, "ClosingBracket", a); + if (eType & CXXTokenTypeClosingParenthesis) a = append (buf, "ClosingParenthesis", a); + if (eType & CXXTokenTypeClosingSquareParenthesis) a = append (buf, "ClosingSquareParenthesis", a); + if (eType & CXXTokenTypeGreaterThanSign) a = append (buf, "GreaterThanSign", a); + if (eType & CXXTokenTypeBracketChain) a = append (buf, "BracketChain", a); + if (eType & CXXTokenTypeParenthesisChain) a = append (buf, "ParenthesisChain", a); + if (eType & CXXTokenTypeSquareParenthesisChain) a = append (buf, "SquareParenthesisChain", a); + if (eType & CXXTokenTypeAngleBracketChain) a = append (buf, "AngleBracketChain", a); + if (vStringLength(buf) == 0) vStringCatS(buf, "REALLY-UNKNOWN"); + return vStringValue (buf); +} +#endif
Modified: ctags/parsers/cxx/cxx_keyword.c 639 lines changed, 639 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,639 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "cxx_keyword.h" +#include "cxx_parser_internal.h" +#include "cxx_debug.h" + +#include "keyword.h" + +enum CXXKeywordFlag +{ + // Keywords that in most cases are parts of the name of a type. + // Examples: int, void, const, float, stuff like that + CXXKeywordFlagMayBePartOfTypeName = 1, + // struct, class, union, enum, typename + CXXKeywordIsTypeRefMarker = (1 << 1), + // Stuff that often appears together with a type name + // (for example a function return type or a variable type) + // but is not part of the type itself. + // Examples: virtual, inline, friend, static + CXXKeywordExcludeFromTypeNames = (1 << 2), + // true, false, nullptr + CXXKeywordIsConstant = (1 << 3), + // certain keywords are disabled "on-the-fly" to better + // handle C / C++ guessing errors (public, protected, private, namespace etc..) + CXXKeywordIsDisabled = (1 << 4), + // Similar to MayBePartOfTypeName but includes more keywords that are NOT part + // of the type itself. Keywords that do NOT have this flag simply cannot appear + // in a variable declaration. + // Examples: __global__, __host__, restrict, register... + CXXKeywordMayAppearInVariableDeclaration = (1 << 5) +}; + +typedef struct _CXXKeywordDescriptor +{ + const char * szName; + unsigned int uLanguages; + unsigned int uFlags; +} CXXKeywordDescriptor; + + +// This array is indexed by the CXXKeywordType enum +static CXXKeywordDescriptor g_aCXXKeywordTable[] = { + { + "__attribute__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "__constant__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__declspec", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__device__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__fastcall", + CXXLanguageCPP + }, + { + "__forceinline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__forceinline__", + CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__global__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__host__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__inline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__inline__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__managed__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__noinline__", + CXXLanguageCUDA, + CXXKeywordExcludeFromTypeNames + }, + { + "__restrict", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__restrict__", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__shared__", + CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "__stdcall", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "__thiscall", + CXXLanguageCPP, + 0 + }, + { + "alignas", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "alignof", + CXXLanguageCPP, + 0 + }, + //{ 1, "and", 0 }, + //{ 1, "and_eq", 0 }, + { + "asm", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "auto", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + //{ 1, "bitand", 0 }, + //{ 1, "bitor", 0 }, + { + "bool", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "break", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "case", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "catch", + CXXLanguageCPP, + 0 + }, + { + "char", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "char16_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "char32_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "class", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + //{ 0, "compl", 0 }, + { + "concept", + CXXLanguageCPP, + 0 + }, + { + "const", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "constexpr", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "const_cast", + CXXLanguageCPP, + 0 + }, + { + "continue", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "decltype", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "default", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "delete", + CXXLanguageCPP, + 0 + }, + { + "do", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "double", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "dynamic_cast", + CXXLanguageCPP, + 0 + }, + { + "else", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "enum", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "explicit", + CXXLanguageCPP, + 0 + }, + { + "export", + CXXLanguageCPP, + 0 + }, + { + "extern", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "false", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordIsConstant + }, + // this is a keyword only in special contexts (we have a switch to enable/disable it) + { + "final", + CXXLanguageCPP, + 0 + }, + { + "float", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "for", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "friend", + CXXLanguageCPP, + CXXKeywordExcludeFromTypeNames + }, + { + "goto", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "if", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "inline", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "int", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "long", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "mutable", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "namespace", + CXXLanguageCPP, + 0 + }, + { + "new", + CXXLanguageCPP, + 0 + }, + { + "noexcept", + CXXLanguageCPP, + 0 + }, + //{ 0, "not", 0 }, + //{ 0, "not_eq", 0 }, + { + "nullptr", + CXXLanguageCPP, + CXXKeywordIsConstant + }, + { + "operator", + CXXLanguageCPP, + 0 + }, + //{ 0, "or", 0 }, + //{ 0, "or_eq", 0 }, + // override is a keyword only after function declarators, + // it's easier handling it as identifier + //{ 0, "override", 0 }, + { + "private", + CXXLanguageCPP, + 0 + }, + { + "protected", + CXXLanguageCPP, + 0 + }, + { + "public", + CXXLanguageCPP, + 0 + }, + { + "register", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "reinterpret_cast", + CXXLanguageCPP, + 0 + }, + { + "requires", + CXXLanguageCPP, + 0 + }, + { + "restrict", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "return", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "short", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "signed", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "sizeof", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "static", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordExcludeFromTypeNames + }, + { + "static_assert", + CXXLanguageCPP, + 0 + }, + { + "static_cast", + CXXLanguageCPP, + 0 + }, + { + "struct", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "switch", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "template", + CXXLanguageCPP, + 0 + }, + { + "this", + CXXLanguageCPP, + 0 + }, + { + "thread_local", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "throw", + CXXLanguageCPP, + 0 + }, + { + "true", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordIsConstant + }, + { + "try", + CXXLanguageCPP, + 0 + }, + { + "typedef", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + { + "typeid", + CXXLanguageCPP, + 0 + }, + { + "typename", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "union", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName | + CXXKeywordIsTypeRefMarker + }, + { + "unsigned", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "using", + CXXLanguageCPP, + 0 + }, + { + "virtual", + CXXLanguageCPP, + CXXKeywordExcludeFromTypeNames + }, + { + "void", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "volatile", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + CXXKeywordMayAppearInVariableDeclaration + }, + { + "wchar_t", + CXXLanguageCPP, + CXXKeywordMayAppearInVariableDeclaration | CXXKeywordFlagMayBePartOfTypeName + }, + { + "while", + CXXLanguageC | CXXLanguageCPP | CXXLanguageCUDA, + 0 + }, + //{ 0, "xor", 0 }, + //{ 0, 1, "xor_eq", 0 } +}; + +const char * cxxKeywordName(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].szName; +} + +bool cxxKeywordMayBePartOfTypeName(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordFlagMayBePartOfTypeName; +} + +bool cxxKeywordMayAppearInVariableDeclaration(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordMayAppearInVariableDeclaration; +} + +bool cxxKeywordIsTypeRefMarker(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsTypeRefMarker; +} + +bool cxxKeywordIsConstant(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsConstant; +} + +bool cxxKeywordIsCPPSpecific(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uLanguages == CXXLanguageCPP; +} + +bool cxxKeywordExcludeFromTypeNames(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordExcludeFromTypeNames; +} + +bool cxxKeywordIsDisabled(CXXKeyword eKeywordId) +{ + return g_aCXXKeywordTable[eKeywordId].uFlags & + CXXKeywordIsDisabled; +} + +bool cxxKeywordEnablePublicProtectedPrivate(bool bEnableIt) +{ + bool bEnabledNow = + !(g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags & CXXKeywordIsDisabled); + + if(bEnabledNow == bEnableIt) + return bEnabledNow; + + if(bEnableIt) + { + CXX_DEBUG_PRINT("Enabling public/protected/private keywords"); + + g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags &= ~CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPROTECTED].uFlags &= ~CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPRIVATE].uFlags &= ~CXXKeywordIsDisabled; + } else { + CXX_DEBUG_PRINT("Disabling public/protected/private keywords"); + + g_aCXXKeywordTable[CXXKeywordPUBLIC].uFlags |= CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPROTECTED].uFlags |= CXXKeywordIsDisabled; + g_aCXXKeywordTable[CXXKeywordPRIVATE].uFlags |= CXXKeywordIsDisabled; + } + + return bEnabledNow; +} + +void cxxKeywordEnableFinal(bool bEnableIt) +{ + if(bEnableIt) + g_aCXXKeywordTable[CXXKeywordFINAL].uFlags &= ~CXXKeywordIsDisabled; + else + g_aCXXKeywordTable[CXXKeywordFINAL].uFlags |= CXXKeywordIsDisabled; +} + + +void cxxBuildKeywordHash(const langType eLangType,unsigned int uLanguage) +{ + const size_t count = sizeof(g_aCXXKeywordTable) / sizeof(CXXKeywordDescriptor); + + size_t i; + + for(i = 0;i < count;i++) + { + const CXXKeywordDescriptor * p = g_aCXXKeywordTable + i; + if(p->uLanguages & uLanguage) + addKeyword(p->szName,eLangType,i); + } +}
Modified: ctags/parsers/cxx/cxx_keyword.h 178 lines changed, 178 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,178 @@ +#ifndef ctags_cxx_keyword_h_ +#define ctags_cxx_keyword_h_ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ + +#include "general.h" +#include "parse.h" + +// WARNING: There is a table in cxx_keyword.c that must match order in this enum +typedef enum _CXXKeyword +{ + CXXKeyword__ATTRIBUTE__, // GCC + CXXKeyword__CONSTANT__, // CUDA + CXXKeyword__DECLSPEC, // Microsoft C/C++ + CXXKeyword__DEVICE__, // CUDA + CXXKeyword__FASTCALL, // Microsoft C/C++ + CXXKeyword__FORCEINLINE, // Microsoft C/C++ + CXXKeyword__FORCEINLINE__, // CUDA + CXXKeyword__GLOBAL__, // CUDA + CXXKeyword__HOST__, // CUDA + CXXKeyword__INLINE, // Microsoft C/C++ + CXXKeyword__INLINE__, // GCC + CXXKeyword__MANAGED__, // CUDA + CXXKeyword__NOINLINE__, // CUDA + CXXKeyword__RESTRICT, // Microsoft C/C++ + CXXKeyword__RESTRICT__, // CUDA + CXXKeyword__SHARED__, // CUDA + CXXKeyword__STDCALL, // Microsoft C/C++ + CXXKeyword__THISCALL, // Microsoft C/C++ + CXXKeywordALIGNAS, // (since C++11) + CXXKeywordALIGNOF, // (since C++11) + //CXXKeywordAND, + //CXXKeywordAND_EQ, + CXXKeywordASM, + CXXKeywordAUTO, + //CXXKeywordBITAND, + //CXXKeywordBITOR, + CXXKeywordBOOL, + CXXKeywordBREAK, + CXXKeywordCASE, + CXXKeywordCATCH, + CXXKeywordCHAR, + CXXKeywordCHAR16_T, // (since C++11) + CXXKeywordCHAR32_T, // (since C++11) + CXXKeywordCLASS, + //CXXKeywordCOMPL, + CXXKeywordCONCEPT, // Concepts TS + CXXKeywordCONST, + CXXKeywordCONSTEXPR, // (since C++11) + CXXKeywordCONST_CAST, + CXXKeywordCONTINUE, + CXXKeywordDECLTYPE, // (since C++11) + CXXKeywordDEFAULT, + CXXKeywordDELETE, + CXXKeywordDO, + CXXKeywordDOUBLE, + CXXKeywordDYNAMIC_CAST, + CXXKeywordELSE, + CXXKeywordENUM, + CXXKeywordEXPLICIT, + CXXKeywordEXPORT, + CXXKeywordEXTERN, + CXXKeywordFALSE, + CXXKeywordFINAL, // not really a keyword, has meanings in some specific contexts + CXXKeywordFLOAT, + CXXKeywordFOR, + CXXKeywordFRIEND, + CXXKeywordGOTO, + CXXKeywordIF, + CXXKeywordINLINE, + CXXKeywordINT, + CXXKeywordLONG, + CXXKeywordMUTABLE, + CXXKeywordNAMESPACE, + CXXKeywordNEW, + CXXKeywordNOEXCEPT, // (since C++11) + //CXXKeywordNOT, + //CXXKeywordNOT_EQ, + CXXKeywordNULLPTR, // (since C++11) + CXXKeywordOPERATOR, + //CXXKeywordOR, + //CXXKeywordOR_EQ, + //CXXKeywordOVERRIDE, // not really a keyword, has meanings in some specific contexts + CXXKeywordPRIVATE, + CXXKeywordPROTECTED, + CXXKeywordPUBLIC, + CXXKeywordREGISTER, + CXXKeywordREINTERPRET_CAST, + CXXKeywordREQUIRES, // (Concepts TS) + CXXKeywordRESTRICT, // C99 extension + CXXKeywordRETURN, + CXXKeywordSHORT, + CXXKeywordSIGNED, + CXXKeywordSIZEOF, + CXXKeywordSTATIC, + CXXKeywordSTATIC_ASSERT, // (since C++11) + CXXKeywordSTATIC_CAST, + CXXKeywordSTRUCT, + CXXKeywordSWITCH, + CXXKeywordTEMPLATE, + CXXKeywordTHIS, + CXXKeywordTHREAD_LOCAL, // (since C++11) + CXXKeywordTHROW, + CXXKeywordTRUE, + CXXKeywordTRY, + CXXKeywordTYPEDEF, + CXXKeywordTYPEID, + CXXKeywordTYPENAME, + CXXKeywordUNION, + CXXKeywordUNSIGNED, + CXXKeywordUSING, + CXXKeywordVIRTUAL, + CXXKeywordVOID, + CXXKeywordVOLATILE, + CXXKeywordWCHAR_T, + CXXKeywordWHILE, + //CXXKeywordXOR, + //CXXKeywordXOR_EQ, + // WARNING: There is a table in cxx_keyword.c that must match order in this enumeration +} CXXKeyword; + +bool cxxKeywordIsConstant(CXXKeyword eKeywordId); +bool cxxKeywordMayBePartOfTypeName(CXXKeyword eKeywordId); +bool cxxKeywordIsTypeRefMarker(CXXKeyword eKeywordId); +bool cxxKeywordExcludeFromTypeNames(CXXKeyword eKeywordId); +bool cxxKeywordMayAppearInVariableDeclaration(CXXKeyword eKeywordId); +bool cxxKeywordIsCPPSpecific(CXXKeyword eKeywordId); + + +const char * cxxKeywordName(CXXKeyword eKeywordId); + +// uLanguage is really CXXLanguage, but we keep it as unsigned int to avoid +// problems with header inclusions. It works anyway. +void cxxBuildKeywordHash(const langType eLangType,unsigned int uLanguage); + +// Keyword enabled/disabled state management. +// +// public, protected, private, class, namespace... keywords are C++ only. +// However when parsing .h files we don't know if they belong to a C program or C++ +// one and thus for safety we parse them as C++. If our guess is wrong then the parser +// may become confused and in some cases even bail out. +// +// For this reason we enable/disable the processing of certain keyword sets +// in certain contexts. + + +// +// "public,protected,private" keywords +// +// In header files we disable processing of such keywords until we either figure +// out that the file really contains C++ or we start parsing a struct/union. +// +// This flag is meaningful only when parsing a .h file as C++ since in C +// public/protected/private are never keywords and we assume that .cpp files +// have C++ content (so public/protected/private are always keywords). +// +// This function returns the previous state of the public/protected/private keywords +// enabled flag so it can be easily restored. +bool cxxKeywordEnablePublicProtectedPrivate(bool bEnableIt); + +// +// "final" keyword +// +// This is actually special at C++ level: it's a keyword only within a specific part +// of a class declaration. In other contexts it's not a keyword. +void cxxKeywordEnableFinal(bool bEnableIt); + +// Is the specific keyword currently disabled? +bool cxxKeywordIsDisabled(CXXKeyword eKeywordId); + + +#endif //!ctags_cxx_keyword_h_ \ No newline at end of file
Modified: ctags/parsers/cxx/cxx_parser.c 2023 lines changed, 2023 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,2023 @@ +/* +* Copyright (c) 2016, Szymon Tomasz Stefanek +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for parsing and scanning C++ source files +*/ +#include "cxx_parser.h" +#include "cxx_parser_internal.h" + +#include "cxx_debug.h" +#include "cxx_keyword.h" +#include "cxx_token.h" +#include "cxx_token_chain.h" +#include "cxx_scope.h" +#include "cxx_tag.h" +#include "cxx_subparser_internal.h" + +#include "parse.h" +#include "vstring.h" +#include "../cpreprocessor.h" +#include "debug.h" +#include "keyword.h" +#include "read.h" +#include "ptrarray.h" +#include "trashbox.h" + +#include <string.h> + +// +// The global parser state +// +CXXParserState g_cxx; + +// +// This is set to false once the parser is run at least one time. +// Used by cleanup routines. +// +bool g_bFirstRun = true; + +// +// Reset parser state: +// - Clear the token chain +// - Reset "seen" keywords +// +void cxxParserNewStatement(void) +{ + cxxTokenChainClear(g_cxx.pTokenChain); + if(g_cxx.pTemplateTokenChain) + { + cxxTokenChainDestroy(g_cxx.pTemplateTokenChain); + g_cxx.pTemplateTokenChain = NULL; + g_cxx.oTemplateParameters.uCount = 0; + } else { + // we don't car@@ Diff output truncated at 100000 characters. @@
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).