Branch: refs/heads/master Author: Jiří Techet techet@gmail.com Committer: Jiří Techet techet@gmail.com Date: Tue, 21 Mar 2023 14:46:46 UTC Commit: dea43baf477ab71650cdfc54fa976714def9c170 https://github.com/geany/geany/commit/dea43baf477ab71650cdfc54fa976714def9c1...
Log Message: ----------- ctags: Add quick path for looking up too long strings in the keyword table
Parser code like
vString *str = vStringNew(); while (someCondition) { int c = getcFromInputFile(); vStringPut(str, c); if (lookupCaseKeyword (vStringValue(str), some_lang)) { do_stuff(); vStringClear(str); } }
is prone to quadratic complexity because when someCondition isn't satisfied in the parsed file, the string str grows by one in each iteration and in each iteration lookupCaseKeyword() has to go through strlen(str) characters to compute the hash.
Since we know the maximum length of the strings inside the keyword hash table, we can store this value and if the queried string is longer than this value, we can be sure it isn't present in the hash table and return quickly without having to compute the full hash of the string.
Modified Paths: -------------- ctags/main/keyword.c
Modified: ctags/main/keyword.c 29 lines changed, 25 insertions(+), 4 deletions(-) =================================================================== @@ -36,6 +36,7 @@ typedef struct sHashEntry { */ static const unsigned int TableSize = 2039; /* prime */ static hashEntry **HashTable = NULL; +static unsigned int MaxEntryLen = 0;
/* * FUNCTION DEFINITIONS @@ -70,7 +71,8 @@ static hashEntry *getHashTableEntry (unsigned long hashedValue) return entry; }
-static unsigned int hashValue (const char *const string, langType language) +static unsigned int hashValue (const char *const string, langType language, + unsigned int maxLen, bool *maxLenReached) { const signed char *p; unsigned int h = 5381; @@ -79,11 +81,19 @@ static unsigned int hashValue (const char *const string, langType language)
/* "djb" hash as used in g_str_hash() in glib */ for (p = (const signed char *)string; *p != '\0'; p++) + { h = (h << 5) + h + tolower (*p); + if (p - (const signed char *)string > maxLen) + { + *maxLenReached = true; + return 0; + } + }
/* consider language as an extra "character" and add it to the hash */ h = (h << 5) + h + language;
+ *maxLenReached = false; return h; }
@@ -107,8 +117,13 @@ static hashEntry *newEntry ( */ extern void addKeyword (const char *const string, langType language, int value) { - const unsigned int index = hashValue (string, language) % TableSize; + bool dummy; + const unsigned int index = hashValue (string, language, 1000, &dummy) % TableSize; hashEntry *entry = getHashTableEntry (index); + size_t len = strlen (string); + + if (len > MaxEntryLen) + MaxEntryLen = len;
if (entry == NULL) { @@ -139,10 +154,16 @@ extern void addKeyword (const char *const string, langType language, int value)
static int lookupKeywordFull (const char *const string, bool caseSensitive, langType language) { - const unsigned int index = hashValue (string, language) % TableSize; - hashEntry *entry = getHashTableEntry (index); + bool maxLenReached; + const unsigned int index = hashValue (string, language, MaxEntryLen, &maxLenReached) % TableSize; + hashEntry *entry; int result = KEYWORD_NONE;
+ if (maxLenReached) + return KEYWORD_NONE; + + entry = getHashTableEntry (index); + while (entry != NULL) { if (language == entry->language &&
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).