Branch: refs/heads/master Author: Jiří Techet techet@gmail.com Committer: Jiří Techet techet@gmail.com Date: Sat, 18 Oct 2014 19:40:10 UTC Commit: d686674ecae5e6c5dc889741d6a96c783b5678e4 https://github.com/geany/geany/commit/d686674ecae5e6c5dc889741d6a96c783b5678...
Log Message: ----------- On single file update only merge the file's tags into workspace tags
Since both the file tags and workspace tags are sorted, it is unnecessary to completely resort the tags for the workspace - instead, remove the tags belonging to the file from the workspace tags and merge the updated file's tags into the workspace tags.
The merge is optimized for merging small number of tags into a large array. For instance, the total number of tags in linux kernel is about 2300000 while the average number of tags per file is about 65 (35000 source files). Most of the time merge won't be performed so we can avoid expensive string comparisons and try to make bigger jumps to see if some merge is needed or not (more details in the source file comments).
Modified Paths: -------------- tagmanager/src/tm_source_file.c tagmanager/src/tm_tag.c tagmanager/src/tm_tag.h tagmanager/src/tm_workspace.c tagmanager/src/tm_workspace.h
Modified: tagmanager/src/tm_source_file.c 16 lines changed, 14 insertions(+), 2 deletions(-) =================================================================== @@ -350,6 +350,12 @@ void tm_source_file_update(TMSourceFile *source_file, gboolean update_workspace) g_message("Source file updating based on source file %s", source_file->file_name); #endif
+ if (update_workspace) + { + /* tm_source_file_parse() deletes the tag objects - remove the tags from + * workspace while they exist and can be scanned */ + tm_workspace_remove_file_tags(source_file); + } tm_source_file_parse(source_file); tm_tags_sort(source_file->tags_array, NULL, FALSE); if (update_workspace) @@ -357,7 +363,7 @@ void tm_source_file_update(TMSourceFile *source_file, gboolean update_workspace) #ifdef TM_DEBUG g_message("Updating workspace from source file"); #endif - tm_workspace_update(); + tm_workspace_merge_file_tags(source_file); } #ifdef TM_DEBUG else @@ -375,6 +381,12 @@ void tm_source_file_buffer_update(TMSourceFile *source_file, guchar* text_buf, g_message("Buffer updating based on source file %s", source_file->file_name); #endif
+ if (update_workspace) + { + /* tm_source_file_parse() deletes the tag objects - remove the tags from + * workspace while they exist and can be scanned */ + tm_workspace_remove_file_tags(source_file); + } tm_source_file_buffer_parse (source_file, text_buf, buf_size); tm_tags_sort(source_file->tags_array, NULL, FALSE); if (update_workspace) @@ -382,7 +394,7 @@ void tm_source_file_buffer_update(TMSourceFile *source_file, guchar* text_buf, #ifdef TM_DEBUG g_message("Updating workspace from buffer.."); #endif - tm_workspace_update(); + tm_workspace_merge_file_tags(source_file); } #ifdef TM_DEBUG else
Modified: tagmanager/src/tm_tag.c 113 lines changed, 113 insertions(+), 0 deletions(-) =================================================================== @@ -835,6 +835,119 @@ gboolean tm_tags_sort(GPtrArray *tags_array, TMTagAttrType *sort_attributes, gbo return TRUE; }
+GPtrArray *tm_tags_remove_file_tags(TMSourceFile *source_file, GPtrArray *tags_array) +{ + gint i; + for (i = 0; i < tags_array->len; i++) + { + TMTag *tag = tags_array->pdata[i]; + + if (tag != NULL && tag->atts.entry.file == source_file) + tags_array->pdata[i] = NULL; + } + tm_tags_prune(tags_array); +} + +/* Optimized merge sort for merging sorted values from one array to another + * where one of the arrays is much smaller than the other. + * The merge complexity depends mostly on the size of the small array + * and is almost independent of the size of the big array. + * In addition, get rid of the duplicates (if both big_array and small_array are duplicate-free). */ +static GPtrArray *merge_big_small(GPtrArray *big_array, GPtrArray *small_array) { + gint i1 = 0; /* index to big_array */ + gint i2 = 0; /* index to small_array */ + /* on average, we are merging a value from small_array every + * len(big_array) / len(small_array) values - good approximation for fast jump + * step size */ + gint initial_step = big_array->len / small_array->len; + initial_step = initial_step > 4 ? initial_step : 1; + gint step = initial_step; + GPtrArray *res_array = g_ptr_array_sized_new(big_array->len + small_array->len); +#ifdef TM_DEBUG + gint cmpnum = 0; +#endif + + while (i1 < big_array->len && i2 < small_array->len) + { + gint cmpval; + gpointer val1 = big_array->pdata[i1]; + gpointer val2 = small_array->pdata[i2]; + + if (step > 4) /* fast path start */ + { + gint j1 = (i1 + step < big_array->len) ? i1 + step : big_array->len - 1; + + val1 = big_array->pdata[j1]; +#ifdef TM_DEBUG + cmpnum++; +#endif + /* if the value in big_array after making the big step is still smaller + * than the value in small_array, we can copy all the values inbetween + * into the result without making expensive string comparisons */ + if (tm_tag_compare(&val1, &val2) < 0) + { + while (i1 <= j1) + { + val1 = big_array->pdata[i1]; + g_ptr_array_add(res_array, val1); + i1++; + } + continue; + } + else + { + /* lower the step and try again */ + step /= 2; + continue; + } + } /* fast path end */ + +#ifdef TM_DEBUG + cmpnum++; +#endif + cmpval = tm_tag_compare(&val1, &val2); + if (cmpval < 0) + { + g_ptr_array_add(res_array, val1); + i1++; + } + else + { + g_ptr_array_add(res_array, val2); + i2++; + /* value from small_array gets merged - reset the step size */ + step = initial_step; + if (cmpval == 0) + i1++; /* remove the duplicate, keep just the newly merged value */ + } + } + + /* end of one of the arrays reached - copy the rest from the other array */ + while (i1 < big_array->len) + g_ptr_array_add(res_array, big_array->pdata[i1++]); + while (i2 < small_array->len) + g_ptr_array_add(res_array, small_array->pdata[i2++]); + +#ifdef TM_DEBUG + printf("cmpnums: %d\n", cmpnum); + printf("total tags: %d\n", big_array->len); + printf("merged tags: %d\n\n", small_array->len); +#endif + + return res_array; +} + +GPtrArray *tm_tags_merge_big_small(GPtrArray *big_array, GPtrArray *small_array, TMTagAttrType *sort_attributes) +{ + GPtrArray *res_array; + + s_sort_attrs = sort_attributes; + s_partial = FALSE; + res_array = merge_big_small(big_array, small_array); + s_sort_attrs = NULL; + return res_array; +} + gboolean tm_tags_custom_sort(GPtrArray *tags_array, TMTagCompareFunc compare_func, gboolean dedup) { if ((!tags_array) || (!tags_array->len))
Modified: tagmanager/src/tm_tag.h 4 lines changed, 4 insertions(+), 0 deletions(-) =================================================================== @@ -242,6 +242,10 @@ int tm_tag_compare(const void *ptr1, const void *ptr2); gboolean tm_tags_merge(GPtrArray *tags_array, gsize orig_len, TMTagAttrType *sort_attributes, gboolean dedup);
+GPtrArray *tm_tags_remove_file_tags(TMSourceFile *source_file, GPtrArray *tags_array); + +GPtrArray *tm_tags_merge_big_small(GPtrArray *big_array, GPtrArray *small_array, TMTagAttrType *sort_attributes); + /* Sort an array of tags on the specified attribuites using the inbuilt comparison function.
Modified: tagmanager/src/tm_workspace.c 28 lines changed, 26 insertions(+), 2 deletions(-) =================================================================== @@ -106,11 +106,11 @@ gboolean tm_workspace_remove_source_file(TMSourceFile *source_file, gboolean do_ { if (theWorkspace->source_files->pdata[i] == source_file) { + if (update) + tm_workspace_remove_file_tags(source_file); if (do_free) tm_source_file_free(source_file); g_ptr_array_remove_index_fast(theWorkspace->source_files, i); - if (update) - tm_workspace_update(); return TRUE; } } @@ -489,6 +489,30 @@ void tm_workspace_recreate_tags_array(void) tm_tags_sort(theWorkspace->tags_array, sort_attrs, TRUE); }
+void tm_workspace_remove_file_tags(TMSourceFile *source_file) +{ + if (theWorkspace->tags_array != NULL) + tm_tags_remove_file_tags(source_file, theWorkspace->tags_array); +} + +void tm_workspace_merge_file_tags(TMSourceFile *source_file) +{ + TMTagAttrType sort_attrs[] = { tm_tag_attr_name_t, tm_tag_attr_file_t, + tm_tag_attr_scope_t, tm_tag_attr_type_t, tm_tag_attr_arglist_t, 0}; + + if (theWorkspace->tags_array == NULL) + theWorkspace->tags_array = g_ptr_array_new(); + + if (source_file->tags_array != NULL) + { + GPtrArray *new_tags = tm_tags_merge_big_small(theWorkspace->tags_array, + source_file->tags_array, sort_attrs); + /* tags owned by TMSourceFile - free just the pointer array */ + g_ptr_array_free(theWorkspace->tags_array, TRUE); + theWorkspace->tags_array = new_tags; + } +} + void tm_workspace_update(void) { #ifdef TM_DEBUG
Modified: tagmanager/src/tm_workspace.h 3 lines changed, 3 insertions(+), 0 deletions(-) =================================================================== @@ -159,6 +159,9 @@ const GPtrArray *tm_workspace_get_parents(const gchar *name); */ void tm_workspace_free(void);
+void tm_workspace_merge_file_tags(TMSourceFile *source_file); + +void tm_workspace_remove_file_tags(TMSourceFile *source_file);
#endif /* GEANY_PRIVATE */
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).