[geany/geany] d68667: On single file update only merge the file's tags into workspace tags

Jiří Techet git-noreply at xxxxx
Sat Nov 8 18:57:47 UTC 2014


Branch:      refs/heads/master
Author:      Jiří Techet <techet at gmail.com>
Committer:   Jiří Techet <techet at gmail.com>
Date:        Sat, 18 Oct 2014 19:40:10 UTC
Commit:      d686674ecae5e6c5dc889741d6a96c783b5678e4
             https://github.com/geany/geany/commit/d686674ecae5e6c5dc889741d6a96c783b5678e4

Log Message:
-----------
On single file update only merge the file's tags into workspace tags

Since both the file tags and workspace tags are sorted, it is unnecessary
to completely resort the tags for the workspace - instead, remove the
tags belonging to the file from the workspace tags and merge the updated
file's tags into the workspace tags.

The merge is optimized for merging small number of tags into a large array.
For instance, the total number of tags in linux kernel is about 2300000
while the average number of tags per file is about 65 (35000 source files).
Most of the time merge won't be performed so we can avoid expensive
string comparisons and try to make bigger jumps to see if some merge is
needed or not (more details in the source file comments).


Modified Paths:
--------------
    tagmanager/src/tm_source_file.c
    tagmanager/src/tm_tag.c
    tagmanager/src/tm_tag.h
    tagmanager/src/tm_workspace.c
    tagmanager/src/tm_workspace.h

Modified: tagmanager/src/tm_source_file.c
16 lines changed, 14 insertions(+), 2 deletions(-)
===================================================================
@@ -350,6 +350,12 @@ void tm_source_file_update(TMSourceFile *source_file, gboolean update_workspace)
 	g_message("Source file updating based on source file %s", source_file->file_name);
 #endif
 
+	if (update_workspace)
+	{
+		/* tm_source_file_parse() deletes the tag objects - remove the tags from
+		 * workspace while they exist and can be scanned */
+		tm_workspace_remove_file_tags(source_file);
+	}
 	tm_source_file_parse(source_file);
 	tm_tags_sort(source_file->tags_array, NULL, FALSE);
 	if (update_workspace)
@@ -357,7 +363,7 @@ void tm_source_file_update(TMSourceFile *source_file, gboolean update_workspace)
 #ifdef TM_DEBUG
 		g_message("Updating workspace from source file");
 #endif
-		tm_workspace_update();
+		tm_workspace_merge_file_tags(source_file);
 	}
 #ifdef TM_DEBUG
 	else
@@ -375,6 +381,12 @@ void tm_source_file_buffer_update(TMSourceFile *source_file, guchar* text_buf,
 	g_message("Buffer updating based on source file %s", source_file->file_name);
 #endif
 
+	if (update_workspace)
+	{
+		/* tm_source_file_parse() deletes the tag objects - remove the tags from
+		 * workspace while they exist and can be scanned */
+		tm_workspace_remove_file_tags(source_file);
+	}
 	tm_source_file_buffer_parse (source_file, text_buf, buf_size);
 	tm_tags_sort(source_file->tags_array, NULL, FALSE);
 	if (update_workspace)
@@ -382,7 +394,7 @@ void tm_source_file_buffer_update(TMSourceFile *source_file, guchar* text_buf,
 #ifdef TM_DEBUG
 		g_message("Updating workspace from buffer..");
 #endif
-		tm_workspace_update();
+		tm_workspace_merge_file_tags(source_file);
 	}
 #ifdef TM_DEBUG
 	else


Modified: tagmanager/src/tm_tag.c
113 lines changed, 113 insertions(+), 0 deletions(-)
===================================================================
@@ -835,6 +835,119 @@ gboolean tm_tags_sort(GPtrArray *tags_array, TMTagAttrType *sort_attributes, gbo
 	return TRUE;
 }
 
+GPtrArray *tm_tags_remove_file_tags(TMSourceFile *source_file, GPtrArray *tags_array)
+{
+	gint i;
+	for (i = 0; i < tags_array->len; i++)
+	{
+		TMTag *tag = tags_array->pdata[i];
+		
+		if (tag != NULL && tag->atts.entry.file == source_file)
+			tags_array->pdata[i] = NULL;
+	}
+	tm_tags_prune(tags_array);
+}
+
+/* Optimized merge sort for merging sorted values from one array to another
+ * where one of the arrays is much smaller than the other.
+ * The merge complexity depends mostly on the size of the small array
+ * and is almost independent of the size of the big array.
+ * In addition, get rid of the duplicates (if both big_array and small_array are duplicate-free). */
+static GPtrArray *merge_big_small(GPtrArray *big_array, GPtrArray *small_array) {
+	gint i1 = 0;  /* index to big_array */
+	gint i2 = 0;  /* index to small_array */
+	/* on average, we are merging a value from small_array every 
+	 * len(big_array) / len(small_array) values - good approximation for fast jump
+	 * step size */
+	gint initial_step = big_array->len / small_array->len;
+	initial_step = initial_step > 4 ? initial_step : 1;
+	gint step = initial_step;
+	GPtrArray *res_array = g_ptr_array_sized_new(big_array->len + small_array->len);
+#ifdef TM_DEBUG
+	gint cmpnum = 0;
+#endif
+
+	while (i1 < big_array->len && i2 < small_array->len)
+	{
+		gint cmpval;
+		gpointer val1 = big_array->pdata[i1];
+		gpointer val2 = small_array->pdata[i2];
+
+		if (step > 4)  /* fast path start */
+		{
+			gint j1 = (i1 + step < big_array->len) ? i1 + step : big_array->len - 1;
+			
+			val1 = big_array->pdata[j1];
+#ifdef TM_DEBUG
+			cmpnum++;
+#endif
+			/* if the value in big_array after making the big step is still smaller
+			 * than the value in small_array, we can copy all the values inbetween
+			 * into the result without making expensive string comparisons */
+			if (tm_tag_compare(&val1, &val2) < 0)
+			{
+				while (i1 <= j1) 
+				{
+					val1 = big_array->pdata[i1];
+					g_ptr_array_add(res_array, val1);
+					i1++;
+				}
+				continue;
+			}
+			else 
+			{
+				/* lower the step and try again */
+				step /= 2;
+				continue;
+			}
+		}  /* fast path end */
+		
+#ifdef TM_DEBUG
+		cmpnum++;
+#endif
+		cmpval = tm_tag_compare(&val1, &val2);
+		if (cmpval < 0)
+		{
+			g_ptr_array_add(res_array, val1);
+			i1++;
+		}
+		else
+		{
+			g_ptr_array_add(res_array, val2);
+			i2++;
+			/* value from small_array gets merged - reset the step size */
+			step = initial_step;
+			if (cmpval == 0)
+				i1++;  /* remove the duplicate, keep just the newly merged value */
+		}
+	}
+	
+	/* end of one of the arrays reached - copy the rest from the other array */
+	while (i1 < big_array->len)
+		g_ptr_array_add(res_array, big_array->pdata[i1++]);
+	while (i2 < small_array->len)
+		g_ptr_array_add(res_array, small_array->pdata[i2++]);
+		
+#ifdef TM_DEBUG
+	printf("cmpnums: %d\n", cmpnum);
+	printf("total tags: %d\n", big_array->len);
+	printf("merged tags: %d\n\n", small_array->len);
+#endif
+
+	return res_array;
+}
+
+GPtrArray *tm_tags_merge_big_small(GPtrArray *big_array, GPtrArray *small_array, TMTagAttrType *sort_attributes)
+{
+	GPtrArray *res_array;
+	
+	s_sort_attrs = sort_attributes;
+	s_partial = FALSE;
+	res_array = merge_big_small(big_array, small_array);
+	s_sort_attrs = NULL;
+	return res_array;
+}
+
 gboolean tm_tags_custom_sort(GPtrArray *tags_array, TMTagCompareFunc compare_func, gboolean dedup)
 {
 	if ((!tags_array) || (!tags_array->len))


Modified: tagmanager/src/tm_tag.h
4 lines changed, 4 insertions(+), 0 deletions(-)
===================================================================
@@ -242,6 +242,10 @@ int tm_tag_compare(const void *ptr1, const void *ptr2);
 gboolean tm_tags_merge(GPtrArray *tags_array, gsize orig_len,
 	TMTagAttrType *sort_attributes, gboolean dedup);
 
+GPtrArray *tm_tags_remove_file_tags(TMSourceFile *source_file, GPtrArray *tags_array);
+
+GPtrArray *tm_tags_merge_big_small(GPtrArray *big_array, GPtrArray *small_array, TMTagAttrType *sort_attributes);
+
 /*
  Sort an array of tags on the specified attribuites using the inbuilt comparison
  function.


Modified: tagmanager/src/tm_workspace.c
28 lines changed, 26 insertions(+), 2 deletions(-)
===================================================================
@@ -106,11 +106,11 @@ gboolean tm_workspace_remove_source_file(TMSourceFile *source_file, gboolean do_
 	{
 		if (theWorkspace->source_files->pdata[i] == source_file)
 		{
+			if (update)
+				tm_workspace_remove_file_tags(source_file);
 			if (do_free)
 				tm_source_file_free(source_file);
 			g_ptr_array_remove_index_fast(theWorkspace->source_files, i);
-			if (update)
-				tm_workspace_update();
 			return TRUE;
 		}
 	}
@@ -489,6 +489,30 @@ void tm_workspace_recreate_tags_array(void)
 	tm_tags_sort(theWorkspace->tags_array, sort_attrs, TRUE);
 }
 
+void tm_workspace_remove_file_tags(TMSourceFile *source_file)
+{
+	if (theWorkspace->tags_array != NULL)
+		tm_tags_remove_file_tags(source_file, theWorkspace->tags_array);
+}
+
+void tm_workspace_merge_file_tags(TMSourceFile *source_file)
+{
+	TMTagAttrType sort_attrs[] = { tm_tag_attr_name_t, tm_tag_attr_file_t,
+		tm_tag_attr_scope_t, tm_tag_attr_type_t, tm_tag_attr_arglist_t, 0};
+
+	if (theWorkspace->tags_array == NULL)
+		theWorkspace->tags_array = g_ptr_array_new();
+		
+	if (source_file->tags_array != NULL)
+	{
+		GPtrArray *new_tags = tm_tags_merge_big_small(theWorkspace->tags_array, 
+			source_file->tags_array, sort_attrs);
+		/* tags owned by TMSourceFile - free just the pointer array */
+		g_ptr_array_free(theWorkspace->tags_array, TRUE);
+		theWorkspace->tags_array = new_tags;
+	}
+}
+
 void tm_workspace_update(void)
 {
 #ifdef TM_DEBUG


Modified: tagmanager/src/tm_workspace.h
3 lines changed, 3 insertions(+), 0 deletions(-)
===================================================================
@@ -159,6 +159,9 @@ const GPtrArray *tm_workspace_get_parents(const gchar *name);
 */
 void tm_workspace_free(void);
 
+void tm_workspace_merge_file_tags(TMSourceFile *source_file);
+
+void tm_workspace_remove_file_tags(TMSourceFile *source_file);
 
 #endif /* GEANY_PRIVATE */
 



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list