[geany/geany] 3cf016: Store "equal" tags into binary trees instead of lists in Symbol tree

Jiří Techet git-noreply at xxxxx
Fri Jul 22 21:21:09 UTC 2016


Branch:      refs/heads/master
Author:      Jiří Techet <techet at gmail.com>
Committer:   Jiří Techet <techet at gmail.com>
Date:        Fri, 22 Jul 2016 21:21:09 UTC
Commit:      3cf0161527db804f0393a6f49e67dee97ef7f46e
             https://github.com/geany/geany/commit/3cf0161527db804f0393a6f49e67dee97ef7f46e

Log Message:
-----------
Store "equal" tags into binary trees instead of lists in Symbol tree

At the moment tags with identical names are stored into a linked list in
tags_table and parents_table. This however leads to quadratic complexity
when looking up the nearest parent or tag in tree because the whole list
has to be traversed.

Use binary trees indexed by line number instead of lists so the lookup can
be performed in log(N) time and the overall complexity is N*log(N) instead
of N^2.

The GTree API is a little stupid because during the search it doesn't give
access to the value and it doesn't tell when a leaf node was reached. For
this reason the lookup has to be made in two steps - first, the best line
number is found (returned in user_data) and then a normal search for the
found line number is made to get the value stored in the tree.

This patch fixes the problem described in #577 when e.g. a big json export
file contains many identically named tags.


Modified Paths:
--------------
    src/symbols.c

Modified: src/symbols.c
239 lines changed, 141 insertions(+), 98 deletions(-)
===================================================================
@@ -64,6 +64,14 @@
 #include <stdlib.h>
 
 
+typedef struct
+{
+	gint found_line; /* return: the nearest line found */
+	gint line;       /* input: the line to look for */
+	gboolean lower   /* input: search only for lines with lower number than @line */;
+} TreeSearchData;
+
+
 static GPtrArray *top_level_iter_names = NULL;
 
 enum
@@ -1097,120 +1105,181 @@ static gboolean tree_store_remove_row(GtkTreeStore *store, GtkTreeIter *iter)
 }
 
 
-/* adds a new element in the parent table if it's key is known.
- * duplicates are kept */
+static gint tree_search_func(gconstpointer key, gpointer user_data)
+{
+	TreeSearchData *data = user_data;
+	gint parent_line = GPOINTER_TO_INT(key);
+	gboolean new_nearest;
+
+	if (data->found_line == -1)
+		data->found_line = parent_line; /* initial value */
+
+	new_nearest = ABS(data->line - parent_line) < ABS(data->line - data->found_line);
+
+	if (parent_line > data->line)
+	{
+		if (new_nearest && !data->lower)
+			data->found_line = parent_line;
+		return -1;
+	}
+
+	if (new_nearest)
+		data->found_line = parent_line;
+
+	if (parent_line < data->line)
+		return 1;
+
+	return 0;
+}
+
+
+static gint tree_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+	return GPOINTER_TO_INT(a) - GPOINTER_TO_INT(b);
+}
+
+
+static void parents_table_tree_value_free(gpointer data)
+{
+	g_slice_free(GtkTreeIter, data);
+}
+
+
+/* adds a new element in the parent table if its key is known. */
 static void update_parents_table(GHashTable *table, const TMTag *tag, const gchar *parent_name,
 		const GtkTreeIter *iter)
 {
-	GList **list;
-	if (g_hash_table_lookup_extended(table, tag->name, NULL, (gpointer *) &list) &&
+	GTree *tree;
+	if (g_hash_table_lookup_extended(table, tag->name, NULL, (gpointer *) &tree) &&
 		! utils_str_equal(parent_name, tag->name) /* prevent Foo::Foo from making parent = child */)
 	{
-		if (! list)
+		if (!tree)
 		{
-			list = g_slice_alloc(sizeof *list);
-			*list = NULL;
-			g_hash_table_insert(table, tag->name, list);
+			tree = g_tree_new_full(tree_cmp, NULL, NULL, parents_table_tree_value_free);
+			g_hash_table_insert(table, tag->name, tree);
 		}
-		*list = g_list_prepend(*list, g_slice_dup(GtkTreeIter, iter));
+
+		g_tree_insert(tree, GINT_TO_POINTER(tag->line), g_slice_dup(GtkTreeIter, iter));
 	}
 }
 
 
-static void free_iter_slice_list(gpointer data)
+static GtkTreeIter *parents_table_lookup(GHashTable *table, const gchar *name, guint line)
 {
-	GList **list = data;
+	GtkTreeIter *parent_search = NULL;
+	GTree *tree;
 
-	if (list)
+	tree = g_hash_table_lookup(table, name);
+	if (tree)
 	{
-		GList *node;
-		foreach_list(node, *list)
-			g_slice_free(GtkTreeIter, node->data);
-		g_list_free(*list);
-		g_slice_free1(sizeof *list, list);
+		TreeSearchData user_data = {-1, line, TRUE};
+
+		/* search parent candidates for the one with the nearest
+		 * line number which is lower than the tag's line number */
+		g_tree_search(tree, (GCompareFunc)tree_search_func, &user_data);
+		parent_search = g_tree_lookup(tree, GINT_TO_POINTER(user_data.found_line));
 	}
+
+	return parent_search;
+}
+
+
+static void parents_table_value_free(gpointer data)
+{
+	GTree *tree = data;
+	if (tree)
+		g_tree_destroy(tree);
 }
 
 
 /* inserts a @data in @table on key @tag.
  * previous data is not overwritten if the key is duplicated, but rather the
  * two values are kept in a list
  *
- * table is: GHashTable<TMTag, GList<GList<TMTag>>> */
+ * table is: GHashTable<TMTag, GTree<line_num, GList<GList<TMTag>>>> */
 static void tags_table_insert(GHashTable *table, TMTag *tag, GList *data)
 {
-	GList *list = g_hash_table_lookup(table, tag);
+	GTree *tree = g_hash_table_lookup(table, tag);
+	if (!tree)
+	{
+		tree = g_tree_new_full(tree_cmp, NULL, NULL, NULL);
+		g_hash_table_insert(table, tag, tree);
+	}
+	GList *list = g_tree_lookup(tree, GINT_TO_POINTER(tag->line));
 	list = g_list_prepend(list, data);
-	g_hash_table_insert(table, tag, list);
+	g_tree_insert(tree, GINT_TO_POINTER(tag->line), list);
 }
 
 
-/* looks up the entry in @table that better matches @tag.
- * if there are more than one candidate, the one that has closest line position to @tag is chosen */
+/* looks up the entry in @table that best matches @tag.
+ * if there is more than one candidate, the one that has closest line position to @tag is chosen */
 static GList *tags_table_lookup(GHashTable *table, TMTag *tag)
 {
-	GList *data = NULL;
-	GList *node = g_hash_table_lookup(table, tag);
-	if (node)
-	{
-		glong delta;
-		data = node->data;
-
-#define TAG_DELTA(a, b) ABS((glong) TM_TAG(a)->line - (glong) TM_TAG(b)->line)
-
-		delta = TAG_DELTA(((GList *) node->data)->data, tag);
-		for (node = node->next; node; node = node->next)
-		{
-			glong d = TAG_DELTA(((GList *) node->data)->data, tag);
+	TreeSearchData user_data = {-1, tag->line, FALSE};
+	GTree *tree = g_hash_table_lookup(table, tag);
 
-			if (d < delta)
-			{
-				data = node->data;
-				delta = d;
-			}
-		}
-
-#undef TAG_DELTA
+	if (tree)
+	{
+		GList *list;
 
+		g_tree_search(tree, (GCompareFunc)tree_search_func, &user_data);
+		list = g_tree_lookup(tree, GINT_TO_POINTER(user_data.found_line));
+		/* return the first value in the list - we don't care which of the
+		 * tags with identical names defined on the same line we get */
+		if (list)
+			return list->data;
 	}
-	return data;
+	return NULL;
 }
 
 
 /* removes the element at @tag from @table.
  * @tag must be the exact pointer used at insertion time */
 static void tags_table_remove(GHashTable *table, TMTag *tag)
 {
-	GList *list = g_hash_table_lookup(table, tag);
-	if (list)
+	GTree *tree = g_hash_table_lookup(table, tag);
+	if (tree)
 	{
-		GList *node;
-		foreach_list(node, list)
+		GList *list = g_tree_lookup(tree, GINT_TO_POINTER(tag->line));
+		if (list)
 		{
-			if (((GList *) node->data)->data == tag)
-				break;
+			GList *node;
+			/* should always be the first element as we returned the first one in
+			 * tags_table_lookup() */
+			foreach_list(node, list)
+			{
+				if (((GList *) node->data)->data == tag)
+					break;
+			}
+			list = g_list_delete_link(list, node);
+			if (!list)
+				g_tree_remove(tree, GINT_TO_POINTER(tag->line));
+			else
+				g_tree_insert(tree, GINT_TO_POINTER(tag->line), list);
 		}
-		list = g_list_delete_link(list, node);
-		if (list)
-			g_hash_table_insert(table, tag, list);
-		else
-			g_hash_table_remove(table, tag);
 	}
 }
 
 
-static void tags_table_destroy(GHashTable *table)
+static gboolean tags_table_tree_value_free(gpointer key, gpointer value, gpointer data)
 {
-	/* free any leftover elements.  note that we can't register a value_free_func when
-	 * creating the hash table because we only want to free it when destroying the table,
-	 * not when inserting a duplicate (we handle this manually) */
-	GHashTableIter iter;
-	gpointer value;
+	GList *list = value;
+	g_list_free(list);
+	return FALSE;
+}
+
 
-	g_hash_table_iter_init(&iter, table);
-	while (g_hash_table_iter_next(&iter, NULL, &value))
-		g_list_free(value);
-	g_hash_table_destroy(table);
+static void tags_table_value_free(gpointer data)
+{
+	GTree *tree = data;
+	if (tree)
+	{
+		/* free any leftover elements.  note that we can't register a value_free_func when
+		 * creating the tree because we only want to free it when destroying the tree,
+		 * not when inserting a duplicate (we handle this manually) */
+		g_tree_foreach(tree, tags_table_tree_value_free, NULL);
+		g_tree_destroy(tree);
+	}
 }
 
 
@@ -1243,10 +1312,11 @@ static void update_tree_tags(GeanyDocument *doc, GList **tags)
 	GList *item;
 
 	/* Build hash tables holding tags and parents */
-	/* parent table holds "tag-name":GtkTreeIter */
-	parents_table = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, free_iter_slice_list);
-	/* tags table is another representation of the @tags list, TMTag:GList<TMTag> */
-	tags_table = g_hash_table_new_full(tag_hash, tag_equal, NULL, NULL);
+	/* parent table is GHashTable<tag_name, GTree<line_num, GtkTreeIter>> */
+	parents_table = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, parents_table_value_free);
+	/* tags table is another representation of the @tags list,
+	 * GHashTable<TMTag, GTree<line_num, GList<GList<TMTag>>>> */
+	tags_table = g_hash_table_new_full(tag_hash, tag_equal, NULL, tags_table_value_free);
 	foreach_list(item, *tags)
 	{
 		TMTag *tag = item->data;
@@ -1339,34 +1409,7 @@ static void update_tree_tags(GeanyDocument *doc, GList **tags)
 			parent_name = get_parent_name(tag, doc->file_type->id);
 			if (parent_name)
 			{
-				GList **candidates;
-				GtkTreeIter *parent_search = NULL;
-
-				/* walk parent candidates to find the better one.
-				 * if there are more than one, take the one that has the closest line number
-				 * after the tag we're searching the parent for */
-				candidates = g_hash_table_lookup(parents_table, parent_name);
-				if (candidates)
-				{
-					GList *node;
-					glong delta = G_MAXLONG;
-					foreach_list(node, *candidates)
-					{
-						TMTag *parent_tag;
-						glong  d;
-
-						gtk_tree_model_get(GTK_TREE_MODEL(store), node->data,
-								SYMBOLS_COLUMN_TAG, &parent_tag, -1);
-
-						d = tag->line - parent_tag->line;
-						if (! parent_search || (d >= 0 && d < delta))
-						{
-							delta = d;
-							parent_search = node->data;
-						}
-						tm_tag_unref(parent_tag);
-					}
-				}
+				GtkTreeIter *parent_search = parents_table_lookup(parents_table, parent_name, tag->line);
 
 				if (parent_search)
 					parent = parent_search;
@@ -1399,7 +1442,7 @@ static void update_tree_tags(GeanyDocument *doc, GList **tags)
 	}
 
 	g_hash_table_destroy(parents_table);
-	tags_table_destroy(tags_table);
+	g_hash_table_destroy(tags_table);
 }
 
 



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list