/pidgin/main: 3118fb87573e: Trie: implement purple_trie_multi_find

Tomasz Wasilczyk twasilczyk at pidgin.im
Mon Apr 7 17:01:09 EDT 2014


Changeset: 3118fb87573e290cc42698e518a5a9a6c3453a8e
Author:	 Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date:	 2014-04-07 23:01 +0200
Branch:	 default
URL: https://hg.pidgin.im/pidgin/main/rev/3118fb87573e

Description:

Trie: implement purple_trie_multi_find

diffstat:

 libpurple/tests/test_trie.c |  49 ++++++++++++++++++++++++++++++++
 libpurple/trie.c            |  68 +++++++++++++++++++++++++++++++++++++++++++++
 libpurple/trie.h            |  22 +++++++++++++-
 3 files changed, 138 insertions(+), 1 deletions(-)

diffs (183 lines):

diff --git a/libpurple/tests/test_trie.c b/libpurple/tests/test_trie.c
--- a/libpurple/tests/test_trie.c
+++ b/libpurple/tests/test_trie.c
@@ -293,6 +293,54 @@ START_TEST(test_trie_find_noreset)
 }
 END_TEST
 
+START_TEST(test_trie_multi_find)
+{
+	PurpleTrie *trie1, *trie2, *trie3;
+	GSList *tries = NULL;
+	const gchar *in;
+	int out;
+
+	trie1 = purple_trie_new();
+	trie2 = purple_trie_new();
+	trie3 = purple_trie_new();
+	purple_trie_set_reset_on_match(trie1, FALSE);
+	purple_trie_set_reset_on_match(trie2, TRUE);
+	purple_trie_set_reset_on_match(trie3, FALSE);
+
+	/* appending is not efficient, but we have only 3 elements */
+	tries = g_slist_append(tries, trie1);
+	tries = g_slist_append(tries, trie2);
+	tries = g_slist_append(tries, trie3);
+
+	purple_trie_add(trie1, "test", (gpointer)0x10011);
+	purple_trie_add(trie1, "trie1", (gpointer)0x10012);
+	purple_trie_add(trie1, "Alice", (gpointer)0x10013);
+
+	purple_trie_add(trie2, "test", (gpointer)0x10021);
+	purple_trie_add(trie2, "trie2", (gpointer)0x10022);
+	purple_trie_add(trie2, "example", (gpointer)0x10023);
+	purple_trie_add(trie2, "Ali", (gpointer)0x10024);
+
+	/* "tester" without last (accepting) letter of "test" */
+	purple_trie_add(trie3, "teser", (gpointer)0x10031);
+	purple_trie_add(trie3, "trie3", (gpointer)0x10032);
+	purple_trie_add(trie3, "tester", (gpointer)0x10033);
+	purple_trie_add(trie3, "example", (gpointer)0x10034);
+	purple_trie_add(trie3, "Al", (gpointer)0x10035);
+
+	in = "test tester trie trie1 trie2 trie3 example Alice";
+
+	out = purple_trie_multi_find(tries, in,
+		test_trie_find_cb, (gpointer)0x10);
+
+	assert_int_equal(9, out);
+	assert_int_equal(2 * 0x11 + 0x33 + 0x12 + 0x22 +
+		0x32 + 0x23 + 0x35 + 0x13, find_sum);
+
+	g_slist_free_full(tries, g_object_unref);
+}
+END_TEST
+
 Suite *
 purple_trie_suite(void)
 {
@@ -308,6 +356,7 @@ purple_trie_suite(void)
 	tcase_add_test(tc, test_trie_find);
 	tcase_add_test(tc, test_trie_find_reset);
 	tcase_add_test(tc, test_trie_find_noreset);
+	tcase_add_test(tc, test_trie_multi_find);
 
 	suite_add_tcase(s, tc);
 
diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -621,6 +621,74 @@ purple_trie_find(PurpleTrie *trie, const
 	return found_count;
 }
 
+gulong
+purple_trie_multi_find(const GSList *tries, const gchar *src,
+	PurpleTrieFindCb find_cb, gpointer user_data)
+{
+	guint tries_count, m_idx;
+	PurpleTrieMachine *machines;
+	gulong found_count = 0;
+	gsize i;
+
+	if (src == NULL)
+		return 0;
+
+	tries_count = g_slist_length((GSList*)tries);
+	if (tries_count == 0)
+		return 0;
+
+	/* Initialize all machines. */
+	machines = g_new(PurpleTrieMachine, tries_count);
+	for (i = 0; i < tries_count; i++, tries = tries->next) {
+		PurpleTrie *trie = tries->data;
+		PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
+
+		if (priv == NULL) {
+			g_warn_if_reached();
+			g_free(machines);
+			return 0;
+		}
+
+		purple_trie_states_build(trie);
+
+		machines[i].state = priv->root_state;
+		machines[i].root_state = priv->root_state;
+		machines[i].reset_on_match = priv->reset_on_match;
+		machines[i].find_cb = find_cb;
+		machines[i].user_data = user_data;
+	}
+
+	i = 0;
+	while (src[i] != '\0') {
+		guchar character = src[i++];
+		gboolean was_found = FALSE;
+
+		/* Advance every machine and possibly perform a replacement. */
+		for (m_idx = 0; m_idx < tries_count; m_idx++) {
+			purple_trie_advance(&machines[m_idx], character);
+			if (was_found)
+				continue;
+			was_found =
+				purple_trie_find_do_discovery(&machines[m_idx]);
+			if (was_found)
+				found_count++;
+		}
+
+		/* If we replaced a word, reset _all_ machines */
+		if (was_found) {
+			for (m_idx = 0; m_idx < tries_count; m_idx++) {
+				if (!machines[m_idx].reset_on_match)
+					continue;
+				machines[m_idx].state =
+					machines[m_idx].root_state;
+			}
+		}
+	}
+
+	g_free(machines);
+	return found_count;
+}
+
 
 /*******************************************************************************
  * Records
diff --git a/libpurple/trie.h b/libpurple/trie.h
--- a/libpurple/trie.h
+++ b/libpurple/trie.h
@@ -266,7 +266,7 @@ purple_trie_multi_replace(const GSList *
  * purple_trie_find:
  * @trie: the trie.
  * @src: the source string.
- * @find_cb: the callback for found entries (may be %NULL).
+ * @find_cb: the callback for the found entries (may be %NULL).
  * @user_data: custom data to be passed to @find_cb.
  *
  * Processes @src string and finds all occuriences of words added to @trie.
@@ -281,6 +281,26 @@ gulong
 purple_trie_find(PurpleTrie *trie, const gchar *src,
 	PurpleTrieFindCb find_cb, gpointer user_data);
 
+/**
+ * purple_trie_multi_find:
+ * @tries: the list of tries.
+ * @src: the source string.
+ * @find_cb: the callback for the found entries (may be %NULL).
+ * @user_data: custom data to be passed to @find_cb.
+ *
+ * Processes @src and replaces all occuriences of words added to tries in
+ * list @tries. Entries added to tries on the beginning of the list have higher
+ * priority, than ones added further.
+ *
+ * Different #GSList's can be combined to possess common parts, so you can create
+ * a "tree of tries".
+ *
+ * Returns: the number of found words.
+ */
+gulong
+purple_trie_multi_find(const GSList *tries, const gchar *src,
+	PurpleTrieFindCb find_cb, gpointer user_data);
+
 G_END_DECLS
 
 #endif /* PURPLE_MEMORY_POOL_H */



More information about the Commits mailing list