/pidgin/main: f1310093e434: Implement and test multi-trie

Tomasz Wasilczyk twasilczyk at pidgin.im
Wed Mar 26 14:39:08 EDT 2014


Changeset: f1310093e4344e1c8da0f60c324e7c4f97d9f5ed
Author:	 Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date:	 2014-03-26 19:38 +0100
Branch:	 default
URL: https://hg.pidgin.im/pidgin/main/rev/f1310093e434

Description:

Implement and test multi-trie

diffstat:

 libpurple/tests/test_trie.c |   60 ++++++++++++-
 libpurple/trie.c            |  186 ++++++++++++++++++++++++++++++++++---------
 libpurple/trie.h            |   22 ++++-
 3 files changed, 219 insertions(+), 49 deletions(-)

diffs (truncated from 380 to 300 lines):

diff --git a/libpurple/tests/test_trie.c b/libpurple/tests/test_trie.c
--- a/libpurple/tests/test_trie.c
+++ b/libpurple/tests/test_trie.c
@@ -7,7 +7,7 @@ static gboolean
 test_trie_replace_cb(GString *out, const gchar *word, gpointer word_data,
 	gpointer user_data)
 {
-	/* the "test" word */
+	/* the "test" word for the test_trie_replace test */
 	if ((int)word_data == 0x1001)
 		return FALSE;
 
@@ -35,10 +35,10 @@ START_TEST(test_trie_replace)
 	in = "Alice is testing her trie implementation, "
 		"but she's far away from making test tree overtested";
 
-	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)7);
+	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)1);
 
-	assert_string_equal("Alice is [7:1002] her [7:1004] [7:1006]ation,"
-		" but she's far away from making test [7:1005] [7:1003]", out);
+	assert_string_equal("Alice is [1:1002] her [1:1004] [1:1006]ation,"
+		" but she's far away from making test [1:1005] [1:1003]", out);
 
 	g_object_unref(trie);
 	g_free(out);
@@ -57,9 +57,9 @@ START_TEST(test_trie_replace_whole)
 
 	in = "test";
 
-	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)5);
+	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
 
-	assert_string_equal("[5:2002]", out);
+	assert_string_equal("[2:2002]", out);
 
 	g_object_unref(trie);
 	g_free(out);
@@ -100,7 +100,7 @@ START_TEST(test_trie_replace_empty)
 
 	in = "";
 
-	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
+	out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)4);
 
 	assert_string_equal("", out);
 
@@ -109,6 +109,51 @@ START_TEST(test_trie_replace_empty)
 }
 END_TEST
 
+START_TEST(test_trie_multi_replace)
+{
+	PurpleTrie *trie1, *trie2, *trie3;
+	GSList *tries = NULL;
+	const gchar *in;
+	gchar *out;
+
+	trie1 = purple_trie_new();
+	trie2 = purple_trie_new();
+	trie3 = purple_trie_new();
+
+	/* appending is not efficient, but we have only 3 elements */
+	tries = g_slist_append(tries, trie1);
+	tries = g_slist_append(tries, trie2);
+	tries = g_slist_append(tries, trie3);
+
+	purple_trie_add(trie1, "test", (gpointer)0x5011);
+	purple_trie_add(trie1, "trie1", (gpointer)0x5012);
+	purple_trie_add(trie1, "Alice", (gpointer)0x5013);
+
+	purple_trie_add(trie2, "test", (gpointer)0x5021);
+	purple_trie_add(trie2, "trie2", (gpointer)0x5022);
+	purple_trie_add(trie2, "example", (gpointer)0x5023);
+	purple_trie_add(trie2, "Ali", (gpointer)0x5024);
+
+	/* "tester" without last (accepting) letter of "test" */
+	purple_trie_add(trie3, "teser", (gpointer)0x5031);
+	purple_trie_add(trie3, "trie3", (gpointer)0x5032);
+	purple_trie_add(trie3, "tester", (gpointer)0x5033);
+	purple_trie_add(trie3, "example", (gpointer)0x5034);
+	purple_trie_add(trie3, "Al", (gpointer)0x5035);
+
+	in = "test tester trie trie1 trie2 trie3 example Alice";
+
+	out = purple_trie_multi_replace(tries, in,
+		test_trie_replace_cb, (gpointer)5);
+
+	assert_string_equal("[5:5011] [5:5011]er trie [5:5012] [5:5022] "
+		"[5:5032] [5:5023] [5:5035]ice", out);
+
+	g_slist_free_full(tries, g_object_unref);
+	g_free(out);
+}
+END_TEST
+
 Suite *
 purple_trie_suite(void)
 {
@@ -119,6 +164,7 @@ purple_trie_suite(void)
 	tcase_add_test(tc, test_trie_replace_whole);
 	tcase_add_test(tc, test_trie_replace_inner);
 	tcase_add_test(tc, test_trie_replace_empty);
+	tcase_add_test(tc, test_trie_multi_replace);
 
 	suite_add_tcase(s, tc);
 
diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -74,6 +74,17 @@ struct _PurpleTrieState
 	PurpleTrieRecord *found_word;
 };
 
+typedef struct
+{
+	PurpleTrieState *state;
+
+	PurpleTrieState *root_state;
+	gboolean reset_on_match;
+
+	PurpleTrieReplaceCb replace_cb;
+	gpointer user_data;
+} PurpleTrieMachine;
+
 /*******************************************************************************
  * Records list
  ******************************************************************************/
@@ -354,14 +365,63 @@ purple_trie_states_build(PurpleTrie *tri
  * Searching
  ******************************************************************************/
 
+static void
+purple_trie_replace_advance(PurpleTrieMachine *m, const guchar character)
+{
+	/* change state after processing a character */
+	while (TRUE) {
+		/* Perfect fit - next character is the same, as the child of the
+		 * prefix we reached so far. */
+		if (m->state->children && m->state->children[character]) {
+			m->state = m->state->children[character];
+			break;
+		}
+
+		/* We reached root, that's a pity. */
+		if (m->state == m->root_state)
+			break;
+
+		/* Let's try a bit shorter suffix. */
+		m->state = m->state->longest_suffix;
+	}
+}
+
+static gboolean
+purple_trie_replace_do_replacement(PurpleTrieMachine *m, GString *out)
+{
+	gboolean was_replaced = FALSE;
+	gsize str_old_len;
+
+	/* if we reached a "found" state, let's process it */
+	if (!m->state->found_word)
+		return FALSE;
+
+	/* let's get back to the beginning of the word */
+	g_assert(out->len >= m->state->found_word->word_len - 1);
+	str_old_len = out->len;
+	out->len -= m->state->found_word->word_len - 1;
+
+	was_replaced = m->replace_cb(out, m->state->found_word->word,
+		m->state->found_word->data, m->user_data);
+
+	/* output was untouched, revert to the previous position */
+	if (!was_replaced)
+		out->len = str_old_len;
+
+	if (was_replaced || m->reset_on_match)
+		m->state = m->root_state;
+
+	return was_replaced;
+}
+
 gchar *
 purple_trie_replace(PurpleTrie *trie, const gchar *src,
 	PurpleTrieReplaceCb replace_cb, gpointer user_data)
 {
 	PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
-	PurpleTrieState *state;
-	gsize i = 0;
+	PurpleTrieMachine machine;
 	GString *out;
+	gsize i;
 
 	if (src == NULL)
 		return NULL;
@@ -371,49 +431,20 @@ purple_trie_replace(PurpleTrie *trie, co
 
 	purple_trie_states_build(trie);
 
+	machine.state = priv->root_state;
+	machine.root_state = priv->root_state;
+	machine.reset_on_match = priv->reset_on_match;
+	machine.replace_cb = replace_cb;
+	machine.user_data = user_data;
+
 	out = g_string_new(NULL);
-	state = priv->root_state;
+	i = 0;
 	while (src[i] != '\0') {
 		guchar character = src[i++];
-		gboolean was_replaced = FALSE;
+		gboolean was_replaced;
 
-		/* change state after processing a character */
-		while (TRUE) {
-			/* Perfect fit - next character is the same, as the
-			 * child of the prefix we reached so far. */
-			if (state->children && state->children[character]) {
-				state = state->children[character];
-				break;
-			}
-
-			/* We reached root, that's a pity. */
-			if (state == priv->root_state)
-				break;
-
-			/* Let's try a bit shorter suffix. */
-			state = state->longest_suffix;
-		}
-
-		/* if we reached a "found" state, let's process it */
-		if (state->found_word) {
-			gsize str_old_len;
-
-			/* let's get back to the beginning of the word */
-			g_assert(out->len >= state->found_word->word_len - 1);
-			str_old_len = out->len;
-			out->len -= state->found_word->word_len - 1;
-
-			was_replaced = replace_cb(out, state->found_word->word,
-				state->found_word->data, user_data);
-
-			/* output string was untouched, rollback to the
-			 * previous position*/
-			if (!was_replaced)
-				out->len = str_old_len;
-
-			if (was_replaced || priv->reset_on_match)
-				state = priv->root_state;
-		}
+		purple_trie_replace_advance(&machine, character);
+		was_replaced = purple_trie_replace_do_replacement(&machine, out);
 
 		/* We skipped a character without finding any records,
 		 * let's just copy it to the output. */
@@ -424,6 +455,78 @@ purple_trie_replace(PurpleTrie *trie, co
 	return g_string_free(out, FALSE);
 }
 
+gchar *
+purple_trie_multi_replace(const GSList *tries, const gchar *src,
+	PurpleTrieReplaceCb replace_cb, gpointer user_data)
+{
+	guint tries_count, m_idx;
+	PurpleTrieMachine *machines;
+	GString *out;
+	gsize i;
+
+	if (src == NULL)
+		return NULL;
+
+	g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
+
+	tries_count = g_slist_length((GSList*)tries);
+	if (tries_count == 0)
+		return g_strdup(src);
+
+	/* Initialize all machines. */
+	machines = g_new(PurpleTrieMachine, tries_count);
+	for (i = 0; i < tries_count; i++, tries = tries->next) {
+		PurpleTrie *trie = tries->data;
+		PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
+
+		if (priv == NULL) {
+			g_warn_if_reached();
+			g_free(machines);
+			return NULL;
+		}
+
+		purple_trie_states_build(trie);
+
+		machines[i].state = priv->root_state;
+		machines[i].root_state = priv->root_state;
+		machines[i].reset_on_match = priv->reset_on_match;
+		machines[i].replace_cb = replace_cb;
+		machines[i].user_data = user_data;
+	}
+
+	out = g_string_new(NULL);
+	i = 0;
+	while (src[i] != '\0') {



More information about the Commits mailing list