/pidgin/main: f1310093e434: Implement and test multi-trie
Tomasz Wasilczyk
twasilczyk at pidgin.im
Wed Mar 26 14:39:08 EDT 2014
Changeset: f1310093e4344e1c8da0f60c324e7c4f97d9f5ed
Author: Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date: 2014-03-26 19:38 +0100
Branch: default
URL: https://hg.pidgin.im/pidgin/main/rev/f1310093e434
Description:
Implement and test multi-trie
diffstat:
libpurple/tests/test_trie.c | 60 ++++++++++++-
libpurple/trie.c | 186 ++++++++++++++++++++++++++++++++++---------
libpurple/trie.h | 22 ++++-
3 files changed, 219 insertions(+), 49 deletions(-)
diffs (truncated from 380 to 300 lines):
diff --git a/libpurple/tests/test_trie.c b/libpurple/tests/test_trie.c
--- a/libpurple/tests/test_trie.c
+++ b/libpurple/tests/test_trie.c
@@ -7,7 +7,7 @@ static gboolean
test_trie_replace_cb(GString *out, const gchar *word, gpointer word_data,
gpointer user_data)
{
- /* the "test" word */
+ /* the "test" word for the test_trie_replace test */
if ((int)word_data == 0x1001)
return FALSE;
@@ -35,10 +35,10 @@ START_TEST(test_trie_replace)
in = "Alice is testing her trie implementation, "
"but she's far away from making test tree overtested";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)7);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)1);
- assert_string_equal("Alice is [7:1002] her [7:1004] [7:1006]ation,"
- " but she's far away from making test [7:1005] [7:1003]", out);
+ assert_string_equal("Alice is [1:1002] her [1:1004] [1:1006]ation,"
+ " but she's far away from making test [1:1005] [1:1003]", out);
g_object_unref(trie);
g_free(out);
@@ -57,9 +57,9 @@ START_TEST(test_trie_replace_whole)
in = "test";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)5);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
- assert_string_equal("[5:2002]", out);
+ assert_string_equal("[2:2002]", out);
g_object_unref(trie);
g_free(out);
@@ -100,7 +100,7 @@ START_TEST(test_trie_replace_empty)
in = "";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)4);
assert_string_equal("", out);
@@ -109,6 +109,51 @@ START_TEST(test_trie_replace_empty)
}
END_TEST
+START_TEST(test_trie_multi_replace)
+{
+ PurpleTrie *trie1, *trie2, *trie3;
+ GSList *tries = NULL;
+ const gchar *in;
+ gchar *out;
+
+ trie1 = purple_trie_new();
+ trie2 = purple_trie_new();
+ trie3 = purple_trie_new();
+
+ /* appending is not efficient, but we have only 3 elements */
+ tries = g_slist_append(tries, trie1);
+ tries = g_slist_append(tries, trie2);
+ tries = g_slist_append(tries, trie3);
+
+ purple_trie_add(trie1, "test", (gpointer)0x5011);
+ purple_trie_add(trie1, "trie1", (gpointer)0x5012);
+ purple_trie_add(trie1, "Alice", (gpointer)0x5013);
+
+ purple_trie_add(trie2, "test", (gpointer)0x5021);
+ purple_trie_add(trie2, "trie2", (gpointer)0x5022);
+ purple_trie_add(trie2, "example", (gpointer)0x5023);
+ purple_trie_add(trie2, "Ali", (gpointer)0x5024);
+
+ /* "tester" without last (accepting) letter of "test" */
+ purple_trie_add(trie3, "teser", (gpointer)0x5031);
+ purple_trie_add(trie3, "trie3", (gpointer)0x5032);
+ purple_trie_add(trie3, "tester", (gpointer)0x5033);
+ purple_trie_add(trie3, "example", (gpointer)0x5034);
+ purple_trie_add(trie3, "Al", (gpointer)0x5035);
+
+ in = "test tester trie trie1 trie2 trie3 example Alice";
+
+ out = purple_trie_multi_replace(tries, in,
+ test_trie_replace_cb, (gpointer)5);
+
+ assert_string_equal("[5:5011] [5:5011]er trie [5:5012] [5:5022] "
+ "[5:5032] [5:5023] [5:5035]ice", out);
+
+ g_slist_free_full(tries, g_object_unref);
+ g_free(out);
+}
+END_TEST
+
Suite *
purple_trie_suite(void)
{
@@ -119,6 +164,7 @@ purple_trie_suite(void)
tcase_add_test(tc, test_trie_replace_whole);
tcase_add_test(tc, test_trie_replace_inner);
tcase_add_test(tc, test_trie_replace_empty);
+ tcase_add_test(tc, test_trie_multi_replace);
suite_add_tcase(s, tc);
diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -74,6 +74,17 @@ struct _PurpleTrieState
PurpleTrieRecord *found_word;
};
+typedef struct
+{
+ PurpleTrieState *state;
+
+ PurpleTrieState *root_state;
+ gboolean reset_on_match;
+
+ PurpleTrieReplaceCb replace_cb;
+ gpointer user_data;
+} PurpleTrieMachine;
+
/*******************************************************************************
* Records list
******************************************************************************/
@@ -354,14 +365,63 @@ purple_trie_states_build(PurpleTrie *tri
* Searching
******************************************************************************/
+static void
+purple_trie_replace_advance(PurpleTrieMachine *m, const guchar character)
+{
+ /* change state after processing a character */
+ while (TRUE) {
+ /* Perfect fit - next character is the same, as the child of the
+ * prefix we reached so far. */
+ if (m->state->children && m->state->children[character]) {
+ m->state = m->state->children[character];
+ break;
+ }
+
+ /* We reached root, that's a pity. */
+ if (m->state == m->root_state)
+ break;
+
+ /* Let's try a bit shorter suffix. */
+ m->state = m->state->longest_suffix;
+ }
+}
+
+static gboolean
+purple_trie_replace_do_replacement(PurpleTrieMachine *m, GString *out)
+{
+ gboolean was_replaced = FALSE;
+ gsize str_old_len;
+
+ /* if we reached a "found" state, let's process it */
+ if (!m->state->found_word)
+ return FALSE;
+
+ /* let's get back to the beginning of the word */
+ g_assert(out->len >= m->state->found_word->word_len - 1);
+ str_old_len = out->len;
+ out->len -= m->state->found_word->word_len - 1;
+
+ was_replaced = m->replace_cb(out, m->state->found_word->word,
+ m->state->found_word->data, m->user_data);
+
+ /* output was untouched, revert to the previous position */
+ if (!was_replaced)
+ out->len = str_old_len;
+
+ if (was_replaced || m->reset_on_match)
+ m->state = m->root_state;
+
+ return was_replaced;
+}
+
gchar *
purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data)
{
PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
- PurpleTrieState *state;
- gsize i = 0;
+ PurpleTrieMachine machine;
GString *out;
+ gsize i;
if (src == NULL)
return NULL;
@@ -371,49 +431,20 @@ purple_trie_replace(PurpleTrie *trie, co
purple_trie_states_build(trie);
+ machine.state = priv->root_state;
+ machine.root_state = priv->root_state;
+ machine.reset_on_match = priv->reset_on_match;
+ machine.replace_cb = replace_cb;
+ machine.user_data = user_data;
+
out = g_string_new(NULL);
- state = priv->root_state;
+ i = 0;
while (src[i] != '\0') {
guchar character = src[i++];
- gboolean was_replaced = FALSE;
+ gboolean was_replaced;
- /* change state after processing a character */
- while (TRUE) {
- /* Perfect fit - next character is the same, as the
- * child of the prefix we reached so far. */
- if (state->children && state->children[character]) {
- state = state->children[character];
- break;
- }
-
- /* We reached root, that's a pity. */
- if (state == priv->root_state)
- break;
-
- /* Let's try a bit shorter suffix. */
- state = state->longest_suffix;
- }
-
- /* if we reached a "found" state, let's process it */
- if (state->found_word) {
- gsize str_old_len;
-
- /* let's get back to the beginning of the word */
- g_assert(out->len >= state->found_word->word_len - 1);
- str_old_len = out->len;
- out->len -= state->found_word->word_len - 1;
-
- was_replaced = replace_cb(out, state->found_word->word,
- state->found_word->data, user_data);
-
- /* output string was untouched, rollback to the
- * previous position*/
- if (!was_replaced)
- out->len = str_old_len;
-
- if (was_replaced || priv->reset_on_match)
- state = priv->root_state;
- }
+ purple_trie_replace_advance(&machine, character);
+ was_replaced = purple_trie_replace_do_replacement(&machine, out);
/* We skipped a character without finding any records,
* let's just copy it to the output. */
@@ -424,6 +455,78 @@ purple_trie_replace(PurpleTrie *trie, co
return g_string_free(out, FALSE);
}
+gchar *
+purple_trie_multi_replace(const GSList *tries, const gchar *src,
+ PurpleTrieReplaceCb replace_cb, gpointer user_data)
+{
+ guint tries_count, m_idx;
+ PurpleTrieMachine *machines;
+ GString *out;
+ gsize i;
+
+ if (src == NULL)
+ return NULL;
+
+ g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
+
+ tries_count = g_slist_length((GSList*)tries);
+ if (tries_count == 0)
+ return g_strdup(src);
+
+ /* Initialize all machines. */
+ machines = g_new(PurpleTrieMachine, tries_count);
+ for (i = 0; i < tries_count; i++, tries = tries->next) {
+ PurpleTrie *trie = tries->data;
+ PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
+
+ if (priv == NULL) {
+ g_warn_if_reached();
+ g_free(machines);
+ return NULL;
+ }
+
+ purple_trie_states_build(trie);
+
+ machines[i].state = priv->root_state;
+ machines[i].root_state = priv->root_state;
+ machines[i].reset_on_match = priv->reset_on_match;
+ machines[i].replace_cb = replace_cb;
+ machines[i].user_data = user_data;
+ }
+
+ out = g_string_new(NULL);
+ i = 0;
+ while (src[i] != '\0') {
More information about the Commits
mailing list