/pidgin/main: 799b62769bd3: Trie: implement search-and-replace (...

Tomasz Wasilczyk twasilczyk at pidgin.im
Tue Mar 25 20:03:02 EDT 2014


Changeset: 799b62769bd36315f00d50b01f2eef18ba580d79
Author:	 Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date:	 2014-03-26 01:02 +0100
Branch:	 default
URL: https://hg.pidgin.im/pidgin/main/rev/799b62769bd3

Description:

Trie: implement search-and-replace (not yet tested)

diffstat:

 libpurple/trie.c |  87 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 libpurple/trie.h |  32 ++++++++++++++++++++
 2 files changed, 108 insertions(+), 11 deletions(-)

diffs (214 lines):

diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -22,6 +22,8 @@
 
 #include "trie.h"
 
+#include <string.h>
+
 #include "debug.h"
 #include "memorypool.h"
 
@@ -49,6 +51,7 @@ typedef struct
 struct _PurpleTrieRecord
 {
 	const gchar *word;
+	guint word_len;
 	gpointer data;
 };
 
@@ -171,7 +174,7 @@ purple_trie_states_cleanup(PurpleTrie *t
 
 /* Allocates a state and binds it to the parent. */
 static PurpleTrieState *
-purple_trie_state_new(PurpleTrie *trie, PurpleTrieState *parent, guchar letter)
+purple_trie_state_new(PurpleTrie *trie, PurpleTrieState *parent, guchar character)
 {
 	PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
 	PurpleTrieState *state;
@@ -200,7 +203,7 @@ purple_trie_state_new(PurpleTrie *trie, 
 		return NULL;
 	}
 
-	parent->children[letter] = state;
+	parent->children[character] = state;
 
 	return state;
 }
@@ -241,12 +244,12 @@ purple_trie_states_build(PurpleTrie *tri
 	for (cur_len = 0; reclist != NULL; cur_len++) {
 		for (it = reclist; it; it = it->next) {
 			PurpleTrieRecord *rec = it->rec;
-			guchar letter = rec->word[cur_len];
+			guchar character = rec->word[cur_len];
 			PurpleTrieState *prefix = it->extra_data;
 			PurpleTrieState *lon_suf_parent;
 
 			/* the whole word is already added to the trie */
-			if (letter == '\0') {
+			if (character == '\0') {
 				if (prefix->found_word == NULL)
 					prefix->found_word = rec;
 				else {
@@ -263,15 +266,15 @@ purple_trie_states_build(PurpleTrie *tri
 
 			/* word's prefix is already in the trie, added by the
 			 * other word */
-			if (prefix->children && prefix->children[letter]) {
-				it->extra_data = prefix->children[letter];
+			if (prefix->children && prefix->children[character]) {
+				it->extra_data = prefix->children[character];
 				continue;
 			}
 
 			/* We need to create a new branch of trie.
-			 * prefix is now of length increased by one letter.
+			 * prefix is now of length increased by one character.
 			 */
-			prefix = purple_trie_state_new(trie, prefix, letter);
+			prefix = purple_trie_state_new(trie, prefix, character);
 			if (!prefix) {
 				g_warn_if_reached();
 				g_object_unref(reclist_mpool);
@@ -287,12 +290,13 @@ purple_trie_states_build(PurpleTrie *tri
 			lon_suf_parent = prefix->parent->longest_suffix;
 			while (lon_suf_parent) {
 				if (lon_suf_parent->children &&
-					lon_suf_parent->children[letter])
+					lon_suf_parent->children[character])
 				{
-					prefix->longest_suffix =
-						lon_suf_parent->children[letter];
+					prefix->longest_suffix = lon_suf_parent->
+						children[character];
 					break;
 				}
+				lon_suf_parent = lon_suf_parent->longest_suffix;
 			}
 			if (prefix->longest_suffix == NULL)
 				prefix->longest_suffix = root;
@@ -305,6 +309,66 @@ purple_trie_states_build(PurpleTrie *tri
 }
 
 /*******************************************************************************
+ * Searching
+ ******************************************************************************/
+
+gchar *
+purple_trie_replace(PurpleTrie *trie, const gchar *src,
+	PurpleTrieReplaceCb replace_cb, gpointer user_data)
+{
+	PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
+	PurpleTrieState *state;
+	gsize i = 0;
+	GString *out;
+
+	if (src == NULL)
+		return NULL;
+
+	g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
+	g_return_val_if_fail(priv != NULL, NULL);
+
+	out = g_string_new(NULL);
+	state = priv->root_state;
+	while (src[i] != '\0') {
+		guchar character = src[i];
+
+		/* change state after processing a character */
+		while (TRUE) {
+			/* Perfect fit - next character is the same, as the
+			 * child of the prefix we reached so far. */
+			if (state->children && state->children[character]) {
+				state = state->children[character];
+				break;
+			}
+
+			/* We reached root, that's a pity. */
+			if (state == priv->root_state)
+				break;
+
+			state = state->longest_suffix;
+		}
+
+		/* if we reached a "found" state, let's process it */
+		if (state->found_word) {
+			gboolean was_replaced;
+
+			was_replaced = replace_cb(out, state->found_word->word,
+				state->found_word->data, user_data);
+
+			if (was_replaced || priv->reset_on_match) {
+				i += state->found_word->word_len;
+				state = priv->root_state;
+			} else
+				i++;
+		} else
+			i++;
+	}
+
+	return g_string_free(out, FALSE);
+}
+
+
+/*******************************************************************************
  * Records
  ******************************************************************************/
 
@@ -325,6 +389,7 @@ purple_trie_add(PurpleTrie *trie, const 
 	rec = purple_memory_pool_alloc(priv->records_obj_mempool,
 		sizeof(PurpleTrieRecord), sizeof(gpointer));
 	rec->word = purple_memory_pool_strdup(priv->records_str_mempool, word);
+	rec->word_len = strlen(word);
 	rec->data = data;
 
 	priv->records = purple_record_list_prepend(priv->records_obj_mempool,
diff --git a/libpurple/trie.h b/libpurple/trie.h
--- a/libpurple/trie.h
+++ b/libpurple/trie.h
@@ -57,6 +57,22 @@ struct _PurpleTrieClass
 	void (*purple_reserved4)(void);
 };
 
+/**
+ * PurpleTrieReplaceCb:
+ * @out: Currently built output string, append replacement to it.
+ * @word: Found word.
+ * @word_data: A user data bound with this word, when added with
+ *             purple_trie_add().
+ * @user_data: A user supplied data passed when calling purple_trie_replace().
+ *
+ * A funtion called after every found matching substring to be replaced.
+ *
+ * Returns: %TRUE, if the word was replaced, %FALSE otherwise. In the second
+ *          case you might possibly want to leave @out untouched.
+ */
+typedef gboolean (*PurpleTrieReplaceCb)(GString *out, const gchar *word,
+	gpointer word_data, gpointer user_data);
+
 G_BEGIN_DECLS
 
 GType
@@ -105,6 +121,22 @@ purple_trie_set_reset_on_match(PurpleTri
 void
 purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data);
 
+/**
+ * purple_trie_replace:
+ * @trie: The trie.
+ * @src: The source string.
+ * @replace_cb: The replacement function.
+ * @user_data: Custom data to be passed to @replace_cb.
+ *
+ * Looks for all occuriences of words added to @trie, in @src string.
+ * It's O(strlen(src)), if replace_cb runs in O(strlen(word)).
+ *
+ * Returns: resulting string. Must be g_free'd when you are done using it.
+ */
+gchar *
+purple_trie_replace(PurpleTrie *trie, const gchar *src,
+	PurpleTrieReplaceCb replace_cb, gpointer user_data);
+
 G_END_DECLS
 
 #endif /* PURPLE_MEMORY_POOL_H */



More information about the Commits mailing list