/pidgin/main: 8c63d334ad44: Comments: PurpleTrie
Tomasz Wasilczyk
twasilczyk at pidgin.im
Sat Apr 5 10:31:30 EDT 2014
Changeset: 8c63d334ad4456c80d2bf3db4863d4577bc02cd5
Author: Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date: 2014-04-05 16:31 +0200
Branch: default
URL: https://hg.pidgin.im/pidgin/main/rev/8c63d334ad44
Description:
Comments: PurpleTrie
diffstat:
doc/reference/libpurple/libpurple-docs.xml | 1 +
libpurple/memorypool.h | 3 +-
libpurple/trie.c | 5 +-
libpurple/trie.h | 158 +++++++++++++++++++++-------
4 files changed, 126 insertions(+), 41 deletions(-)
diffs (truncated from 322 to 300 lines):
diff --git a/doc/reference/libpurple/libpurple-docs.xml b/doc/reference/libpurple/libpurple-docs.xml
--- a/doc/reference/libpurple/libpurple-docs.xml
+++ b/doc/reference/libpurple/libpurple-docs.xml
@@ -86,6 +86,7 @@
<xi:include href="xml/theme.xml" />
<xi:include href="xml/theme-loader.xml" />
<xi:include href="xml/theme-manager.xml" />
+ <xi:include href="xml/trie.xml" />
<xi:include href="xml/sound-theme.xml" />
<xi:include href="xml/sound-theme-loader.xml" />
<xi:include href="xml/upnp.xml" />
diff --git a/libpurple/memorypool.h b/libpurple/memorypool.h
--- a/libpurple/memorypool.h
+++ b/libpurple/memorypool.h
@@ -24,8 +24,9 @@
#define PURPLE_MEMORY_POOL_H
/**
* SECTION:memorypool
+ * @include:memorypool.h
* @section_id: libpurple-memorypool
- * @short_description: <filename>memorypool.h</filename>
+ * @short_description: a container for a large number of small chunks of memory
* @title: Memory pools
*
* A #PurpleMemoryPool allows allocating many small objects within a single
diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -809,7 +809,10 @@ purple_trie_class_init(PurpleTrieClass *
properties[PROP_RESET_ON_MATCH] = g_param_spec_boolean("reset-on-match",
"Reset on match", "Determines, if the search state machine "
"should be reset to the initial state on every match. This "
- "ensures, that every match is distinct from each other.", TRUE,
+ "ensures, that every match is distinct from each other. "
+ "Please note, that it's not well-defined for a replace "
+ "operation, so it's better to leave this value default, unless "
+ "you perform only find operations.", TRUE,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS);
g_object_class_install_properties(obj_class, PROP_LAST, properties);
diff --git a/libpurple/trie.h b/libpurple/trie.h
--- a/libpurple/trie.h
+++ b/libpurple/trie.h
@@ -22,6 +22,32 @@
#ifndef PURPLE_TRIE_H
#define PURPLE_TRIE_H
+/**
+ * SECTION:trie
+ * @include:trie.h
+ * @section_id: libpurple-trie
+ * @short_description: a structure for linear-time text searching
+ * @title: Tries
+ *
+ * A #PurpleTrie is a structure for quick searching of multiple phrases within
+ * a text. It's intended for repeated searches of the same set of patterns
+ * within multiple source texts (or a single, big one).
+ *
+ * It's preparation time is <literal>O(p)</literal>, where <literal>p</literal>
+ * is the total length of searched phrases. In current implementation, the
+ * internal structure is invalidated after every modification of the
+ * #PurpleTrie's contents, so it's not efficient to do alternating modifications
+ * and searches. Search time does not depend on patterns being stored within
+ * a trie and is always <literal>O(n)</literal>, where <literal>n</literal> is
+ * the size of a text.
+ *
+ * Its main drawback is a significant memory usage - every internal trie node
+ * needs about 1kB of memory on 32-bit machine and 2kB on 64-bit. Fortunately,
+ * the trie grows slower when more words (with common prefixes) are added.
+ * We could avoid invalidating the whole tree when altering it, but it would
+ * require figuring out, how to update <literal>longest_suffix</literal> fields
+ * in satisfying time.
+ */
#include <glib-object.h>
@@ -40,12 +66,22 @@
typedef struct _PurpleTrie PurpleTrie;
typedef struct _PurpleTrieClass PurpleTrieClass;
+/**
+ * PurpleTrie:
+ *
+ * The trie object instance.
+ */
struct _PurpleTrie
{
/*< private >*/
GObject parent_instance;
};
+/**
+ * PurpleTrieClass:
+ *
+ * Base class for #PurpleTrie objects.
+ */
struct _PurpleTrieClass
{
/*< private >*/
@@ -59,25 +95,52 @@ struct _PurpleTrieClass
/**
* PurpleTrieReplaceCb:
- * @out: Currently built output string, append replacement to it.
- * @word: Found word.
- * @word_data: A user data bound with this word, when added with
- * purple_trie_add().
- * @user_data: A user supplied data passed when calling purple_trie_replace().
+ * @out: currently built output string, append replacement to it.
+ * @word: found word.
+ * @word_data: the user data bound with this word, when added with
+ * #purple_trie_add.
+ * @user_data: the user supplied data passed when calling #purple_trie_replace.
*
- * A funtion called after every found matching substring to be replaced.
+ * A funtion called on every matching substring to be replaced.
*
- * Returns: %TRUE, if the word was replaced, %FALSE otherwise. In the second
- * case you must not modify @out.
+ * If you decide to replace the word, append your text to @out and return %TRUE.
+ * Otherwise, you must not touch @out. In both cases, you must not do any
+ * operations on @out other than appending text to it.
+ *
+ * Returns: %TRUE if the word was replaced, %FALSE otherwise.
*/
typedef gboolean (*PurpleTrieReplaceCb)(GString *out, const gchar *word,
gpointer word_data, gpointer user_data);
+/**
+ * PurpleTrieFindCb:
+ * @word: found word.
+ * @word_data: the user data bound with this word, when added with
+ * #purple_trie_add.
+ * @user_data: the user data passed when calling #purple_trie_find.
+ *
+ * A function called on every matching substring.
+ *
+ * You can decide to count the match or not (for the total number of found
+ * words, that is returned by #purple_trie_find). In both cases you can
+ * obviously do some processing outside the #PurpleTrie object.
+ *
+ * If you decide to count the word and #PurpleTrie:reset-on-match property
+ * is set, no overlapping words will be found - the processing will skip after
+ * the end of this word.
+ *
+ * Returns: %TRUE if the word should be counter, %FALSE otherwise.
+ */
typedef gboolean (*PurpleTrieFindCb)(const gchar *word, gpointer word_data,
gpointer user_data);
G_BEGIN_DECLS
+/**
+ * purple_trie_get_type:
+ *
+ * Returns: the #GType for a #PurpleTrie.
+ */
GType
purple_trie_get_type(void);
@@ -86,14 +149,14 @@ purple_trie_get_type(void);
*
* Creates a new trie.
*
- * Returns: The new #PurpleTrie.
+ * Returns: the new #PurpleTrie.
*/
PurpleTrie *
purple_trie_new(void);
/**
* purple_trie_get_reset_on_match:
- * @trie: The trie.
+ * @trie: the trie.
*
* Checks, if the trie will reset its internal state after every match.
*
@@ -104,60 +167,76 @@ purple_trie_get_reset_on_match(PurpleTri
/**
* purple_trie_set_reset_on_match:
- * @trie: The trie.
+ * @trie: the trie.
* @reset: %TRUE, if trie should reset, %FALSE otherwise.
*
* Enables or disables a feature of resetting trie's state after every match.
* When enabled, it will not search for overlapping matches.
+ *
+ * It's well defined for #purple_trie_find, but not for replace operations.
+ * Thus, for the latter, it's better to stay with this option enabled, because
+ * its behavior may be changed in future.
*/
void
purple_trie_set_reset_on_match(PurpleTrie *trie, gboolean reset);
/**
* purple_trie_add:
- * @trie: The trie.
- * @word: The word.
- * @data: The word-related data (may be %NULL).
+ * @trie: the trie.
+ * @word: the word.
+ * @data: the word-related data (may be %NULL).
*
- * Adds a word to the trie.
+ * Adds a word to the trie. Current implementation doesn't allow for duplicates,
+ * so please avoid adding those.
*
- * Returns: %TRUE if succeeded, %FALSE otherwise (possibly on duplicate)
+ * Please note, that altering a trie invalidates its internal structure, so by
+ * the occasion of next search, it will be rebuilt. It's done in
+ * <literal>O(n)</literal>, where n is the total length of strings
+ * in #PurpleTrie.
+ *
+ * Returns: %TRUE if succeeded, %FALSE otherwise.
*/
gboolean
purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data);
/**
* purple_trie_remove:
- * @trie: The trie.
- * @word: The word.
+ * @trie: the trie.
+ * @word: the word.
*
* Removes a word from the trie. Depending on used memory pool, this may not
* free allocated memory (that will be freed when destroying the whole
- * collection), so use it wisely.
+ * collection), so use it wisely. See #purple_memory_pool_free.
+ *
+ * Please note, that altering a trie invalidates its internal structure.
+ * See #purple_trie_add.
*/
void
purple_trie_remove(PurpleTrie *trie, const gchar *word);
/**
* purple_trie_get_size:
- * @trie: The trie.
+ * @trie: the trie.
*
- * Returns: The number of elements stored in this trie.
+ * Returns the number of elements contained in the #PurpleTrie.
+ *
+ * Returns: the number of stored words in @trie.
*/
guint
purple_trie_get_size(PurpleTrie *trie);
/**
* purple_trie_replace:
- * @trie: The trie.
- * @src: The source string.
- * @replace_cb: The replacement function.
- * @user_data: Custom data to be passed to @replace_cb.
+ * @trie: the trie.
+ * @src: the source string.
+ * @replace_cb: the replacement function.
+ * @user_data: custom data to be passed to @replace_cb.
*
* Processes @src string and replaces all occuriences of words added to @trie.
- * It's O(strlen(src)), if replace_cb runs in O(strlen(word)).
+ * It's <literal>O(strlen(src))</literal>, if @replace_cb runs in
+ * <literal>O(strlen(word))</literal> and #PurpleTrie:reset-on-match is set.
*
- * Returns: resulting string. Must be g_free'd when you are done using it.
+ * Returns: resulting string. Must be #g_free'd when you are done using it.
*/
gchar *
purple_trie_replace(PurpleTrie *trie, const gchar *src,
@@ -165,19 +244,19 @@ purple_trie_replace(PurpleTrie *trie, co
/**
* purple_trie_multi_replace:
- * @tries: The list of tries.
- * @src: The source string.
- * @replace_cb: The replacement function.
- * @user_data: Custom data to be passed to @replace_cb.
+ * @tries: the list of tries.
+ * @src: the source string.
+ * @replace_cb: the replacement function.
+ * @user_data: custom data to be passed to @replace_cb.
*
* Processes @src and replaces all occuriences of words added to tries in list
* @tries. Entries added to tries on the beginning of the list have higher
* priority, than ones added further.
*
- * Differend GSList's can be combined to possess common parts, so you can create
+ * Different #GSList's can be combined to possess common parts, so you can create
* a "tree of tries".
*
- * Returns: resulting string. Must be g_free'd when you are done using it.
+ * Returns: resulting string. Must be #g_free'd when you are done using it.
*/
gchar *
purple_trie_multi_replace(const GSList *tries, const gchar *src,
@@ -185,17 +264,18 @@ purple_trie_multi_replace(const GSList *
/**
More information about the Commits
mailing list