/pidgin/main: 8c63d334ad44: Comments: PurpleTrie

Tomasz Wasilczyk twasilczyk at pidgin.im
Sat Apr 5 10:31:30 EDT 2014


Changeset: 8c63d334ad4456c80d2bf3db4863d4577bc02cd5
Author:	 Tomasz Wasilczyk <twasilczyk at pidgin.im>
Date:	 2014-04-05 16:31 +0200
Branch:	 default
URL: https://hg.pidgin.im/pidgin/main/rev/8c63d334ad44

Description:

Comments: PurpleTrie

diffstat:

 doc/reference/libpurple/libpurple-docs.xml |    1 +
 libpurple/memorypool.h                     |    3 +-
 libpurple/trie.c                           |    5 +-
 libpurple/trie.h                           |  158 +++++++++++++++++++++-------
 4 files changed, 126 insertions(+), 41 deletions(-)

diffs (truncated from 322 to 300 lines):

diff --git a/doc/reference/libpurple/libpurple-docs.xml b/doc/reference/libpurple/libpurple-docs.xml
--- a/doc/reference/libpurple/libpurple-docs.xml
+++ b/doc/reference/libpurple/libpurple-docs.xml
@@ -86,6 +86,7 @@
       <xi:include href="xml/theme.xml" />
       <xi:include href="xml/theme-loader.xml" />
       <xi:include href="xml/theme-manager.xml" />
+      <xi:include href="xml/trie.xml" />
       <xi:include href="xml/sound-theme.xml" />
       <xi:include href="xml/sound-theme-loader.xml" />
       <xi:include href="xml/upnp.xml" />
diff --git a/libpurple/memorypool.h b/libpurple/memorypool.h
--- a/libpurple/memorypool.h
+++ b/libpurple/memorypool.h
@@ -24,8 +24,9 @@
 #define PURPLE_MEMORY_POOL_H
 /**
  * SECTION:memorypool
+ * @include:memorypool.h
  * @section_id: libpurple-memorypool
- * @short_description: <filename>memorypool.h</filename>
+ * @short_description: a container for a large number of small chunks of memory
  * @title: Memory pools
  *
  * A #PurpleMemoryPool allows allocating many small objects within a single
diff --git a/libpurple/trie.c b/libpurple/trie.c
--- a/libpurple/trie.c
+++ b/libpurple/trie.c
@@ -809,7 +809,10 @@ purple_trie_class_init(PurpleTrieClass *
 	properties[PROP_RESET_ON_MATCH] = g_param_spec_boolean("reset-on-match",
 		"Reset on match", "Determines, if the search state machine "
 		"should be reset to the initial state on every match. This "
-		"ensures, that every match is distinct from each other.", TRUE,
+		"ensures, that every match is distinct from each other. "
+		"Please note, that it's not well-defined for a replace "
+		"operation, so it's better to leave this value default, unless "
+		"you perform only find operations.", TRUE,
 		G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS);
 
 	g_object_class_install_properties(obj_class, PROP_LAST, properties);
diff --git a/libpurple/trie.h b/libpurple/trie.h
--- a/libpurple/trie.h
+++ b/libpurple/trie.h
@@ -22,6 +22,32 @@
 
 #ifndef PURPLE_TRIE_H
 #define PURPLE_TRIE_H
+/**
+ * SECTION:trie
+ * @include:trie.h
+ * @section_id: libpurple-trie
+ * @short_description: a structure for linear-time text searching
+ * @title: Tries
+ *
+ * A #PurpleTrie is a structure for quick searching of multiple phrases within
+ * a text. It's intended for repeated searches of the same set of patterns
+ * within multiple source texts (or a single, big one).
+ *
+ * It's preparation time is <literal>O(p)</literal>, where <literal>p</literal>
+ * is the total length of searched phrases. In current implementation, the
+ * internal structure is invalidated after every modification of the
+ * #PurpleTrie's contents, so it's not efficient to do alternating modifications
+ * and searches. Search time does not depend on patterns being stored within
+ * a trie and is always <literal>O(n)</literal>, where <literal>n</literal> is
+ * the size of a text.
+ *
+ * Its main drawback is a significant memory usage - every internal trie node
+ * needs about 1kB of memory on 32-bit machine and 2kB on 64-bit. Fortunately,
+ * the trie grows slower when more words (with common prefixes) are added.
+ * We could avoid invalidating the whole tree when altering it, but it would
+ * require figuring out, how to update <literal>longest_suffix</literal> fields
+ * in satisfying time.
+ */
 
 #include <glib-object.h>
 
@@ -40,12 +66,22 @@
 typedef struct _PurpleTrie PurpleTrie;
 typedef struct _PurpleTrieClass PurpleTrieClass;
 
+/**
+ * PurpleTrie:
+ *
+ * The trie object instance.
+ */
 struct _PurpleTrie
 {
 	/*< private >*/
 	GObject parent_instance;
 };
 
+/**
+ * PurpleTrieClass:
+ *
+ * Base class for #PurpleTrie objects.
+ */
 struct _PurpleTrieClass
 {
 	/*< private >*/
@@ -59,25 +95,52 @@ struct _PurpleTrieClass
 
 /**
  * PurpleTrieReplaceCb:
- * @out: Currently built output string, append replacement to it.
- * @word: Found word.
- * @word_data: A user data bound with this word, when added with
- *             purple_trie_add().
- * @user_data: A user supplied data passed when calling purple_trie_replace().
+ * @out: currently built output string, append replacement to it.
+ * @word: found word.
+ * @word_data: the user data bound with this word, when added with
+ *             #purple_trie_add.
+ * @user_data: the user supplied data passed when calling #purple_trie_replace.
  *
- * A funtion called after every found matching substring to be replaced.
+ * A funtion called on every matching substring to be replaced.
  *
- * Returns: %TRUE, if the word was replaced, %FALSE otherwise. In the second
- *          case you must not modify @out.
+ * If you decide to replace the word, append your text to @out and return %TRUE.
+ * Otherwise, you must not touch @out. In both cases, you must not do any
+ * operations on @out other than appending text to it.
+ *
+ * Returns: %TRUE if the word was replaced, %FALSE otherwise.
  */
 typedef gboolean (*PurpleTrieReplaceCb)(GString *out, const gchar *word,
 	gpointer word_data, gpointer user_data);
 
+/**
+ * PurpleTrieFindCb:
+ * @word: found word.
+ * @word_data: the user data bound with this word, when added with
+ *             #purple_trie_add.
+ * @user_data: the user data passed when calling #purple_trie_find.
+ *
+ * A function called on every matching substring.
+ *
+ * You can decide to count the match or not (for the total number of found
+ * words, that is returned by #purple_trie_find). In both cases you can
+ * obviously do some processing outside the #PurpleTrie object.
+ *
+ * If you decide to count the word and #PurpleTrie:reset-on-match property
+ * is set, no overlapping words will be found - the processing will skip after
+ * the end of this word.
+ *
+ * Returns: %TRUE if the word should be counter, %FALSE otherwise.
+ */
 typedef gboolean (*PurpleTrieFindCb)(const gchar *word, gpointer word_data,
 	gpointer user_data);
 
 G_BEGIN_DECLS
 
+/**
+ * purple_trie_get_type:
+ *
+ * Returns: the #GType for a #PurpleTrie.
+ */
 GType
 purple_trie_get_type(void);
 
@@ -86,14 +149,14 @@ purple_trie_get_type(void);
  *
  * Creates a new trie.
  *
- * Returns: The new #PurpleTrie.
+ * Returns: the new #PurpleTrie.
  */
 PurpleTrie *
 purple_trie_new(void);
 
 /**
  * purple_trie_get_reset_on_match:
- * @trie: The trie.
+ * @trie: the trie.
  *
  * Checks, if the trie will reset its internal state after every match.
  *
@@ -104,60 +167,76 @@ purple_trie_get_reset_on_match(PurpleTri
 
 /**
  * purple_trie_set_reset_on_match:
- * @trie: The trie.
+ * @trie: the trie.
  * @reset: %TRUE, if trie should reset, %FALSE otherwise.
  *
  * Enables or disables a feature of resetting trie's state after every match.
  * When enabled, it will not search for overlapping matches.
+ *
+ * It's well defined for #purple_trie_find, but not for replace operations.
+ * Thus, for the latter, it's better to stay with this option enabled, because
+ * its behavior may be changed in future.
  */
 void
 purple_trie_set_reset_on_match(PurpleTrie *trie, gboolean reset);
 
 /**
  * purple_trie_add:
- * @trie: The trie.
- * @word: The word.
- * @data: The word-related data (may be %NULL).
+ * @trie: the trie.
+ * @word: the word.
+ * @data: the word-related data (may be %NULL).
  *
- * Adds a word to the trie.
+ * Adds a word to the trie. Current implementation doesn't allow for duplicates,
+ * so please avoid adding those.
  *
- * Returns: %TRUE if succeeded, %FALSE otherwise (possibly on duplicate)
+ * Please note, that altering a trie invalidates its internal structure, so by
+ * the occasion of next search, it will be rebuilt. It's done in
+ * <literal>O(n)</literal>, where n is the total length of strings
+ * in #PurpleTrie.
+ *
+ * Returns: %TRUE if succeeded, %FALSE otherwise.
  */
 gboolean
 purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data);
 
 /**
  * purple_trie_remove:
- * @trie: The trie.
- * @word: The word.
+ * @trie: the trie.
+ * @word: the word.
  *
  * Removes a word from the trie. Depending on used memory pool, this may not
  * free allocated memory (that will be freed when destroying the whole
- * collection), so use it wisely.
+ * collection), so use it wisely. See #purple_memory_pool_free.
+ *
+ * Please note, that altering a trie invalidates its internal structure.
+ * See #purple_trie_add.
  */
 void
 purple_trie_remove(PurpleTrie *trie, const gchar *word);
 
 /**
  * purple_trie_get_size:
- * @trie: The trie.
+ * @trie: the trie.
  *
- * Returns: The number of elements stored in this trie.
+ * Returns the number of elements contained in the #PurpleTrie.
+ *
+ * Returns: the number of stored words in @trie.
  */
 guint
 purple_trie_get_size(PurpleTrie *trie);
 
 /**
  * purple_trie_replace:
- * @trie: The trie.
- * @src: The source string.
- * @replace_cb: The replacement function.
- * @user_data: Custom data to be passed to @replace_cb.
+ * @trie: the trie.
+ * @src: the source string.
+ * @replace_cb: the replacement function.
+ * @user_data: custom data to be passed to @replace_cb.
  *
  * Processes @src string and replaces all occuriences of words added to @trie.
- * It's O(strlen(src)), if replace_cb runs in O(strlen(word)).
+ * It's <literal>O(strlen(src))</literal>, if @replace_cb runs in
+ * <literal>O(strlen(word))</literal> and #PurpleTrie:reset-on-match is set.
  *
- * Returns: resulting string. Must be g_free'd when you are done using it.
+ * Returns: resulting string. Must be #g_free'd when you are done using it.
  */
 gchar *
 purple_trie_replace(PurpleTrie *trie, const gchar *src,
@@ -165,19 +244,19 @@ purple_trie_replace(PurpleTrie *trie, co
 
 /**
  * purple_trie_multi_replace:
- * @tries: The list of tries.
- * @src: The source string.
- * @replace_cb: The replacement function.
- * @user_data: Custom data to be passed to @replace_cb.
+ * @tries: the list of tries.
+ * @src: the source string.
+ * @replace_cb: the replacement function.
+ * @user_data: custom data to be passed to @replace_cb.
  *
  * Processes @src and replaces all occuriences of words added to tries in list
  * @tries. Entries added to tries on the beginning of the list have higher
  * priority, than ones added further.
  *
- * Differend GSList's can be combined to possess common parts, so you can create
+ * Different #GSList's can be combined to possess common parts, so you can create
  * a "tree of tries".
  *
- * Returns: resulting string. Must be g_free'd when you are done using it.
+ * Returns: resulting string. Must be #g_free'd when you are done using it.
  */
 gchar *
 purple_trie_multi_replace(const GSList *tries, const gchar *src,
@@ -185,17 +264,18 @@ purple_trie_multi_replace(const GSList *
 
 /**



More information about the Commits mailing list