pidgin: de6bbf22: Changes to the way we translate raw yaho...
markdoliner at pidgin.im
markdoliner at pidgin.im
Mon Aug 3 22:21:06 EDT 2009
-----------------------------------------------------------------
Revision: de6bbf227459900b7451907360f092dfb33df8d6
Ancestor: 96777141ce1e924f65eb42a1fe65b8e2c5bec883
Author: markdoliner at pidgin.im
Date: 2009-08-04T02:14:46
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/de6bbf227459900b7451907360f092dfb33df8d6
Modified files:
libpurple/protocols/yahoo/util.c
libpurple/tests/test_yahoo_util.c
ChangeLog:
Changes to the way we translate raw yahoo messages to HTML. The HTML
is now well formed (we actually close tags now). The code isn't as
simple as I would like, but I'm happy with it. I think it's a pretty
robust solution and should still be pretty efficient. I didn't find
any memory leaks with valgrind.
The formatting used in yahoo messages is really ghetto.
Also added a lot more tests. They pass with and without the
USE_CSS_FORMATTING define.
-------------- next part --------------
============================================================
--- libpurple/protocols/yahoo/util.c a75d4a718e58b8c8f989a0320aacb067e0758d14
+++ libpurple/protocols/yahoo/util.c ef8ef9bd4397710bd8339b74ed5f483a55d9f382
@@ -184,148 +184,165 @@ char *yahoo_convert_to_numeric(const cha
}
/*
+ * The values in this hash table should probably be lowercase, since that's
+ * what xhtml expects. Also because yahoo_codes_to_html() does
+ * case-sensitive comparisons.
+ *
* I found these on some website but i don't know that they actually
* work (or are supposed to work). I didn't implement them yet.
*
- * [0;30m ---black
- * [1;37m ---white
- * [0;37m ---tan
- * [0;38m ---light black
- * [1;39m ---dark blue
- * [0;32m ---green
- * [0;33m ---yellow
- * [0;35m ---pink
- * [1;35m ---purple
- * [1;30m ---light blue
- * [0;31m ---red
- * [0;34m ---blue
- * [0;36m ---aqua
- * (shift+comma)lyellow(shift+period) ---light yellow
- * (shift+comma)lgreen(shift+period) ---light green
-[2;30m <--white out
-*/
+ * [0;30m ---black
+ * [1;37m ---white
+ * [0;37m ---tan
+ * [0;38m ---light black
+ * [1;39m ---dark blue
+ * [0;32m ---green
+ * [0;33m ---yellow
+ * [0;35m ---pink
+ * [1;35m ---purple
+ * [1;30m ---light blue
+ * [0;31m ---red
+ * [0;34m ---blue
+ * [0;36m ---aqua
+ * (shift+comma)lyellow(shift+period) ---light yellow
+ * (shift+comma)lgreen(shift+period) ---light green
+ * [2;30m <--white out
+ */
-static GHashTable *ht = NULL;
+static GHashTable *esc_codes_ht = NULL;
+static GHashTable *tags_ht = NULL;
void yahoo_init_colorht()
{
- if (ht != NULL)
+ if (esc_codes_ht != NULL)
/* Hash table has already been initialized */
return;
- ht = g_hash_table_new(g_str_hash, g_str_equal);
+ /* Key is the escape code string. Value is the HTML that should be
+ * inserted in place of the escape code. */
+ esc_codes_ht = g_hash_table_new(g_str_hash, g_str_equal);
+
+ /* Key is the name of the HTML tag, for example "font" or "/font"
+ * value is the HTML that should be inserted in place of the old tag */
+ tags_ht = g_hash_table_new(g_str_hash, g_str_equal);
+
/* the numbers in comments are what gyach uses, but i think they're incorrect */
#ifdef USE_CSS_FORMATTING
- g_hash_table_insert(ht, "30", "<span style=\"color: #000000\">"); /* black */
- g_hash_table_insert(ht, "31", "<span style=\"color: #0000FF\">"); /* blue */
- g_hash_table_insert(ht, "32", "<span style=\"color: #008080\">"); /* cyan */ /* 00b2b2 */
- g_hash_table_insert(ht, "33", "<span style=\"color: #808080\">"); /* gray */ /* 808080 */
- g_hash_table_insert(ht, "34", "<span style=\"color: #008000\">"); /* green */ /* 00c200 */
- g_hash_table_insert(ht, "35", "<span style=\"color: #FF0080\">"); /* pink */ /* ffafaf */
- g_hash_table_insert(ht, "36", "<span style=\"color: #800080\">"); /* purple */ /* b200b2 */
- g_hash_table_insert(ht, "37", "<span style=\"color: #FF8000\">"); /* orange */ /* ffff00 */
- g_hash_table_insert(ht, "38", "<span style=\"color: #FF0000\">"); /* red */
- g_hash_table_insert(ht, "39", "<span style=\"color: #808000\">"); /* olive */ /* 546b50 */
+ g_hash_table_insert(esc_codes_ht, "30", "<span style=\"color: #000000\">"); /* black */
+ g_hash_table_insert(esc_codes_ht, "31", "<span style=\"color: #0000FF\">"); /* blue */
+ g_hash_table_insert(esc_codes_ht, "32", "<span style=\"color: #008080\">"); /* cyan */ /* 00b2b2 */
+ g_hash_table_insert(esc_codes_ht, "33", "<span style=\"color: #808080\">"); /* gray */ /* 808080 */
+ g_hash_table_insert(esc_codes_ht, "34", "<span style=\"color: #008000\">"); /* green */ /* 00c200 */
+ g_hash_table_insert(esc_codes_ht, "35", "<span style=\"color: #FF0080\">"); /* pink */ /* ffafaf */
+ g_hash_table_insert(esc_codes_ht, "36", "<span style=\"color: #800080\">"); /* purple */ /* b200b2 */
+ g_hash_table_insert(esc_codes_ht, "37", "<span style=\"color: #FF8000\">"); /* orange */ /* ffff00 */
+ g_hash_table_insert(esc_codes_ht, "38", "<span style=\"color: #FF0000\">"); /* red */
+ g_hash_table_insert(esc_codes_ht, "39", "<span style=\"color: #808000\">"); /* olive */ /* 546b50 */
#else
- g_hash_table_insert(ht, "30", "<font color=\"#000000\">"); /* black */
- g_hash_table_insert(ht, "31", "<font color=\"#0000FF\">"); /* blue */
- g_hash_table_insert(ht, "32", "<font color=\"#008080\">"); /* cyan */ /* 00b2b2 */
- g_hash_table_insert(ht, "33", "<font color=\"#808080\">"); /* gray */ /* 808080 */
- g_hash_table_insert(ht, "34", "<font color=\"#008000\">"); /* green */ /* 00c200 */
- g_hash_table_insert(ht, "35", "<font color=\"#FF0080\">"); /* pink */ /* ffafaf */
- g_hash_table_insert(ht, "36", "<font color=\"#800080\">"); /* purple */ /* b200b2 */
- g_hash_table_insert(ht, "37", "<font color=\"#FF8000\">"); /* orange */ /* ffff00 */
- g_hash_table_insert(ht, "38", "<font color=\"#FF0000\">"); /* red */
- g_hash_table_insert(ht, "39", "<font color=\"#808000\">"); /* olive */ /* 546b50 */
+ g_hash_table_insert(esc_codes_ht, "30", "<font color=\"#000000\">"); /* black */
+ g_hash_table_insert(esc_codes_ht, "31", "<font color=\"#0000FF\">"); /* blue */
+ g_hash_table_insert(esc_codes_ht, "32", "<font color=\"#008080\">"); /* cyan */ /* 00b2b2 */
+ g_hash_table_insert(esc_codes_ht, "33", "<font color=\"#808080\">"); /* gray */ /* 808080 */
+ g_hash_table_insert(esc_codes_ht, "34", "<font color=\"#008000\">"); /* green */ /* 00c200 */
+ g_hash_table_insert(esc_codes_ht, "35", "<font color=\"#FF0080\">"); /* pink */ /* ffafaf */
+ g_hash_table_insert(esc_codes_ht, "36", "<font color=\"#800080\">"); /* purple */ /* b200b2 */
+ g_hash_table_insert(esc_codes_ht, "37", "<font color=\"#FF8000\">"); /* orange */ /* ffff00 */
+ g_hash_table_insert(esc_codes_ht, "38", "<font color=\"#FF0000\">"); /* red */
+ g_hash_table_insert(esc_codes_ht, "39", "<font color=\"#808000\">"); /* olive */ /* 546b50 */
#endif /* !USE_CSS_FORMATTING */
- g_hash_table_insert(ht, "1", "<b>");
- g_hash_table_insert(ht, "x1", "</b>");
- g_hash_table_insert(ht, "2", "<i>");
- g_hash_table_insert(ht, "x2", "</i>");
- g_hash_table_insert(ht, "4", "<u>");
- g_hash_table_insert(ht, "x4", "</u>");
+ g_hash_table_insert(esc_codes_ht, "1", "<b>");
+ g_hash_table_insert(esc_codes_ht, "x1", "</b>");
+ g_hash_table_insert(esc_codes_ht, "2", "<i>");
+ g_hash_table_insert(esc_codes_ht, "x2", "</i>");
+ g_hash_table_insert(esc_codes_ht, "4", "<u>");
+ g_hash_table_insert(esc_codes_ht, "x4", "</u>");
/* these just tell us the text they surround is supposed
* to be a link. purple figures that out on its own so we
* just ignore it.
*/
- g_hash_table_insert(ht, "l", ""); /* link start */
- g_hash_table_insert(ht, "xl", ""); /* link end */
+ g_hash_table_insert(esc_codes_ht, "l", ""); /* link start */
+ g_hash_table_insert(esc_codes_ht, "xl", ""); /* link end */
#ifdef USE_CSS_FORMATTING
- g_hash_table_insert(ht, "<black>", "<span style=\"color: #000000\">");
- g_hash_table_insert(ht, "<blue>", "<span style=\"color: #0000FF\">");
- g_hash_table_insert(ht, "<cyan>", "<span style=\"color: #008284\">");
- g_hash_table_insert(ht, "<gray>", "<span style=\"color: #848284\">");
- g_hash_table_insert(ht, "<green>", "<span style=\"color: #008200\">");
- g_hash_table_insert(ht, "<pink>", "<span style=\"color: #FF0084\">");
- g_hash_table_insert(ht, "<purple>", "<span style=\"color: #840084\">");
- g_hash_table_insert(ht, "<orange>", "<span style=\"color: #FF8000\">");
- g_hash_table_insert(ht, "<red>", "<span style=\"color: #FF0000\">");
- g_hash_table_insert(ht, "<yellow>", "<span style=\"color: #848200\">");
+ g_hash_table_insert(tags_ht, "black", "<span style=\"color: #000000\">");
+ g_hash_table_insert(tags_ht, "blue", "<span style=\"color: #0000FF\">");
+ g_hash_table_insert(tags_ht, "cyan", "<span style=\"color: #008284\">");
+ g_hash_table_insert(tags_ht, "gray", "<span style=\"color: #848284\">");
+ g_hash_table_insert(tags_ht, "green", "<span style=\"color: #008200\">");
+ g_hash_table_insert(tags_ht, "pink", "<span style=\"color: #FF0084\">");
+ g_hash_table_insert(tags_ht, "purple", "<span style=\"color: #840084\">");
+ g_hash_table_insert(tags_ht, "orange", "<span style=\"color: #FF8000\">");
+ g_hash_table_insert(tags_ht, "red", "<span style=\"color: #FF0000\">");
+ g_hash_table_insert(tags_ht, "yellow", "<span style=\"color: #848200\">");
- g_hash_table_insert(ht, "</black>", "</span>");
- g_hash_table_insert(ht, "</blue>", "</span>");
- g_hash_table_insert(ht, "</cyan>", "</span>");
- g_hash_table_insert(ht, "</gray>", "</span>");
- g_hash_table_insert(ht, "</green>", "</span>");
- g_hash_table_insert(ht, "</pink>", "</span>");
- g_hash_table_insert(ht, "</purple>", "</span>");
- g_hash_table_insert(ht, "</orange>", "</span>");
- g_hash_table_insert(ht, "</red>", "</span>");
- g_hash_table_insert(ht, "</yellow>", "</span>");
+ g_hash_table_insert(tags_ht, "/black", "</span>");
+ g_hash_table_insert(tags_ht, "/blue", "</span>");
+ g_hash_table_insert(tags_ht, "/cyan", "</span>");
+ g_hash_table_insert(tags_ht, "/gray", "</span>");
+ g_hash_table_insert(tags_ht, "/green", "</span>");
+ g_hash_table_insert(tags_ht, "/pink", "</span>");
+ g_hash_table_insert(tags_ht, "/purple", "</span>");
+ g_hash_table_insert(tags_ht, "/orange", "</span>");
+ g_hash_table_insert(tags_ht, "/red", "</span>");
+ g_hash_table_insert(tags_ht, "/yellow", "</span>");
#else
- g_hash_table_insert(ht, "<black>", "<font color=\"#000000\">");
- g_hash_table_insert(ht, "<blue>", "<font color=\"#0000FF\">");
- g_hash_table_insert(ht, "<cyan>", "<font color=\"#008284\">");
- g_hash_table_insert(ht, "<gray>", "<font color=\"#848284\">");
- g_hash_table_insert(ht, "<green>", "<font color=\"#008200\">");
- g_hash_table_insert(ht, "<pink>", "<font color=\"#FF0084\">");
- g_hash_table_insert(ht, "<purple>", "<font color=\"#840084\">");
- g_hash_table_insert(ht, "<orange>", "<font color=\"#FF8000\">");
- g_hash_table_insert(ht, "<red>", "<font color=\"#FF0000\">");
- g_hash_table_insert(ht, "<yellow>", "<font color=\"#848200\">");
+ g_hash_table_insert(tags_ht, "black", "<font color=\"#000000\">");
+ g_hash_table_insert(tags_ht, "blue", "<font color=\"#0000FF\">");
+ g_hash_table_insert(tags_ht, "cyan", "<font color=\"#008284\">");
+ g_hash_table_insert(tags_ht, "gray", "<font color=\"#848284\">");
+ g_hash_table_insert(tags_ht, "green", "<font color=\"#008200\">");
+ g_hash_table_insert(tags_ht, "pink", "<font color=\"#FF0084\">");
+ g_hash_table_insert(tags_ht, "purple", "<font color=\"#840084\">");
+ g_hash_table_insert(tags_ht, "orange", "<font color=\"#FF8000\">");
+ g_hash_table_insert(tags_ht, "red", "<font color=\"#FF0000\">");
+ g_hash_table_insert(tags_ht, "yellow", "<font color=\"#848200\">");
- g_hash_table_insert(ht, "</black>", "</font>");
- g_hash_table_insert(ht, "</blue>", "</font>");
- g_hash_table_insert(ht, "</cyan>", "</font>");
- g_hash_table_insert(ht, "</gray>", "</font>");
- g_hash_table_insert(ht, "</green>", "</font>");
- g_hash_table_insert(ht, "</pink>", "</font>");
- g_hash_table_insert(ht, "</purple>", "</font>");
- g_hash_table_insert(ht, "</orange>", "</font>");
- g_hash_table_insert(ht, "</red>", "</font>");
- g_hash_table_insert(ht, "</yellow>", "</font>");
+ g_hash_table_insert(tags_ht, "/black", "</font>");
+ g_hash_table_insert(tags_ht, "/blue", "</font>");
+ g_hash_table_insert(tags_ht, "/cyan", "</font>");
+ g_hash_table_insert(tags_ht, "/gray", "</font>");
+ g_hash_table_insert(tags_ht, "/green", "</font>");
+ g_hash_table_insert(tags_ht, "/pink", "</font>");
+ g_hash_table_insert(tags_ht, "/purple", "</font>");
+ g_hash_table_insert(tags_ht, "/orange", "</font>");
+ g_hash_table_insert(tags_ht, "/red", "</font>");
+ g_hash_table_insert(tags_ht, "/yellow", "</font>");
#endif /* !USE_CSS_FORMATTING */
- /* remove these once we have proper support for <FADE> and <ALT> */
- g_hash_table_insert(ht, "</fade>", "");
- g_hash_table_insert(ht, "</alt>", "");
+ /* We don't support these tags, so discard them */
+ g_hash_table_insert(tags_ht, "alt", "");
+ g_hash_table_insert(tags_ht, "fade", "");
+ g_hash_table_insert(tags_ht, "snd", "");
+ g_hash_table_insert(tags_ht, "/alt", "");
+ g_hash_table_insert(tags_ht, "/fade", "");
- /* these are the normal html yahoo sends (besides <font>).
- * anything else will get turned into <tag>, so if I forgot
- * about something, please add it. Why Yahoo! has to send unescaped
- * <'s and >'s that aren't supposed to be html is beyond me.
- */
- g_hash_table_insert(ht, "<b>", "<b>");
- g_hash_table_insert(ht, "<i>", "<i>");
- g_hash_table_insert(ht, "<u>", "<u>");
+ /* Official clients don't seem to send b, i or u tags. They use
+ * the escape codes listed above. Official clients definitely send
+ * font tags, though. I wonder if we can remove the opening and
+ * closing b, i and u tags from here? */
+ g_hash_table_insert(tags_ht, "b", "<b>");
+ g_hash_table_insert(tags_ht, "i", "<i>");
+ g_hash_table_insert(tags_ht, "u", "<u>");
+ g_hash_table_insert(tags_ht, "font", "<font>");
- g_hash_table_insert(ht, "</b>", "</b>");
- g_hash_table_insert(ht, "</i>", "</i>");
- g_hash_table_insert(ht, "</u>", "</u>");
- g_hash_table_insert(ht, "</font>", "</font>");
+ g_hash_table_insert(tags_ht, "/b", "</b>");
+ g_hash_table_insert(tags_ht, "/i", "</i>");
+ g_hash_table_insert(tags_ht, "/u", "</u>");
+ g_hash_table_insert(tags_ht, "/font", "</font>");
}
void yahoo_dest_colorht()
{
- if (ht == NULL)
+ if (esc_codes_ht == NULL)
/* Hash table has already been destroyed */
return;
- g_hash_table_destroy(ht);
- ht = NULL;
+ g_hash_table_destroy(esc_codes_ht);
+ esc_codes_ht = NULL;
+ g_hash_table_destroy(tags_ht);
+ tags_ht = NULL;
}
#ifndef USE_CSS_FORMATTING
@@ -347,60 +364,161 @@ static int point_to_html(int x)
}
#endif /* !USE_CSS_FORMATTING */
-/*
- * The Yahoo font size value is given in pt, even thougth the HTML
- * standard for <font size="x"> treats the size as a number on a
- * scale between 1 and 7. Let's get rid of this shoddyness and
- * convert it to CSS.
+static void append_attrs_datalist_foreach_cb(GQuark key_id, gpointer data, gpointer user_data)
+{
+ const char *key;
+ const char *value;
+ xmlnode *cur;
+
+ key = g_quark_to_string(key_id);
+ value = data;
+ cur = user_data;
+
+ xmlnode_set_attrib(cur, key, value);
+}
+
+/**
+ * @param cur A pointer to the position in the XML tree that we're
+ * currently building. This will be modified when opening a tag
+ * or closing an existing tag.
*/
-static void _font_tags_fix_size(const char *tag, GString *dest)
+static void yahoo_codes_to_html_add_tag(xmlnode **cur, const char *tag, gboolean is_closing_tag, const gchar *tag_name, gboolean is_font_tag)
{
- char *x, *end;
- int size;
+ if (is_closing_tag) {
+ xmlnode *tmp;
+ GSList *dangling_tags = NULL;
- if (((x = strstr(tag, "size"))) && ((x = strchr(x, '=')))) {
- while (*x && !g_ascii_isdigit(*x))
- x++;
- if (*x) {
-#ifndef USE_CSS_FORMATTING
- int htmlsize;
-#endif /* !USE_CSS_FORMATTING */
+ /* Move up the DOM until we find the opening tag */
+ for (tmp = *cur; tmp != NULL; tmp = xmlnode_get_parent(tmp)) {
+ /* Add one to tag_name when doing this comparison because it starts with a / */
+ if (g_str_equal(tmp->name, tag_name + 1))
+ /* Found */
+ break;
+ dangling_tags = g_slist_prepend(dangling_tags, tmp);
+ }
+ if (tmp == NULL) {
+ /* This is a closing tag with no opening tag. Useless. */
+ purple_debug_error("yahoo", "Ignoring unmatched tag %s", tag);
+ g_slist_free(dangling_tags);
+ return;
+ }
- size = strtol(x, &end, 10);
+ /* Move our current position up, now that we've closed a tag */
+ *cur = xmlnode_get_parent(tmp);
+ /* Re-open any tags that were nested below the tag we just closed */
+ while (dangling_tags != NULL) {
+ tmp = dangling_tags->data;
+ dangling_tags = g_slist_delete_link(dangling_tags, dangling_tags);
+
+ /* Create a copy of this tag+attributes (but not child tags or
+ * data) at our new location */
+ *cur = xmlnode_new_child(*cur, tmp->name);
+ for (tmp = tmp->child; tmp != NULL; tmp = tmp->next)
+ if (tmp->type == XMLNODE_TYPE_ATTRIB)
+ xmlnode_set_attrib_full(*cur, tmp->name,
+ tmp->xmlns, tmp->prefix, tmp->data);
+ }
+ } else {
+ const char *start;
+ const char *end;
+ GData *attributes;
+ char *fontsize = NULL;
+
+ purple_markup_find_tag(tag_name, tag, &start, &end, &attributes);
+ *cur = xmlnode_new_child(*cur, tag_name);
+
+ if (is_font_tag) {
+ /* Special case for the font size attribute */
+ fontsize = g_strdup(g_datalist_get_data(&attributes, "size"));
+ if (fontsize != NULL)
+ g_datalist_remove_data(&attributes, "size");
+ }
+
+ /* Add all font tag attributes */
+ g_datalist_foreach(&attributes, append_attrs_datalist_foreach_cb, *cur);
+ g_datalist_clear(&attributes);
+
+ if (fontsize != NULL) {
#ifdef USE_CSS_FORMATTING
- g_string_append_len(dest, tag, x - tag - 7);
- g_string_append(dest, end + 1);
- g_string_append_printf(dest, "<span style=\"font-size: %dpt\">", size);
+ /*
+ * The Yahoo font size value is given in pt, even though the HTML
+ * standard for <font size="x"> treats the size as a number on a
+ * scale between 1 and 7. So we insert the font size as a CSS
+ * style on a span tag.
+ */
+ gchar *tmp = g_strdup_printf("font-size: %spt", fontsize);
+ *cur = xmlnode_new_child(*cur, "span");
+ xmlnode_set_attrib(*cur, "style", tmp);
+ g_free(tmp);
#else
+ /*
+ * The Yahoo font size value is given in pt, even though the HTML
+ * standard for <font size="x"> treats the size as a number on a
+ * scale between 1 and 7. So we convert it to an appropriate
+ * value. This loses precision, which is why CSS formatting is
+ * preferred. The "absz" attribute remains here for backward
+ * compatibility with UIs that might use it, but it is totally
+ * not standard at all.
+ */
+ int size, htmlsize;
+ gchar tmp[11];
+ size = strtol(fontsize, NULL, 10);
htmlsize = point_to_html(size);
- g_string_append_len(dest, tag, x - tag);
- g_string_append_printf(dest, "%d", htmlsize);
- g_string_append_printf(dest, "\" absz=\"%d", size);
- g_string_append(dest, end);
+ sprintf(tmp, "%u", htmlsize);
+ xmlnode_set_attrib(*cur, "size", tmp);
+ xmlnode_set_attrib(*cur, "absz", fontsize);
#endif /* !USE_CSS_FORMATTING */
- } else {
- g_string_append(dest, tag);
- return;
+ g_free(fontsize);
}
- } else {
- g_string_append(dest, tag);
- return;
}
}
+/**
+ * Similar to purple_markup_get_tag_name(), but works with closing tags.
+ *
+ * @return The lowercase name of the tag. If this is a closing tag then
+ * this value starts with a forward slash. The caller must free
+ * this string with g_free.
+ */
+static gchar *yahoo_markup_get_tag_name(const char *tag, gboolean *is_closing_tag)
+{
+ size_t len;
+
+ *is_closing_tag = (tag[1] == '/');
+ if (*is_closing_tag)
+ len = strcspn(tag + 1, "> ");
+ else
+ len = strcspn(tag + 1, "> /");
+
+ return g_utf8_strdown(tag + 1, len);
+}
+
+/*
+ * Yahoo! messages generally aren't well-formed. Their markup is
+ * more of a flow from start to finish rather than a hierarchy from
+ * outer to inner. They tend to open tags and close them only when
+ * necessary.
+ *
+ * Example: <font size="8">size 8 <font size="16">size 16 <font size="8">size 8 again
+ *
+ * But we want to send well-formed HTML to the core, so we step through
+ * the input string and build an xmlnode tree containing sanitized HTML.
+ */
char *yahoo_codes_to_html(const char *x)
{
size_t x_len;
- GString *s;
+ xmlnode *html, *cur;
+ GString *cdata = g_string_new(NULL);
int i, j;
- gchar *tmp;
gboolean no_more_gt_brackets = FALSE;
const char *match;
+ gchar *xmlstr1, *xmlstr2;
x_len = strlen(x);
- s = g_string_sized_new(x_len);
+ html = xmlnode_new("html");
+ cur = html;
for (i = 0; i < x_len; i++) {
if ((x[i] == 0x1b) && (x[i+1] == '[')) {
/* This escape sequence signifies the beginning of some
@@ -408,90 +526,129 @@ char *yahoo_codes_to_html(const char *x)
j = i + 1;
while (j++ < x_len) {
+ gchar *code;
+
if (x[j] != 'm')
+ /* Keep looking for the end of this sequence */
continue;
- else {
- /* We've reached the end of the formatting code, yay */
- tmp = g_strndup(x + i + 2, j - i - 2);
- if (tmp[0] == '#')
+
+ /* We've reached the end of the formatting sequence, yay */
+
+ /* Append any character data that belongs in the current node */
+ if (cdata->len > 0) {
+ xmlnode_insert_data(cur, cdata->str, cdata->len);
+ g_string_truncate(cdata, 0);
+ }
+
+ code = g_strndup(x + i + 2, j - i - 2);
+ if (code[0] == '#') {
#ifdef USE_CSS_FORMATTING
- g_string_append_printf(s, "<span style=\"color: %s\">", tmp);
+ gchar *tmp = g_strdup_printf("color: %s", code);
+ cur = xmlnode_new_child(cur, "span");
+ xmlnode_set_attrib(cur, "style", tmp);
+ g_free(tmp);
#else
- g_string_append_printf(s, "<font color=\"%s\">", tmp);
+ cur = xmlnode_new_child(cur, "font");
+ xmlnode_set_attrib(cur, "color", code);
#endif /* !USE_CSS_FORMATTING */
- else if ((match = g_hash_table_lookup(ht, tmp)))
- g_string_append(s, match);
- else {
- purple_debug_error("yahoo",
- "Unknown ansi code 'ESC[%sm'.\n", tmp);
- g_free(tmp);
- break;
- }
- i = j;
- g_free(tmp);
- break;
+ } else if ((match = g_hash_table_lookup(esc_codes_ht, code))) {
+ gboolean is_closing_tag;
+ gchar *tag_name;
+
+ tag_name = yahoo_markup_get_tag_name(match, &is_closing_tag);
+ yahoo_codes_to_html_add_tag(&cur, match, is_closing_tag, tag_name, FALSE);
+ g_free(tag_name);
+
+ } else {
+ purple_debug_error("yahoo",
+ "Ignoring unknown ansi code 'ESC[%sm'.\n", code);
}
+
+ g_free(code);
+ i = j;
+ break;
}
- } else if (!no_more_gt_brackets && (x[i] == '<')) {
+ } else if (x[i] == '<' && !no_more_gt_brackets) {
/* The start of an HTML tag */
j = i;
while (j++ < x_len) {
- if (x[j] != '>')
- if (j == x_len) {
- g_string_append(s, "<");
- no_more_gt_brackets = TRUE;
- }
- else
+ gchar *tag;
+ gboolean is_closing_tag;
+ gchar *tag_name;
+
+ if (x[j] != '>') {
+ if (j != x_len)
+ /* Keep looking for the end of this tag */
+ /* TODO: Should maybe use purple_markup_find_tag()
+ * for this... what happens if there is a > inside
+ * a quoted attribute. */
continue;
- else {
- tmp = g_strndup(x + i, j - i + 1);
- g_ascii_strdown(tmp, -1);
- if ((match = g_hash_table_lookup(ht, tmp)))
- g_string_append(s, match);
- else if (!strncmp(tmp, "<fade ", 6) ||
- !strncmp(tmp, "<alt ", 5) ||
- !strncmp(tmp, "<snd ", 5)) {
+ /* This < has no corresponding > */
+ g_string_append_c(cdata, x[i]);
+ no_more_gt_brackets = TRUE;
+ break;
+ }
- /* remove this if gtkimhtml ever supports any of these */
- i = j;
- g_free(tmp);
- break;
+ tag = g_strndup(x + i, j - i + 1);
+ tag_name = yahoo_markup_get_tag_name(tag, &is_closing_tag);
- } else if (!strncmp(tmp, "<font ", 6)) {
- _font_tags_fix_size(tmp, s);
- } else {
- g_string_append(s, "<");
- g_free(tmp);
- break;
+ match = g_hash_table_lookup(tags_ht, tag_name);
+ if (match == NULL) {
+ /* Unknown tag. The user probably typed a less-than sign */
+ g_string_append_c(cdata, x[i]);
+ no_more_gt_brackets = TRUE;
+ g_free(tag);
+ g_free(tag_name);
+ break;
+ }
+
+ /* Some tags are in the hash table only because we
+ * want to ignore them */
+ if (match[0] != '\0') {
+ /* Append any character data that belongs in the current node */
+ if (cdata->len > 0) {
+ xmlnode_insert_data(cur, cdata->str, cdata->len);
+ g_string_truncate(cdata, 0);
}
-
- i = j;
- g_free(tmp);
- break;
+ if (g_str_equal(tag_name, "font"))
+ /* Font tags are a special case. We don't
+ * necessarily want to replace the whole thing--
+ * we just want to fix the size attribute. */
+ yahoo_codes_to_html_add_tag(&cur, tag, is_closing_tag, tag_name, TRUE);
+ else
+ yahoo_codes_to_html_add_tag(&cur, match, is_closing_tag, tag_name, FALSE);
}
+ i = j;
+ g_free(tag);
+ g_free(tag_name);
+ break;
}
} else {
- if (x[i] == '<')
- g_string_append(s, "<");
- else if (x[i] == '>')
- g_string_append(s, ">");
- else if (x[i] == '&')
- g_string_append(s, "&");
- else if (x[i] == '"')
- g_string_append(s, """);
- else
- g_string_append_c(s, x[i]);
+ g_string_append_c(cdata, x[i]);
}
}
- purple_debug_misc("yahoo", "yahoo_codes_to_html: Returning string: '%s'.\n", s->str);
- return g_string_free(s, FALSE);
+ /* Append any remaining character data */
+ if (cdata->len > 0)
+ xmlnode_insert_data(cur, cdata->str, cdata->len);
+ g_string_free(cdata, TRUE);
+
+ /* Serialize our HTML */
+ xmlstr1 = xmlnode_to_str(html, NULL);
+ xmlnode_free(html);
+
+ /* Strip off the outter HTML node */
+ xmlstr2 = g_strndup(xmlstr1 + 6, strlen(xmlstr1) - 13);
+ g_free(xmlstr1);
+
+ purple_debug_misc("yahoo", "yahoo_codes_to_html: Returning string: '%s'.\n", xmlstr2);
+ return xmlstr2;
}
/* borrowed from gtkimhtml */
============================================================
--- libpurple/tests/test_yahoo_util.c e500aea89ba34a5bbce64d4283fafce51598fb0f
+++ libpurple/tests/test_yahoo_util.c b342a75f6353a7384747087132e5957962cd4244
@@ -17,49 +17,81 @@ START_TEST(test_codes_to_html)
{
assert_string_equal_free("plain",
yahoo_codes_to_html("plain"));
+ assert_string_equal_free("unknown ansi code",
+ yahoo_codes_to_html("unknown \x1B[12345m ansi code"));
+ assert_string_equal_free("plain <peanut>",
+ yahoo_codes_to_html("plain <peanut>"));
+ assert_string_equal_free("plain <peanut",
+ yahoo_codes_to_html("plain <peanut"));
+ assert_string_equal_free("plain> peanut",
+ yahoo_codes_to_html("plain> peanut"));
/* bold/italic/underline */
- assert_string_equal_free("<b>bold",
+ assert_string_equal_free("<b>bold</b>",
yahoo_codes_to_html("\x1B[1mbold"));
- assert_string_equal_free("<i>italic",
+ assert_string_equal_free("<i>italic</i>",
yahoo_codes_to_html("\x1B[2mitalic"));
- assert_string_equal_free("<u>underline",
+ assert_string_equal_free("<u>underline</u>",
yahoo_codes_to_html("\x1B[4munderline"));
- assert_string_equal_free("<b>bold</b> <i>italic</i> <u>underline",
+ assert_string_equal_free("no markup",
+ yahoo_codes_to_html("no\x1B[x4m markup"));
+ assert_string_equal_free("<b>bold</b> <i>italic</i> <u>underline</u>",
yahoo_codes_to_html("\x1B[1mbold\x1B[x1m \x1B[2mitalic\x1B[x2m \x1B[4munderline"));
+ assert_string_equal_free("<b>bold <i>bolditalic</i></b><i> italic</i>",
+ yahoo_codes_to_html("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m italic"));
+ assert_string_equal_free("<b>bold <i>bolditalic</i></b><i> <u>italicunderline</u></i>",
+ yahoo_codes_to_html("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m \x1B[4mitalicunderline"));
+ assert_string_equal_free("<b>bold <i>bolditalic <u>bolditalicunderline</u></i><u> boldunderline</u></b>",
+ yahoo_codes_to_html("\x1B[1mbold \x1B[2mbolditalic \x1B[4mbolditalicunderline\x1B[x2m boldunderline"));
+ assert_string_equal_free("<b>bold <i>bolditalic <u>bolditalicunderline</u></i></b><i><u> italicunderline</u></i>",
+ yahoo_codes_to_html("\x1B[1mbold \x1B[2mbolditalic \x1B[4mbolditalicunderline\x1B[x1m italicunderline"));
#ifdef USE_CSS_FORMATTING
/* font color */
- assert_string_equal_free("<span style=\"color: #0000FF\">blue",
+ assert_string_equal_free("<span style='color: #0000FF'>blue</span>",
yahoo_codes_to_html("\x1B[31mblue"));
- assert_string_equal_free("<span style=\"color: #70ea15\">custom color",
+ assert_string_equal_free("<span style='color: #70ea15'>custom color</span>",
yahoo_codes_to_html("\x1B[#70ea15mcustom color"));
+ /* font face */
+ assert_string_equal_free("<font face='Georgia'>test</font>",
+ yahoo_codes_to_html("<font face='Georgia'>test</font>"));
+
/* font size */
- assert_string_equal_free("<font><span style=\"font-size: 15pt\">test",
- yahoo_codes_to_html("<font size=\"15\">test"));
- assert_string_equal_free("<font><span style=\"font-size: 32pt\">size 32",
- yahoo_codes_to_html("<font size=\"32\">size 32"));
+ assert_string_equal_free("<font><span style='font-size: 15pt'>test</span></font>",
+ yahoo_codes_to_html("<font size='15'>test"));
+ assert_string_equal_free("<font><span style='font-size: 32pt'>size 32</span></font>",
+ yahoo_codes_to_html("<font size='32'>size 32"));
/* combinations */
- assert_string_equal_free("<span style=\"color: #FF0080\"><font><span style=\"font-size: 15pt\">test",
- yahoo_codes_to_html("\x1B[35m<font size=\"15\">test"));
+ assert_string_equal_free("<font face='Georgia'><span style='font-size: 32pt'>test</span></font>",
+ yahoo_codes_to_html("<font face='Georgia' size='32'>test"));
+ assert_string_equal_free("<span style='color: #FF0080'><font><span style='font-size: 15pt'>test</span></font></span>",
+ yahoo_codes_to_html("\x1B[35m<font size='15'>test"));
#else
/* font color */
- assert_string_equal_free("<font color=\"#0000FF\">blue",
+ assert_string_equal_free("<font color='#0000FF'>blue</font>",
yahoo_codes_to_html("\x1B[31mblue"));
- assert_string_equal_free("<font color=\"#70ea15\">custom color",
+ assert_string_equal_free("<font color='#70ea15'>custom color</font>",
yahoo_codes_to_html("\x1B[#70ea15mcustom color"));
+ assert_string_equal_free("test",
+ yahoo_codes_to_html("<ALT #ff0000,#00ff00,#0000ff>test</ALT>"));
+ /* font face */
+ assert_string_equal_free("<font face='Georgia'>test</font>",
+ yahoo_codes_to_html("<font face='Georgia'>test"));
+
/* font size */
- assert_string_equal_free("<font size=\"4\" absz=\"15\">test",
- yahoo_codes_to_html("<font size=\"15\">test"));
- assert_string_equal_free("<font size=\"6\" absz=\"32\">size 32",
- yahoo_codes_to_html("<font size=\"32\">size 32"));
+ assert_string_equal_free("<font size='4' absz='15'>test</font>",
+ yahoo_codes_to_html("<font size='15'>test"));
+ assert_string_equal_free("<font size='6' absz='32'>size 32</font>",
+ yahoo_codes_to_html("<font size='32'>size 32"));
/* combinations */
- assert_string_equal_free("<font color=\"#FF0080\"><font size=\"4\" absz=\"15\">test",
- yahoo_codes_to_html("\x1B[35m<font size=\"15\">test"));
+ assert_string_equal_free("<font face='Georgia' size='6' absz='32'>test</font>",
+ yahoo_codes_to_html("<font face='Georgia' size='32'>test"));
+ assert_string_equal_free("<font color='#FF0080'><font size='4' absz='15'>test</font></font>",
+ yahoo_codes_to_html("\x1B[35m<font size='15'>test"));
#endif /* !USE_CSS_FORMATTING */
}
END_TEST
More information about the Commits
mailing list