/pidgin/main: c600a505402d: patch from loic to fix some bad html...
Nathan Walp
nwalp at pidgin.im
Mon Sep 10 10:30:49 EDT 2012
Changeset: c600a505402d9eb9098cb4cc3419a8d284c1fe4a
Author: Nathan Walp <nwalp at pidgin.im>
Date: 2012-09-10 10:30 -0400
Branch: default
URL: http://hg.pidgin.im/pidgin/main/rev/c600a505402d
Description:
patch from loic to fix some bad html -> xhtml conversion
diffstat:
libpurple/tests/test_util.c | 165 ++++++++++++++++++++++++++++++++++++++++++++
libpurple/util.c | 63 +++++++++-------
2 files changed, 201 insertions(+), 27 deletions(-)
diffs (truncated from 327 to 300 lines):
diff --git a/libpurple/tests/test_util.c b/libpurple/tests/test_util.c
--- a/libpurple/tests/test_util.c
+++ b/libpurple/tests/test_util.c
@@ -167,14 +167,179 @@ START_TEST(test_markup_html_to_xhtml)
{
gchar *xhtml = NULL;
gchar *plaintext = NULL;
+
purple_markup_html_to_xhtml("<a>", &xhtml, &plaintext);
assert_string_equal_free("<a href=\"\"></a>", xhtml);
assert_string_equal_free("", plaintext);
+ purple_markup_html_to_xhtml("<A href='URL'>ABOUT</a>", &xhtml, &plaintext);
+ assert_string_equal_free("<a href=\"URL\">ABOUT</a>", xhtml);
+ assert_string_equal_free("ABOUT <URL>", plaintext);
+
+ purple_markup_html_to_xhtml("<a href='URL'>URL</a>", &xhtml, &plaintext);
+ assert_string_equal_free("URL", plaintext);
+ assert_string_equal_free("<a href=\"URL\">URL</a>", xhtml);
+
+ purple_markup_html_to_xhtml("<a href='mailto:mail'>mail</a>", &xhtml, &plaintext);
+ assert_string_equal_free("mail", plaintext);
+ assert_string_equal_free("<a href=\"mailto:mail\">mail</a>", xhtml);
+
+ purple_markup_html_to_xhtml("<A href='\"U'R&L'>ABOUT</a>", &xhtml, &plaintext);
+ assert_string_equal_free("<a href=\""U'R&L\">ABOUT</a>", xhtml);
+ assert_string_equal_free("ABOUT <\"U'R&L>", plaintext);
+
+ purple_markup_html_to_xhtml("<img src='SRC' alt='ALT'/>", &xhtml, &plaintext);
+ assert_string_equal_free("<img src='SRC' alt='ALT' />", xhtml);
+ assert_string_equal_free("ALT", plaintext);
+
+ purple_markup_html_to_xhtml("<img src=\"'S'R&C\" alt=\"'A'L&T\"/>", &xhtml, &plaintext);
+ assert_string_equal_free("<img src=''S'R&C' alt=''A'L&T' />", xhtml);
+ assert_string_equal_free("'A'L&T", plaintext);
+
+ purple_markup_html_to_xhtml("<unknown>", &xhtml, &plaintext);
+ assert_string_equal_free("<unknown>", xhtml);
+ assert_string_equal_free("<unknown>", plaintext);
+
+ purple_markup_html_to_xhtml("é&", &xhtml, &plaintext);
+ assert_string_equal_free("é&", xhtml);
+ assert_string_equal_free("é&", plaintext);
+
+ purple_markup_html_to_xhtml("<h1>A<h2>B</h2>C</h1>", &xhtml, &plaintext);
+ assert_string_equal_free("<h1>A<h2>B</h2>C</h1>", xhtml);
+ assert_string_equal_free("ABC", plaintext);
+
+ purple_markup_html_to_xhtml("<h1><h2><h3><h4>", &xhtml, &plaintext);
+ assert_string_equal_free("<h1><h2><h3><h4></h4></h3></h2></h1>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<italic/>", &xhtml, &plaintext);
+ assert_string_equal_free("<em/>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("</", &xhtml, &plaintext);
+ assert_string_equal_free("</", xhtml);
+ assert_string_equal_free("</", plaintext);
+
+ purple_markup_html_to_xhtml("</div>", &xhtml, &plaintext);
+ assert_string_equal_free("", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<hr/>", &xhtml, &plaintext);
+ assert_string_equal_free("<br/>", xhtml);
+ assert_string_equal_free("\n", plaintext);
+
+ purple_markup_html_to_xhtml("<hr>", &xhtml, &plaintext);
+ assert_string_equal_free("<br/>", xhtml);
+ assert_string_equal_free("\n", plaintext);
+
+ purple_markup_html_to_xhtml("<br />", &xhtml, &plaintext);
+ assert_string_equal_free("<br/>", xhtml);
+ assert_string_equal_free("\n", plaintext);
+
+ purple_markup_html_to_xhtml("<br>INSIDE</br>", &xhtml, &plaintext);
+ assert_string_equal_free("<br/>INSIDE", xhtml);
+ assert_string_equal_free("\nINSIDE", plaintext);
+
+ purple_markup_html_to_xhtml("<div></div>", &xhtml, &plaintext);
+ assert_string_equal_free("<div></div>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<div/>", &xhtml, &plaintext);
+ assert_string_equal_free("<div/>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<div attr='\"&<>'/>", &xhtml, &plaintext);
+ assert_string_equal_free("<div attr='"&<>'/>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<div attr=\"'\"/>", &xhtml, &plaintext);
+ assert_string_equal_free("<div attr=\"'\"/>", xhtml);
+ assert_string_equal_free("", plaintext);
+
+ purple_markup_html_to_xhtml("<div/> < <div/>", &xhtml, &plaintext);
+ assert_string_equal_free("<div/> < <div/>", xhtml);
+ assert_string_equal_free(" < ", plaintext);
+
+ purple_markup_html_to_xhtml("<div>x</div>", &xhtml, &plaintext);
+ assert_string_equal_free("<div>x</div>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<b>x</b>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='font-weight: bold;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<bold>x</bold>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='font-weight: bold;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<strong>x</strong>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='font-weight: bold;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<u>x</u>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='text-decoration: underline;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<underline>x</underline>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='text-decoration: underline;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<s>x</s>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='text-decoration: line-through;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<strike>x</strike>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='text-decoration: line-through;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<sub>x</sub>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='vertical-align:sub;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<sup>x</sup>", &xhtml, &plaintext);
+ assert_string_equal_free("<span style='vertical-align:super;'>x</span>", xhtml);
+ assert_string_equal_free("x", plaintext);
purple_markup_html_to_xhtml("<FONT>x</FONT>", &xhtml, &plaintext);
assert_string_equal_free("x", xhtml);
assert_string_equal_free("x", plaintext);
+
+ purple_markup_html_to_xhtml("<font face=\"'Times>New & Roman'\">x</font>", &xhtml, &plaintext);
+ assert_string_equal_free("x", plaintext);
+ assert_string_equal_free("<span style='font-family: \"Times>New & Roman\";'>x</span>", xhtml);
+
+ purple_markup_html_to_xhtml("<font back=\"'color>blue&red'\">x</font>", &xhtml, &plaintext);
+ assert_string_equal_free("x", plaintext);
+ assert_string_equal_free("<span style='background: \"color>blue&red\";'>x</span>", xhtml);
+
+ purple_markup_html_to_xhtml("<font color=\"'color>blue&red'\">x</font>", &xhtml, &plaintext);
+ assert_string_equal_free("x", plaintext);
+ assert_string_equal_free("<span style='color: \"color>blue&red\";'>x</span>", xhtml);
+
+ purple_markup_html_to_xhtml("<font size=1>x</font>", &xhtml, &plaintext);
+ assert_string_equal_free("x", plaintext);
+ assert_string_equal_free("<span style='font-size: xx-small;'>x</span>", xhtml);
+
+ purple_markup_html_to_xhtml("<font size=432>x</font>", &xhtml, &plaintext);
+ assert_string_equal_free("x", plaintext);
+ assert_string_equal_free("<span style='font-size: medium;'>x</span>", xhtml);
+
+ /* The following tests document a behaviour that looks suspicious */
+
+ /* bug report http://developer.pidgin.im/ticket/13485 */
+ purple_markup_html_to_xhtml("<!--COMMENT-->", &xhtml, &plaintext);
+ assert_string_equal_free("<!--COMMENT-->", xhtml);
+ assert_string_equal_free("COMMENT-->", plaintext);
+
+ /* no bug report */
+ purple_markup_html_to_xhtml("<br />", &xhtml, &plaintext);
+ assert_string_equal_free("<br />", xhtml);
+ assert_string_equal_free("<br />", plaintext);
+
+ /* same code section as <br /> */
+ purple_markup_html_to_xhtml("<hr />", &xhtml, &plaintext);
+ assert_string_equal_free("<hr />", xhtml);
+ assert_string_equal_free("<hr />", plaintext);
}
END_TEST
diff --git a/libpurple/util.c b/libpurple/util.c
--- a/libpurple/util.c
+++ b/libpurple/util.c
@@ -1891,7 +1891,7 @@ purple_markup_html_to_xhtml(const char *
if (cdata && url &&
(!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
g_utf8_collate(url->str + 7, cdata->str) != 0)))
- g_string_append_printf(plain, " <%s>", g_strstrip(url->str));
+ g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
if (cdata) {
g_string_free(cdata, TRUE);
cdata = NULL;
@@ -2022,33 +2022,39 @@ purple_markup_html_to_xhtml(const char *
if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
const char *p = c + 4;
GString *src = NULL, *alt = NULL;
+#define ESCAPE(from, to) \
+ CHECK_QUOTE(from); \
+ while (VALID_CHAR(from)) { \
+ int len; \
+ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+ to = g_string_append(to, "&"); \
+ else if (*from == '\'') \
+ to = g_string_append(to, "'"); \
+ else \
+ to = g_string_append_c(to, *from); \
+ from++; \
+ }
+
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "src=", 4)) {
const char *q = p + 4;
if (src)
g_string_free(src, TRUE);
src = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- src = g_string_append_c(src, *q);
- q++;
- }
+ ESCAPE(q, src);
p = q;
} else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
const char *q = p + 4;
if (alt)
g_string_free(alt, TRUE);
alt = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- alt = g_string_append_c(alt, *q);
- q++;
- }
+ ESCAPE(q, alt);
p = q;
} else {
p++;
}
}
+#undef ESCAPE
if ((c = strchr(p, '>')) != NULL)
c++;
else
@@ -2058,7 +2064,7 @@ purple_markup_html_to_xhtml(const char *
g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
if(alt) {
if(plain)
- plain = g_string_append(plain, alt->str);
+ plain = g_string_append(plain, purple_unescape_html(alt->str));
if(!src && xhtml)
xhtml = g_string_append(xhtml, alt->str);
g_string_free(alt, TRUE);
@@ -2083,6 +2089,8 @@ purple_markup_html_to_xhtml(const char *
int len;
if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
url = g_string_append(url, "&");
+ else if (*q == '"')
+ url = g_string_append(url, """);
else
url = g_string_append_c(url, *q);
q++;
@@ -2104,6 +2112,18 @@ purple_markup_html_to_xhtml(const char *
g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
continue;
}
+#define ESCAPE(from, to) \
+ CHECK_QUOTE(from); \
+ while (VALID_CHAR(from)) { \
+ int len; \
+ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+ to = g_string_append(to, "&"); \
+ else if (*from == '\'') \
+ to = g_string_append_c(to, '\"'); \
+ else \
+ to = g_string_append_c(to, *from); \
+ from++; \
+ }
if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
const char *p = c + 5;
GString *style = g_string_new("");
@@ -2112,33 +2132,21 @@ purple_markup_html_to_xhtml(const char *
if (!g_ascii_strncasecmp(p, "back=", 5)) {
const char *q = p + 5;
GString *color = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- color = g_string_append_c(color, *q);
- q++;
- }
+ ESCAPE(q, color);
g_string_append_printf(style, "background: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "color=", 6)) {
const char *q = p + 6;
GString *color = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
More information about the Commits
mailing list