pidgin: 4856b929: * Change yahoo_html_to_codes() a little ...
markdoliner at pidgin.im
markdoliner at pidgin.im
Wed Aug 19 18:09:16 EDT 2009
-----------------------------------------------------------------
Revision: 4856b929b0811c3ef6e0fa552e431bdcabaf5223
Ancestor: 1fa60074b939d7c3827e843242f1532560f6f4a6
Author: markdoliner at pidgin.im
Date: 2009-08-19T22:01:10
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/4856b929b0811c3ef6e0fa552e431bdcabaf5223
Modified files:
libpurple/protocols/yahoo/libymsg.h
libpurple/protocols/yahoo/util.c
libpurple/tests/test_yahoo_util.c
ChangeLog:
* Change yahoo_html_to_codes() a little to hopefully be more straight forward
and more similar to yahoo_codes_to_html()
* Add documentation to yahoo_html_to_codes() that explains the differences
between the encoded text that we send and the encoded text sent by yahoo
* Enable the test caes for yahoo_html_to_codes()
-------------- next part --------------
============================================================
--- libpurple/protocols/yahoo/libymsg.h c505b82f296148f9159201e9c12d8dc3fcf02bf1
+++ libpurple/protocols/yahoo/libymsg.h 22e537c8bea812721238661dda7ebf4fe877a8dd
@@ -280,6 +280,27 @@ char *yahoo_codes_to_html(const char *x)
void yahoo_init_colorht(void);
void yahoo_dest_colorht(void);
char *yahoo_codes_to_html(const char *x);
+
+/**
+ * This function takes a normal HTML message and converts it to the message
+ * format used by Yahoo, which uses a frankensteinish combination of ANSI
+ * escape codes and broken HTML.
+ *
+ * It results in slightly different output than would be sent by official
+ * Yahoo clients. The two main differences are:
+ *
+ * 1. We always close all tags, whereas official Yahoo clients leave tags
+ * dangling open at the end of each message (and the client treats them
+ * as closed).
+ * 2. We always close inner tags first before closing outter tags.
+ *
+ * For example, if you want to send this message:
+ * <b> bold <i> bolditalic </i></b><i> italic </i>
+ * Official Yahoo clients would send:
+ * ESC[1m bold ESC[2m bolditalic ESC[x1m italic
+ * But we will send:
+ * ESC[1m bold ESC[2m bolditalic ESC[x2mESC[x1mESC[2m italic ESC[x2m
+ */
char *yahoo_html_to_codes(const char *src);
gboolean
============================================================
--- libpurple/protocols/yahoo/util.c 31666b2add695540ed3a138e979cdb9a4e0089bf
+++ libpurple/protocols/yahoo/util.c bbd1e1c1e5aa6b0dd83bfecda4b1c757529a1f6c
@@ -669,7 +669,7 @@ static const gint _point_sizes [] = { 8,
#define POINT_SIZE(x) (_point_sizes [MIN ((x > 0 ? x : 1), MAX_FONT_SIZE) - 1])
static const gint _point_sizes [] = { 8, 10, 12, 14, 20, 30, 40 };
-enum fatype
+enum fontattr_type
{
FATYPE_SIZE,
FATYPE_COLOR,
@@ -679,7 +679,7 @@ typedef struct
typedef struct
{
- enum fatype type;
+ enum fontattr_type type;
union {
int size;
char *color;
@@ -688,6 +688,17 @@ typedef struct
} u;
} fontattr;
+typedef struct
+{
+ gboolean bold;
+ gboolean italic;
+ gboolean underline;
+ gboolean in_link;
+ int font_size;
+ char *font_face;
+ char *font_color;
+} CurrentMsgState;
+
static void fontattr_free(fontattr *f)
{
if (f->type == FATYPE_COLOR)
@@ -876,167 +887,124 @@ char *yahoo_html_to_codes(const char *sr
GString *dest;
char *esc;
GQueue *ftattr = NULL;
- gboolean no_more_specials = FALSE;
+ gboolean no_more_gt_brackets = FALSE;
+ gchar *tag, *tag_name;
+ gboolean is_closing_tag;
+ CurrentMsgState current_state;
+ bzero(¤t_state, sizeof(current_state));
+
src_len = strlen(src);
dest = g_string_sized_new(src_len);
for (i = 0; i < src_len; i++) {
-
- if (src[i] == '<' && !no_more_specials) {
+ if (src[i] == '<' && !no_more_gt_brackets) {
+ /* The start of an HTML tag */
j = i;
- while (1) {
- j++;
+ while (j++ < src_len) {
+ if (src[j] != '>') {
+ if (src[j] == '"') {
+ /* We're inside a quoted attribute value. Skip to the end */
+ j++;
+ while (j != src_len && src[j] != '"')
+ j++;
+ } else if (src[j] == '\'') {
+ /* We're inside a quoted attribute value. Skip to the end */
+ j++;
+ while (j != src_len && src[j] != '\'')
+ j++;
+ }
+ if (j != src_len)
+ /* Keep looking for the end of this tag */
+ continue;
- if (j >= src_len) { /* no '>' */
+ /* This < has no corresponding > */
g_string_append_c(dest, src[i]);
- no_more_specials = TRUE;
+ no_more_gt_brackets = TRUE;
break;
}
- if (src[j] == '<') {
- /* FIXME: This doesn't convert outgoing entities.
- * However, I suspect this case may never
- * happen anymore because of the entities.
- */
- g_string_append_len(dest, &src[i], j - i);
- i = j - 1;
- if (ftattr) {
- fontattr *f;
+ tag = g_strndup(src + i, j - i + 1);
+ tag_name = yahoo_markup_get_tag_name(tag, &is_closing_tag);
- while ((f = g_queue_pop_head(ftattr)))
- fontattr_free(f);
- g_queue_free(ftattr);
- ftattr = NULL;
- }
- break;
- }
-
- if (src[j] == ' ') {
- if (!g_ascii_strncasecmp(&src[i+1], "BODY", j - i - 1)) {
- char *t = strchr(&src[j], '>');
- if (!t) {
- g_string_append(dest, &src[i]);
+ if (g_str_equal(tag_name, "a")) {
+ j += 7;
+ g_string_append(dest, "\033[lm");
+ if (purple_str_has_prefix(src + j, "mailto:"))
+ j += sizeof("mailto:") - 1;
+ while (1) {
+ g_string_append_c(dest, src[j]);
+ if (++j >= src_len) {
i = src_len;
break;
- } else {
- i = t - src;
- break;
}
- } else if (!g_ascii_strncasecmp(&src[i+1], "A HREF=\"", j - i - 1)) {
- j += 7;
- g_string_append(dest, "\033[lm");
- if (purple_str_has_prefix(src + j, "mailto:"))
- j += sizeof("mailto:") - 1;
- while (1) {
- g_string_append_c(dest, src[j]);
- if (++j >= src_len) {
- i = src_len;
- break;
- }
- if (src[j] == '"') {
- g_string_append(dest, "\033[xlm");
- while (1) {
- if (++j >= src_len) {
- i = src_len;
- break;
- }
- if (!g_ascii_strncasecmp(&src[j], "</A>", 4)) {
- j += 3;
- break;
- }
+ if (src[j] == '"') {
+ g_string_append(dest, "\033[xlm");
+ while (1) {
+ if (++j >= src_len) {
+ i = src_len;
+ break;
}
- i = j;
- break;
+ if (!g_ascii_strncasecmp(&src[j], "</A>", 4)) {
+ j += 3;
+ break;
+ }
}
+ i = j;
+ break;
}
- } else if (!g_ascii_strncasecmp(&src[i+1], "SPAN", j - i - 1)) { /* drop span tags */
- while (1) {
- if (++j >= src_len) {
- g_string_append(dest, &src[i]);
- i = src_len;
- break;
- }
- if (src[j] == '>') {
- i = j;
- break;
- }
- }
- } else if (g_ascii_strncasecmp(&src[i+1], "FONT", j - i - 1)) { /* not interested! */
- while (1) {
- if (++j >= src_len) {
- g_string_append(dest, &src[i]);
- i = src_len;
- break;
- }
- if (src[j] == '>') {
- g_string_append_len(dest, &src[i], j - i + 1);
- i = j;
- break;
- }
- }
- } else { /* yay we have a font tag */
- _parse_font_tag(src, dest, &i, &j, src_len, &colors, &tags, ftattr);
}
- break;
- }
-
- if (src[j] == '>') {
- /* This has some problems like the FIXME for the
- * '<' case. and like that case, I suspect the case
- * that this has problems is won't happen anymore anyway.
- */
- int sublen = j - i - 1;
-
- if (sublen) {
- if (!g_ascii_strncasecmp(&src[i+1], "B", sublen)) {
- g_string_append(dest, "\033[1m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "/B", sublen)) {
- g_string_append(dest, "\033[x1m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "I", sublen)) {
- g_string_append(dest, "\033[2m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "/I", sublen)) {
- g_string_append(dest, "\033[x2m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "U", sublen)) {
- g_string_append(dest, "\033[4m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "/U", sublen)) {
- g_string_append(dest, "\033[x4m");
- } else if (!g_ascii_strncasecmp(&src[i+1], "/A", sublen)) {
- g_string_append(dest, "\033[xlm");
- } else if (!g_ascii_strncasecmp(&src[i+1], "BR", sublen)) {
- g_string_append_c(dest, '\n');
- } else if (!g_ascii_strncasecmp(&src[i+1], "/BODY", sublen)) {
- /* mmm, </body> tags. *BURP* */
- } else if (!g_ascii_strncasecmp(&src[i+1], "/SPAN", sublen)) {
- /* </span> tags. dangerously close to </spam> */
- } else if (!g_ascii_strncasecmp(&src[i+1], "/FONT", sublen) && tags != NULL) {
- char *etag;
-
- etag = tags->data;
- tags = g_slist_delete_link(tags, tags);
- if (etag) {
- g_string_append(dest, etag);
- if (!strcmp(etag, "</font>")) {
- if (colors != NULL) {
- g_free(colors->data);
- colors = g_slist_delete_link(colors, colors);
- }
- }
- g_free(etag);
+ } else if (g_str_equal(tag_name, "font")) {
+ _parse_font_tag(src, dest, &i, &j, src_len, &colors, &tags, ftattr);
+ } else if (g_str_equal(tag_name, "b")) {
+ g_string_append(dest, "\033[1m");
+ current_state.bold = TRUE;
+ } else if (g_str_equal(tag_name, "/b")) {
+ if (current_state.bold) {
+ g_string_append(dest, "\033[x1m");
+ current_state.bold = FALSE;
+ }
+ } else if (g_str_equal(tag_name, "i")) {
+ current_state.italic = TRUE;
+ g_string_append(dest, "\033[2m");
+ } else if (g_str_equal(tag_name, "/i")) {
+ if (current_state.italic) {
+ g_string_append(dest, "\033[x2m");
+ current_state.italic = FALSE;
+ }
+ } else if (g_str_equal(tag_name, "u")) {
+ current_state.underline = TRUE;
+ g_string_append(dest, "\033[4m");
+ } else if (g_str_equal(tag_name, "/u")) {
+ if (current_state.underline) {
+ g_string_append(dest, "\033[x4m");
+ current_state.underline = FALSE;
+ }
+ } else if (g_str_equal(tag_name, "/a")) {
+ g_string_append(dest, "\033[xlm");
+ } else if (g_str_equal(tag_name, "br")) {
+ g_string_append_c(dest, '\n');
+ } else if (g_str_equal(tag_name, "/font")) {
+ if (tags != NULL) {
+ char *etag = tags->data;
+ tags = g_slist_delete_link(tags, tags);
+ g_string_append(dest, etag);
+ if (g_str_equal(etag, "</font>")) {
+ if (colors != NULL) {
+ g_free(colors->data);
+ colors = g_slist_delete_link(colors, colors);
}
- } else {
- g_string_append_len(dest, &src[i], j - i + 1);
}
- } else {
- g_string_append_len(dest, &src[i], j - i + 1);
+ g_free(etag);
}
-
- i = j;
- break;
}
+ i = j;
+ g_free(tag);
+ g_free(tag_name);
+ break;
}
} else {
============================================================
--- libpurple/tests/test_yahoo_util.c 948b653aa8f524b8e4570e6e7dae24f843183ed5
+++ libpurple/tests/test_yahoo_util.c efe0e7daa06d052dac47e925e354f9c0a41acc9c
@@ -104,7 +104,6 @@ END_TEST
}
END_TEST
-#if 0
START_TEST(test_html_to_codes)
{
assert_string_equal_free("plain",
@@ -129,7 +128,6 @@ START_TEST(test_html_to_codes)
yahoo_html_to_codes("plain &"));
/* bold/italic/underline */
- // MARK: This isn't correct. Should not have the closing bold escape code
assert_string_equal_free("\x1B[1mbold\x1B[x1m",
yahoo_html_to_codes("<b>bold</b>"));
assert_string_equal_free("\x1B[2mitalic\x1B[x2m",
@@ -140,13 +138,12 @@ START_TEST(test_html_to_codes)
yahoo_html_to_codes("no</u> markup"));
assert_string_equal_free("\x1B[1mbold\x1B[x1m \x1B[2mitalic\x1B[x2m \x1B[4munderline\x1B[x4m",
yahoo_html_to_codes("<b>bold</b> <i>italic</i> <u>underline</u>"));
- assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m italic\x1B[x1m",
+ assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x2m\x1B[x1m\x1B[2m italic\x1B[x2m",
yahoo_html_to_codes("<b>bold <i>bolditalic</i></b><i> italic</i>"));
- assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m \x1B[4mitalicunderline",
+ assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x2m\x1B[x1m\x1B[2m \x1B[4mitalicunderline\x1B[x4m\x1B[x2m",
yahoo_html_to_codes("<b>bold <i>bolditalic</i></b><i> <u>italicunderline</u></i>"));
}
END_TEST
-#endif
Suite *
yahoo_util_suite(void)
@@ -161,11 +158,9 @@ yahoo_util_suite(void)
tcase_add_test(tc, test_codes_to_html);
suite_add_tcase(s, tc);
-#if 0
tc = tcase_create("Convert IM from HTML to network format");
tcase_add_test(tc, test_html_to_codes);
suite_add_tcase(s, tc);
-#endif
return s;
}
More information about the Commits
mailing list