pidgin: 237cf4e0: Process the data correctly if the server...

Tue Jun 30 16:55:29 EDT 2009

-----------------------------------------------------------------
Revision: 237cf4e06a6b94e637978e489ea7936ef3d2ad2d
Ancestor: fdc9c398d90e22dc3ec2fa37d0bbe83b5ae2f1a6
Author: sadrul at pidgin.im
Date: 2009-06-30T20:48:12
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/237cf4e06a6b94e637978e489ea7936ef3d2ad2d

Modified files:
        libpurple/util.c

ChangeLog: 

Process the data correctly if the server sends chunked data.

The yahoo servers seem to always send out chunked data, which can cause
errors with aliases, profile pictures etc. Apparently we include 'Host: '
header when we request the URL, hoping that would stop the server from
sending us chunked data. But that at least doesn't work for the yahoo
servers, it seems.

-------------- next part --------------
============================================================

--- libpurple/util.c	e1d2253a3c331f4132f4ea55c9466b6882c650a3
+++ libpurple/util.c	34346d20137ff3e5673fb17997a75b7a62d3e0c1
@@ -68,6 +68,7 @@ struct _PurpleUtilFetchUrlData
 	unsigned long len;
 	unsigned long data_len;
 	gssize max_len;
+	gboolean chunked;
 };
 
 static char *custom_user_dir = NULL;
@@ -3714,41 +3715,43 @@ parse_redirect(const char *data, size_t 
 	return TRUE;
 }
 
-static size_t
-parse_content_len(const char *data, size_t data_len)
+static const char *
+find_header_content(const char *data, size_t data_len, const char *header, size_t header_len)
 {
-	size_t content_len = 0;
 	const char *p = NULL;
 
-	/* This is still technically wrong, since headers are case-insensitive
-	 * [RFC 2616, section 4.2], though this ought to catch the normal case.
-	 * Note: data is _not_ nul-terminated.
-	 */
-	if(data_len > 16) {
-		p = (strncmp(data, "Content-Length: ", 16) == 0) ? data : NULL;
-		if(!p)
-			p = (strncmp(data, "CONTENT-LENGTH: ", 16) == 0)
-				? data : NULL;
-		if(!p) {
-			p = g_strstr_len(data, data_len, "\nContent-Length: ");
-			if (p)
-				p++;
-		}
-		if(!p) {
-			p = g_strstr_len(data, data_len, "\nCONTENT-LENGTH: ");
-			if (p)
-				p++;
-		}
+	if (header_len <= 0)
+		header_len = strlen(header);
 
-		if(p)
-			p += 16;
+	/* Note: data is _not_ nul-terminated.  */
+	if (data_len > header_len) {
+		if (header[0] == '\n')
+			p = (g_strncasecmp(data, header + 1, header_len - 1) == 0) ? data : NULL;
+		if (!p)
+			p = purple_strcasestr(data, header);
+		if (p)
+			p += header_len;
 	}
 
-	/* If we can find a Content-Length header at all, try to sscanf it.
+	/* If we can find the header at all, try to sscanf it.
 	 * Response headers should end with at least \r\n, so sscanf is safe,
 	 * if we make sure that there is indeed a \n in our header.
 	 */
 	if (p && g_strstr_len(p, data_len - (p - data), "\n")) {
+		return p;
+	}
+
+	return NULL;
+}
+
+static size_t
+parse_content_len(const char *data, size_t data_len)
+{
+	size_t content_len = 0;
+	const char *p = NULL;
+
+	p = find_header_content(data, data_len, "\nContent-Length: ", sizeof("\nContent-Length: ") - 1);
+	if (p) {
 		sscanf(p, "%" G_GSIZE_FORMAT, &content_len);
 		purple_debug_misc("util", "parsed %" G_GSIZE_FORMAT "\n", content_len);
 	}
@@ -3756,8 +3759,51 @@ parse_content_len(const char *data, size
 	return content_len;
 }
 
+static gboolean
+content_is_chunked(const char *data, size_t data_len)
+{
+	gboolean chunked = FALSE;
+	const char *p = find_header_content(data, data_len, "\nTransfer-Encoding: ", sizeof("\nTransfer-Encoding: ") - 1);
+	if (p && g_strncasecmp(p, "chunked", 7) == 0)
+		chunked = TRUE;
 
+	return chunked;
+}
+
+/* Process in-place */
 static void
+process_chunked_data(char *data, gssize *len)
+{
+	gssize sz;
+	gssize nlen = 0;
+	char *p = data;
+	char *s = data;
+
+	while (*s) {
+		if (sscanf(s, "%x\r\n", &sz) != 1) {
+			purple_debug_error("util", "Error processing chunked data. Expected data length, found: %s\n", s);
+			break;
+		}
+		if (sz == 0)
+			break;
+		s = strstr(s, "\r\n") + 2;
+		g_memmove(p, s, sz);
+		p += sz;
+		s += sz;
+		nlen += sz;
+		if (*s != '\r' && *(s + 1) != '\n') {
+			purple_debug_error("util", "Error processing chunked data. Expected \\r\\n, found: %s\n", s);
+			break;
+		}
+		s += 2;
+	}
+	*p = 0;
+
+	if (len)
+		*len = nlen;
+}
+
+static void
 url_fetch_recv_cb(gpointer url_data, gint source, PurpleInputCondition cond)
 {
 	PurpleUtilFetchUrlData *gfud = url_data;
@@ -3816,6 +3862,7 @@ url_fetch_recv_cb(gpointer url_data, gin
 
 				/* No redirect. See if we can find a content length. */
 				content_len = parse_content_len(gfud->webdata, header_len);
+				gfud->chunked = content_is_chunked(gfud->webdata, header_len);
 
 				if(content_len == 0) {
 					/* We'll stick with an initial 8192 */
@@ -3888,6 +3935,11 @@ url_fetch_recv_cb(gpointer url_data, gin
 		gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1);
 		gfud->webdata[gfud->len] = '\0';
 
+		if (!gfud->include_headers && gfud->chunked) {
+			/* Process only if we don't want the headers. */
+			process_chunked_data(gfud->webdata, &gfud->len);
+		}
+
 		gfud->callback(gfud, gfud->user_data, gfud->webdata, gfud->len, NULL);
 		purple_util_fetch_url_cancel(gfud);
 	}