pidgin: 5b432252: util: Better validation of the allowed c...

darkrain42 at pidgin.im darkrain42 at pidgin.im
Fri Apr 30 21:05:39 EDT 2010


-----------------------------------------------------------------
Revision: 5b4322528c199b000fbfc774bb782dfd6dc0e2f2
Ancestor: c9659a3fecdd575434640e531c3eb61f90de6976
Author: darkrain42 at pidgin.im
Date: 2010-04-29T17:17:00
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/5b4322528c199b000fbfc774bb782dfd6dc0e2f2

Modified files:
        libpurple/tests/test_util.c libpurple/util.c

ChangeLog: 

util: Better validation of the allowed character values in XML 1.0

>From http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char.  Refs #11257
This doesn't actually make a difference, because I think all the
invalid ranges aren't valid UTF-8 and so g_utf8_validate catches them.

-------------- next part --------------
============================================================
--- libpurple/tests/test_util.c	97bd0ced6da1a62f501c5640a12649056e037bbb
+++ libpurple/tests/test_util.c	a5ecc8ec17d796a22a88277468eb36a51fea231d
@@ -121,6 +121,33 @@ END_TEST
 }
 END_TEST
 
+START_TEST(test_utf8_strip_unprintables)
+{
+	fail_unless(NULL == purple_utf8_strip_unprintables(NULL));
+	/* invalid UTF-8 */
+#if 0
+	/* disabled because make check fails on an assertion */
+	fail_unless(NULL == purple_utf8_strip_unprintables("abc\x80\x7f"));
+#endif
+	/* \t, \n, \r, space */
+	assert_string_equal_free("ab \tcd\nef\r   ", purple_utf8_strip_unprintables("ab \tcd\nef\r   "));
+	/* Basic ASCII */
+	assert_string_equal_free("Foobar", purple_utf8_strip_unprintables("Foobar"));
+	/* 0xE000 - 0xFFFD (UTF-8 encoded) */
+	/* U+F1F7 */
+	assert_string_equal_free("aaaa\xef\x87\xb7", purple_utf8_strip_unprintables("aaaa\xef\x87\xb7"));
+#if 0
+	/* disabled because make check fails on an assertion */
+	/* U+DB80 (Private Use High Surrogate, First) -- should be stripped */
+	assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xed\xa0\x80"));
+	/* U+FFFE (should be stripped) */
+	assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xef\xbf\xbe"));
+#endif
+	/* U+FEFF (should not be stripped) */
+	assert_string_equal_free("aaaa\xef\xbb\xbf", purple_utf8_strip_unprintables("aaaa\xef\xbb\xbf"));
+}
+END_TEST
+
 START_TEST(test_mime_decode_field)
 {
 	gchar *result = purple_mime_decode_field("=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=");
@@ -168,6 +195,10 @@ util_suite(void)
 	tcase_add_test(tc, test_markup_html_to_xhtml);
 	suite_add_tcase(s, tc);
 
+	tc = tcase_create("Stripping Unparseables");
+	tcase_add_test(tc, test_utf8_strip_unprintables);
+	suite_add_tcase(s, tc);
+
 	tc = tcase_create("MIME");
 	tcase_add_test(tc, test_mime_decode_field);
 	suite_add_tcase(s, tc);
============================================================
--- libpurple/util.c	aa071cb1fe0710e3d75ebebc70d4f20674526d5a
+++ libpurple/util.c	807eb07e5315efd98f8e70facbb71e7928898f59
@@ -4593,12 +4593,22 @@ purple_utf8_strip_unprintables(const gch
 	}
 
 	workstr = iter = g_new(gchar, strlen(str) + 1);
-	for ( ; *str; ++str) {
-		guchar c = *str;
-		if (c >= 0x20 || c == '\t' || c == '\n' || c == '\r') {
-			*iter = c;
-			++iter;
+	while (*str) {
+		gunichar ch = g_utf8_get_char(str);
+		gchar *next = g_utf8_next_char(str);
+		/*
+		 * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+		 *          [#x10000-#x10FFFF]
+		 */
+		if ((ch == '\t' || ch == '\n' || ch == '\r') ||
+				(ch >= 0x20 && ch <= 0xD7FF) ||
+				(ch >= 0xE000 && ch <= 0xFFFD) ||
+				(ch >= 0x10000 && ch <= 0x10FFFF)) {
+			memcpy(iter, str, next - str);
+			iter += (next - str);
 		}
+
+		str = next;
 	}
 
 	/* nul-terminate the new string */


More information about the Commits mailing list