pidgin: 3f961188: Check in a version of jabber_id_new() th...

Fri Jul 10 02:40:37 EDT 2009

-----------------------------------------------------------------
Revision: 3f9611885d2286d341ad36a8b0fe8386fc14277b
Ancestor: ba489b13636a0bd7fa86f2ceacfe8107534adf16
Author: markdoliner at pidgin.im
Date: 2009-07-10T06:37:13
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/3f9611885d2286d341ad36a8b0fe8386fc14277b

Modified files:
        libpurple/protocols/jabber/jutil.c

ChangeLog: 

Check in a version of jabber_id_new() that is hopefully more efficient.
I think it's less efficient than the version I originally sent to the
devel list and accidentally checked in a day or three ago.

But it's also correct, and passes all our unit tests.  I think it can
be optimized a little further by filling in characters in the default case
of the switch statement (see "implement_me") and remove the #if 0 lines.
It's ok if the "implement_me" checks don't list every valid character--
the worst that happens is the check falls through to our nodeprep and
resource prep.

-------------- next part --------------
============================================================

--- libpurple/protocols/jabber/jutil.c	912799b76983833354a4420afc830bbb9b370f21
+++ libpurple/protocols/jabber/jutil.c	4ce516a30bb6a637824dcd737e064448f0edec3f
@@ -103,20 +103,139 @@ jabber_id_new(const char *str)
 JabberID*
 jabber_id_new(const char *str)
 {
-	char *at;
-	char *slash;
+	const char *at = NULL;
+	const char *slash = NULL;
+	const char *cur;
+	gunichar c;
+	gboolean needs_validation = FALSE;
+#if 0
+	gboolean node_is_required = FALSE;
+#endif
 	char *node = NULL;
 	char *domain;
 	JabberID *jid;
 
-	if(!str || !g_utf8_validate(str, -1, NULL))
+	if (!str)
 		return NULL;
 
-	jid = g_new0(JabberID, 1);
+	for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur))
+	{
+		c = g_utf8_get_char(cur);
+		switch (c) {
+			case '@':
+				if (!slash) {
+					if (at) {
+						/* Multiple @'s in the node/domain portion, not a valid JID! */
+						return NULL;
+					}
+					if (cur == str) {
+						/* JIDs cannot start with @ */
+						return NULL;
+					}
+					if ((g_utf8_next_char(cur))[0] == '\0') {
+						/* JIDs cannot end with @ */
+						return NULL;
+					}
+					at = cur;
+				}
+				break;
 
-	at = g_utf8_strchr(str, -1, '@');
-	slash = g_utf8_strchr(str, -1, '/');
+			case '/':
+				if (!slash) {
+					if (cur == str) {
+						/* JIDs cannot start with / */
+						return NULL;
+					}
+					if ((g_utf8_next_char(cur))[0] == '\0') {
+						/* JIDs cannot end with / */
+						return NULL;
+					}
+					slash = cur;
+				}
+				break;
 
+			default:
+				/* characters allowed everywhere */
+				if ((c > 'a' && c < 'z')
+						|| (c > '0' && c < '9')
+						|| (c > 'A' && c < 'Z')
+						|| c == '.' || c == '-')
+					/* We're good */
+					break;
+
+#if 0
+				if (slash != NULL) {
+					/* characters allowed only in the resource */
+					if (implement_me)
+						/* We're good */
+						break;
+				}
+
+				/* characters allowed only in the node */
+				if (implement_me) {
+					/*
+					 * Ok, this character is valid, but only if it's a part
+					 * of the node and not the domain.  But we don't know
+					 * if "c" is a part of the node or the domain until after
+					 * we've found the @.  So set a flag for now and check
+					 * that we found an @ later.
+					 */
+					node_is_required = TRUE;
+					break;
+				}
+#endif
+
+				/*
+				 * Hmm, this character is a bit more exotic.  Better fall
+				 * back to using the more expensive UTF-8 compliant
+				 * stringprep functions.
+				 */
+				needs_validation = TRUE;
+				break;
+		}
+	}
+
+#if 0
+	if (node_is_required && at == NULL)
+		/* Found invalid characters in the domain */
+		return NULL;
+#endif
+
+	if (!needs_validation) {
+		/* JID is made of only ASCII characters--just lowercase and return */
+		jid = g_new0(JabberID, 1);
+
+		if (at) {
+			jid->node = g_ascii_strdown(str, at - str);
+			if (slash) {
+				jid->domain = g_ascii_strdown(at + 1, slash - (at + 1));
+				jid->resource = g_strdup(slash + 1);
+			} else {
+				jid->domain = g_ascii_strdown(at + 1, -1);
+			}
+		} else {
+			if (slash) {
+				jid->domain = g_ascii_strdown(str, slash - str);
+				jid->resource = g_strdup(slash + 1);
+			} else {
+				jid->domain = g_ascii_strdown(str, -1);
+			}
+		}
+		return jid;
+	}
+
+	/*
+	 * If we get here, there are some non-ASCII chars in the string, so
+	 * we'll need to validate it, normalize, and finally do a full jabber
+	 * nodeprep on the jid.
+	 */
+
+	if (!g_utf8_validate(str, -1, NULL))
+		return NULL;
+
+	jid = g_new0(JabberID, 1);
+
+	/* normalization */
 	if(at) {
 		node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC);
 		if(slash) {
@@ -144,6 +263,7 @@ jabber_id_new(const char *str)
 		g_free(domain);
 	}
 
+	/* and finally the jabber nodeprep */
 	if(!jabber_nodeprep_validate(jid->node) ||
 			!jabber_nameprep_validate(jid->domain) ||
 			!jabber_resourceprep_validate(jid->resource)) {