pidgin: 3f961188: Check in a version of jabber_id_new() th...
markdoliner at pidgin.im
markdoliner at pidgin.im
Fri Jul 10 02:40:37 EDT 2009
-----------------------------------------------------------------
Revision: 3f9611885d2286d341ad36a8b0fe8386fc14277b
Ancestor: ba489b13636a0bd7fa86f2ceacfe8107534adf16
Author: markdoliner at pidgin.im
Date: 2009-07-10T06:37:13
Branch: im.pidgin.pidgin
URL: http://d.pidgin.im/viewmtn/revision/info/3f9611885d2286d341ad36a8b0fe8386fc14277b
Modified files:
libpurple/protocols/jabber/jutil.c
ChangeLog:
Check in a version of jabber_id_new() that is hopefully more efficient.
I think it's less efficient than the version I originally sent to the
devel list and accidentally checked in a day or three ago.
But it's also correct, and passes all our unit tests. I think it can
be optimized a little further by filling in characters in the default case
of the switch statement (see "implement_me") and remove the #if 0 lines.
It's ok if the "implement_me" checks don't list every valid character--
the worst that happens is the check falls through to our nodeprep and
resource prep.
-------------- next part --------------
============================================================
--- libpurple/protocols/jabber/jutil.c 912799b76983833354a4420afc830bbb9b370f21
+++ libpurple/protocols/jabber/jutil.c 4ce516a30bb6a637824dcd737e064448f0edec3f
@@ -103,20 +103,139 @@ jabber_id_new(const char *str)
JabberID*
jabber_id_new(const char *str)
{
- char *at;
- char *slash;
+ const char *at = NULL;
+ const char *slash = NULL;
+ const char *cur;
+ gunichar c;
+ gboolean needs_validation = FALSE;
+#if 0
+ gboolean node_is_required = FALSE;
+#endif
char *node = NULL;
char *domain;
JabberID *jid;
- if(!str || !g_utf8_validate(str, -1, NULL))
+ if (!str)
return NULL;
- jid = g_new0(JabberID, 1);
+ for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur))
+ {
+ c = g_utf8_get_char(cur);
+ switch (c) {
+ case '@':
+ if (!slash) {
+ if (at) {
+ /* Multiple @'s in the node/domain portion, not a valid JID! */
+ return NULL;
+ }
+ if (cur == str) {
+ /* JIDs cannot start with @ */
+ return NULL;
+ }
+ if ((g_utf8_next_char(cur))[0] == '\0') {
+ /* JIDs cannot end with @ */
+ return NULL;
+ }
+ at = cur;
+ }
+ break;
- at = g_utf8_strchr(str, -1, '@');
- slash = g_utf8_strchr(str, -1, '/');
+ case '/':
+ if (!slash) {
+ if (cur == str) {
+ /* JIDs cannot start with / */
+ return NULL;
+ }
+ if ((g_utf8_next_char(cur))[0] == '\0') {
+ /* JIDs cannot end with / */
+ return NULL;
+ }
+ slash = cur;
+ }
+ break;
+ default:
+ /* characters allowed everywhere */
+ if ((c > 'a' && c < 'z')
+ || (c > '0' && c < '9')
+ || (c > 'A' && c < 'Z')
+ || c == '.' || c == '-')
+ /* We're good */
+ break;
+
+#if 0
+ if (slash != NULL) {
+ /* characters allowed only in the resource */
+ if (implement_me)
+ /* We're good */
+ break;
+ }
+
+ /* characters allowed only in the node */
+ if (implement_me) {
+ /*
+ * Ok, this character is valid, but only if it's a part
+ * of the node and not the domain. But we don't know
+ * if "c" is a part of the node or the domain until after
+ * we've found the @. So set a flag for now and check
+ * that we found an @ later.
+ */
+ node_is_required = TRUE;
+ break;
+ }
+#endif
+
+ /*
+ * Hmm, this character is a bit more exotic. Better fall
+ * back to using the more expensive UTF-8 compliant
+ * stringprep functions.
+ */
+ needs_validation = TRUE;
+ break;
+ }
+ }
+
+#if 0
+ if (node_is_required && at == NULL)
+ /* Found invalid characters in the domain */
+ return NULL;
+#endif
+
+ if (!needs_validation) {
+ /* JID is made of only ASCII characters--just lowercase and return */
+ jid = g_new0(JabberID, 1);
+
+ if (at) {
+ jid->node = g_ascii_strdown(str, at - str);
+ if (slash) {
+ jid->domain = g_ascii_strdown(at + 1, slash - (at + 1));
+ jid->resource = g_strdup(slash + 1);
+ } else {
+ jid->domain = g_ascii_strdown(at + 1, -1);
+ }
+ } else {
+ if (slash) {
+ jid->domain = g_ascii_strdown(str, slash - str);
+ jid->resource = g_strdup(slash + 1);
+ } else {
+ jid->domain = g_ascii_strdown(str, -1);
+ }
+ }
+ return jid;
+ }
+
+ /*
+ * If we get here, there are some non-ASCII chars in the string, so
+ * we'll need to validate it, normalize, and finally do a full jabber
+ * nodeprep on the jid.
+ */
+
+ if (!g_utf8_validate(str, -1, NULL))
+ return NULL;
+
+ jid = g_new0(JabberID, 1);
+
+ /* normalization */
if(at) {
node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC);
if(slash) {
@@ -144,6 +263,7 @@ jabber_id_new(const char *str)
g_free(domain);
}
+ /* and finally the jabber nodeprep */
if(!jabber_nodeprep_validate(jid->node) ||
!jabber_nameprep_validate(jid->domain) ||
!jabber_resourceprep_validate(jid->resource)) {
More information about the Commits
mailing list