/soc/2012/sanket/www-statscollector: 5048ce22798c: [FEATURE] Dis...

Sanket Agarwal sanket at soc.pidgin.im
Sun Aug 19 04:56:22 EDT 2012


Changeset: 5048ce22798c9fca0dc6fb8c3b930243053c53a0
Author:	 Sanket Agarwal <sanket at soc.pidgin.im>
Date:	 2012-08-19 01:54 +0530
Branch:	 default
URL: http://hg.pidgin.im/soc/2012/sanket/www-statscollector/rev/5048ce22798c

Description:

[FEATURE] Display only public servers for prpl-irc/jabber

The server will now provide a new service called "/trusted" which
will return to the user, in XML format, a list of md5 hex hashes
which correspond to the public servers. The definition of public
server is that a server which has THRESHOLD number of users. Ofcourse
we'll have to fixate on some value of THRESHOLD (or determing it dynamically?)

This list can be used by the client to send to the server a server name, if it
is public. You can read more on: http://developer.pidgin.im/wiki/GSoC2012/Statscollector#Ensuringservernamesinprpl-jabberircarepublic

diffstat:

 pidgin_stats_collector/statscollector/constants.py |   6 ++
 pidgin_stats_collector/statscollector/models.py    |   1 +
 pidgin_stats_collector/statscollector/process.py   |  48 ++++++++++++++++++++-
 pidgin_stats_collector/statscollector/views.py     |  32 ++++++++++++++-
 pidgin_stats_collector/urls.py                     |   8 ++-
 5 files changed, 88 insertions(+), 7 deletions(-)

diffs (197 lines):

diff --git a/pidgin_stats_collector/statscollector/constants.py b/pidgin_stats_collector/statscollector/constants.py
--- a/pidgin_stats_collector/statscollector/constants.py
+++ b/pidgin_stats_collector/statscollector/constants.py
@@ -39,3 +39,9 @@ WIN_CAT = {"6.1":"Windows 7", "6.0":"Win
 COLOR_LIST = ('#7D0000','#006699', '#669900', '#996600', '#4C801A',\
     '#006B6B','#660066')
 
+# Trusted JABBER Servers, we don't want to reveal identities!
+TRUSTED_JABBER = {'gmail.com':'Google', 'google.com':'Google', 'facebook.com':'Facebook', 'jabber.org':'Jabber', 'meebo.org': 'Meebo'};
+
+
+# Trusted threshold
+TRUSTED_THRESHOLD = 0
diff --git a/pidgin_stats_collector/statscollector/models.py b/pidgin_stats_collector/statscollector/models.py
--- a/pidgin_stats_collector/statscollector/models.py
+++ b/pidgin_stats_collector/statscollector/models.py
@@ -23,6 +23,7 @@ class Account(models.Model):
   prpl_name = models.CharField(max_length=200)
   buddies = models.IntegerField()
   service = models.CharField(max_length=200)
+  service_hash = models.CharField(max_length=200)
 
 class Plugin(models.Model):
 
diff --git a/pidgin_stats_collector/statscollector/process.py b/pidgin_stats_collector/statscollector/process.py
--- a/pidgin_stats_collector/statscollector/process.py
+++ b/pidgin_stats_collector/statscollector/process.py
@@ -5,6 +5,8 @@ the elements, by adding ``processed'' pa
 
 from statscollector.models import *
 from statscollector.constants import *
+from statscollector.views import *
+from django.db.models import Count
 from lxml import etree
 import pdb
 import re
@@ -12,6 +14,7 @@ from django.core.files.temp import Named
 from django.core.files import File
 import random
 import glob
+import md5
 
 class Process:
 
@@ -116,29 +119,66 @@ class Process:
       jabber_connect = None
       jabber_domain  = None
       irc_server = None
+
+      jabber_connect_hash = None
+      jabber_domain_hash = None
+      irc_server_hash = None
+
       try:
         prpl_count = int(prpl_plugin.xpath('buddies')[0].text.strip())
       except: prpl_count = 0
 
       # Determines the actual connect server in case of jabber protocol
       try:
+        jabber_connect_hash = \
+            prpl_plugin.xpath('connect-server_hash')[0].text.strip()
         jabber_connect = prpl_plugin.xpath('connect-server')[0].text.strip()
       except: pass
       try:
+        jabber_domain_hash = prpl_plugin.xpath('Domain_hash')[0].text.strip()
         jabber_domain = prpl_plugin.xpath('Domain')[0].text.strip()
       except: pass
       try:
+        irc_server_hash = prpl_plugin.xpath('Server_hash')[0].text.strip()
         irc_server = prpl_plugin.xpath('Server')[0].text.strip()
       except: pass
+
       print prpl_name, irc_server, jabber_connect, jabber_domain
+      print irc_server_hash, jabber_connect_hash, jabber_domain_hash
+
+      trusted_server_list = getTrustedList()
+      print jabber_connect, jabber_connect_hash
+      if(jabber_connect): print md5.md5(jabber_connect).hexdigest()
+
+      print jabber_domain, jabber_domain_hash
+      if(jabber_domain): print md5.md5(jabber_domain).hexdigest()
+
+      print irc_server, irc_server_hash
+      if(irc_server): print md5.md5(irc_server).hexdigest()
 
       if prpl_name == 'prpl-jabber':
-        if jabber_connect and jabber_connect != "":
+        if jabber_connect and jabber_connect != ""\
+            and md5.md5(jabber_connect).hexdigest() == jabber_connect_hash\
+            and jabber_connect_hash in trusted_server_list:
           acc.service = jabber_connect
-        elif jabber_domain and jabber_domain != "":
+        elif jabber_domain and jabber_domain != ""\
+            and md5.md5(jabber_domain).hexdigest() == jabber_domain_hash\
+            and jabber_domain_hash in trusted_server_list:
           acc.service = jabber_domain
-      elif prpl_name == 'prpl-irc' and irc_server and irc_server != "":
-        acc.service = irc_server
+
+        if jabber_connect_hash and jabber_connect_hash != "":
+          acc.service_hash = jabber_connect_hash
+        elif jabber_domain_hash and jabber_domain_hash != "":
+          acc.service_hash = jabber_domain_hash
+
+      elif prpl_name == 'prpl-irc':
+        if irc_server and irc_server != "" \
+            and md5.md5(irc_server).hexdigest() == irc_server_hash\
+            and irc_server_hash in trusted_server_list:
+          acc.service = irc_server
+
+        if irc_server_hash and irc_server_hash != "":
+          acc.service_hash = irc_server_hash
 
       acc.prpl_name = prpl_name
       acc.buddies = prpl_count
diff --git a/pidgin_stats_collector/statscollector/views.py b/pidgin_stats_collector/statscollector/views.py
--- a/pidgin_stats_collector/statscollector/views.py
+++ b/pidgin_stats_collector/statscollector/views.py
@@ -4,12 +4,15 @@ from statscollector.models import *
 from django.core.context_processors import csrf
 from django.shortcuts import render_to_response
 from django.views.decorators.csrf import csrf_exempt
+from django.db.models import Count
 from xml.dom.minidom import parseString
 from xml.parsers.expat import ExpatError
 from django.core.files.temp import NamedTemporaryFile
 from django.core.files import File
 from statscollector.process import Process
 import random
+from lxml import etree
+from constants import *
 
 def index(request):
   return render_to_response('statscollector/index.html')
@@ -31,7 +34,7 @@ def handle_post(request):
   # Parse the XML file
   try:
 
-    print stats_xml
+    #print stats_xml
     stats_dom = parseString(stats_xml)
 
     # Create a file object from this data
@@ -55,6 +58,33 @@ def handle_post(request):
   except ExpatError:
     return HttpResponseBadRequest('Broken POST XML file :=(\n')
 
+def getTrustedList():
+
+  """ Returns a list of trusted server hashes for client use """
+
+  """ We scan the list of prpl-* and check which all have a well defined
+  hash associated. This we keep in a lookup table to be returned finally """
+
+  service_hash_list = Account.objects.exclude(service_hash = '')\
+      .values('service_hash').annotate(Count('service_hash'))
+  trusted_hash_list = []
+
+  for hash_dict in service_hash_list:
+    k = hash_dict['service_hash']
+    v = hash_dict['service_hash__count']
+    if v > TRUSTED_THRESHOLD:
+      trusted_hash_list.append(k)
+  return trusted_hash_list
+
+def trusted(request):
+  """ Construct a XML file from getTrustedList """
+  trusted_xml_root = etree.Element('trusted-hashes')
+  for hash in getTrustedList():
+    h = etree.SubElement(trusted_xml_root, 'hash')
+    h.attrib['id'] = hash
+  return HttpResponse(etree.tostring(trusted_xml_root, pretty_print="True"), mimetype="text/xml")
+
+
 @csrf_exempt
 def collect(request):
 
diff --git a/pidgin_stats_collector/urls.py b/pidgin_stats_collector/urls.py
--- a/pidgin_stats_collector/urls.py
+++ b/pidgin_stats_collector/urls.py
@@ -23,10 +23,14 @@ urlpatterns = patterns('',
     url(r'^collect/$', 'statscollector.views.collect'),
 
     # A minimalistic display app for the stats
-    url(r'^display/$', 'display.views.index'),
+    url(r'^$', 'display.views.index'),
 
     # Detail of plugins
-    url(r'^display/plugins-detail$', 'display.views.plugins_detail'),
+    url(r'^plugins-detail$', 'display.views.plugins_detail'),
+
+    # Trusted Server hashes
+    url(r'^trusted/$', 'statscollector.views.trusted'),
+
     # Make media files to be served static and hot ;-)
     (r'^media/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.MEDIA_ROOT, 'show_indexes': True}),
 )



More information about the Commits mailing list