/soc/2012/sanket/www-statscollector: 7c0a74b72b43: Pre-process t...

sanket sanket at soc.pidgin.im
Sun Jul 8 05:10:04 EDT 2012


Changeset: 7c0a74b72b43b4af1fe59fbf14f204b2cb85dded
Author:	 sanket <sanket at soc.pidgin.im>
Date:	 2012-07-08 02:08 +0530
Branch:	 default
URL: http://hg.pidgin.im/soc/2012/sanket/www-statscollector/rev/7c0a74b72b43

Description:

Pre-process the XML into schema

To leverage the power of SQL Queries, the XML will be split into
various schemas such as Account, Plugin etc. This will make queries
very efficient as they'll just be count(*) ... GROUP BY ...

TODO: OS information is still coming from Files and needs to be changed
to the above schema.

diffstat:

 pidgin_stats_collector/display/views.py            |  211 +++++---------------
 pidgin_stats_collector/settings.py                 |    2 +-
 pidgin_stats_collector/statscollector/admin.py     |   23 ++
 pidgin_stats_collector/statscollector/constants.py |   38 +++
 pidgin_stats_collector/statscollector/models.py    |   41 ++++
 pidgin_stats_collector/statscollector/process.py   |  204 ++++++++++++++++++++
 pidgin_stats_collector/statscollector/views.py     |    4 +
 7 files changed, 369 insertions(+), 154 deletions(-)

diffs (truncated from 645 to 300 lines):

diff --git a/pidgin_stats_collector/display/views.py b/pidgin_stats_collector/display/views.py
--- a/pidgin_stats_collector/display/views.py
+++ b/pidgin_stats_collector/display/views.py
@@ -1,6 +1,7 @@
 from django.http import HttpResponse
 from statscollector.models import *
 from django.core.context_processors import csrf
+from django.db.models import Avg, Count
 from django.shortcuts import render_to_response
 from django.views.decorators.csrf import csrf_exempt
 from xml.dom.minidom import parseString
@@ -12,44 +13,8 @@
 from lxml import etree
 import simplejson
 import re
+from statscollector.constants import *
 
-# List of statistics that we are going to measure
-
-# Stat variables with strings associated
-
-# Application bitness
-APP_BIT_STR = 'app-bit'
-OS_BIT_STR = 'os-bit'
-APP_BIT = ('32', '64', 'unknown')
-
-# Major OS name
-OS_NAME_STR = 'os-name'
-OS_NAME = ('linux', 'windows', 'apple', 'unknown')
-
-# Types of architectures
-ARCH_NAME_STR = 'arch-name'
-ARCH_NAME = ('x86', 'x86_64', 'ppc', 'ppc64', 'ia64', 'unknown')
-
-# Purple version information
-PRPL_V_STR = 'purple-version'
-PRPL_V = ('unknown',)
-
-# Purple protocols information
-PRPL_P_STR = 'purple-protocols'
-PRPL_P_U_STR = 'purple-users-protocol'
-PRPL_P = ('prpl-msn','prpl-yahoo', 'prpl-gtalk', 'prpl-jabber', 'prpl-facebook-xmpp', 'prpl-irc', 'unknown',)
-
-# Loadable/User plugins
-PLUGIN_STR = 'plugins'
-PLUGIN = ('unknown',)
-
-# Categories of Windows Installations
-WIN_CAT = {"6.1":"Windows 7", "6.0":"Windows Vista", "unknown":"unknown", \
-    "5.2":"Windows Server 2003", "5.1":"Windows XP", "5.0":"Windows 2000"}
-
-# Color List, useful for rendering column's/graphs with varying colors
-COLOR_LIST = ('#7D0000','#006699', '#669900', '#996600', '#4C801A',\
-    '#006B6B','#660066')
 
 def init_stats_dict():
 
@@ -95,23 +60,19 @@
   stats_d[APP_BIT_STR] = dict(zip(APP_BIT, [0]*len(APP_BIT)))
 
   # OS bitness
-  stats_d[OS_BIT_STR] = dict(zip(APP_BIT, [0]*len(APP_BIT)))
+  stats_d[OS_BIT_STR] = {}
 
   # Arch name
-  stats_d[ARCH_NAME_STR] = dict(zip(ARCH_NAME, [0]*len(ARCH_NAME)))
+  stats_d[ARCH_NAME_STR] = {}
 
   # Purple Version
-  stats_d[PRPL_V_STR] = dict(zip(PRPL_V, [0]*len(PRPL_V)))
-
+  stats_d[PRPL_V_STR] = {}
   # Purple Protocols
-  stats_d[PRPL_P_STR] = dict(zip(PRPL_P, [0]*len(PRPL_P)))
-
+  stats_d[PRPL_P_STR] = {}
   # Purple Protocols
-  stats_d[PRPL_P_U_STR] = dict(zip(PRPL_P, [0]*len(PRPL_P)))
-
+  stats_d[PRPL_P_U_STR] = {}
   # Plugins
-  stats_d[PLUGIN_STR] = dict(zip(PLUGIN, [0]*len(PLUGIN)))
-
+  stats_d[PLUGIN_STR] = {}
   return stats_d
 
 def process_stats(from_date, to_date):
@@ -147,132 +108,75 @@
     stats_dom = etree.fromstring(stats_str)
 
     # Start with cpuinfo
+    os_name_dict = Info.objects.values('os_name').\
+        annotate(Count('os_name'))
 
-    # Type of the OS (basic)
+    for item in os_name_dict:
+      stats_os_name[item['os_name']]['y'] = \
+          item['os_name__count']
 
-    e1 = stats_dom.xpath('/stats/cpuinfo/cpu/os-info')[0]
+    # Windows specific info
+    win_v_cat = stats_os_name['windows']['drilldown']['categories']
+    win_v_dat = stats_os_name['windows']['drilldown']['data']
 
-    if('id' in e1.attrib):
+    win_dict = Info.objects.filter(os_name='windows').\
+        values('os_sub_name').annotate(Count('os_sub_name'))
 
-      os_name = e1.attrib['id'].lower()
+    for item in win_dict:
+      win_v_dat[win_v_cat.index(WIN_CAT[item['os_sub_name']])] = item['os_sub_name__count']
 
-      if os_name in stats_os_name:
-        stats_os_name[os_name]['y'] += 1
-      else: stats_os_name['unknown']['y'] += 1
+    # Apple specific info
+    apple_dict = Info.objects.filter(os_name='apple').\
+        values('os_sub_name').annotate(Count('os_sub_name'))
 
-      if os_name == 'windows':
-
-        win_v_cat = stats_os_name[os_name]['drilldown']['categories']
-        win_v_dat = stats_os_name[os_name]['drilldown']['data']
-
-        win_major_v = e1.xpath("major-version")[0].text
-        win_minor_v = e1.xpath("minor-version")[0].text
-        win_v = "%s.%s" % (win_major_v, win_minor_v)
-
-        if win_v in WIN_CAT:
-          win_v_dat[win_v_cat.index(WIN_CAT[win_v])] += 1
-        else:
-          win_v_dat[win_v_cat.index('unknown')] += 1
-
-      elif os_name == 'apple':
-
-        print 'APPLE XXX'
-        apple_major_v = e1.xpath("major-version")[0].text
-        apple_minor_v = e1.xpath("minor-version")[0].text
-        apple_bug_v   = e1.xpath("bug-fix-version")[0].text
-
-        apple_v = "%s.%s.%s" % (apple_major_v, apple_minor_v, apple_bug_v)
-
-        if apple_v not in stats_os_name['apple']['drilldown']['dict']:
-          stats_os_name['apple']['drilldown']['dict'][apple_v] = 1
-        else: stats_os_name['apple']['drilldown']['dict'][apple_v] += 1
+    for item in apple_dict:
+      stats_os_name['apple']['drilldown']['dict'][item['os_sub_name']] = \
+         item['os_sub_name__count']
 
     # The bitness of the application
-    e2 = stats_dom.xpath('/stats/cpuinfo/cpu/app-bit')
-    app_bit = e2[0].text
-
-    if app_bit in stats_app_bit:
-      stats_app_bit[app_bit] += 1
-    else: stats_app_bit['unknown'] += 1
+    app_bit_dict = Info.objects.values('app_bit').\
+        annotate(Count('app_bit'))
+    for app_bit in app_bit_dict:
+      stats_app_bit[app_bit['app_bit']] = app_bit['app_bit__count']
 
     # The bitness of the kernel
-    e2 = stats_dom.xpath('/stats/cpuinfo/cpu/os-bit')
-    os_bit = e2[0].text
-
-    if os_bit in stats_os_bit:
-      stats_os_bit[os_bit] += 1
-    else: stats_os_bit['unknown'] += 1
+    os_bit_dict = Info.objects.values('os_bit').\
+        annotate(Count('os_bit'))
+    for os_bit in os_bit_dict:
+      stats_os_bit[os_bit['os_bit']] = os_bit['os_bit__count']
 
     # Type of architecture we are dealing with
-    e3 = stats_dom.xpath('/stats/cpuinfo/cpu/arch')[0]
-
-    if('id' in e3.attrib):
-
-      arch_name = e3.attrib['id'].lower()
-
-      if arch_name == 'amd64': arch_name = 'x86_64'
-      elif arch_name in ['i386', 'i486', 'i586', 'i686']:
-        arch_name = 'x86'
-
-      if arch_name in ARCH_NAME:
-        stats_arch_name[arch_name] += 1
-      else: stats_arch_name['unknown'] += 1
+    arch_dict = Info.objects.values('arch_id').\
+        annotate(Count('arch_id'))
+    for item in arch_dict:
+      stats_arch_name[item['arch_id']] = item['arch_id__count']
 
     # Version of libpurple installation
-
-    e4 = stats_dom.xpath('/stats/cpuinfo/purple-version')[0]
-    purple_v_str = e4.text
-
-    print purple_v_str
-
-    rel_re = '^([0-9]+)\.([0-9]+)\.([0-9]+)$'
-    dev_re = '^([0-9]+)\.([0-9]+)\.([0-9]+)(devel)(.*)$'
-
-    rel_res = re.search(rel_re, purple_v_str)
-    dev_res = re.search(dev_re, purple_v_str)
-
-    if (rel_res and rel_res.group(0)==purple_v_str) or \
-        (dev_res and dev_res.group(0)==purple_v_str):
-          if not purple_v_str in stats_prpl_v:
-            stats_prpl_v[purple_v_str] = 0
-          stats_prpl_v[purple_v_str] += 1
-    else:
-      stats_prpl_v['unknown'] += 1
+    purple_version_dict = Info.objects.values('purple_version').\
+        annotate(Count('purple_version')).\
+        order_by('-purple_version__count')
+    for item in purple_version_dict:
+      stats_prpl_v[item['purple_version']] = \
+          item['purple_version__count']
 
     # IM Services used (protocol plugins) and avg users
+    account_count_dict = Account.objects.values('prpl_name').\
+        annotate(Count('prpl_name')).order_by('-prpl_name__count')
+    account_avg_dict   = Account.objects.values('prpl_name').\
+        annotate(Avg('buddies')).order_by('-buddies__avg')
 
-    prpl_plugins = stats_dom.xpath('/stats/accounts/account')
+    for item in account_count_dict:
+      stats_prpl_p[item['prpl_name']] = item['prpl_name__count']
+    for item in account_avg_dict:
+      stats_prpl_p_u[item['prpl_name']] = item['buddies__avg']
 
-    for prpl_plugin in prpl_plugins:
+    # Plugins (3rd party/loadable-unloadable)
+    plugin_count_dict = Plugin.objects.values('p_id').\
+        annotate(Count('p_id')).order_by('-p_id__count')
 
-      prpl_name = prpl_plugin.xpath('protocol')[0].text
-      prpl_count=0
-      try:
-        prpl_count = int(prpl_plugin.xpath('buddies')[0].text.strip())
-      except: prpl_count = 0
+    for item in plugin_count_dict:
+      stats_plugin[item['p_id']] = item['p_id__count']
 
-      if prpl_name in stats_prpl_p:
-        stats_prpl_p[prpl_name] += 1
-        stats_prpl_p_u[prpl_name] += prpl_count
-      else: stats_prpl_p['unknown'] += 1
-
-    # Loadable/User plugins
-
-    plugins = stats_dom.xpath('/stats/plugins/plugin')
-
-    for plugin in plugins:
-
-      plugin_name = plugin.attrib['id'].lower()
-
-      if plugin_name in stats_plugin.keys():
-        stats_plugin[plugin_name] += 1
-      else: stats_plugin[plugin_name] = 1
-
-  for prpl_name in stats_prpl_p_u.keys():
-    if prpl_name!='prpl-irc' and stats_prpl_p[prpl_name]>0:
-      stats_prpl_p_u[prpl_name] /= stats_prpl_p[prpl_name]
-
-  print stats_dict
   return stats_dict
 
 def sorted_dict(d, cut=None):
@@ -299,6 +203,7 @@
 def plugins_detail(request):
 
   stats_dict = process_stats()
+  print 'STATS DICT', stats_dict
 
   return render_to_response('display/plugins-detail.html',{
       'plugin_cat': simplejson.dumps(sorted_dict(stats_dict[PLUGIN_STR])[0]),
diff --git a/pidgin_stats_collector/settings.py b/pidgin_stats_collector/settings.py
--- a/pidgin_stats_collector/settings.py
+++ b/pidgin_stats_collector/settings.py
@@ -17,7 +17,7 @@
         'ENGINE': 'django.db.backends.mysql', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
         'NAME': 'pidgin',                      # Or path to database file if using sqlite3.
         'USER': 'root',                      # Not used with sqlite3.
-        'PASSWORD': 'xx',                  # Not used with sqlite3.
+        'PASSWORD': 'bruteforce',                  # Not used with sqlite3.
         'HOST': 'localhost',                      # Set to empty string for localhost. Not used with sqlite3.
         'PORT': '3306',                      # Set to empty string for default. Not used with sqlite3.
     }
diff --git a/pidgin_stats_collector/statscollector/admin.py b/pidgin_stats_collector/statscollector/admin.py
--- a/pidgin_stats_collector/statscollector/admin.py
+++ b/pidgin_stats_collector/statscollector/admin.py
@@ -5,4 +5,27 @@
   list_display = ['hash_id','timestamp']
   search_fields = ['hash_id']
 
+class AccountAdmin(admin.ModelAdmin):
+  list_display = ['raw_xml', 'prpl_name','buddies']
+  search_fields = ['prpl_name']
+



More information about the Commits mailing list