[or-cvs] r20534: {torflow} Add dirreq parsing code and factor out estimator into its ow (torflow/trunk/NetworkScanners/BwAuthority/extras)

Sat Sep 12 04:09:32 UTC 2009

Author: mikeperry
Date: 2009-09-12 00:09:31 -0400 (Sat, 12 Sep 2009)
New Revision: 20534

Added:
   torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py
   torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
Modified:
   torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py
Log:

Add dirreq parsing code and factor out estimator into
its own file.



Added: torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py
===================================================================

--- torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py	2009-09-12 04:09:31 UTC (rev 20534)
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import math
+
+# From http://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function
+# Q(p) = u + s*ln(p/(1-p))
+# We want to solve for u and s. So we want a linear regression to solve:
+# time = u + s*ln(upgrade_rate/(1-upgrade_rate))
+
+# http://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
+# So u=> Alpha; s=>Beta and x=>ln(upgrade_rate/(1-upgrade_rate))
+# and time => y
+
+def estimate(upgrade_table):
+  y = {}
+  for t in upgrade_table.iterkeys():
+    x = math.log(upgrade_table[t]/(1.0-upgrade_table[t]))
+    y[x] = t
+
+  y_ = sum(y.itervalues())/len(y)
+  x_ = sum(y.iterkeys())/len(y)
+
+  xy__ = sum(map(lambda x: x*y[x], y.iterkeys()))/len(y)
+
+  x2_ = sum(map(lambda x: x*x, y.iterkeys()))/len(y)
+
+  s = Beta = (xy__ - x_*y_)/(x2_ - x_*x_)
+
+  u = Alpha = y_ - Beta*x_
+
+  return (u,s)
+

Added: torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py	2009-09-12 04:09:31 UTC (rev 20534)
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+
+# <armadev> mikeperry: v3 fetchers fetch about 8 times a day
+# <armadev> v2 fetchers fetch about 48 times a day
+# <armadev> except, as karsten pointed out, the v2 fetchers might fetch
+#           more than that, since some fail and they retry
+# So v2 fetches are 6.0 times more frequent than v3 fetches.
+
+# written 
+# n-ns-reqs
+# n-v2-ns-reqs
+
+# n-ns-ip
+# n-v2-ip
+
+import time
+import math
+import sys
+
+def total_countries(l):
+  reqs = 0
+  l = l.split(" ")
+  if len(l) != 2:
+   print l
+   sys.exit(1)
+  l = l[1].split(",")
+  for c in l:
+    c = c.split("=")
+    reqs += int(c[1])
+  return reqs
+
+f = open("trusted-dirreq", "r")
+
+# t0 from the dirreq dataset:
+t0 = time.mktime(time.strptime("2007-03-05", "%Y-%m-%d"))
+
+upgrade_ip_table = {}
+upgrade_req_table = {}
+
+l = f.readline()
+while l:
+  l = l.split(" ")
+  if l[0] == "written":
+    written = time.mktime(time.strptime(l[1], "%Y-%m-%d"))
+    nsreqs = 0
+    v2reqs = 0
+    nsips = 0
+    v2ips = 0
+    l = f.readline()
+    while l and not l.startswith("ns-ips"): l = f.readline()
+    nsreqs = total_countries(l)
+    l = f.readline()
+    while l and not l.startswith("ns-v2-ips"): l = f.readline()
+    v2reqs = total_countries(l)
+    l = f.readline()
+    while l and not l.startswith("n-ns-reqs"): l = f.readline()
+    nsips = total_countries(l)
+    l = f.readline()
+    while l and not l.startswith("n-v2-ns-reqs"): l = f.readline()
+    v2ips = total_countries(l)
+
+    #print "Written at "+time.ctime(written)+" v3-ip: "+str(nsips)+\
+    #      " v2-ip: "+str(v2ips)+" v3-reqs: "+str(nsreqs)+\
+    #      " v2-reqs "+str(v2reqs)
+    upgrade_ip_table[written-t0] = nsips/(nsips+(v2ips/8.0))
+    upgrade_req_table[written-t0] = nsreqs/(nsreqs+(v2reqs/8.0))
+
+  l = f.readline()
+
+
+import logistic
+
+(u_ip, s_ip) = logistic.estimate(upgrade_ip_table)
+(u_req, s_req) = logistic.estimate(upgrade_req_table)
+
+print "s_ip="+str(s_ip)+", u_ip="+str(u_ip)
+print "Estimate 50% IP upgrade at: "+time.ctime(t0+u_ip)
+
+print "s_req="+str(s_req)+", u_req="+str(u_req)
+print "Estimate 50% REQ upgrade at: "+time.ctime(t0+u_req)


Property changes on: torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
___________________________________________________________________
Added: svn:executable
   + *

Modified: torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py	2009-09-12 02:53:45 UTC (rev 20533)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py	2009-09-12 04:09:31 UTC (rev 20534)
@@ -6,6 +6,8 @@
 import time
 import math
 
+import logistic
+
 f = open("platforms.csv", "r")
 f.readline()
 
@@ -20,34 +22,7 @@
   upgrade_table[t-t0] = up
 
 
+(u,s) = logistic.estimate(upgrade_table)
 
-# From http://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function
-# Q(p) = u + s*ln(p/(1-p))
-# We want to solve for u and s. So we want a linear regression to solve:
-# time = u + s*ln(upgrade_rate/(1-upgrade_rate))
-
-# http://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
-# So u=> Alpha; s=>Beta and x=>ln(upgrade_rate/(1-upgrade_rate))
-# and time => y
-
-y = {}
-for t in upgrade_table.iterkeys():
-  x = math.log(upgrade_table[t]/(1.0-upgrade_table[t]))
-  y[x] = t
-
-
-y_ = sum(y.itervalues())/len(y)
-x_ = sum(y.iterkeys())/len(y)
-
-xy__ = sum(map(lambda x: x*y[x], y.iterkeys()))/len(y)
-
-x2_ = sum(map(lambda x: x*x, y.iterkeys()))/len(y)
-
-s = Beta = (xy__ - x_*y_)/(x2_ - x_*x_)
-
-u = Alpha = y_ - Beta*x_
-
-
 print "s="+str(s)+", u="+str(u)
-
 print "Estimate 50% upgrade at: "+time.ctime(t0+u)