[or-cvs] r20534: {torflow} Add dirreq parsing code and factor out estimator into its ow (torflow/trunk/NetworkScanners/BwAuthority/extras)
mikeperry at seul.org
mikeperry at seul.org
Sat Sep 12 04:09:32 UTC 2009
Author: mikeperry
Date: 2009-09-12 00:09:31 -0400 (Sat, 12 Sep 2009)
New Revision: 20534
Added:
torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py
torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
Modified:
torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py
Log:
Add dirreq parsing code and factor out estimator into
its own file.
Added: torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py (rev 0)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/logistic.py 2009-09-12 04:09:31 UTC (rev 20534)
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import math
+
+# From http://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function
+# Q(p) = u + s*ln(p/(1-p))
+# We want to solve for u and s. So we want a linear regression to solve:
+# time = u + s*ln(upgrade_rate/(1-upgrade_rate))
+
+# http://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
+# So u=> Alpha; s=>Beta and x=>ln(upgrade_rate/(1-upgrade_rate))
+# and time => y
+
+def estimate(upgrade_table):
+ y = {}
+ for t in upgrade_table.iterkeys():
+ x = math.log(upgrade_table[t]/(1.0-upgrade_table[t]))
+ y[x] = t
+
+ y_ = sum(y.itervalues())/len(y)
+ x_ = sum(y.iterkeys())/len(y)
+
+ xy__ = sum(map(lambda x: x*y[x], y.iterkeys()))/len(y)
+
+ x2_ = sum(map(lambda x: x*x, y.iterkeys()))/len(y)
+
+ s = Beta = (xy__ - x_*y_)/(x2_ - x_*x_)
+
+ u = Alpha = y_ - Beta*x_
+
+ return (u,s)
+
Added: torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py (rev 0)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py 2009-09-12 04:09:31 UTC (rev 20534)
@@ -0,0 +1,80 @@
+#!/usr/bin/python
+
+# <armadev> mikeperry: v3 fetchers fetch about 8 times a day
+# <armadev> v2 fetchers fetch about 48 times a day
+# <armadev> except, as karsten pointed out, the v2 fetchers might fetch
+# more than that, since some fail and they retry
+# So v2 fetches are 6.0 times more frequent than v3 fetches.
+
+# written
+# n-ns-reqs
+# n-v2-ns-reqs
+
+# n-ns-ip
+# n-v2-ip
+
+import time
+import math
+import sys
+
+def total_countries(l):
+ reqs = 0
+ l = l.split(" ")
+ if len(l) != 2:
+ print l
+ sys.exit(1)
+ l = l[1].split(",")
+ for c in l:
+ c = c.split("=")
+ reqs += int(c[1])
+ return reqs
+
+f = open("trusted-dirreq", "r")
+
+# t0 from the dirreq dataset:
+t0 = time.mktime(time.strptime("2007-03-05", "%Y-%m-%d"))
+
+upgrade_ip_table = {}
+upgrade_req_table = {}
+
+l = f.readline()
+while l:
+ l = l.split(" ")
+ if l[0] == "written":
+ written = time.mktime(time.strptime(l[1], "%Y-%m-%d"))
+ nsreqs = 0
+ v2reqs = 0
+ nsips = 0
+ v2ips = 0
+ l = f.readline()
+ while l and not l.startswith("ns-ips"): l = f.readline()
+ nsreqs = total_countries(l)
+ l = f.readline()
+ while l and not l.startswith("ns-v2-ips"): l = f.readline()
+ v2reqs = total_countries(l)
+ l = f.readline()
+ while l and not l.startswith("n-ns-reqs"): l = f.readline()
+ nsips = total_countries(l)
+ l = f.readline()
+ while l and not l.startswith("n-v2-ns-reqs"): l = f.readline()
+ v2ips = total_countries(l)
+
+ #print "Written at "+time.ctime(written)+" v3-ip: "+str(nsips)+\
+ # " v2-ip: "+str(v2ips)+" v3-reqs: "+str(nsreqs)+\
+ # " v2-reqs "+str(v2reqs)
+ upgrade_ip_table[written-t0] = nsips/(nsips+(v2ips/8.0))
+ upgrade_req_table[written-t0] = nsreqs/(nsreqs+(v2reqs/8.0))
+
+ l = f.readline()
+
+
+import logistic
+
+(u_ip, s_ip) = logistic.estimate(upgrade_ip_table)
+(u_req, s_req) = logistic.estimate(upgrade_req_table)
+
+print "s_ip="+str(s_ip)+", u_ip="+str(u_ip)
+print "Estimate 50% IP upgrade at: "+time.ctime(t0+u_ip)
+
+print "s_req="+str(s_req)+", u_req="+str(u_req)
+print "Estimate 50% REQ upgrade at: "+time.ctime(t0+u_req)
Property changes on: torflow/trunk/NetworkScanners/BwAuthority/extras/parsedireq.py
___________________________________________________________________
Added: svn:executable
+ *
Modified: torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py 2009-09-12 02:53:45 UTC (rev 20533)
+++ torflow/trunk/NetworkScanners/BwAuthority/extras/parseplatform.py 2009-09-12 04:09:31 UTC (rev 20534)
@@ -6,6 +6,8 @@
import time
import math
+import logistic
+
f = open("platforms.csv", "r")
f.readline()
@@ -20,34 +22,7 @@
upgrade_table[t-t0] = up
+(u,s) = logistic.estimate(upgrade_table)
-# From http://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function
-# Q(p) = u + s*ln(p/(1-p))
-# We want to solve for u and s. So we want a linear regression to solve:
-# time = u + s*ln(upgrade_rate/(1-upgrade_rate))
-
-# http://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
-# So u=> Alpha; s=>Beta and x=>ln(upgrade_rate/(1-upgrade_rate))
-# and time => y
-
-y = {}
-for t in upgrade_table.iterkeys():
- x = math.log(upgrade_table[t]/(1.0-upgrade_table[t]))
- y[x] = t
-
-
-y_ = sum(y.itervalues())/len(y)
-x_ = sum(y.iterkeys())/len(y)
-
-xy__ = sum(map(lambda x: x*y[x], y.iterkeys()))/len(y)
-
-x2_ = sum(map(lambda x: x*x, y.iterkeys()))/len(y)
-
-s = Beta = (xy__ - x_*y_)/(x2_ - x_*x_)
-
-u = Alpha = y_ - Beta*x_
-
-
print "s="+str(s)+", u="+str(u)
-
print "Estimate 50% upgrade at: "+time.ctime(t0+u)
More information about the tor-commits
mailing list