[tor-commits] [metrics-tasks/master] Use advertised bandwidths
karsten at torproject.org
karsten at torproject.org
Tue Jul 24 10:11:20 UTC 2012
commit cabd3c6397f3fdc7a50efb33b847d73d0e54b703
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date: Sun Jul 22 14:49:14 2012 +0530
Use advertised bandwidths
Parse the server descriptors and find the advertised
bandwidths
---
task-6232/pyentropy.py | 86 ++++++++++++++++++++++++++++++++++-------------
1 files changed, 62 insertions(+), 24 deletions(-)
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index 4328f98..448cb44 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -21,17 +21,21 @@ import os
import pygeoip
import StringIO
import stem.descriptor
-from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
-from binascii import b2a_hex, a2b_base64, a2b_hex
+
from optparse import OptionParser
+from binascii import b2a_hex, a2b_base64, a2b_hex
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
-KEYS = ['r','s','v','w','p','m']
+KEYS = ['r', 's', 'v', 'w','p', 'm']
class Router:
def __init__(self):
self.lines = []
self.nick = None
+ self.digest = None
+ self.hex_digest = None
self.bandwidth = None
+ self.advertised_bw = None
self.flags = None
self.probability = None
self.ip = None
@@ -40,31 +44,50 @@ class Router:
self.as_name = None
self.is_exit = None
self.is_guard = None
-
+
def add(self, key, values):
if key == 'r':
self.nick = values[0]
+ self.digest = values[2]
+ self.hex_digest = b2a_hex(a2b_base64(self.digest+"="))
self.ip = values[5]
self.country = gi_db.country_name_by_addr(self.ip)
self.as_no, self.as_name = self.get_as_details()
if key == 'w':
- self.bandwidth = int(values[0].split('=')[1])
+ self.advertised_bw = self.get_advertised_bw()
+ if self.advertised_bw:
+ self.bandwidth = self.advertised_bw
+ else:
+ self.bandwidth = int(values[0].split('=')[1])
if key == 's':
self.flags = values
if "Exit" in self.flags:
self.is_exit = True
if "Guard" in self.flags:
self.is_guard = True
-
+
def get_as_details(self):
try:
value = as_db.org_by_addr(str(self.ip)).split()
return value[0], value[1]
except:
return None, None
-
+
+ def get_advertised_bw(self):
+ try:
+ with open(options.server_desc+self.hex_digest) as f:
+ data = f.read()
+
+ desc_iter = stem.descriptor.server_descriptor.parse_file(StringIO.StringIO(data))
+ desc_entries = list(desc_iter)
+ desc = desc_entries[0]
+ return min(desc.average_bandwidth, desc.burst_bandwidth, desc.observed_bandwidth)
+ except:
+ return None
+
def run(file_name):
routers = []
+ Wed, Wee, Wgd, Wgg = 1, 1, 1, 1
# parse consensus
with open(file_name, 'r') as f:
for line in f.readlines():
@@ -78,20 +101,28 @@ def run(file_name):
routers.append(router)
elif key == 'valid-after':
valid_after = ' '.join(values)
+ elif key == 'bandwidth-weights':
+ Wed = float(values[6].split('=')[1]) / 10000
+ Wee = float(values[7].split('=')[1]) / 10000
+ Wgd = float(values[11].split('=')[1]) / 10000
+ Wgg = float(values[12].split('=')[1]) / 10000
elif key in KEYS:
router.add(key, values)
-
+
total_bw, total_exit_bw, total_guard_bw = 0, 0, 0
- guards_no, exists_no = 0, 0
+ guards_no, exits_no = 0, 0
bw_countries, bw_as = {}, {}
for router in routers:
total_bw += router.bandwidth
- if router.is_guard:
- total_guard_bw += router.bandwidth
+ if router.is_guard and router.is_exit:
+ total_guard_bw += Wgd*router.bandwidth
+ total_exit_bw += Wed*router.bandwidth
+ elif router.is_guard:
+ total_guard_bw += Wgg*router.bandwidth
guards_no += 1
- if router.is_exit:
- total_exit_bw += router.bandwidth
- exists_no += 1
+ elif router.is_exit:
+ total_exit_bw += Wee*router.bandwidth
+ exits_no += 1
if bw_countries.has_key(router.country):
bw_countries[router.country] += router.bandwidth
else:
@@ -101,7 +132,7 @@ def run(file_name):
bw_as[router.as_no] += router.bandwidth
else:
bw_as[router.as_no] = router.bandwidth
-
+
if len(routers) <= 0:
return
@@ -110,12 +141,19 @@ def run(file_name):
p = float(router.bandwidth) / float(total_bw)
if p != 0:
entropy += -(p * math.log(p, 2))
- if router.is_guard:
- p = float(router.bandwidth) / float(total_guard_bw)
+ if router.is_guard and router.is_exit:
+ p = float(Wgd*router.bandwidth) / float(total_guard_bw)
+ if p != 0:
+ entropy_guard += -(p * math.log(p, 2))
+ p = float(Wed*router.bandwidth) / float(total_exit_bw)
+ if p != 0:
+ entropy_exit += -(p * math.log(p, 2))
+ elif router.is_guard:
+ p = float(Wgg*router.bandwidth) / float(total_guard_bw)
if p != 0:
entropy_guard += -(p * math.log(p, 2))
- if router.is_exit:
- p = float(router.bandwidth) / float(total_exit_bw)
+ elif router.is_exit:
+ p = float(Wee*router.bandwidth) / float(total_exit_bw)
if p != 0:
entropy_exit += -(p * math.log(p, 2))
@@ -132,7 +170,7 @@ def run(file_name):
# Entropy of uniform distribution of 'n' possible values: log(n)
max_entropy = math.log(len(routers), 2)
max_entropy_guard = math.log(guards_no, 2)
- max_entropy_exit = math.log(exists_no, 2)
+ max_entropy_exit = math.log(exits_no, 2)
max_entropy_country = math.log(len(bw_countries), 2)
max_entropy_as = math.log(len(bw_as), 2)
@@ -151,18 +189,18 @@ def run(file_name):
def parse_args():
usage = "Usage - python pyentropy.py [options]"
parser = OptionParser(usage)
-
+
parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat", help="Input GeoIP database")
parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat", help="Input AS GeoIP database")
+ parser.add_option("-s", "--server_desc", dest="server_desc", default="data/relay-descriptors/server-descriptors/", help="Server descriptors directory")
parser.add_option("-o", "--output", dest="output", default="entropy.csv", help="Output filename")
parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", help="Input consensus dir")
-
+
(options, args) = parser.parse_args()
-
+
return options
if __name__ == "__main__":
-
options = parse_args()
gi_db = pygeoip.GeoIP(options.gi_db)
as_db = pygeoip.GeoIP(options.as_db)
More information about the tor-commits
mailing list