[tor-commits] [metrics-tasks/master] Find entropy based on AS No
karsten at torproject.org
karsten at torproject.org
Tue Jul 24 10:11:20 UTC 2012
commit 687d1e8b90e5a413aa56cec17abab097745bde8e
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date: Sat Jul 21 13:26:31 2012 +0530
Find entropy based on AS No
Use -as to specify the AS db
---
task-6232/pyentropy.py | 55 +++++++++++++++++++++++++++++++++++++----------
1 files changed, 43 insertions(+), 12 deletions(-)
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index 1a981fe..fa50406 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -9,7 +9,9 @@ Output - A CSV file of the format (without newlines):
<entropy for guard nodes>,
<max entropy for guard nodes>,
<entropy for countries>,
- <max entropy for countries>
+ <max entropy for countries>,
+ <entropy for AS>,
+ <max entropy for AS>
rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
"""
@@ -17,6 +19,10 @@ import sys
import math
import os
import pygeoip
+import StringIO
+import stem.descriptor
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
+from binascii import b2a_hex, a2b_base64, a2b_hex
from optparse import OptionParser
KEYS = ['r','s','v','w','p','m']
@@ -30,6 +36,8 @@ class Router:
self.probability = None
self.ip = None
self.country = None
+ self.as_no = None
+ self.as_name = None
self.is_exit = None
self.is_guard = None
@@ -37,7 +45,8 @@ class Router:
if key == 'r':
self.nick = values[0]
self.ip = values[5]
- self.country = gi.country_name_by_addr(self.ip)
+ self.country = gi_db.country_name_by_addr(self.ip)
+ self.as_no, self.as_name = self.get_as_details()
if key == 'w':
self.bandwidth = int(values[0].split('=')[1])
if key == 's':
@@ -47,6 +56,13 @@ class Router:
if "Guard" in self.flags:
self.is_guard = True
+ def get_as_details(self):
+ try:
+ value = as_db.org_by_addr(str(self.ip)).split()
+ return value[0], value[1]
+ except:
+ return None, None
+
def run(file_name):
routers = []
# parse consensus
@@ -67,7 +83,7 @@ def run(file_name):
totalBW, totalExitBW, totalGuardBW = 0, 0, 0
guards_n, exits_n = 0, 0
- bw_countries = {}
+ bw_countries, bw_as = {}, {}
for router in routers:
totalBW += router.bandwidth
if router.is_guard:
@@ -79,12 +95,17 @@ def run(file_name):
if bw_countries.has_key(router.country):
bw_countries[router.country] += router.bandwidth
else:
- bw_countries[router.country] = router.bandwidth
+ bw_countries[router.country] = router.bandwidth
+ if router.as_no:
+ if bw_as.has_key(router.as_no):
+ bw_as[router.as_no] += router.bandwidth
+ else:
+ bw_as[router.as_no] = router.bandwidth
if len(routers) <= 0:
return
-
- entropy, entropy_exit, entropy_guard, entropy_country = 0.0, 0.0, 0.0, 0.0
+
+ entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0
for router in routers:
p = float(router.bandwidth) / float(totalBW)
if p != 0:
@@ -97,18 +118,24 @@ def run(file_name):
p = float(router.bandwidth) / float(totalExitBW)
if p != 0:
entropy_exit += -(p * math.log(p, 2))
-
+
for country in bw_countries.iterkeys():
p = float(bw_countries[country]) / float(totalBW)
if p != 0:
entropy_country += -(p * math.log(p, 2))
-
+
+ for as_no in bw_as.iterkeys():
+ p = float(bw_as[as_no]) / float(totalBW)
+ if p !=0:
+ entropy_as += -(p * math.log(p, 2))
+
# Entropy of uniform distribution of 'n' possible values: log(n)
max_entropy = math.log(len(routers), 2)
max_entropy_guard = math.log(guards_n, 2)
max_entropy_exit = math.log(exits_n, 2)
max_entropy_country = math.log(len(bw_countries), 2)
-
+ max_entropy_as = math.log(len(bw_as), 2)
+
return ",".join([valid_after,
str(entropy),
str(max_entropy),
@@ -117,13 +144,16 @@ def run(file_name):
str(entropy_guard),
str(max_entropy_guard),
str(entropy_country),
- str(max_entropy_country)])
+ str(max_entropy_country),
+ str(entropy_as),
+ str(max_entropy_as)])
def parse_args():
usage = "Usage - python pyentropy.py [options]"
parser = OptionParser(usage)
- parser.add_option("-g", "--geoip", dest="geoip", default="GeoIP.dat", help="Input GeoIP database")
+ parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat", help="Input GeoIP database")
+ parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat", help="Input AS GeoIP database")
parser.add_option("-o", "--output", dest="output", default="entropy.csv", help="Output filename")
parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", help="Input consensus dir")
@@ -134,7 +164,8 @@ def parse_args():
if __name__ == "__main__":
options = parse_args()
- gi = pygeoip.GeoIP(options.geoip)
+ gi_db = pygeoip.GeoIP(options.gi_db)
+ as_db = pygeoip.GeoIP(options.as_db)
with open(options.output, 'w') as f:
for file_name in os.listdir(options.consensus):
More information about the tor-commits
mailing list