[tor-commits] [metrics-tasks/master] task-6329: refactor relay selection
karsten at torproject.org
karsten at torproject.org
Tue Aug 7 07:01:42 UTC 2012
commit fa237df1e9b60ce12867df91f655cc24f4aca7d2
Author: delber <delber at riseup.net>
Date: Thu Aug 2 10:35:22 2012 +0000
task-6329: refactor relay selection
Instead of doing all our tests in RelayStats.get_relay(), we now have a set of
filters, each of them with its own class, that will be added to a filter chain
depending on command-line options.
---
task-6329/tor-relays-stats.py | 199 +++++++++++++++++++++++++----------------
1 files changed, 122 insertions(+), 77 deletions(-)
diff --git a/task-6329/tor-relays-stats.py b/task-6329/tor-relays-stats.py
index 80d1f06..4a6989b 100755
--- a/task-6329/tor-relays-stats.py
+++ b/task-6329/tor-relays-stats.py
@@ -13,10 +13,103 @@ import os.path
from optparse import OptionParser, OptionGroup
import urllib
import re
+from abc import abstractmethod
+
+class BaseFilter(object):
+ @abstractmethod
+ def accept(self, relay):
+ pass
+
+class RunningFilter(BaseFilter):
+ def accept(self, relay):
+ return relay['running']
+
+class FamilyFilter(BaseFilter):
+ def __init__(self, family, all_relays):
+ self._family_fingerprint = None
+ self._family_nickname = None
+ self._family_relays = []
+ found_relay = None
+ for relay in all_relays:
+ if len(family) == 40 and relay['fingerprint'] == family:
+ found_relay = relay
+ break
+ if len(family) < 20 and 'Named' in relay['flags'] and relay['nickname'] == family:
+ found_relay = relay
+ break
+ if found_relay:
+ self._family_fingerprint = '$%s' % found_relay['fingerprint']
+ if 'Named' in found_relay['flags']:
+ self._family_nickname = found_relay['nickname']
+ self._family_relays = [self._family_fingerprint] + found_relay.get('family', [])
+
+ def accept(self, relay):
+ fingerprint = '$%s' % relay['fingerprint']
+ mentions = [fingerprint] + relay.get('family', [])
+ if fingerprint in self._family_relays:
+ return True
+ if 'Named' in relay['flags'] and relay['nickname'] in self._family_relays:
+ return True
+ if self._family_fingerprint in mentions:
+ return True
+ if self._family_nickname in mentions:
+ return True
+ return False
+
+class CountryFilter(BaseFilter):
+ def __init__(self, countries=[]):
+ self._countries = [x.lower() for x in countries]
+
+ def accept(self, relay):
+ return relay.get('country', None) in self._countries
+
+class ASFilter(BaseFilter):
+ def __init__(self, as_sets=[]):
+ self._as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
+
+ def accept(self, relay):
+ return relay.get('as_number', None) in self._as_sets
+
+class ExitFilter(BaseFilter):
+ def accept(self, relay):
+ return relay.get('exit_probability', -1) > 0.0
+
+class GuardFilter(BaseFilter):
+ def accept(self, relay):
+ return relay.get('guard_probability', -1) > 0.0
+
+class FastExitFilter(BaseFilter):
+ def accept(self, relay):
+ if relay.get('bandwidth_rate', -1) < 12500 * 1024:
+ return False
+ if relay.get('advertised_bandwidth', -1) < 5000 * 1024:
+ return False
+ relevant_ports = set([80, 443, 554, 1755])
+ summary = relay.get('exit_policy_summary', {})
+ if 'accept' in summary:
+ portlist = summary['accept']
+ elif 'reject' in summary:
+ portlist = summary['reject']
+ else:
+ return False
+ ports = []
+ for p in portlist:
+ if '-' in p:
+ ports.extend(range(int(p.split('-')[0]),
+ int(p.split('-')[1]) + 1))
+ else:
+ ports.append(int(p))
+ policy_ports = set(ports)
+ if 'accept' in summary and not relevant_ports.issubset(policy_ports):
+ return False
+ if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
+ return False
+ return True
class RelayStats(object):
- def __init__(self):
+ def __init__(self, options):
self._data = None
+ self._filters = self._create_filters(options)
@property
def data(self):
@@ -24,76 +117,34 @@ class RelayStats(object):
self._data = json.load(file('details.json'))
return self._data
- def get_relays(self, countries=[], as_sets=[], exits_only=False, guards_only=False, inactive=False, fast_exits_only=False, family=None):
+ def _create_filters(self, options):
+ filters = []
+ if not options.inactive:
+ filters.append(RunningFilter())
+ if options.family:
+ filters.append(FamilyFilter(options.family, self.data['relays']))
+ if options.country:
+ filters.append(CountryFilter(options.country))
+ if options.ases:
+ filters.append(ASFilter(options.ases))
+ if options.exits_only:
+ filters.append(ExitFilter())
+ if options.guards_only:
+ filters.append(GuardFilter())
+ if options.fast_exits_only:
+ filters.append(FastExitFilter())
+ return filters
+
+ def get_relays(self):
relays = []
- family_fingerprint = None
- family_nickname = None
- family_relays = []
- if countries:
- countries = [x.lower() for x in countries]
- if as_sets:
- as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
- if family:
- fingerprint = family if len(family) == 40 else None
- nickname = family if len(family) < 20 else None
- found_relay = None
- for relay in self.data['relays']:
- if fingerprint and relay['fingerprint'] == fingerprint:
- found_relay = relay
- break
- if nickname and 'Named' in relay['flags'] and relay['nickname'] == nickname:
- found_relay = relay
- break
- if found_relay:
- family_fingerprint = '$%s' % found_relay.get('fingerprint')
- family_nickname = found_relay['nickname'] if 'Named' in found_relay['flags'] else None
- family_relays = found_relay.get('family', [])
- family_relays.append(family_fingerprint)
for relay in self.data['relays']:
- if family:
- mentions = relay.get('family', [])
- mentions.append('$%s' % relay['fingerprint'])
- if ('$%s' % relay['fingerprint'] not in family_relays and \
- relay['nickname'] not in family_relays if 'Named' in relay['flags'] else '') or \
- (family_fingerprint not in mentions and \
- family_nickname not in mentions):
- continue
- if not inactive and inactive == relay['running']:
- continue
- if countries and not relay.get('country', ' ') in countries:
- continue
- if as_sets and not relay.get('as_number', ' ') in as_sets:
- continue
- if exits_only and not relay.get('exit_probability', -1) > 0.0:
- continue
- if guards_only and not relay.get('guard_probability', -1) > 0.0:
- continue
- if fast_exits_only:
- if relay.get('bandwidth_rate', -1) < 12500 * 1024:
- continue
- if relay.get('advertised_bandwidth', -1) < 5000 * 1024:
- continue
- relevant_ports = set([80, 443, 554, 1755])
- summary = relay.get('exit_policy_summary', {})
- if 'accept' in summary:
- portlist = summary['accept']
- elif 'reject' in summary:
- portlist = summary['reject']
- else:
- continue
- ports = []
- for p in portlist:
- if '-' in p:
- ports.extend(range(int(p.split('-')[0]),
- int(p.split('-')[1]) + 1))
- else:
- ports.append(int(p))
- policy_ports = set(ports)
- if 'accept' in summary and not relevant_ports.issubset(policy_ports):
- continue
- if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
- continue
- relays.append(relay)
+ accepted = True
+ for f in self._filters:
+ if not f.accept(relay):
+ accepted = False
+ break
+ if accepted:
+ relays.append(relay)
return relays
def group_relays(self, relays, by_country=False, by_as_number=False):
@@ -248,14 +299,8 @@ if '__main__' == __name__:
if not os.path.exists('details.json'):
parser.error("Did not find details.json. Re-run with --download.")
- stats = RelayStats()
- relays = stats.get_relays(countries=options.country,
- as_sets=options.ases,
- exits_only=options.exits_only,
- guards_only=options.guards_only,
- inactive=options.inactive,
- fast_exits_only=options.fast_exits_only,
- family=options.family)
+ stats = RelayStats(options)
+ relays = stats.get_relays()
grouped_relays = stats.group_relays(relays,
by_country=options.by_country,
by_as_number=options.by_as)
More information about the tor-commits
mailing list