[tor-commits] [compass/master] Refactor Sathya's almost-fast exits code a bit.

karsten at torproject.org karsten at torproject.org
Fri Aug 24 17:12:34 UTC 2012


commit 3ba4dcbe55d7a50bb3ed3eddc6a6a545087fe4e3
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Aug 24 09:25:40 2012 +0200

    Refactor Sathya's almost-fast exits code a bit.
---
 compass.py |  212 +++++++++++++++++++++++++++++-------------------------------
 1 files changed, 103 insertions(+), 109 deletions(-)

diff --git a/compass.py b/compass.py
index 04b8313..770e563 100755
--- a/compass.py
+++ b/compass.py
@@ -76,119 +76,101 @@ class ExitFilter(BaseFilter):
     def accept(self, relay):
         return relay.get('exit_probability', -1) > 0.0
 
-class SameNetworkFilter(BaseFilter):
+class GuardFilter(BaseFilter):
+    def accept(self, relay):
+        return relay.get('guard_probability', -1) > 0.0
+
+class FastExitFilter(BaseFilter):
+
     class Relay(object):
         def __init__(self, relay):
             self.exit = relay.get('exit_probability')
             self.fp = relay.get('fingerprint')
             self.relay = relay
-    
-    def __init__(self):
-        self.network_data = {}
-        self.relays = []        
-    
-    def load(self, all_relays):
-        for relay in all_relays:
-            or_addresses = relay.get("or_addresses")
-            no_of_addresses = 0
-            for ip in or_addresses:
-                ip, port = ip.rsplit(':', 1)
-                # skip if ipv6
-                if ':' in ip:
-                    continue
-                no_of_addresses += 1
-                if no_of_addresses > 1:
-                    print "[WARNING] - %s has more than two OR Addresses - %s" % relay.get("fingerprint"), or_addresses
-                network = ip.rsplit('.', 1)[0]
-                relay_info = self.Relay(relay)
-                if self.network_data.has_key(network):
-                    if len(self.network_data[network]) > 1:
-                        # assume current relay to have smallest exit_probability
-                        min_exit = relay.get('exit_probability')
-                        min_id = -1
-                        for id, value in enumerate(self.network_data[network]):
-                            if value.exit < min_exit:
-                                min_exit = value.exit
-                                min_id = id
-                        if min_id != -1:
-                            del self.network_data[network][min_id]
-                            self.network_data[network].append(relay_info)
-                    else:
-                        self.network_data[network].append(relay_info)
-                else:
-                    self.network_data[network] = [relay_info]
-
-        for relay_list in self.network_data.values():
-            self.relays.extend([relay.relay for relay in relay_list])
-        return self.relays
-    
-    def accept(self, relay):
-        return relay.get('fingerprint') in self.relays_fp
-    
-class GuardFilter(BaseFilter):
-    def accept(self, relay):
-        return relay.get('guard_probability', -1) > 0.0
 
-class FastExitFilter(BaseFilter):
-    def __init__(self, bandwidth_rate, advertised_bandwidth, ports, inverse=False):
+    def __init__(self, bandwidth_rate, advertised_bandwidth, ports, same_network, inverse=False):
         self.bandwidth_rate = bandwidth_rate
         self.advertised_bandwidth = advertised_bandwidth
         self.ports = ports
+        self.same_network = same_network
         self.inverse = inverse
-    
-    def accept(self, relay):
-        if relay.get('bandwidth_rate', -1) < self.bandwidth_rate:
-            return self.inverse
-        if relay.get('advertised_bandwidth', -1) < self.advertised_bandwidth:
-            return self.inverse
-        relevant_ports = set(self.ports)
-        summary = relay.get('exit_policy_summary', {})
-        if 'accept' in summary:
-            portlist = summary['accept']
-        elif 'reject' in summary:
-            portlist = summary['reject']
-        else:
-            return self.inverse
-        ports = []
-        for p in portlist:
-            if '-' in p:
-                ports.extend(range(int(p.split('-')[0]),
-                                   int(p.split('-')[1]) + 1))
+
+    def load(self, all_relays):
+
+        # First, filter relays based on bandwidth and port requirements.
+        matching_relays = []
+        for relay in all_relays:
+            if relay.get('bandwidth_rate', -1) < self.bandwidth_rate:
+                continue
+            if relay.get('advertised_bandwidth', -1) < self.advertised_bandwidth:
+                continue
+            relevant_ports = set(self.ports)
+            summary = relay.get('exit_policy_summary', {})
+            if 'accept' in summary:
+                portlist = summary['accept']
+            elif 'reject' in summary:
+                portlist = summary['reject']
             else:
-                ports.append(int(p))
-        policy_ports = set(ports)
-        if 'accept' in summary and not relevant_ports.issubset(policy_ports):
-            return self.inverse
-        if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
-            return self.inverse
-        return not self.inverse
-
-class AlmostFastExitFilter(BaseFilter):    
-    def load(self, relays):
-        exit_filter = FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], False)
-        fast_relays = exit_filter.load(relays)
-        same_network_filter = SameNetworkFilter()
-        fast_relays_with_network_restriction = same_network_filter.load(fast_relays)
-        almost_exit_filter = FastExitFilter(80* 125* 1024, 2000 * 1024, [80, 443], False)
-        almost_fast_relays = almost_exit_filter.load(relays)
-        almost_exit_filter = FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], True)
-        almost_fast_relays = almost_exit_filter.load(almost_fast_relays)
-        diffed_relays = self.diff(fast_relays, fast_relays_with_network_restriction)
-        return self.union(diffed_relays, almost_fast_relays)
-    
-    def diff(self, relays_a, relays_b):
-        relays_a = dict([(relay.get('fingerprint'), relay) for relay in relays_a])
-        relays_b = dict([(relay.get('fingerprint'), relay) for relay in relays_b])
-        total_relays = dict(relays_a.items() + relays_b.items())
-        set_diff = set(relays_a.keys()) - set(relays_b.keys())
-        return [total_relays[fp] for fp in set_diff]
-
-    def union(self, relays_a, relays_b):
-        relays_a = dict([(relay.get('fingerprint'), relay) for relay in relays_a])
-        relays_b = dict([(relay.get('fingerprint'), relay) for relay in relays_b])
-        total_relays = dict(relays_a.items() + relays_b.items())
-        set_union = set(relays_a.keys()) | set(relays_b.keys())
-        return [total_relays[fp] for fp in set_union]
+                continue
+            ports = []
+            for p in portlist:
+                if '-' in p:
+                    ports.extend(range(int(p.split('-')[0]),
+                                       int(p.split('-')[1]) + 1))
+                else:
+                    ports.append(int(p))
+            policy_ports = set(ports)
+            if 'accept' in summary and not relevant_ports.issubset(policy_ports):
+                continue
+            if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
+                continue
+            matching_relays.append(relay)
+
+        # Second, filter relays based on same /24 requirement.
+        if self.same_network:
+            network_data = {}
+            for relay in matching_relays:
+                or_addresses = relay.get("or_addresses")
+                no_of_addresses = 0
+                for ip in or_addresses:
+                    ip, port = ip.rsplit(':', 1)
+                    # skip if ipv6
+                    if ':' in ip:
+                        continue
+                    no_of_addresses += 1
+                    if no_of_addresses > 1:
+                        print "[WARNING] - %s has more than one IPv4 OR address - %s" % relay.get("fingerprint"), or_addresses
+                    network = ip.rsplit('.', 1)[0]
+                    relay_info = self.Relay(relay)
+                    if network_data.has_key(network):
+                        if len(network_data[network]) > 1:
+                            # assume current relay to have smallest exit_probability
+                            min_exit = relay.get('exit_probability')
+                            min_id = -1
+                            for id, value in enumerate(network_data[network]):
+                                if value.exit < min_exit:
+                                    min_exit = value.exit
+                                    min_id = id
+                            if min_id != -1:
+                                del network_data[network][min_id]
+                                network_data[network].append(relay_info)
+                        else:
+                            network_data[network].append(relay_info)
+                    else:
+                        network_data[network] = [relay_info]
+            matching_relays = []
+            for relay_list in network_data.values():
+                matching_relays.extend([relay.relay for relay in relay_list])
+
+        # Either return relays meeting all requirements, or the inverse set.
+        if self.inverse:
+            inverse_relays = []
+            for relay in all_relays:
+                if relay not in matching_relays:
+                    inverse_relays.append(relay)
+            return inverse_relays
+        else:
+            return matching_relays
 
 class RelayStats(object):
     def __init__(self, options):
@@ -233,10 +215,12 @@ class RelayStats(object):
         if options.guards_only:
             filters.append(GuardFilter())
         if options.fast_exits_only:
-            filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], False))
-            filters.append(SameNetworkFilter())
+            filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], True, False))
         if options.almost_fast_exits_only:
-            filters.append(AlmostFastExitFilter())
+            filters.append(FastExitFilter(80 * 125 * 1024, 2000 * 1024, [80, 443], False, False))
+            filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], True, True))
+        if options.fast_exits_only_any_network:
+            filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], False, False))
         return filters
 
     def _get_group_function(self, options):
@@ -355,10 +339,12 @@ def create_option_parser():
                      help="select family by fingerprint or nickname (for named relays)")
     group.add_option("-g", "--guards-only", action="store_true",
                      help="select only relays suitable for guard position")
-    group.add_option("-x", "--fast-exits-only", action="store_true",
-                     help="select only 100+ Mbit/s exits allowing ports 80, 443, 554, and 1755")
-    group.add_option("-w", "--almost-fast-exits-only", action="store_true",
-                     help="select only 80+ & 95- Mbit/s exits allowing ports 80, 443, 554, and 1755")
+    group.add_option("--fast-exits-only", action="store_true",
+                     help="select only fast exits (95+ Mbit/s, 5000+ KB/s, 80/443/554/1755, 2- per /24)")
+    group.add_option("--almost-fast-exits-only", action="store_true",
+                     help="select only almost fast exits (80+ Mbit/s, 2000+ KB/s, 80/443, not in set of fast exits)")
+    group.add_option("--fast-exits-only-any-network", action="store_true",
+                     help="select only fast exits without network restriction (95+ Mbit/s, 5000+ KB/s, 80/443/554/1755")
     parser.add_option_group(group)
     group = OptionGroup(parser, "Grouping options")
     group.add_option("-A", "--by-as", action="store_true", default=False,
@@ -391,6 +377,14 @@ if '__main__' == __name__:
 
     if options.family and not re.match(r'^[A-F0-9]{40}$', options.family) and not re.match(r'^[A-Za-z0-9]{1,19}$', options.family):
         parser.error("Not a valid fingerprint or nickname: %s" % options.family)
+
+    fast_exit_options = 0
+    if options.fast_exits_only: fast_exit_options += 1
+    if options.almost_fast_exits_only: fast_exit_options += 1
+    if options.fast_exits_only_any_network: fast_exit_options += 1
+    if fast_exit_options > 1:
+        parser.error("Can only filter by one fast-exit option.")
+
     if options.download:
         download_details_file()
         print "Downloaded details.json.  Re-run without --download option."





More information about the tor-commits mailing list