[tor-commits] [compass/master] Built improved relay selection functions.

karsten at torproject.org karsten at torproject.org
Mon Jan 7 07:09:40 UTC 2013


commit 0ec37f81f7102e5078ae38b651513f96189d9f01
Author: Chris Wacek <cwacek at cs.georgetown.edu>
Date:   Thu Dec 20 16:33:41 2012 -0500

    Built improved relay selection functions.
    
    These replace the existing selection functions, which were pretty
    inefficient (write to string, parse it back again), with ones that
    parse Python objects around.
---
 compass.py |  128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 128 insertions(+), 0 deletions(-)

diff --git a/compass.py b/compass.py
index 520a017..40b564a 100755
--- a/compass.py
+++ b/compass.py
@@ -253,7 +253,135 @@ class RelayStats(object):
 
     WEIGHTS = ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
 
+    def sort_and_reduce(self, relay_set, options):
+      """
+      Take a set of relays (has already been grouped and 
+      filtered), sort it and return the ones requested 
+      in the 'top' option.  Add index numbers to them as well.
+      """
+      output_relays = list()
+      excluded_relays = None
+      total_relays = None
+
+      # We need a simple sorting key function
+      def sort_fn(r):
+        return getattr(r,options.sort)
+
+      relay_set.sort(key=sort_fn,reverse=options.sort_reverse)
+
+      if options.top < 0:
+        options.top = len(relay_set)
+
+      # Add selected relays to the result set
+      for i,selected_relay in enumerate(relay_set[:options.top]):
+        selected_relay.index = i + 1
+        output_relays.append(selected_relay)
+
+      # Figure out what the 'remainder' numbers are 
+      if len(relay_set) > options.top:
+        if options.by_country and options.by_as:
+            filtered = "countries and ASes"
+        elif options.by_country:
+            filtered = "countries"
+        elif options.by_as:
+            filtered = "ASes"
+        else:
+            filtered = "relays"
+
+        # Sum up all the rates
+        excluded_relays = util.Result(zero_probs=True)
+        total_relays = util.Result(zero_probs=True)
+        for i,relay in enumerate(relay_set):
+          if i < options.top:
+            excluded_relays.p_guard += relay.p_guard
+            excluded_relays.p_exit += relay.p_exit
+            excluded_relays.p_middle += relay.p_middle
+            excluded_relays.adv_bw += relay.adv_bw
+            excluded_relays.cw += relay.cw
+          total_relays.p_guard += relay.p_guard
+          total_relays.p_exit += relay.p_exit
+          total_relays.p_middle += relay.p_middle
+          total_relays.adv_bw += relay.adv_bw
+          total_relays.cw += relay.cw
+
+        excluded_relays.fp = "(%d other %s)" % (
+                                  len(relay_set) - options.top,
+                                  filtered)
+        total_relays.fp = "(total in selection)"
+
+        # Only include the last line if 
+        if total_relays.cw > 99.9:
+          total_relays = None
+
+      return {
+              'results': output_relays, 
+              'excluded': excluded_relays,
+              'total': total_relays
+              }
+
+
+    def select_relays(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
+      """
+      Return a Pythonic representation of the relays result set. Return it as a set of Result objects.
+      """
+      results = []
+      for group in grouped_relays.itervalues():
+        #Initialize some stuff
+        group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
+        relays_in_group, exits_in_group, guards_in_group = 0, 0, 0
+        ases_in_group = set()
+        result = util.Result()
+        for relay in group:
+            for weight in RelayStats.WEIGHTS:
+                group_weights[weight] += relay.get(weight, 0)
+
+            result.nick = relay['nickname']
+            result.link = links
+            result.fp = relay['fingerprint']
+
+            if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']):
+                result.exit = 'Exit'
+                exits_in_group += 1
+            else:
+                result.exit = '-'
+            if 'Guard' in set(relay['flags']):
+                result.guard = 'Guard'
+                guards_in_group += 1
+            else:
+                result.guard = '-'
+            result.cc = relay.get('country', '??').upper()
+            result.as_no = relay.get('as_number', '??')
+            result.as_name = relay.get('as_name', '??')
+            result.as_info = "%s %s" %(result.as_no, result.as_name)
+            ases_in_group.add(result.as_info)
+            relays_in_group += 1
+
+        # If we want to group by things, we need to handle some fields
+        # specially
+        if by_country or by_as_number:
+            result.nick = "*"
+            result.fp = "(%d relays)" % relays_in_group
+            result.exit = "(%d)" % exits_in_group
+            result.guard = "(%d)" % guards_in_group
+            if not by_as_number and not ases:
+                result.as_info = "(%s)" % len(ases_in_group)
+            if not by_country and not country:
+                country = "*"
+
+        #Include our weight values
+        for weight in group_weights.iterkeys():
+          result['cw'] = group_weights['consensus_weight_fraction'] * 100.0
+          result['adv_bw'] = group_weights['advertised_bandwidth_fraction'] * 100.0
+          result['p_guard'] = group_weights['guard_probability'] * 100.0
+          result['p_middle'] = group_weights['middle_probability'] * 100.0
+          result['p_exit'] = group_weights['exit_probability'] * 100.0
+
+        results.append(result)
+
+      return results
+
     def format_and_sort_groups(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
+
         formatted_groups = {}
         for group in grouped_relays.values():
             group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)





More information about the tor-commits mailing list