[tor-commits] [onionperf/develop] Add new filter mode to filter analysis results.

karsten at torproject.org karsten at torproject.org
Wed Sep 16 15:15:08 UTC 2020


commit dcab214322356db9d975673d19146ff46ec86b4f
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Aug 18 21:05:45 2020 +0200

    Add new filter mode to filter analysis results.
    
    Implements tpo/metrics/onionperf#33260.
---
 CHANGELOG.md           |   6 +++
 onionperf/analysis.py  |  11 ++++++
 onionperf/filtering.py | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
 onionperf/onionperf    |  51 +++++++++++++++++++++++++
 4 files changed, 168 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b8a86ee..c57695e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# Changes in version 0.7 - 2020-??-??
+
+ - Add a new `onionperf filter` mode that takes an OnionPerf analysis
+   results file as input, applies filters, and produces a new
+   OnionPerf analysis results file as output.
+
 # Changes in version 0.6 - 2020-08-08
 
  - Update to TGen 1.0.0, use TGenTools for parsing TGen log files, and
diff --git a/onionperf/analysis.py b/onionperf/analysis.py
index ea07d0e..b2f483f 100644
--- a/onionperf/analysis.py
+++ b/onionperf/analysis.py
@@ -62,6 +62,11 @@ class OPAnalysis(Analysis):
         self.json_db['data'][self.nickname]["tgen"].pop("stream_summary")
         self.did_analysis = True
 
+    def set_tgen_transfers(self, node, tgen_transfers):
+        self.json_db['data'][node]['tgen']['transfers'] = tgen_transfers
+
+    def set_tgen_streams(self, node, tgen_streams):
+        self.json_db['data'][node]['tgen']['streams'] = tgen_streams
 
     def save(self, filename=None, output_prefix=os.getcwd(), do_compress=True, date_prefix=None):
         if filename is None:
@@ -98,6 +103,12 @@ class OPAnalysis(Analysis):
         except:
             return None
 
+    def get_tor_circuits(self, node):
+        try:
+            return self.json_db['data'][node]['tor']['circuits']
+        except:
+            return None
+
     def get_tor_streams(self, node):
         try:
             return self.json_db['data'][node]['tor']['streams']
diff --git a/onionperf/filtering.py b/onionperf/filtering.py
new file mode 100644
index 0000000..435a1bc
--- /dev/null
+++ b/onionperf/filtering.py
@@ -0,0 +1,100 @@
+'''
+  OnionPerf
+  Authored by Rob Jansen, 2015
+  Copyright 2015-2020 The Tor Project
+  See LICENSE for licensing information
+'''
+
+import re
+from onionperf.analysis import OPAnalysis
+
+class Filtering(object):
+
+    def __init__(self):
+        self.fingerprints_to_include = None
+        self.fingerprints_to_exclude = None
+        self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
+
+    def read_input(self, path):
+        self.analysis = OPAnalysis.load(filename=path)
+
+    def include_fingerprints(self, path):
+        self.fingerprints_to_include = []
+        with open(path, 'rt') as f:
+            for line in f:
+                fingerprint_match = self.fingerprint_pattern.match(line)
+                if fingerprint_match:
+                    fingerprint = fingerprint_match.group(1).upper()
+                    self.fingerprints_to_include.append(fingerprint)
+
+    def exclude_fingerprints(self, path):
+        self.exclude_fingerprints = []
+        with open(path, 'rt') as f:
+            for line in f:
+                fingerprint_match = self.fingerprint_pattern.match(line)
+                if fingerprint_match:
+                    fingerprint = fingerprint_match.group(1).upper()
+                    self.exclude_fingerprints.append(fingerprint)
+
+    def apply_filters(self):
+        if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
+            return
+        for source in self.analysis.get_nodes():
+            tor_streams_by_source_port = {}
+            tor_streams = self.analysis.get_tor_streams(source)
+            for tor_stream in tor_streams.values():
+                if "source" in tor_stream and ":" in tor_stream["source"]:
+                    source_port = tor_stream["source"].split(":")[1]
+                    tor_streams_by_source_port.setdefault(source_port, []).append(tor_stream)
+            tor_circuits = self.analysis.get_tor_circuits(source)
+            tgen_streams = self.analysis.get_tgen_streams(source)
+            tgen_transfers = self.analysis.get_tgen_transfers(source)
+            retained_tgen_streams = {}
+            retained_tgen_transfers = {}
+            while tgen_streams or tgen_transfers:
+                stream_id = None
+                transfer_id = None
+                source_port = None
+                unix_ts_end = None
+                keep = False
+                if tgen_streams:
+                    stream_id, stream_data = tgen_streams.popitem()
+                    if "local" in stream_data["transport_info"] and len(stream_data["transport_info"]["local"].split(":")) > 2:
+                        source_port = stream_data["transport_info"]["local"].split(":")[2]
+                    if "unix_ts_end" in stream_data:
+                        unix_ts_end = stream_data["unix_ts_end"]
+                elif tgen_transfers:
+                    transfer_id, transfer_data = tgen_transfers.popitem()
+                    if "endpoint_local" in transfer_data and len(transfer_data["endpoint_local"].split(":")) > 2:
+                        source_port = transfer_data["endpoint_local"].split(":")[2]
+                    if "unix_ts_end" in transfer_data:
+                        unix_ts_end = transfer_data["unix_ts_end"]
+                if source_port and unix_ts_end:
+                    for tor_stream in tor_streams_by_source_port[source_port]:
+                        if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
+                            circuit_id = tor_stream["circuit_id"]
+                if circuit_id and circuit_id in tor_circuits:
+                    tor_circuit = tor_circuits[circuit_id]
+                    path = tor_circuit["path"]
+                    keep = True
+                    for long_name, _ in path:
+                        fingerprint_match = self.fingerprint_pattern.match(long_name)
+                        if fingerprint_match:
+                            fingerprint = fingerprint_match.group(1).upper()
+                            if self.fingerprints_to_include and fingerprint not in self.fingerprints_to_include:
+                                keep = False
+                                break
+                            if self.fingerprints_to_exclude and fingerprint in self.fingerprints_to_exclude:
+                                keep = False
+                                break
+                if keep:
+                    if stream_id:
+                        retained_tgen_streams[stream_id] = stream_data
+                    if transfer_id:
+                        retained_tgen_transfers[transfer_id] = transfer_data
+            self.analysis.set_tgen_streams(source, retained_tgen_streams)
+            self.analysis.set_tgen_transfers(source, retained_tgen_transfers)
+
+    def write_output(self, path):
+        self.analysis.save(filename=path)
+
diff --git a/onionperf/onionperf b/onionperf/onionperf
index e8024ce..b1e7bd3 100755
--- a/onionperf/onionperf
+++ b/onionperf/onionperf
@@ -74,6 +74,15 @@ Stats files in the default Torperf format can also be exported.
 HELP_ANALYZE = """
 Analyze Tor and TGen output
 """
+
+DESC_FILTER = """
+Takes an OnionPerf analysis results file as input, applies filters,
+and produces a new OnionPerf analysis results file as output.
+"""
+HELP_FILTER = """
+Filter OnionPerf analysis results
+"""
+
 DESC_VISUALIZE = """
 Loads an OnionPerf json file, e.g., one produced with the `analyze` subcommand,
 and plots various interesting performance metrics to PDF files.
@@ -280,6 +289,36 @@ files generated by this script will be written""",
         action="store", dest="date_prefix",
         default=None)
 
+    # filter
+    filter_parser = sub_parser.add_parser('filter', description=DESC_FILTER, help=HELP_FILTER,
+        formatter_class=my_formatter_class)
+    filter_parser.set_defaults(func=filter, formatter_class=my_formatter_class)
+
+    filter_parser.add_argument('-i', '--input',
+        help="""read the OnionPerf analysis results at PATH as input""",
+        metavar="PATH", required="True",
+        action="store", dest="input")
+
+    filter_parser.add_argument('--include-fingerprints',
+        help="""include only measurements with known circuit path and with all
+                relays being contained in the fingerprints file located at
+                PATH""",
+        metavar="PATH", action="store", dest="include_fingerprints",
+        default=None)
+
+    filter_parser.add_argument('--exclude-fingerprints',
+        help="""exclude measurements without known circuit path or with any
+                relays being contained in the fingerprints file located at
+                PATH""",
+        metavar="PATH", action="store", dest="exclude_fingerprints",
+        default=None)
+
+    filter_parser.add_argument('-o', '--output',
+        help="""write the filtered output OnionPerf analysis results file to
+                PATH""",
+        metavar="PATH", required="True",
+        action="store", dest="output")
+
     # visualize
     visualize_parser = sub_parser.add_parser('visualize', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
         formatter_class=my_formatter_class)
@@ -397,6 +436,18 @@ def analyze(args):
     else:
         logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")
 
+def filter(args):
+    from onionperf.filtering import Filtering
+
+    filtering = Filtering()
+    filtering.read_input(args.input)
+    if args.include_fingerprints is not None:
+        filtering.include_fingerprints(args.include_fingerprints)
+    if args.exclude_fingerprints is not None:
+        filtering.exclude_fingerprints(args.exclude_fingerprints)
+    filtering.apply_filters()
+    filtering.write_output(args.output)
+
 def visualize(args):
     from onionperf.visualization import TGenVisualization
     from onionperf.analysis import OPAnalysis





More information about the tor-commits mailing list