[tor-commits] [onionperf/develop] Change filter mode to filter Tor circuits.
karsten at torproject.org
karsten at torproject.org
Wed Sep 16 15:15:08 UTC 2020
commit 9d0c80561b84905e72b41758dfcb7b712f5f407f
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Aug 31 11:30:53 2020 +0200
Change filter mode to filter Tor circuits.
This new filter mode removes Tor circuits that don't match the
provided fingerprints and leaves TGen transfers/streams untouched. At
the same time the visualize mode only includes TGen transfers/streams
with an existing mapping between TGen transfers/streams and Tor
streams/circuits.
This patch changes the default behavior of the visualize mode. The
original behavior of visualizing TGen transfers/streams *without* an
existing mapping to Tor streams/circuits can be selected with the
--outer-join switch, even though that's rather an edge use case.
Another minor change is that the filtered analysis files is not
written with sort_keys=True anymore, which would have produced a newly
sorted file with keys in alphabetic order rather than the original
insert order. The result is an actually useful diff.
---
onionperf/analysis.py | 13 +++++-------
onionperf/filtering.py | 49 ++++++++--------------------------------------
onionperf/onionperf | 24 +++++++++++++++++------
onionperf/visualization.py | 33 ++++++++++++++++++++-----------
4 files changed, 53 insertions(+), 66 deletions(-)
diff --git a/onionperf/analysis.py b/onionperf/analysis.py
index b2f483f..49c109c 100644
--- a/onionperf/analysis.py
+++ b/onionperf/analysis.py
@@ -62,13 +62,7 @@ class OPAnalysis(Analysis):
self.json_db['data'][self.nickname]["tgen"].pop("stream_summary")
self.did_analysis = True
- def set_tgen_transfers(self, node, tgen_transfers):
- self.json_db['data'][node]['tgen']['transfers'] = tgen_transfers
-
- def set_tgen_streams(self, node, tgen_streams):
- self.json_db['data'][node]['tgen']['streams'] = tgen_streams
-
- def save(self, filename=None, output_prefix=os.getcwd(), do_compress=True, date_prefix=None):
+ def save(self, filename=None, output_prefix=os.getcwd(), do_compress=True, date_prefix=None, sort_keys=True):
if filename is None:
base_filename = "onionperf.analysis.json.xz"
if date_prefix is not None:
@@ -85,7 +79,7 @@ class OPAnalysis(Analysis):
logging.info("saving analysis results to {0}".format(filepath))
outf = util.FileWritable(filepath, do_compress=do_compress)
- json.dump(self.json_db, outf, sort_keys=True, separators=(',', ': '), indent=2)
+ json.dump(self.json_db, outf, sort_keys=sort_keys, separators=(',', ': '), indent=2)
outf.close()
logging.info("done!")
@@ -109,6 +103,9 @@ class OPAnalysis(Analysis):
except:
return None
+ def set_tor_circuits(self, node, tor_circuits):
+ self.json_db['data'][node]['tor']['circuits'] = tor_circuits
+
def get_tor_streams(self, node):
try:
return self.json_db['data'][node]['tor']['streams']
diff --git a/onionperf/filtering.py b/onionperf/filtering.py
index 9e7b34f..1b614d6 100644
--- a/onionperf/filtering.py
+++ b/onionperf/filtering.py
@@ -38,41 +38,11 @@ class Filtering(object):
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
return
for source in self.analysis.get_nodes():
- tor_streams_by_source_port = {}
- tor_streams = self.analysis.get_tor_streams(source)
- for tor_stream in tor_streams.values():
- if "source" in tor_stream and ":" in tor_stream["source"]:
- source_port = tor_stream["source"].split(":")[1]
- tor_streams_by_source_port.setdefault(source_port, []).append(tor_stream)
tor_circuits = self.analysis.get_tor_circuits(source)
- tgen_streams = self.analysis.get_tgen_streams(source)
- tgen_transfers = self.analysis.get_tgen_transfers(source)
- retained_tgen_streams = {}
- retained_tgen_transfers = {}
- while tgen_streams or tgen_transfers:
- stream_id = None
- transfer_id = None
- source_port = None
- unix_ts_end = None
+ filtered_circuit_ids = []
+ for circuit_id, tor_circuit in tor_circuits.items():
keep = False
- if tgen_streams:
- stream_id, stream_data = tgen_streams.popitem()
- if "local" in stream_data["transport_info"] and len(stream_data["transport_info"]["local"].split(":")) > 2:
- source_port = stream_data["transport_info"]["local"].split(":")[2]
- if "unix_ts_end" in stream_data:
- unix_ts_end = stream_data["unix_ts_end"]
- elif tgen_transfers:
- transfer_id, transfer_data = tgen_transfers.popitem()
- if "endpoint_local" in transfer_data and len(transfer_data["endpoint_local"].split(":")) > 2:
- source_port = transfer_data["endpoint_local"].split(":")[2]
- if "unix_ts_end" in transfer_data:
- unix_ts_end = transfer_data["unix_ts_end"]
- if source_port and unix_ts_end:
- for tor_stream in tor_streams_by_source_port[source_port]:
- if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
- circuit_id = tor_stream["circuit_id"]
- if circuit_id and str(circuit_id) in tor_circuits:
- tor_circuit = tor_circuits[circuit_id]
+ if "path" in tor_circuit:
path = tor_circuit["path"]
keep = True
for long_name, _ in path:
@@ -85,12 +55,9 @@ class Filtering(object):
if self.fingerprints_to_exclude is not None and fingerprint in self.fingerprints_to_exclude:
keep = False
break
- if keep:
- if stream_id:
- retained_tgen_streams[stream_id] = stream_data
- if transfer_id:
- retained_tgen_transfers[transfer_id] = transfer_data
- self.analysis.set_tgen_streams(source, retained_tgen_streams)
- self.analysis.set_tgen_transfers(source, retained_tgen_transfers)
- self.analysis.save(filename=output_file, output_prefix=output_dir)
+ if not keep:
+ filtered_circuit_ids.append(circuit_id)
+ for circuit_id in filtered_circuit_ids:
+ del(tor_circuits[circuit_id])
+ self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
diff --git a/onionperf/onionperf b/onionperf/onionperf
index e3f49c8..1efa8cb 100755
--- a/onionperf/onionperf
+++ b/onionperf/onionperf
@@ -79,8 +79,13 @@ DESC_FILTER = """
Takes an OnionPerf analysis results file or directory as input, applies filters,
and produces new OnionPerf analysis results file(s) as output.
-This subcommand only filters measurements in `data/[source]/tgen/transfers`
-and `data/[source]/tgen/streams`, but leaves any summaries unchanged.
+The `filter` subcommand is typically used in combination with the `visualize`
+subcommand. The work flow is to filter out any TGen transfers/streams or Tor
+streams/circuits that are not supposed to be visualized and then visualize only
+those measurements with an existing mapping between TGen transfers/streams and
+Tor streams/circuits.
+
+This subcommand only filters individual objects and leaves summaries unchanged.
"""
HELP_FILTER = """
Filter OnionPerf analysis results
@@ -304,15 +309,15 @@ files generated by this script will be written""",
action="store", dest="input")
filter_parser.add_argument('--include-fingerprints',
- help="""include only measurements with known circuit path and with all
+ help="""include only Tor circuits with known circuit path and with all
relays being contained in the fingerprints file located at
PATH""",
metavar="PATH", action="store", dest="include_fingerprints",
default=None)
filter_parser.add_argument('--exclude-fingerprints',
- help="""exclude measurements without known circuit path or with any
- relays being contained in the fingerprints file located at
+ help="""exclude Tor circuits without known circuit path or with any
+ relay being contained in the fingerprints file located at
PATH""",
metavar="PATH", action="store", dest="exclude_fingerprints",
default=None)
@@ -337,6 +342,13 @@ files generated by this script will be written""",
required="True",
action=PathStringArgsAction, dest="datasets")
+ visualize_parser.add_argument('--outer-join',
+ help="""Include measurements without an existing mapping between TGen
+ transfers/streams and Tor streams/circuits, which is the
+ equivalent of an outer join in the database sense""",
+ action="store_true", dest="outer_join",
+ default=False)
+
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
@@ -477,7 +489,7 @@ def visualize(args):
if analysis is not None:
analyses.append(analysis)
tgen_viz.add_dataset(analyses, label)
- tgen_viz.plot_all(args.prefix)
+ tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)
def type_nonnegative_integer(value):
i = int(value)
diff --git a/onionperf/visualization.py b/onionperf/visualization.py
index 660f52e..0f69879 100644
--- a/onionperf/visualization.py
+++ b/onionperf/visualization.py
@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
- def plot_all(self, output_prefix):
+ def plot_all(self, output_prefix, outer_join=False):
if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S")
- self.__extract_data_frame()
+ self.__extract_data_frame(outer_join)
self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper")
self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
self.__plot_errors_time()
self.page.close()
- def __extract_data_frame(self):
+ def __extract_data_frame(self, outer_join=False):
streams = []
for (analyses, label) in self.datasets:
for analysis in analyses:
@@ -62,6 +62,7 @@ class TGenVisualization(Visualization):
if "source" in tor_stream and ":" in tor_stream["source"]:
source_port = tor_stream["source"].split(":")[1]
tor_streams_by_source_port.setdefault(source_port, []).append(tor_stream)
+ tor_circuits = analysis.get_tor_circuits(client)
tgen_streams = analysis.get_tgen_streams(client)
tgen_transfers = analysis.get_tgen_transfers(client)
while tgen_streams or tgen_transfers:
@@ -122,20 +123,30 @@ class TGenVisualization(Visualization):
unix_ts_end = transfer_data["unix_ts_end"]
if "unix_ts_start" in transfer_data:
stream["start"] = datetime.datetime.utcfromtimestamp(transfer_data["unix_ts_start"])
+ tor_stream = None
+ tor_circuit = None
+ if source_port and unix_ts_end:
+ for s in tor_streams_by_source_port[source_port]:
+ if abs(unix_ts_end - s["unix_ts_end"]) < 150.0:
+ tor_stream = s
+ break
+ if tor_stream and "circuit_id" in tor_stream:
+ circuit_id = tor_stream["circuit_id"]
+ if str(circuit_id) in tor_circuits:
+ tor_circuit = tor_circuits[circuit_id]
if error_code:
if error_code == "PROXY":
error_code_parts = ["TOR"]
else:
error_code_parts = ["TGEN", error_code]
- if source_port and unix_ts_end:
- for tor_stream in tor_streams_by_source_port[source_port]:
- if abs(unix_ts_end - tor_stream["unix_ts_end"]) < 150.0:
- if "failure_reason_local" in tor_stream:
- error_code_parts.append(tor_stream["failure_reason_local"])
- if "failure_reason_remote" in tor_stream:
- error_code_parts.append(tor_stream["failure_reason_remote"])
+ if tor_stream:
+ if "failure_reason_local" in tor_stream:
+ error_code_parts.append(tor_stream["failure_reason_local"])
+ if "failure_reason_remote" in tor_stream:
+ error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts)
- streams.append(stream)
+ if tor_circuit or outer_join:
+ streams.append(stream)
self.data = pd.DataFrame.from_records(streams, index="id")
def __plot_firstbyte_ecdf(self):
More information about the tor-commits
mailing list