[tor-commits] [onionperf/develop] Move filters and filter metadata to analysis files
karsten at torproject.org
karsten at torproject.org
Wed Sep 16 15:15:08 UTC 2020
commit 95b749a8fc690825c0a828b8473c58faea7ad912
Author: Ana Custura <ana at netstat.org.uk>
Date: Thu Sep 10 01:51:54 2020 +0100
Move filters and filter metadata to analysis files
---
onionperf/filtering.py | 25 ++++++++++++++++++-------
onionperf/onionperf | 9 +--------
onionperf/visualization.py | 14 +++++++++-----
3 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/onionperf/filtering.py b/onionperf/filtering.py
index 1b614d6..c008c03 100644
--- a/onionperf/filtering.py
+++ b/onionperf/filtering.py
@@ -7,6 +7,7 @@
import re
from onionperf.analysis import OPAnalysis
+from collections import defaultdict
class Filtering(object):
@@ -14,9 +15,11 @@ class Filtering(object):
self.fingerprints_to_include = None
self.fingerprints_to_exclude = None
self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})")
+ self.filters = defaultdict(list)
def include_fingerprints(self, path):
self.fingerprints_to_include = []
+ self.fingerprints_to_include_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
@@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path):
self.fingerprints_to_exclude = []
+ self.fingerprints_to_exclude_path = path
with open(path, 'rt') as f:
for line in f:
fingerprint_match = self.fingerprint_pattern.match(line)
@@ -33,12 +37,16 @@ class Filtering(object):
fingerprint = fingerprint_match.group(1).upper()
self.fingerprints_to_exclude.append(fingerprint)
- def apply_filters(self, input_path, output_dir, output_file):
- self.analysis = OPAnalysis.load(filename=input_path)
+ def filter_tor_circuits(self, analysis):
if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None:
return
- for source in self.analysis.get_nodes():
- tor_circuits = self.analysis.get_tor_circuits(source)
+ self.filters["tor/circuits"] = []
+ if self.fingerprints_to_include:
+ self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path })
+ if self.fingerprints_to_exclude:
+ self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path })
+ for source in analysis.get_nodes():
+ tor_circuits = analysis.get_tor_circuits(source)
filtered_circuit_ids = []
for circuit_id, tor_circuit in tor_circuits.items():
keep = False
@@ -56,8 +64,11 @@ class Filtering(object):
keep = False
break
if not keep:
- filtered_circuit_ids.append(circuit_id)
- for circuit_id in filtered_circuit_ids:
- del(tor_circuits[circuit_id])
+ tor_circuits[circuit_id]["filtered"] = True
+
+ def apply_filters(self, input_path, output_dir, output_file):
+ self.analysis = OPAnalysis.load(filename=input_path)
+ self.filter_tor_circuits(self.analysis)
+ self.analysis.json_db["filters"] = self.filters
self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
diff --git a/onionperf/onionperf b/onionperf/onionperf
index 1efa8cb..108af4e 100755
--- a/onionperf/onionperf
+++ b/onionperf/onionperf
@@ -342,13 +342,6 @@ files generated by this script will be written""",
required="True",
action=PathStringArgsAction, dest="datasets")
- visualize_parser.add_argument('--outer-join',
- help="""Include measurements without an existing mapping between TGen
- transfers/streams and Tor streams/circuits, which is the
- equivalent of an outer join in the database sense""",
- action="store_true", dest="outer_join",
- default=False)
-
visualize_parser.add_argument('-p', '--prefix',
help="a STRING filename prefix for graphs we generate",
metavar="STRING", type=str,
@@ -489,7 +482,7 @@ def visualize(args):
if analysis is not None:
analyses.append(analysis)
tgen_viz.add_dataset(analyses, label)
- tgen_viz.plot_all(args.prefix, outer_join=args.outer_join)
+ tgen_viz.plot_all(args.prefix)
def type_nonnegative_integer(value):
i = int(value)
diff --git a/onionperf/visualization.py b/onionperf/visualization.py
index 0f69879..f5bc03f 100644
--- a/onionperf/visualization.py
+++ b/onionperf/visualization.py
@@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
- def plot_all(self, output_prefix, outer_join=False):
+ def plot_all(self, output_prefix):
if len(self.datasets) > 0:
prefix = output_prefix + '.' if output_prefix is not None else ''
ts = time.strftime("%Y-%m-%d_%H:%M:%S")
- self.__extract_data_frame(outer_join)
+ self.__extract_data_frame()
self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts))
sns.set_context("paper")
self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts))
@@ -51,7 +51,7 @@ class TGenVisualization(Visualization):
self.__plot_errors_time()
self.page.close()
- def __extract_data_frame(self, outer_join=False):
+ def __extract_data_frame(self):
streams = []
for (analyses, label) in self.datasets:
for analysis in analyses:
@@ -145,8 +145,12 @@ class TGenVisualization(Visualization):
if "failure_reason_remote" in tor_stream:
error_code_parts.append(tor_stream["failure_reason_remote"])
stream["error_code"] = "/".join(error_code_parts)
- if tor_circuit or outer_join:
- streams.append(stream)
+
+ if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]:
+ if tor_circuit and "filtered" not in tor_circuit.keys():
+ streams.append(stream)
+ else:
+ streams.append(stream)
self.data = pd.DataFrame.from_records(streams, index="id")
def __plot_firstbyte_ecdf(self):
More information about the tor-commits
mailing list