[tor-commits] [sbws/master] Remove two v3bw parsing/plotting scripts

pastly at torproject.org pastly at torproject.org
Wed Jul 11 15:05:40 UTC 2018


commit 250c7e62a2d131eef86780fdedcb0a9eb1e16dc6
Author: Matt Traudt <sirmatt at ksu.edu>
Date:   Thu Jul 5 22:09:59 2018 -0400

    Remove two v3bw parsing/plotting scripts
    
    GH: ref #201
---
 CHANGELOG.md                  |   4 ++
 scripts/tools/plot-v3bw-xy.py | 156 ------------------------------------------
 scripts/tools/v3bw-into-xy.py |  53 --------------
 3 files changed, 4 insertions(+), 209 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ecd79f2..9c35713 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Remove is_controller_ok. Instead catch possible controller exceptions and 
 log them
 
+### Removed
+
+- Two parsing/plotting scripts in scripts/tools/ that can now be found at
+<https://github.com/pastly/v3bw-tools>
 
 ## [0.5.0] - 2018-06-26
 
diff --git a/scripts/tools/plot-v3bw-xy.py b/scripts/tools/plot-v3bw-xy.py
deleted file mode 100755
index 861894a..0000000
--- a/scripts/tools/plot-v3bw-xy.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python3
-# File: plot-v3bw-xy.py
-# Author: Matt Traudt
-# License: CC0
-#
-# Requires matplotlib; pip install matplotlib
-from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
-import matplotlib; matplotlib.use('Agg')  # noqa; for systems without X11
-from matplotlib.backends.backend_pdf import PdfPages
-import pylab as plt
-
-colors = "krbgcmy"
-
-plt.rcParams.update({
-    'axes.grid': True,
-})
-
-
-def get_all_values_from_fd(fd):
-    values = []
-    for line in fd:
-        try:
-            x, y = line.strip().split()
-            x, y = str(x), float(y)
-            assert len(x) == 40
-            values.append((x, y))
-        except ValueError:
-            print('ignoring', line)
-            continue
-    return values
-
-
-def common_elements(l1, l2):
-    ret = set()
-    for item in l1:
-        if item in l2:
-            ret.add(item)
-    return ret
-
-
-def main(args, pdf):
-    plt.figure()
-    data = {}
-    # Read all data in
-    all_labels = []
-    for fname, label in args.input:
-        with open(fname, 'rt') as fd:
-            data[label] = {
-                'label': label,
-                'data': get_all_values_from_fd(fd)
-            }
-            all_labels.append(label)
-    # Determine what relay fingerprints have data from all input sources
-    common_fingerprints = None
-    for label1 in data:
-        fp_list1 = set([point[0] for point in data[label1]['data']])
-        for label2 in data:
-            if label2 == label1:
-                continue
-            fp_list2 = set([point[0] for point in data[label2]['data']])
-            fp_list1 = common_elements(fp_list1, fp_list2)
-        common_fingerprints = fp_list1
-        break
-    # Remove unneeded data, then
-    # sort by fingerprint
-    for label in data:
-        points = [p for p in data[label]['data']
-                  if p[0] in common_fingerprints]
-        points = sorted(points, key=lambda p: p[0])
-        data[label]['data'] = points
-    # combine the y values for each fingerprint
-    # {
-    #    'fp1': {'label1': 10, 'label2': 30},
-    #    'fp2': {'label1': 20, 'label2': 15},
-    # }
-    # and change dict data's structure to that
-    new_data = {}
-    for fp in common_fingerprints:
-        new_data[fp] = {}
-        for label in data:
-            y = [p[1] for p in data[label]['data'] if p[0] == fp]
-            assert len(y) == 1
-            y = y[0]
-            new_data[fp].update({label: y})
-    data = new_data
-    sort_label = all_labels[0]
-    all_labels_sorted = sorted(all_labels)
-    # Sort the data points such that sort_label's highest value is first.
-    # Assuming sort_label is label1, then turn into this list
-    # [
-    #    {'label1': 20, 'label2': 15},
-    #    {'label1': 10, 'label2': 30},
-    # ]
-    # and change dict data's structure to that
-    new_data = []
-    for fp in data:
-        new_data.append(data[fp])
-    new_data = sorted(new_data, key=lambda k: k[sort_label], reverse=True)
-    data = new_data
-    # Plot data
-    for label_i, label in enumerate(all_labels_sorted):
-        x = []
-        y = []
-        for point_i, point in enumerate(data):
-            x.append(point_i)
-            if 'sbws' in label:
-                y.append(point[label] / 1000)
-            else:
-                y.append(point[label])
-        plt.scatter(x, y, c=colors[label_i], s=args.size, label=label)
-    plt.legend(loc='upper right')
-    plt.xlabel(args.xlabel)
-    plt.ylabel(args.ylabel)
-    if args.xmin is not None:
-        plt.xlim(xmin=args.xmin)
-    if args.ymin is not None:
-        plt.ylim(ymin=args.ymin)
-    if args.xmax is not None:
-        plt.xlim(xmax=args.xmax)
-    if args.ymax is not None:
-        plt.ylim(ymax=args.ymax)
-    plt.title(args.title)
-    pdf.savefig()
-
-
-if __name__ == '__main__':
-    d = 'Takes one or more lists of (fingerprint, bandwidth) points, 1 per '\
-        'line, and plots a scatter plot of them. Data points are sorted by '\
-        'the first input\'s bandwidth values, thus this script can be used '\
-        'to visually determine how similar the results are from various '\
-        'instances of a bandwidth scanner, or even across different '\
-        'bandwidth scanning tools.'
-    parser = ArgumentParser(
-        formatter_class=ArgumentDefaultsHelpFormatter, description=d)
-    parser.add_argument(
-        '-i', '--input', nargs=2, metavar=('FNAME', 'LABEL'),
-        action='append', help='Specify a file to read values from and what '
-        'to label its points in the PDF. Can be given more than once.')
-    parser.add_argument('-o', '--output', default='temp.pdf')
-    parser.add_argument('-x', '--xlabel', type=str, default='Relay #',
-                        help='What to label the X axis in the PDF')
-    parser.add_argument('-y', '--ylabel', type=str,
-                        default='"Bandwidth" units (thousands)',
-                        help='What to label the Y axis in the PDF')
-    parser.add_argument('-t', '--title', type=str,
-                        default='Correlation of various bwscanning systems',
-                        help='What to title the plot in the PDF')
-    parser.add_argument('--xmin', type=float, default=0)
-    parser.add_argument('--ymin', type=float, default=0)
-    parser.add_argument('--xmax', type=float)
-    parser.add_argument('--ymax', type=float)
-    parser.add_argument('-s', '--size', type=float, default=1,
-                        help='Size of scatter plot points')
-    args = parser.parse_args()
-    with PdfPages(args.output) as pdf:
-        exit(main(args, pdf))
diff --git a/scripts/tools/v3bw-into-xy.py b/scripts/tools/v3bw-into-xy.py
deleted file mode 100755
index fda5d74..0000000
--- a/scripts/tools/v3bw-into-xy.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-import sys
-import re
-# File: v3bw-into-xy.py
-# Author: Matt Traudt
-# License: CC0
-#
-# Takes one or more v3bw files as arguments.
-#
-# Looks for lines that contain actual data. That means most of them, since most
-# of them contain "node_id=" and those are the ones that are interesting.
-#
-# Extract the fingerprint and bandwidth values for each of those lines and put
-# them on stdout, one per line. Effectively, after ignoring other lines, this:
-#     node_id=$AAAA...AAAA bw=12345
-# becomes this:
-#     AAAA...AAAA 12345
-#
-# NOTE: If you specify more than v3bw file, this will do NOTHING to tell you
-# when the output from one file stops and the next begins
-#
-# With v1.1.0 of the v3bw file format, we no longer know if node_id or bw is
-# first in the line. Hence two regular expresions and searching for the matched
-# item that has 40 chars (the fingerprint)
-
-
-def main():
-    re1 = re.compile('.*node_id=\$?([\w]+).* bw=([\d]+).*')  # noqa
-    re2 = re.compile('.*bw=([\d]+).* node_id=\$?([\w]+)')  # noqa
-    for fname in sys.argv[1:]:
-        with open(fname, 'rt') as fd:
-            for line in fd:
-                if 'node_id' not in line:
-                    continue
-                match = re1.match(line) or re2.match(line)
-                if not match:
-                    continue
-                items = match.groups()
-                assert len(items) == 2
-                s = '{} {}\n'
-                if len(items[0]) == 40:
-                    s = s.format(*items)
-                else:
-                    s = s.format(*items[::-1])
-                sys.stdout.write(s)
-    return 0
-
-
-if __name__ == '__main__':
-    try:
-        exit(main())
-    except (KeyboardInterrupt, BrokenPipeError):
-        pass





More information about the tor-commits mailing list