[tor-commits] [ooni-probe/master] Add support for downloading bridge lists from ooni-resources (#682)

art at torproject.org art at torproject.org
Fri Jan 13 12:39:58 UTC 2017


commit 02aaceb9070e341e89e9852b3bf3ff8b24b9f865
Author: Arturo Filastò <arturo at filasto.net>
Date:   Wed Nov 30 19:55:14 2016 +0000

    Add support for downloading bridge lists from ooni-resources (#682)
    
    * Add support for downloading bridge lists from ooni-resources
    
    This implements what is described in: #652
    
    * Move input_processor logic in tcp_connect into the setUp method
    
    * Use multiple with statements on same line
    
    * Use variable in onion to store the names of pluggable transports
    
    * Fix sorting to respect desc and asc keys
    
    * Enable some basic anomaly detection for tcp_connect tests
    
    * Fix handling of stripped input lines in tcp_connect
---
 data/decks/tor.yaml                   |  7 +++
 ooni/deck/store.py                    | 87 ++++++++++++++++++++++++++---------
 ooni/measurements.py                  | 16 ++++++-
 ooni/nettests/blocking/tcp_connect.py | 74 ++++++++++++++---------------
 ooni/utils/onion.py                   |  2 +
 5 files changed, 123 insertions(+), 63 deletions(-)

diff --git a/data/decks/tor.yaml b/data/decks/tor.yaml
index 9d0634f..ea6e1a3 100644
--- a/data/decks/tor.yaml
+++ b/data/decks/tor.yaml
@@ -10,3 +10,10 @@ tasks:
 - name: Runs the Meek fronted request test
   ooni:
     test_name: meek_fronted_requests
+
+- name: Does a tcp_connect test on the tor bridges
+  ooni:
+    annotations:
+      test_class: tor_bridge_reachability
+    test_name: tcp_connect
+    file: "$tor_bridge_lines"
diff --git a/ooni/deck/store.py b/ooni/deck/store.py
index 956ea37..6e3e6de 100644
--- a/ooni/deck/store.py
+++ b/ooni/deck/store.py
@@ -21,6 +21,25 @@ class InputNotFound(Exception):
 class DeckNotFound(Exception):
     pass
 
+
+def write_txt_from_csv(in_file, out_file, func, skip_header=True):
+    with in_file.open('r') as in_fh, out_file.open('w') as out_fh:
+        csvreader = csv.reader(in_fh)
+        if skip_header:
+            csvreader.next()
+        for row in csvreader:
+            out_fh.write(func(row))
+
+def write_descriptor(out_file, name, desc_id, filepath, file_type):
+    with out_file.open('w') as out_fh:
+        json.dump({
+            "name": name,
+            "filepath": filepath,
+            "last_updated": timestampNowISO8601UTC(),
+            "id": desc_id,
+            "type": file_type
+        }, out_fh)
+
 class InputStore(object):
     def __init__(self):
         self.path = FilePath(config.inputs_directory)
@@ -51,30 +70,55 @@ class InputStore(object):
             desc_fname = "citizenlab-test-lists_{0}.desc".format(cc)
 
             out_file = self.path.child("data").child(data_fname)
-            out_fh = out_file.open('w')
-            with in_file.open('r') as in_fh:
-                csvreader = csv.reader(in_fh)
-                csvreader.next()
-                for row in csvreader:
-                    out_fh.write("%s\n" % row[0])
-            out_fh.close()
+            write_txt_from_csv(in_file, out_file,
+                lambda row: "{}\n".format(row[0])
+            )
 
             desc_file = self.path.child("descriptors").child(desc_fname)
-            with desc_file.open('w') as out_fh:
-                if cc == "global":
-                    name = "List of globally accessed websites"
-                else:
-                    # XXX resolve this to a human readable country name
-                    country_name = cc
-                    name = "List of websites for {0}".format(country_name)
-                json.dump({
-                    "name": name,
-                    "filepath": out_file.path,
-                    "last_updated": timestampNowISO8601UTC(),
-                    "id": "citizenlab_{0}_urls".format(cc),
-                    "type": "file/url"
-                }, out_fh)
+            if cc == "global":
+                name = "List of globally accessed websites"
+            else:
+                # XXX resolve this to a human readable country name
+                country_name = cc
+                name = "List of websites for {0}".format(country_name)
+            write_descriptor(desc_file, name,
+                             "citizenlab_{0}_urls".format(cc),
+                             out_file.path,
+                             "file/url")
+
         self._cache_stale = True
+        yield defer.succeed(None)
+
+    @defer.inlineCallbacks
+    def update_tor_bridge_lines(self, country_code):
+        from ooni.utils import onion
+        in_file = self.resources.child("tor-bridges").child(
+            "tor-bridges-ip-port.csv"
+        )
+        if not in_file.exists():
+            yield check_for_update(country_code)
+
+        data_fname = "tor-bridge-lines.txt"
+        desc_fname = "tor-bridge-lines.desc"
+        out_file = self.path.child("data").child(data_fname)
+
+        def format_row(row):
+            host, port, nickname, protocol = row
+            if protocol.lower() not in onion.pt_names:
+                return "{}:{}\n".format(host, port)
+            return "{} {}:{}\n".format(protocol, host, port)
+
+        write_txt_from_csv(in_file, out_file, format_row)
+        desc_file = self.path.child("descriptors").child(desc_fname)
+        write_descriptor(
+            desc_file, "Tor bridge lines",
+            "tor_bridge_lines", out_file.path,
+            "file/ip-port"
+        )
+        self._cache_stale = True
+
+        # Do an empty defer to fit inside of a event loop clock
+        yield defer.succeed(None)
 
     @defer.inlineCallbacks
     def create(self, country_code=None):
@@ -88,6 +132,7 @@ class InputStore(object):
         mkdir_p(self.path.child("data").path)
 
         yield self.update_url_lists(country_code)
+        yield self.update_tor_bridge_lines(country_code)
 
     @defer.inlineCallbacks
     def update(self, country_code=None):
diff --git a/ooni/measurements.py b/ooni/measurements.py
index d79fffb..b3b3e0f 100644
--- a/ooni/measurements.py
+++ b/ooni/measurements.py
@@ -14,7 +14,8 @@ class MeasurementInProgress(Exception):
 class Process():
     supported_tests = [
         "web_connectivity",
-        "http_requests"
+        "http_requests",
+        "tcp_connect"
     ]
     @staticmethod
     def web_connectivity(entry):
@@ -26,6 +27,15 @@ class Process():
         return result
 
     @staticmethod
+    def tcp_connect(entry):
+        result = {}
+        result['anomaly'] = False
+        if entry['test_keys']['connection'] != "success":
+            result['anomaly'] = True
+        result['url'] = entry['input']
+        return result
+
+    @staticmethod
     def http_requests(entry):
         result = {}
         test_keys = entry['test_keys']
@@ -50,6 +60,8 @@ def generate_summary(input_file, output_file):
             if entry['test_name'] in Process.supported_tests:
                 result = getattr(Process, entry['test_name'])(entry)
             result['idx'] = idx
+            if not result.get('url', None):
+                result['url'] = entry['input']
             results['test_name'] = entry['test_name']
             results['test_start_time'] = entry['test_start_time']
             results['country_code'] = entry['probe_cc']
@@ -145,7 +157,7 @@ def list_measurements(compute_size=False, order=None):
         return measurements
 
     if order.lower() in ['asc', 'desc']:
-        reverse = {'asc': True, 'desc': False}[order.lower()]
+        reverse = {'asc': False, 'desc': True}[order.lower()]
         measurements.sort(key=operator.itemgetter('test_start_time'),
                           reverse=reverse)
         return measurements
diff --git a/ooni/nettests/blocking/tcp_connect.py b/ooni/nettests/blocking/tcp_connect.py
index f2712ea..da92129 100644
--- a/ooni/nettests/blocking/tcp_connect.py
+++ b/ooni/nettests/blocking/tcp_connect.py
@@ -18,7 +18,7 @@ class TCPConnectTest(nettest.NetTestCase):
     description = "Performs a TCP connect scan of all the " \
                   "host port combinations given as input."
     author = "Arturo Filastò"
-    version = "0.1"
+    version = "0.2.0"
     inputFile = [
         'file',
         'f',
@@ -29,6 +29,38 @@ class TCPConnectTest(nettest.NetTestCase):
     requiresRoot = False
     requiredOptions = ['file']
 
+    def setUp(self):
+        def strip_url(address):
+            proto, path = address.strip().split('://')
+            proto = proto.lower()
+            host = path.split('/')[0]
+            if proto == 'http':
+                return host, 80
+            if proto == 'https':
+                return host, 443
+
+        pluggable_transports = (
+            "obfs3", "obfs2", "fte", "scramblesuit",
+            "obfs4"
+        )
+        def is_bridge_line(line):
+            first = line.split(" ")[0]
+            return first.lower() in pluggable_transports + ("bridge",)
+        def strip_bridge(line):
+            if line.lower().startswith("bridge"):
+                return line.split(" ")[2].split(":")
+            return line.split(" ")[1].split(":")
+
+        if self.input.startswith("http"):
+            host, port = strip_url(self.input)
+        elif is_bridge_line(self.input):
+            host, port = strip_bridge(self.input)
+        else:
+            host, port = self.input.split(" ")[0].split(":")
+
+        self.host = host
+        self.port = port
+
     def test_connect(self):
         """
         This test performs a TCP connection to the remote host on the
@@ -36,8 +68,6 @@ class TCPConnectTest(nettest.NetTestCase):
         The report will contains the string 'success' if the test has
         succeeded, or the reason for the failure if it has failed.
         """
-        host, port = self.input.split(":")
-
         def connectionSuccess(protocol):
             protocol.transport.loseConnection()
             log.debug("Got a connection to %s" % self.input)
@@ -47,44 +77,8 @@ class TCPConnectTest(nettest.NetTestCase):
             self.report['connection'] = handleAllFailures(failure)
 
         from twisted.internet import reactor
-        point = TCP4ClientEndpoint(reactor, host, int(port))
+        point = TCP4ClientEndpoint(reactor, self.host, int(self.port))
         d = point.connect(TCPFactory())
         d.addCallback(connectionSuccess)
         d.addErrback(connectionFailed)
         return d
-
-    def inputProcessor(self, filename=None):
-        """
-        This inputProcessor extracts name:port pairs from urls
-        XXX: Does not support unusual port numbers
-        """
-        def strip_url(address):
-            proto, path = x.strip().split('://')
-            proto = proto.lower()
-            host = path.split('/')[0]
-            if proto == 'http':
-                return "%s:80" % host
-            if proto == 'https':
-                return "%s:443" % host
-
-        pluggable_transports = ("obfs3", "obfs2", "fte", "scramblesuit")
-        def is_bridge_line(line):
-            first = line.split(" ")[0]
-            return first.lower() in pluggable_transports + ("bridge",)
-        def strip_bridge(line):
-            if line.lower().startswith("Bridge"):
-                return line.split(" ")[2]
-            return line.split(" ")[1]
-
-        if filename:
-            fp = open(filename)
-            for x in fp.readlines():
-                if x.startswith("http"):
-                    yield strip_url(x)
-                elif is_bridge_line(x):
-                    yield strip_bridge(x)
-                else:
-                    yield x.split(" ")[0]
-            fp.close()
-        else:
-            pass
diff --git a/ooni/utils/onion.py b/ooni/utils/onion.py
index 7414e2f..9d41b29 100644
--- a/ooni/utils/onion.py
+++ b/ooni/utils/onion.py
@@ -136,6 +136,8 @@ _transport_line_templates = {
 
 }
 
+pt_names = _transport_line_templates.keys()
+
 class UnrecognizedTransport(Exception):
     pass
 class UninstalledTransport(Exception):





More information about the tor-commits mailing list