[tor-commits] [ooni-probe/master] Add a tool for generating ooniprobe decks.

art at torproject.org art at torproject.org
Tue Sep 2 23:20:50 UTC 2014


commit b8c27c2c46aa3f8cd5b1462ba6d07cc3fa653d41
Author: Arturo Filastò <art at fuffa.org>
Date:   Tue Aug 12 18:30:45 2014 +0200

    Add a tool for generating ooniprobe decks.
    
    * Also adds tool for downloading ooniprobe resources.
---
 bin/oonideckgen                                  |   27 ++++
 bin/ooniresources                                |   27 ++++
 ooni/deckgen/__init__.py                         |    1 +
 ooni/deckgen/cli.py                              |  151 ++++++++++++++++++++++
 ooni/deckgen/processors/citizenlab_test_lists.py |   53 ++++++++
 ooni/deckgen/processors/namebench_dns_servers.py |   53 ++++++++
 ooni/resources/__init__.py                       |   22 ++++
 ooni/resources/cli.py                            |   28 ++++
 ooni/resources/update.py                         |   47 +++++++
 9 files changed, 409 insertions(+)

diff --git a/bin/oonideckgen b/bin/oonideckgen
new file mode 100755
index 0000000..c990160
--- /dev/null
+++ b/bin/oonideckgen
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+import os
+import sys
+
+sys.path[:] = map(os.path.abspath, sys.path)
+sys.path.insert(0, os.path.abspath(os.getcwd()))
+
+from twisted.internet import defer, reactor
+
+from ooni.utils import log
+from ooni.deckgen import cli
+
+def failed(failure):
+    log.err("Failed to run oonideckgen")
+    log.exception(failure)
+    reactor.stop()
+
+def done(result):
+    reactor.stop()
+
+def start():
+    d = defer.maybeDeferred(cli.run)
+    d.addCallback(done)
+    d.addErrback(failed)
+
+reactor.callWhenRunning(start)
+reactor.run()
diff --git a/bin/ooniresources b/bin/ooniresources
new file mode 100755
index 0000000..e0123ac
--- /dev/null
+++ b/bin/ooniresources
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+import os
+import sys
+
+sys.path[:] = map(os.path.abspath, sys.path)
+sys.path.insert(0, os.path.abspath(os.getcwd()))
+
+from twisted.internet import defer, reactor
+
+from ooni.utils import log
+from ooni.resources import cli
+
+def failed(failure):
+    log.err("Failed to run ooniresources")
+    log.exception(failure)
+    reactor.stop()
+
+def done(result):
+    reactor.stop()
+
+def start():
+    d = defer.maybeDeferred(cli.run)
+    d.addCallback(done)
+    d.addErrback(done)
+
+reactor.callWhenRunning(start)
+reactor.run()
diff --git a/ooni/deckgen/__init__.py b/ooni/deckgen/__init__.py
new file mode 100644
index 0000000..f102a9c
--- /dev/null
+++ b/ooni/deckgen/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/ooni/deckgen/cli.py b/ooni/deckgen/cli.py
new file mode 100644
index 0000000..406417d
--- /dev/null
+++ b/ooni/deckgen/cli.py
@@ -0,0 +1,151 @@
+import os
+import sys
+import copy
+import errno
+
+import yaml
+
+from twisted.python import usage
+
+from . import __version__
+from ooni.resources import inputs
+from ooni.settings import config
+
+
+class Options(usage.Options):
+    synopsis = """%s [options]
+    """
+
+    optParameters = [
+        ["country-code", "c",
+         None,
+         "Specify the two letter country code for which we should "
+         "generate the deck."
+         ],
+        ["output", "o",
+         None,
+         "Specify the directory where to write output."
+         ]
+    ]
+
+    def opt_version(self):
+        print("oonideckgen version: %s" % __version__)
+        sys.exit(0)
+
+    def postOptions(self):
+        if not self['output'] or not self['country-code']:
+            raise usage.UsageError(
+                "Both --output and --country-code are required"
+            )
+        if len(self['country-code']) != 2:
+            raise usage.UsageError("--country-code must be 2 characters")
+        if not os.path.isdir(self['output']):
+            raise usage.UsageError("%s is not a directory" % self['output'])
+
+        self['country-code'] = self['country-code'].lower()
+
+        output_dir = os.path.abspath(self['output'])
+        output_dir = os.path.join(output_dir,
+                                  "deck-%s" % self['country-code'])
+        self['output'] = output_dir
+
+
+class Deck():
+    _base_entry = {
+        "options": {
+            "collector": None,
+            "help": 0,
+            "logfile": None,
+            "no-default-reporter": 0,
+            "parallelism": None,
+            "pcapfile": None,
+            "reportfile": None,
+            "resume": 0,
+            "testdeck": None
+        }
+    }
+
+    def __init__(self):
+        self.deck = []
+
+    def add_test(self, test_file, subargs=[]):
+        deck_entry = copy.deepcopy(self._base_entry)
+        deck_entry['options']['test_file'] = test_file
+        deck_entry['options']['subargs'] = subargs
+        self.deck.append(deck_entry)
+
+    def pprint(self):
+        print yaml.safe_dump(self.deck)
+
+    def write_to_file(self, filename):
+        with open(filename, "w+") as f:
+            f.write(yaml.safe_dump(self.deck))
+
+
+def usage():
+    print "%s <two letter country code> <output dir>" % sys.argv[0]
+
+
+def run():
+    options = Options()
+    try:
+        options.parseOptions()
+    except usage.UsageError as error_message:
+        print "%s: %s" % (sys.argv[0], error_message)
+        print "%s: Try --help for usage details." % (sys.argv[0])
+        sys.exit(1)
+
+    config.read_config_file()
+
+    try:
+        os.makedirs(options['output'])
+    except OSError as exception:
+        if exception.errno != errno.EEXIST:
+            raise
+    dns_servers_processor = inputs['namebench-dns-servers.csv']['processor']
+    url_lists_processor = inputs['citizenlab-test-lists.zip']['processor']
+
+    try:
+        url_list_country = url_lists_processor.generate_country_input(
+            options['country-code'],
+            options['output']
+        )
+
+    except Exception:
+        print "Could not generate country specific url list"
+        print "We will just use the global one."
+        url_list_country = None
+
+    url_list_global = url_lists_processor.generate_global_input(
+        options['output']
+    )
+    dns_servers = dns_servers_processor.generate_country_input(
+        options['country-code'],
+        options['output']
+    )
+
+    deck = Deck()
+    # deck.add_test('manipulation/http_host', ['-f', 'somefile.txt'])
+    deck.add_test('blocking/http_requests', ['-f', url_list_global])
+    deck.add_test('blocking/dns_consistency',
+                  ['-f', url_list_global, '-T', dns_servers])
+
+    if url_list_country is not None:
+        deck.add_test('blocking/dns_consistency',
+                      ['-f', url_list_country, '-T', dns_servers])
+        deck.add_test('blocking/http_requests', ['-f', url_list_country])
+
+    deck.add_test('manipulation/http_invalid_request_line')
+    deck.add_test('manipulation/http_header_field_manipulation')
+    # deck.add_test('manipulation/traceroute')
+    deck.pprint()
+    deck_filename = os.path.join(options['output'],
+                                 "%s-%s-user.deck" % (__version__,
+                                                      options['country-code']))
+    deck.write_to_file(deck_filename)
+    print "Deck written to %s" % deck_filename
+    print "Run ooniprobe like so:"
+    print "ooniprobe -i %s" % deck_filename
+
+if __name__ == "__main__":
+    run()
diff --git a/ooni/deckgen/processors/__init__.py b/ooni/deckgen/processors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ooni/deckgen/processors/citizenlab_test_lists.py b/ooni/deckgen/processors/citizenlab_test_lists.py
new file mode 100644
index 0000000..645d714
--- /dev/null
+++ b/ooni/deckgen/processors/citizenlab_test_lists.py
@@ -0,0 +1,53 @@
+import os
+import csv
+from ooni.settings import config
+
+def generate_country_input(country_code, dst):
+    """
+    Write to dst/citizenlab-urls-{country_code}.txt
+    the list for the given country code.
+
+    Returns:
+
+        the path to the generated input
+    """
+
+    country_code = country_code.lower()
+    filename = os.path.join(dst, "citizenlab-urls-%s.txt" % country_code)
+    fw = open(filename, "w+")
+
+    input_list = os.path.join(config.resources_directory,
+                              "citizenlab-test-lists",
+                              "test-lists-master",
+                              "csv", country_code + ".csv")
+
+    if not os.path.exists(input_list):
+        raise Exception("Could not find list for country %s" % country_code)
+
+    with open(input_list) as f:
+        csvreader = csv.reader(f)
+        csvreader.next()
+        for row in csvreader:
+            fw.write("%s\n" % row[0])
+
+    fw.close()
+    return filename
+
+
+def generate_global_input(dst):
+
+    filename = os.path.join(dst, "citizenlab-urls-global.txt")
+    fw = open(filename, "w+")
+
+    input_list = os.path.join(config.resources_directory,
+                              "citizenlab-test-lists",
+                              "test-lists-master",
+                              "csv", "global.csv")
+    with open(input_list) as f:
+        csvreader = csv.reader(f)
+        csvreader.next()
+        for row in csvreader:
+            fw.write("%s\n" % row[0])
+
+    fw.close()
+    return filename
diff --git a/ooni/deckgen/processors/namebench_dns_servers.py b/ooni/deckgen/processors/namebench_dns_servers.py
new file mode 100644
index 0000000..1079335
--- /dev/null
+++ b/ooni/deckgen/processors/namebench_dns_servers.py
@@ -0,0 +1,53 @@
+import os
+import csv
+import GeoIP
+
+from ooni.settings import config
+
+
+class GeoIPDB(object):
+    _borg = {}
+    country = None
+    asn = None
+
+    def __init__(self):
+        self.__dict__ = self._borg
+        if not self.country:
+            try:
+                print config.advanced.geoip_data_dir
+                country_file = os.path.join(config.advanced.geoip_data_dir,
+                                            'GeoIP.dat')
+                self.country = GeoIP.open(country_file,
+                                          GeoIP.GEOIP_STANDARD)
+            except:
+                raise Exception("Edit the geoip_data_dir line in your config"
+                                " file to point to your geoip files")
+
+
+def generate_country_input(country_code, dst):
+
+    csv_file = os.path.join(config.resources_directory,
+                            "namebench-dns-servers.csv")
+    filename = os.path.join(dst, "dns-server-%s.txt" % country_code)
+    fw = open(filename, "w")
+    geoip_db = GeoIPDB()
+    reader = csv.reader(open(csv_file))
+    for row in reader:
+        if row[2] == 'X-Internal-IP':
+            continue
+        elif row[2] == 'X-Unroutable':
+            continue
+        elif row[2] == 'X-Link_local':
+            continue
+        ipaddr = row[0]
+        cc = geoip_db.country.country_code_by_addr(ipaddr)
+        if not cc:
+            continue
+        if cc.lower() == country_code.lower():
+            fw.write(ipaddr + "\n")
+    fw.close()
+    return filename
+
+
+def generate_global_input(dst):
+    pass
diff --git a/ooni/resources/__init__.py b/ooni/resources/__init__.py
new file mode 100644
index 0000000..0afc0af
--- /dev/null
+++ b/ooni/resources/__init__.py
@@ -0,0 +1,22 @@
+from ooni.settings import config
+from ooni.utils import unzip
+
+from ooni.deckgen.processors import citizenlab_test_lists
+from ooni.deckgen.processors import namebench_dns_servers
+
+__version__ = "0.0.1"
+
+inputs = {
+    "namebench-dns-servers.csv": {
+        "url": "https://namebench.googlecode.com/svn/trunk/config/servers.csv",
+        "action": None,
+        "action_args": [],
+        "processor": namebench_dns_servers,
+    },
+    "citizenlab-test-lists.zip": {
+        "url": "https://github.com/citizenlab/test-lists/archive/master.zip",
+        "action": unzip,
+        "action_args": [config.resources_directory],
+        "processor": citizenlab_test_lists
+    }
+}
diff --git a/ooni/resources/cli.py b/ooni/resources/cli.py
new file mode 100644
index 0000000..4cc4c6d
--- /dev/null
+++ b/ooni/resources/cli.py
@@ -0,0 +1,28 @@
+import sys
+
+from twisted.python import usage
+
+from ooni.resources import __version__
+from ooni.resources import update
+
+
+class Options(usage.Options):
+    synopsis = """%s"""
+
+    optParameters = []
+
+    def opt_version(self):
+        print("ooniresources version: %s" % __version__)
+        sys.exit(0)
+
+
+def run():
+    options = Options()
+    try:
+        options.parseOptions()
+    except usage.UsageError as error_message:
+        print "%s: %s" % (sys.argv[0], error_message)
+        print "%s: Try --help for usage details." % (sys.argv[0])
+        sys.exit(1)
+
+    return update.download_inputs()
diff --git a/ooni/resources/update.py b/ooni/resources/update.py
new file mode 100644
index 0000000..28a6ec7
--- /dev/null
+++ b/ooni/resources/update.py
@@ -0,0 +1,47 @@
+import os
+
+from twisted.internet import reactor, defer, protocol
+from twisted.web.client import RedirectAgent, Agent
+
+from ooni.settings import config
+from ooni.resources import inputs
+
+agent = RedirectAgent(Agent(reactor))
+
+
+class SaveToFile(protocol.Protocol):
+    def __init__(self, finished, filesize, filename):
+        self.finished = finished
+        self.remaining = filesize
+        self.outfile = open(filename, 'wb')
+
+    def dataReceived(self, bytes):
+        if self.remaining:
+            display = bytes[:self.remaining]
+            self.outfile.write(display)
+            self.remaining -= len(display)
+        else:
+            self.outfile.close()
+
+    def connectionLost(self, reason):
+        self.outfile.close()
+        self.finished.callback(None)
+
+
+ at defer.inlineCallbacks
+def download_inputs():
+    for filename, resource in inputs.items():
+        print "Downloading %s" % filename
+
+        filename = os.path.join(config.resources_directory, filename)
+
+        response = yield agent.request("GET", resource['url'])
+        finished = defer.Deferred()
+        response.deliverBody(SaveToFile(finished, response.length, filename))
+        yield finished
+
+        if resource['action'] is not None:
+            yield defer.maybeDeferred(resource['action'],
+                                      filename,
+                                      *resource['action_args'])
+        print "%s written." % filename





More information about the tor-commits mailing list