[tor-commits] [ooni-probe/master] Added random hostname testing and vendor DNS tests.
art at torproject.org
art at torproject.org
Mon Jul 9 14:39:04 UTC 2012
commit 95bbf24df68c13f16fd1d231326f89934133f5ae
Author: Isis Lovecruft <isis at patternsinthevoid.net>
Date: Thu Apr 12 16:32:42 2012 -0700
Added random hostname testing and vendor DNS tests.
---
ooni-probe.conf | 3 +
tests/captiveportal.py | 310 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 290 insertions(+), 23 deletions(-)
diff --git a/ooni-probe.conf b/ooni-probe.conf
index a19c1d0..7a456d8 100644
--- a/ooni-probe.conf
+++ b/ooni-probe.conf
@@ -55,6 +55,9 @@ default_ua = Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firef
# Enable vendor tests for captive portals:
do_captive_portal_vendor_tests = true
+# Enable DNS-based vendor tests for captive portals:
+do_captive_portal_vendor_dns_tests = true
+
### traceroute testing related config parameters
# This is the list of ips to traceroute to
diff --git a/tests/captiveportal.py b/tests/captiveportal.py
index 7b46094..e74490c 100644
--- a/tests/captiveportal.py
+++ b/tests/captiveportal.py
@@ -4,14 +4,16 @@
*************
This test is a collection of tests to detect the presence of a
- captive portal. Code is taken, in part from the old ooni-probe,
+ captive portal. Code is taken, in part, from the old ooni-probe,
which was written by Jacob Appelbaum and Arturo Filastò.
:copyright: (c) 2012 Isis Lovecruft
:license: see LICENSE for more details
"""
+import base64
import os
import re
+import string
import urllib2
from urlparse import urlparse
@@ -19,14 +21,24 @@ from plugoo.assets import Asset
from plugoo.tests import Test
try:
+ from dns import resolver
+except ImportError:
+ print "The dnspython module was not found. https://crate.io/packages/dnspython/"
+
+try:
from gevent import monkey
- monkey.patch_socket()
+ monkey.patch_socket(dns=False)
except ImportError:
print "The gevent module was not found. https://crate.io/packages/gevent/"
__plugoo__ = "captiveportal"
__desc__ = "Captive portal detection test"
+# TODO make tally marker system to display all detected
+# censorship event at the end of the test.
+#tally = 0
+#tally_marks = []
+
class CaptivePortalAsset(Asset):
"""
Parses captive_portal_tests.txt into an Asset.
@@ -85,20 +97,20 @@ class CaptivePortal(Test):
pattern = re.compile(control_result)
match = pattern.search(response_content)
if not match:
- log.info("Fuzzy HTTP content comparison of experiment" \
- " URL '%s' and the expected control result" \
- " do not match." % experimental_url)
+ log.info("Fuzzy HTTP content comparison for experiment URL")
+ log.info("'%s'" % experimental_url)
+ log.info("does not match!")
return False, response_code
else:
- log.info("Fuzzy HTTP content comparison of experiment" \
- " URL '%s' and the expected control result" \
- " yielded a match." % experimental_url)
+ log.info("Fuzzy HTTP content comparison of experiment URL")
+ log.info("'%s'" % experimental_url)
+ log.info("and the expected control result yielded a match.")
return True, response_code
else:
if str(response_content) != str(control_result):
- log.info("HTTP content comparison of experiment URL" \
- " '%s' and the expected control result" \
- " do not match." % experimental_url)
+ log.info("HTTP content comparison of experiment URL")
+ log.info("'%s'" % experimental_url)
+ log.info("and the expected control result do not match.")
return False, response_code
else:
return True, response_code
@@ -122,6 +134,216 @@ class CaptivePortal(Test):
return False
return True
+ def dns_resolve(self, hostname, nameserver=None):
+ """
+ Resolves hostname though nameserver ns to its corresponding
+ address(es). If ns is not given, use local DNS resolver.
+ """
+ log = self.logger
+
+ if nameserver is not None:
+ res = resolver.Resolver(configure=False)
+ res.nameservers = [nameserver]
+ else:
+ res = resolver.Resolver()
+
+ try:
+ answer = res.query(hostname)
+ response = []
+ for addr in answer:
+ response.append(addr.address)
+ return response
+ except resolver.NXDOMAIN as e:
+ log.info("DNS resolution for %s returned NXDOMAIN" % hostname)
+ response = ['NXDOMAIN']
+ return response
+ except:
+ return False
+
+ def dns_resolve_match(self, experiment_hostname, control_address):
+ """
+ Resolve experiment_hostname, and check to see that it returns
+ an experiment_address which matches the control_address. If
+ they match, returns True and experiment_address; otherwise
+ returns False and experiment_address.
+ """
+ log = self.logger
+
+ experiment_address = self.dns_resolve(experiment_hostname)
+ if experiment_address:
+ if len(set(experiment_address) & set([control_address])) > 0:
+ return True, experiment_address
+ else:
+ log.info("DNS comparison of control '%s' does not match " \
+ "experiment response '%s'" % control_address, address)
+ return False, experiment_address
+ else:
+ return None
+
+ def get_random_url_safe_string(self, length):
+ """
+ Returns a random url-safe string of specified length, where
+ 0 < length <= 256. The returned string will always start with
+ an alphabetic character.
+ """
+ if (length <= 0):
+ length = 1
+ elif (length > 256):
+ length = 256
+
+ random_ascii = base64.urlsafe_b64encode(os.urandom(int(length)))
+
+ while not random_ascii[:1].isalpha():
+ random_ascii = base64.urlsafe_b64encode(os.urandom(int(length)))
+
+ three_quarters = int((len(random_ascii)) * (3.0/4.0))
+ random_string = random_ascii[:three_quarters]
+ return random_string
+
+ def get_random_hostname(self, length=None):
+ """
+ Returns a random hostname with SLD of specified length. If
+ length is unspecified, length=32 is used.
+
+ These *should* all resolve to NXDOMAIN. If they actually
+ resolve to a box that isn't part of a captive portal that
+ would be rather interesting.
+ """
+ log = self.logger
+
+ if length is None:
+ length = 32
+
+ random_sld = self.get_random_url_safe_string(length)
+
+ # if it doesn't start with a letter, chuck it.
+ while not random_sld[:1].isalpha():
+ random_sld = self.get_random_url_safe_string(length)
+
+ tld_list = ['.com', '.net', '.org', '.info', '.test', '.invalid']
+ random_tld = urllib2.random.choice(tld_list)
+ random_hostname = random_sld + random_tld
+ return random_hostname
+
+ def compare_random_hostnames(self, hostname_count=None, hostname_length=None):
+ """
+ Get hostname_count number of random hostnames with SLD length
+ of hostname_length, and then attempt DNS resolution. If no
+ arguments are given, default to three hostnames of 32 bytes
+ each. These random hostnames *should* resolve to NXDOMAIN,
+ except in the case where a user is presented with a captive
+ portal and remains unauthenticated, in which case the captive
+ portal may return the address of the authentication page.
+
+ If the cardinality of the intersection of the set of resolved
+ random hostnames and the single element control set
+ (['NXDOMAIN']) are equal to one, then DNS properly resolved.
+
+ Returns true if only NXDOMAINs were returned, otherwise returns
+ False with the relative complement of the control set in the
+ response set.
+ """
+ log = self.logger
+
+ if hostname_count is None:
+ hostname_count = 3
+
+ log.info("Generating random hostnames...")
+ log.info("Resolving DNS for %d random hostnames..." % hostname_count)
+
+ control = ['NXDOMAIN']
+ responses = []
+
+ for x in range(hostname_count):
+ random_hostname = self.get_random_hostname(hostname_length)
+ response_match, response_address = self.dns_resolve_match(random_hostname,
+ control[0])
+ if response_match is False:
+ log.info("Strangely, DNS resolution of the random hostname")
+ log.ingo("%s actually points to %s"
+ % (random_hostname, response_address))
+ responses = responses + response_address
+ else:
+ responses = responses + response_address
+
+ intersection = set(responses) & set(control)
+ relative_complement = set(responses) - set(control)
+ r = set(responses)
+
+ if len(intersection) == 1:
+ log.info("All %d random hostnames properly resolved to NXDOMAIN."
+ % hostname_count)
+ return True, relative_complement
+ elif (len(intersection) == 1) and (len(r) > 1):
+ log.info("Something odd happened. Some random hostnames correctly " \
+ "resolved to NXDOMAIN, but several others resolved " \
+ "to the following addresses: %s" % relative_complement)
+ return False, relative_complement
+ elif (len(intersection) == 0) and (len(r) == 1):
+ log.info("All random hostnames resolved to the IP address ")
+ log.info("'%s', which is indicative of a captive portal." % r)
+ return False, relative_complement
+ else:
+ log.debug("Apparently, pigs are flying on your network, 'cause a " \
+ "bunch of hostnames made from 32-byte random strings " \
+ "just magically resolved to a bunch of random addresses. " \
+ "That is definitely highly improbable. In fact, my napkin " \
+ "tells me that the probability of just one of those " \
+ "hostnames resolving to an address is 1.68e-59, making" \
+ "it nearly twice as unlikely as an MD5 hash collision. " \
+ "Either someone is seriously messing with your network, " \
+ "or else you are witnessing the impossible. %s" % r)
+ return False, relative_complement
+
+ def google_dns_cp_test(self):
+ """
+ Google Chrome resolves three 10-byte random hostnames.
+ """
+ log = self.logger
+ log.info("")
+ log.info("Running the Google Chrome DNS-based captive portal test...")
+
+ gmatch, g_dns_result = self.compare_random_hostnames(3, 10)
+
+ if gmatch:
+ log.info("Google Chrome DNS-based captive portal test did not")
+ log.info("detect a captive portal.")
+ return g_dns_result
+ else:
+ log.info("Google Chrome DNS-based captive portal test believes")
+ log.info("you are in a captive portal, or else something very")
+ log.info("odd is happening with your DNS.")
+ return g_dns_result
+
+ def ms_dns_cp_test(self):
+ """
+ Microsoft "phones home" to a server which will always resolve
+ to the same address.
+ """
+ log = self.logger
+ log.info("")
+ log.info("Running the Microsoft NCSI DNS-based captive")
+ log.info("portal test...")
+
+ msmatch, ms_dns_result = self.dns_resolve_match("dns.msftncsi.com",
+ "131.107.255.255")
+ if msmatch:
+ log.info("Microsoft NCSI DNS-based captive portal test did not")
+ log.info("detect a captive portal.")
+ return ms_dns_result
+ else:
+ log.info("Microsoft NCSI DNS-based captive portal test ")
+ log.info("believes you are in a captive portal.")
+ return ms_dns_result
+
+ def run_vendor_dns_tests(self):
+ """
+ Run the vendor DNS tests.
+ """
+ self.google_dns_cp_test()
+ self.ms_dns_cp_test()
+ return
+
def run_vendor_tests(self, *a, **kw):
"""
These are several vendor tests used to detect the presence of
@@ -156,13 +378,16 @@ class CaptivePortal(Test):
def compare_content(status_func, exp_url, ctrl_result, ctrl_code, headers,
test_name, fuzzy):
+ log.info("")
log.info("Running the %s test..." % test_name)
content_match, exp_code = cm(exp_url, ctrl_result, headers, fuzzy)
status_match = status_func(exp_code, ctrl_code)
if status_match and content_match:
- log.info("The %s test was unable to detect a captive portal." % test_name)
+ log.info("The %s test was unable to detect " % test_name)
+ log.info("a captive portal.")
else:
- log.info("The %s test shows that your network is filtered." % test_name)
+ log.info("The %s test shows that your network" % test_name)
+ log.info("is filtered.")
for vt in vendor_tests:
exp_url = vt[0]
@@ -202,7 +427,7 @@ class CaptivePortal(Test):
if (os.path.isfile(os.path.join(self.config.main.assetdir,
self.config.tests.captive_portal))):
kw['data'].append(None)
- kw['data'].append('user defined')
+ kw['data'].append('user-defined')
experiment_url = kw['data'][0]
control_result = kw['data'][1]
@@ -215,34 +440,65 @@ class CaptivePortal(Test):
snm = self.http_status_code_no_match
log = self.logger
+
+ #tally = kw['tally']
+ #tally_marks = kw['tally_marks']
- if test_name == "user defined":
- log.info("Running the %s test for %s..." % (test_name, experiment_url))
+ if test_name == "user-defined":
+ log.info("Running %s test for '%s'..." % (test_name, experiment_url))
content_match, experiment_code = cm(experiment_url, control_result)
status_match = sm(experiment_code, control_code)
if status_match and content_match:
- log.info("The %s test was unable to detect a captive portal."
- % test_name)
+ log.info("The %s test for '%s'" % (test_name, experiment_url))
+ log.info("was unable to detect a captive portal.")
return True, test_name
elif status_match and not content_match:
- log.info("The %s test detected mismatched content, retrying "
- "with fuzzy match enabled." % test_name)
+ log.info("Retrying '%s' with fuzzy match enabled."
+ % experiment_url)
content_fuzzy_match, experiment_code = cm(experiment_url,
control_result,
fuzzy=True)
if content_fuzzy_match:
return True, test_name
else:
+ log.info("Found modified content on '%s'," % experiment_url)
+ log.info("which could indicate a captive portal.")
+
+ ## TODO return exp_content and compare HTTP headers
+ #tally = tally + 1
+ #tally_marks.append([experiment_url, experiment_code,
+ # control_result, control_code])
return False, test_name
else:
- log.info("The %s test shows that your network is filtered."
- % test_name)
+ log.info("The content comparison test for ")
+ log.info("'%s'" % experiment_url)
+ log.info("shows that your HTTP traffic is filtered.")
+ #tally = tally + 1
+ #tally_marks.append([experiment_url, experiment_code,
+ # control_result, control_code])
return False, test_name
else:
log.warn("Ooni is trying to run an undefined captive portal test.")
return False, test_name
+ def confirmed_kill_count(self, *a, **kw):
+ """
+ Yeah, sounds scary. And it is.
+
+ This returns a tally count for detected censorship events and the
+ experiment results which upped the count.
+ """
+ log = self.logger
+
+ tally = kw['tally']
+ tally_marks = kw['tally_marks']
+
+ log.info("")
+ log.info("OONI-probe captive portal test detected %d potential " % tally)
+ log.info("censorship events.")
+ log.info("Events which were flagged as potential censorship:")
+ log.info("%s" % tally_marks)
def run(ooni):
"""
@@ -267,11 +523,19 @@ def run(ooni):
captiveportal = CaptivePortal(ooni)
log.info("Starting captive portal test...")
- log.info("Running user defined tests...")
captiveportal.run(assets, {'index': 1})
+ #captiveportal.run(assets, {'index': 1, 'tally': tally,
+ # 'tally_marks': tally_marks})
if config.tests.do_captive_portal_vendor_tests:
log.info("Running vendor tests...")
captiveportal.run_vendor_tests()
+ if config.tests.do_captive_portal_vendor_dns_tests:
+ log.info("Running vendor DNS-based tests...")
+ captiveportal.run_vendor_dns_tests()
+
+ #captiveportal.confirmed_kill_count({'tally': tally,
+ # 'tally_marks': tally_marks})
+
log.info("Captive portal test finished!")
More information about the tor-commits
mailing list