[tor-commits] [ooni-probe/master] Address feedback by @bassosimone

art at torproject.org art at torproject.org
Mon May 30 16:28:34 UTC 2016


commit 8e7199d85c877353b83b3da98936c602d4c4a5b4
Author: Arturo Filastò <arturo at filasto.net>
Date:   Mon May 30 10:42:29 2016 +0200

    Address feedback by @bassosimone
    
    * Test the next words if the word is less than 5 chars in web_connectivity
      test.
    
    * Move the charset detection charset into the correct module and change the
      unittest accordingly.
    
    * Include ooni.common in setup.py
    
    * Various code style fixes
---
 ooni/backend_client.py                     |  6 +-----
 ooni/common/http_utils.py                  |  2 +-
 ooni/nettests/blocking/web_connectivity.py | 21 ++++++++++++---------
 ooni/templates/httpt.py                    |  4 ----
 ooni/tests/test_common.py                  | 19 +++++++++++++++++++
 ooni/tests/test_templates.py               | 16 ----------------
 ooni/utils/net.py                          |  1 -
 setup.py                                   |  1 +
 8 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/ooni/backend_client.py b/ooni/backend_client.py
index d4c463e..0e85dd7 100644
--- a/ooni/backend_client.py
+++ b/ooni/backend_client.py
@@ -207,9 +207,7 @@ class CollectorClient(OONIBClient):
         @d.addErrback
         def err(failure):
             failure.trap(Error)
-            if failure.value.status == '404':
-                return True
-            return False
+            return failure.value.status == '404'
 
         return d
 
@@ -345,8 +343,6 @@ class CollectorClient(OONIBClient):
 
 class WebConnectivityClient(OONIBClient):
     def isReachable(self):
-        # XXX maybe in the future we can have a dedicated API endpoint to
-        # test the reachability of the collector.
         d = self.queryBackend('GET', '/status')
 
         @d.addCallback
diff --git a/ooni/common/http_utils.py b/ooni/common/http_utils.py
index 6d636d5..57c3a15 100644
--- a/ooni/common/http_utils.py
+++ b/ooni/common/http_utils.py
@@ -2,7 +2,7 @@ import re
 import codecs
 from base64 import b64encode
 
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>]*)')
+META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>!;]+)')
 
 def representBody(body):
     if not body:
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py
index 655bf76..b4ccff1 100644
--- a/ooni/nettests/blocking/web_connectivity.py
+++ b/ooni/nettests/blocking/web_connectivity.py
@@ -42,10 +42,9 @@ class UsageOptions(usage.Options):
 def is_public_ipv4_address(address):
     try:
         ip_address = IPv4Address(address)
-        if not any([ip_address.is_private,
-                    ip_address.is_loopback]):
-            return True
-        return False
+        return not any(
+            [ip_address.is_private, ip_address.is_loopback]
+        )
     except AddressValueError:
         return None
 
@@ -293,13 +292,17 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
     def compare_titles(self, experiment_http_response):
         experiment_title = extractTitle(experiment_http_response.body).strip()
         control_title = self.control['http_request']['title'].strip()
-        first_exp_word = experiment_title.split(' ')[0]
-        first_ctrl_word = control_title.split(' ')[0]
-        if len(first_exp_word) < 5:
+
+        control_words = control_title.split(' ')
+        for exp_word, idx in enumerate(experiment_title.split(' ')):
             # We don't consider to match words that are shorter than 5
             # characters (5 is the average word length for english)
-            return False
-        return (first_ctrl_word.lower() == first_exp_word.lower())
+            if len(exp_word) < 5:
+                continue
+            try:
+                return control_words[idx].lower() == exp_word.lower()
+            except IndexError:
+                return False
 
     def compare_http_experiments(self, experiment_http_response):
 
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index f8ea941..3f7e77d 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -1,5 +1,3 @@
-import re
-import codecs
 import random
 
 from txtorcon.interface import StreamListenerMixin
@@ -22,8 +20,6 @@ from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent
 from ooni.common.http_utils import representBody
 from ooni.errors import handleAllFailures
 
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>!;]+)')
-
 class InvalidSocksProxyOption(Exception):
     pass
 
diff --git a/ooni/tests/test_common.py b/ooni/tests/test_common.py
new file mode 100644
index 0000000..1cd77cf
--- /dev/null
+++ b/ooni/tests/test_common.py
@@ -0,0 +1,19 @@
+from twisted.trial import unittest
+from ooni.common.http_utils import META_CHARSET_REGEXP
+
+class TestHTTPUtils(unittest.TestCase):
+    def test_charset_detection(self):
+        no_charset_html = """
+        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+        <title>Foo</title>
+"""
+        with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
+        with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
+        with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">'
+        self.assertEqual(META_CHARSET_REGEXP.search(no_charset_html), None)
+        self.assertEqual(META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
+        self.assertEqual(META_CHARSET_REGEXP.search(
+            with_two_charsets).group(1), 'UTF-8')
+        self.assertEqual(META_CHARSET_REGEXP.search(with_empty_charset), None)
diff --git a/ooni/tests/test_templates.py b/ooni/tests/test_templates.py
index ebd5b2e..7aa399b 100644
--- a/ooni/tests/test_templates.py
+++ b/ooni/tests/test_templates.py
@@ -46,22 +46,6 @@ class TestHTTPT(unittest.TestCase):
         yield self.assertFailure(http_test.doRequest('http://invaliddomain/'), DNSLookupError)
         assert http_test.report['requests'][0]['failure'] == 'dns_lookup_error'
 
-    def test_charset_detection(self):
-        no_charset_html = """
-        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html>
-<head>
-        <title>Foo</title>
-"""
-        with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
-        with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
-        with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">'
-        self.assertEqual(httpt.META_CHARSET_REGEXP.search(no_charset_html), None)
-        self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
-        self.assertEqual(httpt.META_CHARSET_REGEXP.search(
-            with_two_charsets).group(1), 'UTF-8')
-        self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_empty_charset), None)
-
 class TestDNST(unittest.TestCase):
     def setUp(self):
         if not is_internet_connected():
diff --git a/ooni/utils/net.py b/ooni/utils/net.py
index 9918b60..79d17f4 100644
--- a/ooni/utils/net.py
+++ b/ooni/utils/net.py
@@ -1,4 +1,3 @@
-import re
 import sys
 import socket
 from random import randint
diff --git a/setup.py b/setup.py
index da0b967..3bfc5cd 100644
--- a/setup.py
+++ b/setup.py
@@ -171,6 +171,7 @@ data_files = []
 packages = [
     'ooni',
     'ooni.api',
+    'ooni.common',
     'ooni.deckgen',
     'ooni.deckgen.processors',
     'ooni.kit',





More information about the tor-commits mailing list