[tor-commits] [ooni-probe/master] Address feedback by @bassosimone
art at torproject.org
art at torproject.org
Mon May 30 16:28:34 UTC 2016
commit 8e7199d85c877353b83b3da98936c602d4c4a5b4
Author: Arturo Filastò <arturo at filasto.net>
Date: Mon May 30 10:42:29 2016 +0200
Address feedback by @bassosimone
* Test the next words if the word is less than 5 chars in web_connectivity
test.
* Move the charset detection charset into the correct module and change the
unittest accordingly.
* Include ooni.common in setup.py
* Various code style fixes
---
ooni/backend_client.py | 6 +-----
ooni/common/http_utils.py | 2 +-
ooni/nettests/blocking/web_connectivity.py | 21 ++++++++++++---------
ooni/templates/httpt.py | 4 ----
ooni/tests/test_common.py | 19 +++++++++++++++++++
ooni/tests/test_templates.py | 16 ----------------
ooni/utils/net.py | 1 -
setup.py | 1 +
8 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/ooni/backend_client.py b/ooni/backend_client.py
index d4c463e..0e85dd7 100644
--- a/ooni/backend_client.py
+++ b/ooni/backend_client.py
@@ -207,9 +207,7 @@ class CollectorClient(OONIBClient):
@d.addErrback
def err(failure):
failure.trap(Error)
- if failure.value.status == '404':
- return True
- return False
+ return failure.value.status == '404'
return d
@@ -345,8 +343,6 @@ class CollectorClient(OONIBClient):
class WebConnectivityClient(OONIBClient):
def isReachable(self):
- # XXX maybe in the future we can have a dedicated API endpoint to
- # test the reachability of the collector.
d = self.queryBackend('GET', '/status')
@d.addCallback
diff --git a/ooni/common/http_utils.py b/ooni/common/http_utils.py
index 6d636d5..57c3a15 100644
--- a/ooni/common/http_utils.py
+++ b/ooni/common/http_utils.py
@@ -2,7 +2,7 @@ import re
import codecs
from base64 import b64encode
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>]*)')
+META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>!;]+)')
def representBody(body):
if not body:
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py
index 655bf76..b4ccff1 100644
--- a/ooni/nettests/blocking/web_connectivity.py
+++ b/ooni/nettests/blocking/web_connectivity.py
@@ -42,10 +42,9 @@ class UsageOptions(usage.Options):
def is_public_ipv4_address(address):
try:
ip_address = IPv4Address(address)
- if not any([ip_address.is_private,
- ip_address.is_loopback]):
- return True
- return False
+ return not any(
+ [ip_address.is_private, ip_address.is_loopback]
+ )
except AddressValueError:
return None
@@ -293,13 +292,17 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
def compare_titles(self, experiment_http_response):
experiment_title = extractTitle(experiment_http_response.body).strip()
control_title = self.control['http_request']['title'].strip()
- first_exp_word = experiment_title.split(' ')[0]
- first_ctrl_word = control_title.split(' ')[0]
- if len(first_exp_word) < 5:
+
+ control_words = control_title.split(' ')
+ for exp_word, idx in enumerate(experiment_title.split(' ')):
# We don't consider to match words that are shorter than 5
# characters (5 is the average word length for english)
- return False
- return (first_ctrl_word.lower() == first_exp_word.lower())
+ if len(exp_word) < 5:
+ continue
+ try:
+ return control_words[idx].lower() == exp_word.lower()
+ except IndexError:
+ return False
def compare_http_experiments(self, experiment_http_response):
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index f8ea941..3f7e77d 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -1,5 +1,3 @@
-import re
-import codecs
import random
from txtorcon.interface import StreamListenerMixin
@@ -22,8 +20,6 @@ from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent
from ooni.common.http_utils import representBody
from ooni.errors import handleAllFailures
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>!;]+)')
-
class InvalidSocksProxyOption(Exception):
pass
diff --git a/ooni/tests/test_common.py b/ooni/tests/test_common.py
new file mode 100644
index 0000000..1cd77cf
--- /dev/null
+++ b/ooni/tests/test_common.py
@@ -0,0 +1,19 @@
+from twisted.trial import unittest
+from ooni.common.http_utils import META_CHARSET_REGEXP
+
+class TestHTTPUtils(unittest.TestCase):
+ def test_charset_detection(self):
+ no_charset_html = """
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+ <title>Foo</title>
+"""
+ with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
+ with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
+ with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">'
+ self.assertEqual(META_CHARSET_REGEXP.search(no_charset_html), None)
+ self.assertEqual(META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
+ self.assertEqual(META_CHARSET_REGEXP.search(
+ with_two_charsets).group(1), 'UTF-8')
+ self.assertEqual(META_CHARSET_REGEXP.search(with_empty_charset), None)
diff --git a/ooni/tests/test_templates.py b/ooni/tests/test_templates.py
index ebd5b2e..7aa399b 100644
--- a/ooni/tests/test_templates.py
+++ b/ooni/tests/test_templates.py
@@ -46,22 +46,6 @@ class TestHTTPT(unittest.TestCase):
yield self.assertFailure(http_test.doRequest('http://invaliddomain/'), DNSLookupError)
assert http_test.report['requests'][0]['failure'] == 'dns_lookup_error'
- def test_charset_detection(self):
- no_charset_html = """
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html>
-<head>
- <title>Foo</title>
-"""
- with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
- with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
- with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">'
- self.assertEqual(httpt.META_CHARSET_REGEXP.search(no_charset_html), None)
- self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
- self.assertEqual(httpt.META_CHARSET_REGEXP.search(
- with_two_charsets).group(1), 'UTF-8')
- self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_empty_charset), None)
-
class TestDNST(unittest.TestCase):
def setUp(self):
if not is_internet_connected():
diff --git a/ooni/utils/net.py b/ooni/utils/net.py
index 9918b60..79d17f4 100644
--- a/ooni/utils/net.py
+++ b/ooni/utils/net.py
@@ -1,4 +1,3 @@
-import re
import sys
import socket
from random import randint
diff --git a/setup.py b/setup.py
index da0b967..3bfc5cd 100644
--- a/setup.py
+++ b/setup.py
@@ -171,6 +171,7 @@ data_files = []
packages = [
'ooni',
'ooni.api',
+ 'ooni.common',
'ooni.deckgen',
'ooni.deckgen.processors',
'ooni.kit',
More information about the tor-commits
mailing list