[tor-commits] [ooni-probe/master] Test and Implement HTTP Header Field Manipulation Test
art at torproject.org
art at torproject.org
Sun Nov 25 05:57:54 UTC 2012
commit fb5ab197c4efabd005286c32335010f24a102267
Author: Arturo Filastò <art at fuffa.org>
Date: Sun Nov 25 06:50:37 2012 +0100
Test and Implement HTTP Header Field Manipulation Test
(rename it to what we had originally called it since it made most sense)
* Extend TrueHeaders to support calculation of difference between two HTTP
headers respectful of capitalization
* Write unittests for TrueHeaders functions with decent code path coverage
* Add commented out testdeck for running HTTP Header Field manipulation test
* Fix bug in calculation of runtime of test
---
before_i_commit.testdeck | 9 +
nettests/core/http_header_field_manipulation.py | 181 ++++++++++++++++++++++
nettests/core/http_requests.py | 187 -----------------------
ooni/reporter.py | 2 +-
ooni/utils/txagentwithsocks.py | 36 +++++
oonib/testhelpers/http_helpers.py | 5 +-
tests/test_trueheaders.py | 41 +++++
7 files changed, 272 insertions(+), 189 deletions(-)
diff --git a/before_i_commit.testdeck b/before_i_commit.testdeck
index e0b30ea..7fb33f9 100644
--- a/before_i_commit.testdeck
+++ b/before_i_commit.testdeck
@@ -38,3 +38,12 @@
reportfile: http_url_lists.yamloo
subargs: [-f, test_inputs/url_lists_file.txt]
test: nettests/core/http_url_list.py
+# XXX this is disabled because it requires oonib to be running
+#- options:
+# collector: null
+# help: 0
+# logfile: null
+# pcapfile: null
+# reportfile: null
+# subargs: [-h, test_inputs/test_header_field_manipulation.txt]
+# test: nettests/core/http_header_field_manipulation.py
diff --git a/nettests/core/http_header_field_manipulation.py b/nettests/core/http_header_field_manipulation.py
new file mode 100644
index 0000000..08ee8c7
--- /dev/null
+++ b/nettests/core/http_header_field_manipulation.py
@@ -0,0 +1,181 @@
+# -*- encoding: utf-8 -*-
+#
+# :authors: Arturo Filastò
+# :licence: see LICENSE
+
+import random
+import json
+import yaml
+
+from twisted.python import usage
+
+from ooni.utils import log, net, randomStr
+from ooni.templates import httpt
+from ooni.utils.txagentwithsocks import TrueHeaders
+
+def random_capitalization(string):
+ output = ""
+ original_string = string
+ string = string.swapcase()
+ for i in range(len(string)):
+ if random.randint(0, 1):
+ output += string[i].swapcase()
+ else:
+ output += string[i]
+ if original_string == output:
+ return random_capitalization(output)
+ else:
+ return output
+
+class UsageOptions(usage.Options):
+ optParameters = [
+ ['backend', 'b', 'http://127.0.0.1:57001',
+ 'URL of the backend to use for sending the requests'],
+ ['headers', 'h', None,
+ 'Specify a yaml formatted file from which to read the request headers to send']
+ ]
+
+class HTTPHeaderFieldManipulation(httpt.HTTPTest):
+ """
+ It performes HTTP requests with request headers that vary capitalization
+ towards a backend. If we detect that the headers the backend received
+ matches the ones we have sent then we have detected tampering.
+ """
+ name = "HTTP Header Field Manipulation"
+ author = "Arturo Filastò"
+ version = "0.1.3"
+
+ randomizeUA = False
+ usageOptions = UsageOptions
+
+ requiredOptions = ['backend']
+
+ def get_headers(self):
+ headers = {}
+ if self.localOptions['headers']:
+ try:
+ f = open(self.localOptions['headers'])
+ except IOError:
+ raise Exception("Specified input file does not exist")
+ content = ''.join(f.readlines())
+ f.close()
+ headers = yaml.safe_load(content)
+ return headers
+ else:
+ # XXX generate these from a random choice taken from whatheaders.com
+ # http://s3.amazonaws.com/data.whatheaders.com/whatheaders-latest.xml.zip
+ headers = {"User-Agent": [random.choice(net.userAgents)[0]],
+ "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
+ "Accept-Encoding": ["gzip,deflate,sdch"],
+ "Accept-Language": ["en-US,en;q=0.8"],
+ "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"],
+ "Host": [randomStr(15)+'.com']
+ }
+ return headers
+
+ def get_random_caps_headers(self):
+ headers = {}
+ normal_headers = self.get_headers()
+ for k, v in normal_headers.items():
+ new_key = random_capitalization(k)
+ headers[new_key] = v
+ return headers
+
+ def processInputs(self):
+ if self.localOptions['backend']:
+ self.url = self.localOptions['backend']
+ else:
+ raise Exception("No backend specified")
+
+ def processResponseBody(self, data):
+ self.check_for_tampering(data)
+
+ def check_for_tampering(self, data):
+ """
+ Here we do checks to verify if the request we made has been tampered
+ with. We have 3 categories of tampering:
+
+ * **total** when the response is not a json object and therefore we were not
+ able to reach the ooniprobe test backend
+
+ * **request_line_capitalization** when the HTTP Request line (e.x. GET /
+ HTTP/1.1) does not match the capitalization we set.
+
+ * **header_field_number** when the number of headers we sent does not match
+ with the ones the backend received
+
+ * **header_name_capitalization** when the header field names do not match
+ those that we sent.
+
+ * **header_field_value** when the header field value does not match with the
+ one we transmitted.
+ """
+ self.report['tampering'] = {
+ 'total': False,
+ 'request_line_capitalization': False,
+ 'header_name_capitalization': False,
+ 'header_field_value': False,
+ 'header_field_number': False
+ }
+ try:
+ response = json.loads(data)
+ except ValueError:
+ self.report['tampering']['total'] = True
+ return
+
+ request_request_line = "%s / HTTP/1.1" % self.request_method
+
+ try:
+ response_request_line = response['request_line']
+ response_headers_dict = response['headers_dict']
+ except KeyError:
+ self.report['tampering']['total'] = True
+ return
+
+ if request_request_line != response_request_line:
+ self.report['tampering']['request_line_capitalization'] = True
+
+ request_headers = TrueHeaders(self.request_headers)
+ diff = request_headers.getDiff(response_headers_dict, ignore=['Connection'])
+ if diff:
+ self.report['tampering']['header_field_name'] = True
+ else:
+ self.report['tampering']['header_field_name'] = False
+ self.report['tampering']['header_name_diff'] = list(diff)
+
+ def test_get(self):
+ self.request_method = "GET"
+ self.request_headers = self.get_random_caps_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
+ def test_get_random_capitalization(self):
+ self.request_method = random_capitalization("GET")
+ self.request_headers = self.get_random_caps_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
+ def test_post(self):
+ self.request_method = "POST"
+ self.request_headers = self.get_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
+ def test_post_random_capitalization(self):
+ self.request_method = random_capitalization("POST")
+ self.request_headers = self.get_random_caps_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
+ def test_put(self):
+ self.request_method = "PUT"
+ self.request_headers = self.get_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
+ def test_put_random_capitalization(self):
+ self.request_method = random_capitalization("PUT")
+ self.request_headers = self.get_random_caps_headers()
+ return self.doRequest(self.url, self.request_method,
+ headers=self.request_headers)
+
diff --git a/nettests/core/http_requests.py b/nettests/core/http_requests.py
deleted file mode 100644
index 5d67070..0000000
--- a/nettests/core/http_requests.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# -*- encoding: utf-8 -*-
-#
-# :authors: Arturo Filastò
-# :licence: see LICENSE
-
-import random
-import json
-
-from twisted.python import usage
-
-from ooni.utils import log, net, randomStr
-from ooni.templates import httpt
-
-def random_capitalization(string):
- output = ""
- original_string = string
- string = string.swapcase()
- for i in range(len(string)):
- if random.randint(0, 1):
- output += string[i].swapcase()
- else:
- output += string[i]
- if original_string == output:
- return random_capitalization(output)
- else:
- return output
-
-class UsageOptions(usage.Options):
- optParameters = [
- ['backend', 'b', 'http://127.0.0.1:57001',
- 'URL of the backend to use for sending the requests'],
- ['headers', 'h', None,
- 'Specify a yaml formatted file from which to read the request headers to send']
- ]
-
-class HTTPRequests(httpt.HTTPTest):
- """
- This test is also known as Header Field manipulation. It performes HTTP
- requests with variations in capitalization towards the backend.
- """
- name = "HTTP Requests"
- author = "Arturo Filastò"
- version = "0.1.1"
-
- randomizeUA = False
- usageOptions = UsageOptions
-
- requiredOptions = ['backend']
-
- def processInputs(self):
- if self.localOptions['backend']:
- self.url = self.localOptions['backend']
- else:
- raise Exception("No backend specified")
-
- def processResponseBody(self, data):
- self.check_for_tampering(data)
-
- def check_for_tampering(self, data):
- """
- Here we do checks to verify if the request we made has been tampered
- with. We have 3 categories of tampering:
-
- * **total** when the response is not a json object and therefore we were not
- able to reach the ooniprobe test backend
-
- * **request_line_capitalization** when the HTTP Request line (e.x. GET /
- HTTP/1.1) does not match the capitalization we set.
-
- * **header_field_number** when the number of headers we sent does not match
- with the ones the backend received
-
- * **header_name_capitalization** when the header field names do not match
- those that we sent.
-
- * **header_field_value** when the header field value does not match with the
- one we transmitted.
- """
- self.report['tampering'] = {'total': False,
- 'request_line_capitalization': False,
- 'header_name_capitalization': False,
- 'header_field_value': False,
- 'header_field_number': False
- }
-
- try:
- response = json.loads(data)
- except ValueError:
- self.report['tampering']['total'] = True
- return
-
- requestLine = "%s / HTTP/1.1" % self.request_method
- if response['request_line'] != requestLine:
- self.report['tampering']['request_line_capitalization'] = True
-
- # We compare against length -1 because the response will also contain
- # the Connection: close header since we do not do persistent
- # connections
- if len(self.request_headers) != (len(response['headers_dict']) - 1):
- self.report['tampering']['header_field_number'] = True
-
- for header, value in self.request_headers.items():
- # XXX this still needs some work
- # in particular if the response headers are of different length or
- # some extra headers get added in the response (so the lengths
- # match), we will get header_name_capitalization set to true, while
- # the actual tampering is the addition of an extraneous header
- # field.
- if header == "Connection":
- # Ignore Connection header
- continue
- try:
- response_value = response['headers_dict'][header]
- if response_value != value[0]:
- log.msg("Tampering detected because %s != %s" % (response_value, value[0]))
- self.report['tampering']['header_field_value'] = True
- except KeyError:
- log.msg("Tampering detected because %s not in %s" % (header, response['headers_dict']))
- self.report['tampering']['header_name_capitalization'] = True
-
- def get_headers(self):
- headers = {}
- if self.localOptions['headers']:
- # XXX test this code
- try:
- f = open(self.localOptions['headers'])
- except IOError:
- raise Exception("Specified input file does not exist")
- content = ''.join(f.readlines())
- f.close()
- headers = yaml.load(content)
- return headers
- else:
- headers = {"User-Agent": [random.choice(net.userAgents)[0]],
- "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
- "Accept-Encoding": ["gzip,deflate,sdch"],
- "Accept-Language": ["en-US,en;q=0.8"],
- "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"],
- "Host": [randomStr(15)+'.com']
- }
- return headers
-
- def get_random_caps_headers(self):
- headers = {}
- normal_headers = self.get_headers()
- for k, v in normal_headers.items():
- new_key = random_capitalization(k)
- headers[new_key] = v
- return headers
-
- def test_get(self):
- self.request_method = "GET"
- self.request_headers = self.get_random_caps_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
- def test_get_random_capitalization(self):
- self.request_method = random_capitalization("GET")
- self.request_headers = self.get_random_caps_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
- def test_post(self):
- self.request_method = "POST"
- self.request_headers = self.get_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
- def test_post_random_capitalization(self):
- self.request_method = random_capitalization("POST")
- self.request_headers = self.get_random_caps_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
- def test_put(self):
- self.request_method = "PUT"
- self.request_headers = self.get_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
- def test_put_random_capitalization(self):
- self.request_method = random_capitalization("PUT")
- self.request_headers = self.get_random_caps_headers()
- return self.doRequest(self.url, self.request_method,
- headers=self.request_headers)
-
-
diff --git a/ooni/reporter.py b/ooni/reporter.py
index b79f27f..63f501e 100644
--- a/ooni/reporter.py
+++ b/ooni/reporter.py
@@ -184,7 +184,7 @@ class OReporter(object):
test_input = test.input
test_started = test._start_time
- test_runtime = test_started - time.time()
+ test_runtime = time.time() - test_started
report = {'input': test_input,
'test_name': test_name,
diff --git a/ooni/utils/txagentwithsocks.py b/ooni/utils/txagentwithsocks.py
index 57c27e4..7b000fc 100644
--- a/ooni/utils/txagentwithsocks.py
+++ b/ooni/utils/txagentwithsocks.py
@@ -4,6 +4,8 @@
# :licence: see LICENSE
import struct
+import itertools
+from copy import copy
from zope.interface import implements
from twisted.web import client, _newclient, http_headers
@@ -137,6 +139,40 @@ class TrueHeaders(http_headers.Headers):
self._rawHeaders[name.lower()]['name'] = name
self._rawHeaders[name.lower()]['values'] = values
+ def getDiff(self, header_dict, ignore=[]):
+ """
+ ignore: specify a list of header fields to ignore
+
+ Returns a set containing the header names that are not present in
+ header_dict or not present in self.
+ """
+ diff = set()
+ field_names = []
+
+ headers_a = copy(self)
+ headers_b = TrueHeaders(header_dict)
+ for name in ignore:
+ try:
+ del headers_a._rawHeaders[name.lower()]
+ except KeyError:
+ pass
+ try:
+ del headers_b._rawHeaders[name.lower()]
+ except KeyError:
+ pass
+
+ for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
+ headers_b.getAllRawHeaders()):
+ field_names.append(k)
+
+ for name in field_names:
+ if self.getRawHeaders(name) and \
+ name in header_dict:
+ pass
+ else:
+ diff.add(name)
+ return diff
+
def getAllRawHeaders(self):
for k, v in self._rawHeaders.iteritems():
yield v['name'], v['values']
diff --git a/oonib/testhelpers/http_helpers.py b/oonib/testhelpers/http_helpers.py
index b384216..1fa0ccb 100644
--- a/oonib/testhelpers/http_helpers.py
+++ b/oonib/testhelpers/http_helpers.py
@@ -77,7 +77,10 @@ class SimpleHTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
def allHeadersReceived(self):
headers_dict = {}
for k, v in self.headers:
- headers_dict[k] = v
+ if k not in headers_dict:
+ headers_dict[k] = []
+ headers_dict[k].append(v)
+
response = {'request_headers': self.headers,
'request_line': self.requestLine,
'headers_dict': headers_dict
diff --git a/tests/test_trueheaders.py b/tests/test_trueheaders.py
new file mode 100644
index 0000000..33521b8
--- /dev/null
+++ b/tests/test_trueheaders.py
@@ -0,0 +1,41 @@
+from twisted.trial import unittest
+
+from ooni.utils.txagentwithsocks import TrueHeaders
+
+dummy_headers_dict = {
+ 'Header1': ['Value1', 'Value2'],
+ 'Header2': ['ValueA', 'ValueB']
+}
+
+dummy_headers_dict2 = {
+ 'Header1': ['Value1', 'Value2'],
+ 'Header2': ['ValueA', 'ValueB'],
+ 'Header3': ['ValueA', 'ValueB'],
+}
+
+dummy_headers_dict3 = {
+ 'Header1': ['Value1', 'Value2'],
+ 'Header2': ['ValueA', 'ValueB'],
+ 'Header4': ['ValueA', 'ValueB'],
+}
+
+
+class TestTrueHeaders(unittest.TestCase):
+ def test_names_match(self):
+ th = TrueHeaders(dummy_headers_dict)
+ self.assertEqual(th.getDiff(dummy_headers_dict), set())
+
+ def test_names_not_match(self):
+ th = TrueHeaders(dummy_headers_dict)
+ self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3']))
+
+ th = TrueHeaders(dummy_headers_dict3)
+ self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3', 'Header4']))
+
+ def test_names_match_expect_ignore(self):
+ th = TrueHeaders(dummy_headers_dict)
+ self.assertEqual(th.getDiff(dummy_headers_dict2, ignore=['Header3']), set())
+
+
+
+
More information about the tor-commits
mailing list