[or-cvs] r18345: {torflow} Add Javascript crawl-based check. (torflow/trunk/NetworkScanners)
mikeperry at seul.org
mikeperry at seul.org
Fri Jan 30 15:39:47 UTC 2009
Author: mikeperry
Date: 2009-01-30 10:39:47 -0500 (Fri, 30 Jan 2009)
New Revision: 18345
Modified:
torflow/trunk/NetworkScanners/libsoat.py
torflow/trunk/NetworkScanners/soat.py
Log:
Add Javascript crawl-based check.
Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py 2009-01-30 15:38:46 UTC (rev 18344)
+++ torflow/trunk/NetworkScanners/libsoat.py 2009-01-30 15:39:47 UTC (rev 18345)
@@ -39,6 +39,7 @@
# Failed reasons
FAILURE_EXITONLY = "FailureExitOnly"
FAILURE_DYNAMICTAGS = "FailureDynamicTags"
+FAILURE_DYNAMICJS = "FailureDynamicJS"
FAILURE_DYNAMICBINARY = "FailureDynamicBinary"
FAILURE_COOKIEMISMATCH = "FailureCookieMismatch"
@@ -92,6 +93,25 @@
self.tor_cookies = tor_cookies
self.plain_cookies = plain_cookies
+class JsTestResult(TestResult):
+ ''' Represents the result of a JS test '''
+ def __init__(self, exit_node, website, status, reason=None,
+ content=None, content_exit=None, content_old=None):
+ super(JsTestResult, self).__init__(exit_node, website, status)
+ self.proto = "http"
+ self.reason = reason
+ self.content = content
+ self.content_exit = content_exit
+ self.content_old = content_old
+
+ def remove_files(self):
+ try: os.unlink(self.content)
+ except: pass
+ try: os.unlink(self.content_old)
+ except: pass
+ try: os.unlink(self.content_exit)
+ except: pass
+
class HtmlTestResult(TestResult):
''' Represents the result of a http test '''
def __init__(self, exit_node, website, status, reason=None,
Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py 2009-01-30 15:38:46 UTC (rev 18344)
+++ torflow/trunk/NetworkScanners/soat.py 2009-01-30 15:39:47 UTC (rev 18345)
@@ -374,8 +374,9 @@
else:
plog("ERROR", self.proto+" http error code failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
+
- def check_http(self, address):
+ def check_http_nodynamic(self, address):
''' check whether a http connection to a given address is molested '''
plog('INFO', 'Conducting an http test with destination ' + address)
@@ -426,8 +427,8 @@
sha1sum.update(buf)
buf = content_file.read(4096)
content_file.close()
-
self.cookie_jar.load(content_prefix+'.cookies', 'w')
+ content = None
except IOError:
(code, content) = http_request(address, self.cookie_jar, self.headers)
@@ -505,7 +506,27 @@
if address in self.successes: self.successes[address]+=1
else: self.successes[address]=1
return TEST_SUCCESS
+
+ if not content:
+ content_file = open(content_prefix+'.content', 'r')
+ content = content_file.read()
+ content_file.close()
+ # Dirty dirty dirty...
+ return (pcontent, psha1sum, content, sha1sum, content_new, sha1sum_new,
+ exit_node)
+
+ def check_http(self, address):
+ ret = self.check_http_nodynamic(address)
+ if type(ret) == int:
+ return ret
+
+ (pcontent, psha1sum, content, sha1sum, content_new, sha1sum_new, exit_node) = ret
+
+ address_file = self.datahandler.safeFilename(address[7:])
+ content_prefix = http_content_dir+address_file
+ failed_prefix = http_failed_dir+address_file
+
# XXX: Check for existence of this file before overwriting
exit_content_file = open(failed_prefix+'.dyn-content.'+exit_node[1:], 'w')
exit_content_file.write(pcontent)
@@ -513,7 +534,7 @@
result = HttpTestResult(exit_node, address, TEST_FAILURE,
FAILURE_DYNAMICBINARY, sha1sum_new.hexdigest(),
- psha1sum.hexdigest(), new_content_file.name,
+ psha1sum.hexdigest(), content_prefix+".content",
exit_content_file.name,
content_prefix+'.content-old',
sha1sum.hexdigest())
@@ -521,7 +542,7 @@
self.datahandler.saveResult(result)
# The HTTP Test should remove address immediately.
- plog("NOTICE", "HTTP Test is removing dynamic URL "+address)
+ plog("WARN", "HTTP Test is removing dynamic URL "+address)
self.remove_target(address)
return TEST_FAILURE
@@ -555,6 +576,7 @@
(test, url) = self.fetch_queue.get_nowait()
if test == "html": result = self.check_html(url)
elif test == "http": result = self.check_http(url)
+ elif test == "js": result = self.check_js(url)
else:
plog("WARN", "Unknown test type: "+test+" for "+url)
result = TEST_SUCCESS
@@ -608,6 +630,15 @@
if str(t.name) in recurse_html:
plog("NOTICE", "Adding html "+str(t.name)+" target: "+attr_tgt)
targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
+ elif str(t.name) in recurse_script:
+ if str(t.name) == "link":
+ for a in t.attrs:
+ if str(a[0]) == "type" and str(a[1]) in link_script_types:
+ targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+ else:
+ targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+ plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
+ targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
elif str(t.name) == 'a':
if attr_name == "href":
for f in self.recurse_filetypes:
@@ -654,6 +685,45 @@
tag.extract()
return soup
+ def check_js(self, address):
+ plog('INFO', 'Conducting a js test with destination ' + address)
+ ret = self.check_http_nodynamic(address)
+
+ if type(ret) == int:
+ return ret
+ (tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = ret
+
+ jsdiff = JSDiffer(orig_js)
+ jsdiff.prune_differences(new_js)
+ false_positive = not jsdiff.contains_differences(tor_js)
+
+ if false_positive:
+ result = JsTestResult(exit_node, address, TEST_SUCCESS)
+ self.results.append(result)
+ #self.datahandler.saveResult(result)
+ if address in self.successes: self.successes[address]+=1
+ else: self.successes[address]=1
+ return TEST_SUCCESS
+ else:
+ address_file = self.datahandler.safeFilename(address[7:])
+ content_prefix = http_content_dir+address_file
+ failed_prefix = http_failed_dir+address_file
+
+ # XXX: Check for existence of this file before overwriting
+ exit_content_file = open(failed_prefix+'.dyn-content.'+exit_node[1:], 'w')
+ exit_content_file.write(tor_js)
+ exit_content_file.close()
+
+ result = JsTestResult(exit_node, address, TEST_FAILURE,
+ FAILURE_DYNAMICJS, content_prefix+".content",
+ exit_content_file.name,
+ content_prefix+'.content-old')
+ self.results.append(result)
+ self.datahandler.saveResult(result)
+ plog("ERROR", "Javascript 3-way failure at "+exit_node+" for "+address)
+
+ return TEST_FAILURE
+
def check_html(self, address):
# XXX: Check mimetype to decide what to do..
''' check whether a http connection to a given address is molested '''
More information about the tor-commits
mailing list