[or-cvs] r18483: {torflow} Fix HTTP Error code handling to register as failure instead (torflow/trunk/NetworkScanners)
mikeperry at seul.org
mikeperry at seul.org
Wed Feb 11 08:04:48 UTC 2009
Author: mikeperry
Date: 2009-02-11 03:04:46 -0500 (Wed, 11 Feb 2009)
New Revision: 18483
Modified:
torflow/trunk/NetworkScanners/libsoat.py
torflow/trunk/NetworkScanners/snakeinspector.py
torflow/trunk/NetworkScanners/soat.py
Log:
Fix HTTP Error code handling to register as failure instead
of inconclusive. Double-check mime-types of documents before
sending them through a given test. Improve the Snake
Inspector to be a bit more user friendly (but not too much :)
Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py 2009-02-10 23:39:29 UTC (rev 18482)
+++ torflow/trunk/NetworkScanners/libsoat.py 2009-02-11 08:04:46 UTC (rev 18483)
@@ -37,12 +37,13 @@
# Sorry, we sort of rely on the ordinal nature of the above constants
RESULT_STRINGS = {TEST_SUCCESS:"Success", TEST_INCONCLUSIVE:"Inconclusive", TEST_FAILURE:"Failure"}
+RESULT_CODES=dict([v,k] for k,v in RESULT_STRINGS.iteritems())
# Inconclusive reasons
INCONCLUSIVE_NOEXITCONTENT = "InconclusiveNoExitContent"
INCONCLUSIVE_NOLOCALCONTENT = "InconclusiveNoLocalContent"
-INCONCLUSIVE_BADHTTPCODE = "InconclusiveBadHTTPCode"
INCONCLUSIVE_DYNAMICSSL = "InconclusiveDynamicSSL"
+INCONCLUSIVE_TORBREAKAGE = "InconclusiveTorBreakage"
# Failed reasons
FAILURE_EXITONLY = "FailureExitOnly"
@@ -51,6 +52,7 @@
FAILURE_DYNAMICBINARY = "FailureDynamicBinary"
FAILURE_DYNAMICCERTS = "FailureDynamicCerts"
FAILURE_COOKIEMISMATCH = "FailureCookieMismatch"
+FAILURE_BADHTTPCODE = "FailureBadHTTPCode"
# False positive reasons
FALSEPOSITIVE_HTTPERRORS = "FalsePositiveHTTPErrors"
@@ -70,6 +72,15 @@
self.false_positive=False
self.false_positive_reason="None"
self.verbose=False
+
+ def _rebase(self, filename, new_data_root):
+ if not filename: return filename
+ filename = os.path.normpath(filename)
+ split_file = filename.split("/")
+ return os.path.normpath(os.path.join(new_data_root, *split_file[1:]))
+
+ def rebase(self, new_data_root):
+ pass
def mark_false_positive(self, reason):
self.false_positive=True
@@ -102,12 +113,16 @@
class SSLTestResult(TestResult):
''' Represents the result of an openssl test '''
def __init__(self, exit_node, ssl_site, ssl_file, status, reason=None,
- exit_cert_pem=None):
+ exit_ip=None, exit_cert_pem=None):
super(SSLTestResult, self).__init__(exit_node, ssl_site, status, reason)
self.ssl_file = ssl_file
self.exit_cert = exit_cert_pem # Meh, not that much space
+ self.exit_ip = exit_ip
self.proto = "ssl"
+ def rebase(self, new_data_root):
+ self.ssl_file = self._rebase(self.ssl_file, new_data_root)
+
def mark_false_positive(self, reason):
TestResult.mark_false_positive(self, reason)
self.ssl_file=self.move_file(self.ssl_file, ssl_falsepositive_dir)
@@ -124,7 +139,11 @@
ret += "\nCert for "+ssl_domain.cert_map[cert]+":\n"
ret += cert+"\n"
if self.exit_cert:
- ret += "\nExit node's cert:\n"
+ # XXX: Kill the first part of this clause after restart:
+ if 'exit_ip' in self.__dict__ and self.exit_ip:
+ ret += "\nExit node's cert for "+self.exit_ip+":\n"
+ else:
+ ret += "\nExit node's cert:\n"
ret += self.exit_cert+"\n"
return ret
@@ -167,6 +186,11 @@
self.content_exit = content_exit
self.content_old = content_old
+ def rebase(self, new_data_root):
+ self.content = self._rebase(self.content, new_data_root)
+ self.content_exit = self._rebase(self.content_exit, new_data_root)
+ self.content_old = self._rebase(self.content_old, new_data_root)
+
def mark_false_positive(self, reason):
TestResult.mark_false_positive(self, reason)
self.content=self.move_file(self.content, http_falsepositive_dir)
@@ -200,6 +224,12 @@
self.tor_cookies = tor_cookies
self.plain_cookies = plain_cookies
+ def __str__(self):
+ ret = TestResult.__str__(self)
+ ret += " Plain Cookies:"+self.plain_cookies
+ ret += " Tor Cookies:"+self.tor_cookies
+ return ret
+
class JsTestResult(TestResult):
''' Represents the result of a JS test '''
def __init__(self, exit_node, website, status, reason=None,
@@ -210,6 +240,11 @@
self.content_exit = content_exit
self.content_old = content_old
+ def rebase(self, new_data_root):
+ self.content = self._rebase(self.content, new_data_root)
+ self.content_exit = self._rebase(self.content_exit, new_data_root)
+ self.content_old = self._rebase(self.content_old, new_data_root)
+
def mark_false_positive(self, reason):
TestResult.mark_false_positive(self, reason)
self.content=self.move_file(self.content, http_falsepositive_dir)
@@ -260,6 +295,11 @@
self.content_exit = content_exit
self.content_old = content_old
+ def rebase(self, new_data_root):
+ self.content = self._rebase(self.content, new_data_root)
+ self.content_exit = self._rebase(self.content_exit, new_data_root)
+ self.content_old = self._rebase(self.content_old, new_data_root)
+
def mark_false_positive(self, reason):
TestResult.mark_false_positive(self, reason)
self.content=self.move_file(self.content,http_falsepositive_dir)
@@ -284,7 +324,7 @@
old_soup = FullyStrainedSoup(content_old)
tags = map(str, soup.findAll())
old_tags = map(str, old_soup.findAll())
- diff = difflib.unified_diff(tags, old_tags, "Non-Tor1", "Non-Tor1",
+ diff = difflib.unified_diff(old_tags, tags, "Non-Tor1", "Non-Tor2",
lineterm="")
for line in diff:
ret+=line+"\n"
@@ -345,6 +385,9 @@
self.proto = "pop"
class DataHandler:
+ def __init__(self, my_data_dir=data_dir):
+ self.data_dir = my_data_dir
+
''' Class for saving and managing test result data '''
def filterResults(self, results, protocols=[], show_good=False,
show_bad=False, show_inconclusive=False):
@@ -376,39 +419,39 @@
def getAll(self):
''' get all available results'''
- return self.__getResults(data_dir)
+ return self.__getResults(self.data_dir)
def getSsh(self):
''' get results of ssh tests '''
- return self.__getResults(data_dir + 'ssh/')
+ return self.__getResults(self.data_dir + 'ssh/')
def getHttp(self):
''' get results of http tests '''
- return self.__getResults(data_dir + 'http/')
+ return self.__getResults(self.data_dir + 'http/')
def getSsl(self):
''' get results of ssl tests '''
- return self.__getResults(data_dir + 'ssl/')
+ return self.__getResults(self.data_dir + 'ssl/')
def getSmtp(self):
''' get results of smtp tests '''
- return self.__getResults(data_dir + 'smtp/')
+ return self.__getResults(self.data_dir + 'smtp/')
def getPop(self):
''' get results of pop tests '''
- return self.__getResults(data_dir + 'pop/')
+ return self.__getResults(self.data_dir + 'pop/')
def getImap(self):
''' get results of imap tests '''
- return self.__getResults(data_dir + 'imap/')
+ return self.__getResults(self.data_dir + 'imap/')
def getDns(self):
''' get results of basic dns tests '''
- return self.__getResults(data_dir + 'dns')
+ return self.__getResults(self.data_dir + 'dns')
def getDnsRebind(self):
''' get results of dns rebind tests '''
- return self.__getResults(data_dir + 'dnsbrebind/')
+ return self.__getResults(self.data_dir + 'dnsbrebind/')
def __getResults(self, rdir):
'''
@@ -422,6 +465,7 @@
if f.endswith('.result'):
fh = open(os.path.join(root, f))
result = pickle.load(fh)
+ result.rebase(self.data_dir)
results.append(result)
return results
@@ -458,7 +502,7 @@
else:
raise Exception, 'This doesn\'t seems to be a result instance.'
- rdir = data_dir+result.proto.lower()+'/'
+ rdir = self.data_dir+result.proto.lower()+'/'
if result.false_positive:
rdir += 'falsepositive/'
elif result.status == TEST_SUCCESS:
Modified: torflow/trunk/NetworkScanners/snakeinspector.py
===================================================================
--- torflow/trunk/NetworkScanners/snakeinspector.py 2009-02-10 23:39:29 UTC (rev 18482)
+++ torflow/trunk/NetworkScanners/snakeinspector.py 2009-02-11 08:04:46 UTC (rev 18483)
@@ -10,29 +10,75 @@
import sets
from sets import Set
+import getopt
+
import libsoat
from libsoat import *
sys.path.append("../")
from TorCtl.TorUtil import *
+def usage():
+ # TODO: Don't be a jerk.
+ print "Use teh src, luke."
+ sys.exit(1)
+def getargs(argv):
+ try:
+ opts,args = getopt.getopt(argv[1:],"d:f:e:r:vt:p:s:",
+ ["dir=", "file=", "exit=", "reason=", "resultfilter=", "proto=",
+ "verbose", "statuscode="])
+ except getopt.GetoptError,err:
+ print str(err)
+ usage()
+ use_dir="./data/"
+ use_file=None
+ node=None
+ reason=None
+ result=None
+ verbose=False
+ proto=None
+ resultfilter=None
+ for o,a in opts:
+ if o == '-d' or o == '--dir':
+ use_dir = a
+ elif o == '-f' or o == '--file':
+ use_file = a
+ elif o == '-e' or o == '--exit':
+ node = a
+ elif o == '-r' or o == '--reason':
+ reason = a
+ elif o == '-v' or o == '--verbose':
+ verbose = True
+ elif o == '-t' or o == '--resultfilter':
+ resultfilter = a
+ elif o == '-p' or o == '--proto':
+ proto = a
+ elif o == '-s' or o == '--statuscode':
+ try:
+ result = int(a)
+ except ValueError:
+ result = RESULT_CODES[a]
+ return use_dir,use_file,node,reason,result,verbose,resultfilter,proto
+
def main(argv):
- dh = DataHandler()
- # FIXME: Handle this better.. maybe explicit --file or --exit options?
- # For now, I should be the only one runnin this so...
- # XXX: Also want to filter on reason, false positive, and
- # failure/inconclusive
- if len(argv) == 1:
+ use_dir,use_file,node,reason,result,verbose,resultfilter,proto=getargs(argv)
+ dh = DataHandler(use_dir)
+ print dh.data_dir
+
+ if use_file:
+ results = [dh.getResult(use_file)]
+ elif node:
+ results = dh.filterByNode(dh.getAll(), node)
+ else:
results = dh.getAll()
- elif argv[1][0] == '$':
- results = dh.filterByNode(dh.getAll(), argv[1])
- else:
- results = [dh.getResult(argv[1])]
for r in results:
- r.verbose = True
- if r.status == TEST_FAILURE and r.reason == "FailureExitOnly":
+ r.verbose = verbose
+ if (not result or r.status == result) and \
+ (not reason or r.reason == reason) and \
+ (not proto or r.proto == proto) and \
+ (not resultfilter or r.__class__.__name__ == resultfilter):
print r
print "\n-----------------------------\n"
Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py 2009-02-10 23:39:29 UTC (rev 18482)
+++ torflow/trunk/NetworkScanners/soat.py 2009-02-11 08:04:46 UTC (rev 18483)
@@ -134,23 +134,23 @@
mime_type = reply.info().type
content = decompress_response_data(reply)
except urllib2.HTTPError, e:
- plog('WARN', "HTTP Error during request of "+address+": "+str(e))
+ plog('NOTICE', "HTTP Error during request of "+address+": "+str(e))
traceback.print_exc()
- return (e.code, [], "", "")
+ return (e.code, [], "", str(e))
except (ValueError, urllib2.URLError):
plog('WARN', 'The http-request address ' + address + ' is malformed')
traceback.print_exc()
return (0, [], "", "")
except (IndexError, TypeError, socks.Socks5Error), e:
- plog('WARN', 'An error occured while negotiating socks5 with Tor: '+str(e))
+ plog('NOTICE', 'An error occured while negotiating socks5 with Tor: '+str(e))
traceback.print_exc()
return (0, [], "", "")
except KeyboardInterrupt:
raise KeyboardInterrupt
- except e:
+ except Exception, e:
plog('WARN', 'An unknown HTTP error occured for '+address+": "+str(e))
traceback.print_exc()
- return (0, [], "", "")
+ return (666, [], "", str(e))
# TODO: Consider also returning mime type here
return (reply.code, new_cookies, mime_type, content)
@@ -170,6 +170,7 @@
self.results = []
self.dynamic_fails = {}
self.dynamic_limit = max_dynamic_failure
+ self.banned_targets = sets.Set([])
def run_test(self):
raise NotImplemented()
@@ -181,6 +182,7 @@
return random.choice(self.nodes)
def remove_target(self, target, reason="None"):
+ self.banned_targets.add(target)
if target in self.targets: self.targets.remove(target)
if len(self.targets) < self.min_targets:
plog("NOTICE", self.proto+" scanner short on targets. Adding more")
@@ -293,6 +295,8 @@
return False
if valid_schemes and scheme not in valid_schemes:
return False
+ if url in self.banned_targets:
+ return False
if filetypes: # Must be checked last
for filetype in filetypes:
if url[-len(filetype):] == filetype:
@@ -370,7 +374,9 @@
if host_only:
# FIXME: %-encoding, @'s, etc?
host = urlparse.urlparse(url)[1]
- type_urls.add(host)
+ # Have to check again here after parsing the url:
+ if host not in self.banned_targets:
+ type_urls.add(host)
else:
type_urls.add(url)
else:
@@ -396,6 +402,7 @@
def check_cookies(self):
tor_cookies = "\n"
plain_cookies = "\n"
+ # XXX: do we need to sort these?
for cookie in self.tor_cookie_jar:
tor_cookies += "\t"+cookie.name+":"+cookie.domain+cookie.path+" discard="+str(cookie.discard)+"\n"
for cookie in self.cookie_jar:
@@ -478,7 +485,6 @@
# TODO: use nocontent to cause us to not load content into memory.
# This will require refactoring http_response though.
''' check whether a http connection to a given address is molested '''
- plog('INFO', 'Conducting an http test with destination ' + address)
# an address representation acceptable for a filename
address_file = self.datahandler.safeFilename(address[7:])
@@ -505,6 +511,7 @@
added_cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
self.cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
content = None
+ mime_type = None
except IOError:
(code, new_cookies, mime_type, content) = http_request(address, self.cookie_jar, self.headers)
@@ -568,19 +575,33 @@
if pcode - (pcode % 100) != 200:
plog("NOTICE", exit_node+" had error "+str(pcode)+" fetching content for "+address)
- # FIXME: Timeouts and socks errors give error code 0. Maybe
- # break them up into more detailed reasons?
- result = HttpTestResult(exit_node, address, TEST_INCONCLUSIVE,
- INCONCLUSIVE_BADHTTPCODE+str(pcode))
- self.results.append(result)
- self.datahandler.saveResult(result)
- if pcode != 0:
- self.register_httpcode_failure(address, exit_node)
# Restore cookie jars
self.cookie_jar = orig_cookie_jar
self.tor_cookie_jar = orig_tor_cookie_jar
- return TEST_INCONCLUSIVE
+ if pcode == 0:
+ result = HttpTestResult(exit_node, address, TEST_INCONCLUSIVE,
+ INCONCLUSIVE_TORBREAKAGE+str(pcontent))
+ self.results.append(result)
+ self.datahandler.saveResult(result)
+ return TEST_INCONCLUSIVE
+ else:
+ BindingSocket.bind_to = refetch_ip
+ (code_new, new_cookies_new, mime_type_new, content_new) = http_request(address, orig_tor_cookie_jar, self.headers)
+ BindingSocket.bind_to = None
+
+ if code_new == pcode:
+ plog("NOTICE", "Non-tor HTTP error "+str(code_new)+" fetching content for "+address)
+ # Just remove it
+ self.remove_target(address, FALSEPOSITIVE_HTTPERRORS)
+ return TEST_INCONCLUSIVE
+ result = HttpTestResult(exit_node, address, TEST_FAILURE,
+ FAILURE_BADHTTPCODE+str(pcode)+":"+str(pcontent))
+ self.results.append(result)
+ self.datahandler.saveResult(result)
+ self.register_httpcode_failure(address, exit_node)
+ return TEST_FAILURE
+
# if we have no content, we had a connection error
if pcontent == "":
plog("NOTICE", exit_node+" failed to fetch content for "+address)
@@ -662,17 +683,26 @@
content_file = open(load_file, 'r')
content = content_file.read()
content_file.close()
+
+ if not ((mime_type == mime_type_new or not mime_type) \
+ and mime_type_new == pmime_type):
+ plog("WARN", "Mime type change: 1st: "+mime_type+", 2nd: "+mime_type_new+", Tor: "+pmime_type)
+ # TODO: If this actually happens, store a result.
+ mime_type = 'text/html';
# Dirty dirty dirty...
- return (pcontent, psha1sum, content, sha1sum, content_new, sha1sum_new,
- exit_node)
+ return (mime_type_new, pcontent, psha1sum, content, sha1sum, content_new,
+ sha1sum_new, exit_node)
def check_http(self, address):
+ plog('INFO', 'Conducting an http test with destination ' + address)
ret = self.check_http_nodynamic(address)
if type(ret) == int:
return ret
-
- (pcontent, psha1sum, content, sha1sum, content_new, sha1sum_new, exit_node) = ret
+ return self._check_http_worker(address, ret)
+
+ def _check_http_worker(self, address, http_ret):
+ (mime_type,pcontent,psha1sum,content,sha1sum,content_new,sha1sum_new,exit_node) = http_ret
address_file = self.datahandler.safeFilename(address[7:])
content_prefix = http_content_dir+address_file
@@ -745,8 +775,9 @@
while not self.fetch_queue.empty():
(test, url, referer) = self.fetch_queue.get_nowait()
if referer: self.headers['Referer'] = referer
- if test == "html": result = self.check_html(url)
- elif test == "http": result = self.check_http(url)
+ # Technically both html and js tests check and dispatch via mime types
+ # but I want to know when link tags lie
+ if test == "html" or test == "http": result = self.check_html(url)
elif test == "js": result = self.check_js(url)
else:
plog("WARN", "Unknown test type: "+test+" for "+url)
@@ -779,7 +810,7 @@
elif t.name in recurse_script:
if t.name == "link":
for a in t.attrs:
- if a[0] == "type" and a[1] in link_script_types:
+ if a[0] == "type" and a[1] in script_mime_types:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
else:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
@@ -797,7 +828,7 @@
self.fetch_queue.put_nowait((i[0], i[1], orig_addr))
else:
plog("NOTICE", "Skipping "+i[0]+" target: "+i[1])
-
+
def check_js(self, address):
plog('INFO', 'Conducting a js test with destination ' + address)
@@ -808,8 +839,18 @@
if type(ret) == int:
return ret
- (tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = ret
+ return self._check_js_worker(address, ret)
+ def _check_js_worker(self, address, http_ret):
+ (mime_type, tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = http_ret
+
+ if mime_type not in script_mime_types:
+ plog("WARN", "Non-script mime type "+mime_type+" fed to JS test")
+ if mime_type in html_mime_types:
+ return self._check_html_worker(address, http_ret)
+ else:
+ return self._check_http_worker(address, http_ret)
+
jsdiff = JSDiffer(orig_js)
jsdiff.prune_differences(new_js)
has_js_changes = jsdiff.contains_differences(tor_js)
@@ -844,13 +885,24 @@
def check_html(self, address):
plog('INFO', 'Conducting an html test with destination ' + address)
-
ret = self.check_http_nodynamic(address)
if type(ret) == int:
return ret
- (tor_html, tsha, orig_html, osha, new_html, nsha, exit_node) = ret
+ return self._check_html_worker(address, ret)
+
+ def _check_html_worker(self, address, http_ret):
+ (mime_type,tor_html,tsha,orig_html,osha,new_html,nsha,exit_node)=http_ret
+
+ if mime_type not in html_mime_types:
+ # XXX: Keep an eye on this logline.
+ plog("INFO", "Non-html mime type "+mime_type+" fed to HTML test")
+ if mime_type in script_mime_types:
+ return self._check_js_worker(address, http_ret)
+ else:
+ return self._check_http_worker(address, http_ret)
+
# an address representation acceptable for a filename
address_file = self.datahandler.safeFilename(address[7:])
content_prefix = http_content_dir+address_file
@@ -1002,7 +1054,7 @@
return 0
except KeyboardInterrupt:
raise KeyboardInterrupt
- except e:
+ except Exception, e:
plog('WARN', 'An unknown SSL error occured for '+address+': '+str(e))
traceback.print_exc()
return 0
@@ -1131,7 +1183,8 @@
# failure... Need to prune all results for this cert and give up.
if ssl_domain.cert_rotates:
result = SSLTestResult(exit_node, address, ssl_file_name, TEST_FAILURE,
- FAILURE_DYNAMICCERTS, cert_pem)
+ FAILURE_DYNAMICCERTS,
+ self.get_resolved_ip(address), cert_pem)
self.results.append(result)
self.datahandler.saveResult(result)
self.register_dynamic_failure(address, exit_node)
@@ -1139,7 +1192,8 @@
# if certs dont match, means the exit node has been messing with the cert
result = SSLTestResult(exit_node, address, ssl_file_name, TEST_FAILURE,
- FAILURE_EXITONLY, cert_pem)
+ FAILURE_EXITONLY, self.get_resolved_ip(address),
+ cert_pem)
self.datahandler.saveResult(result)
self.results.append(result)
self.register_exit_failure(address, exit_node)
@@ -1664,6 +1718,8 @@
return response
class NodeManager(EventHandler):
+ # FIXME: Periodically check to see if we are accumulating stalte
+ # descriptors and prune them..
'''
A tor control event handler extending TorCtl.EventHandler.
Monitors NS and NEWDESC events, and updates each test
@@ -2138,7 +2194,6 @@
mt.get_new_circuit()
for test in tests.values():
- # Keep testing failures and inconclusives
result = test.run_test()
plog("INFO", test.proto+" test via "+scan_exit+" has result "+str(result))
plog('INFO', 'Done.')
@@ -2151,7 +2206,7 @@
plog("INFO", "Got signal for node update.")
for test in avail_tests:
test.update_nodes()
- plog("INFO", "Note update complete.")
+ plog("INFO", "Node update complete.")
# Get as much milage out of each exit as we safely can:
# Run a random subset of our tests in random order
More information about the tor-commits
mailing list