[or-cvs] r18378: {torflow} Add ability to bind to a different IP for the second non-tor (torflow/trunk/NetworkScanners)
mikeperry at seul.org
mikeperry at seul.org
Tue Feb 3 13:01:17 UTC 2009
Author: mikeperry
Date: 2009-02-03 08:01:16 -0500 (Tue, 03 Feb 2009)
New Revision: 18378
Modified:
torflow/trunk/NetworkScanners/soat.py
torflow/trunk/NetworkScanners/soatstats.py
Log:
Add ability to bind to a different IP for the second non-tor
fetch, and wipe our cookies before this fetch. Helps discover
and filter IP-based identifiers. Also, add a NodeManager to
keep our node lists in sync with metatroller and Tor. And
clean up url handling a bit.
Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py 2009-02-03 12:42:55 UTC (rev 18377)
+++ torflow/trunk/NetworkScanners/soat.py 2009-02-03 13:01:16 UTC (rev 18378)
@@ -43,6 +43,7 @@
import cookielib
import sha
import Queue
+import threading
from libsoat import *
@@ -66,13 +67,23 @@
from soat_config import *
search_cookies=None
+linebreak = '\r\n'
-#
-# constants
-#
-linebreak = '\r\n'
+# Oh yeah. so dirty. Blame this guy if you hate me:
+# http://mail.python.org/pipermail/python-bugs-list/2008-October/061202.html
+_origsocket = socket.socket
+class BindingSocket(_origsocket):
+ bind_to = None
+ def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
+ _origsocket.__init__(self, family, type, proto, _sock)
+ if BindingSocket.bind_to:
+ plog("DEBUG", "Binding socket to "+BindingSocket.bind_to)
+ self.bind((BindingSocket.bind_to, 0))
+socket.socket = BindingSocket
+
+
# Http request handling
def http_request(address, cookie_jar=None, headers=firefox_headers):
''' perform a http GET-request and return the content received '''
@@ -125,6 +136,7 @@
self.mt = mt
self.datahandler = DataHandler()
self.min_targets = 10
+ self.marked_nodes = sets.Set([])
def run_test(self):
raise NotImplemented()
@@ -140,13 +152,21 @@
if len(self.targets) < self.min_targets:
plog("NOTICE", self.proto+" scanner short on targets. Adding more")
self.targets.extend(self.get_targets())
-
+
+ def update_nodes(self):
+ self.nodes = self.mt.node_manager.get_nodes_for_port(self.port)
+ self.node_map = {}
+ for n in self.nodes:
+ self.node_map[n.idhex] = n
+ self.total_nodes = len(self.nodes)
+ self.all_nodes = sets.Set(self.nodes)
+
def mark_chosen(self, node):
self.nodes_marked += 1
- self.nodes.remove(node)
+ self.marked_nodes.add(node)
def finished(self):
- return not self.nodes
+ return not self.marked_nodes ^ self.all_nodes
def percent_complete(self):
return round(100.0*self.nodes_marked/self.total_nodes, 1)
@@ -165,21 +185,34 @@
plog("INFO", "Using the following urls for "+self.proto+" scan:\n\t"+targets)
self.tests_run = 0
self.nodes_marked = 0
- # XXX: We really need to register an eventhandler
- # and register a callback for it when this list
- # changes due to dropping either "Running" or "Fast"
- self.nodes = self.mt.get_nodes_for_port(self.port)
- self.node_map = {}
- for n in self.nodes:
- self.node_map[n.idhex] = n
- self.total_nodes = len(self.nodes)
+ self.marked_nodes = sets.Set([])
-
class SearchBasedTest(Test):
def __init__(self, mt, proto, port, wordlist):
self.wordlist = wordlist
Test.__init__(self, mt, proto, port)
+ def _is_useable_url(self, url, valid_schemes=None, filetypes=None):
+ (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
+ if netloc.rfind(":") != -1:
+ # XXX: %-encoding?
+ port = netloc[netloc.rfind(":")+1:]
+ try:
+ if int(port) != self.port:
+ return False
+ except:
+ traceback.print_exc()
+ plog("WARN", "Unparseable port "+port+" in "+url)
+ return False
+ if valid_schemes and scheme not in valid_schemes:
+ return False
+ if filetypes: # Must be checked last
+ for filetype in filetypes:
+ if url[-len(filetype):] == filetype:
+ return True
+ return False
+ return True
+
def get_search_urls(self, protocol='any', results_per_type=10, host_only=False, filetypes=['any'], search_mode=default_search_mode):
'''
construct a list of urls based on the wordlist, filetypes and protocol.
@@ -237,15 +270,20 @@
# get the links and do some additional filtering
for link in soup.findAll('a', {'class' : search_mode["class"]}):
url = link['href']
- if (protocol != 'any' and url[:len(protocol)] != protocol or
- filetype != 'any' and url[-len(filetype):] != filetype):
- pass
- else:
+ if protocol == 'any': prot_list = None
+ else: prot_list = [protocol]
+ if filetype == 'any': file_list = None
+ else: file_list = filetypes
+
+ if self._is_useable_url(url, prot_list, file_list):
if host_only:
- host = urlparse.urlparse(link['href'])[1]
+ # XXX: %-encoding, @'s, etc?
+ host = urlparse.urlparse(url)[1]
type_urls.append(host)
else:
- type_urls.append(link['href'])
+ type_urls.append(url)
+ else:
+ pass
plog("INFO", "Have "+str(len(type_urls))+"/"+str(results_per_type)+" google urls so far..")
# make sure we don't get more urls than needed
@@ -258,6 +296,7 @@
class HTTPTest(SearchBasedTest):
def __init__(self, mt, wordlist, filetypes=scan_filetypes):
+ # FIXME: Handle http urls w/ non-80 ports..
SearchBasedTest.__init__(self, mt, "HTTP", 80, wordlist)
self.fetch_targets = 5
self.httpcode_fails = {}
@@ -375,7 +414,6 @@
plog("ERROR", self.proto+" http error code failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
-
def check_http_nodynamic(self, address):
''' check whether a http connection to a given address is molested '''
plog('INFO', 'Conducting an http test with destination ' + address)
@@ -427,7 +465,7 @@
sha1sum.update(buf)
buf = content_file.read(4096)
content_file.close()
- self.cookie_jar.load(content_prefix+'.cookies', 'w')
+ self.cookie_jar.load(content_prefix+'.cookies')
content = None
except IOError:
@@ -441,7 +479,11 @@
content_file.write(content)
content_file.close()
- self.cookie_jar.save(content_prefix+'.cookies', 'w')
+ try:
+ self.cookie_jar.save(content_prefix+'.cookies')
+ except:
+ traceback.print_exc()
+ plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
except TypeError, e:
plog('ERROR', 'Failed obtaining the shasum for ' + address)
@@ -458,8 +500,14 @@
else: self.successes[address]=1
return TEST_SUCCESS
- # if content doesnt match, update the direct content
+ # if content doesnt match, update the direct content and use new cookies
+ self.cookie_jar = cookielib.LWPCookieJar()
+ # If we have alternate IPs to bind to on this box, use them?
+ # Sometimes pages have the client IP encoded in them..
+ BindingSocket.bind_to = refetch_ip
(code_new, content_new) = http_request(address, self.cookie_jar, self.headers)
+ BindingSocket.bind_to = None
+
if not content_new:
plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
result = HttpTestResult(exit_node, address, TEST_INCONCLUSIVE,
@@ -495,7 +543,11 @@
new_content_file.close()
os.rename(content_prefix+'.cookies', content_prefix+'.cookies-old')
- self.cookie_jar.save(content_prefix+'.cookies', 'w')
+ try:
+ self.cookie_jar.save(content_prefix+'.cookies')
+ except:
+ traceback.print_exc()
+ plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
# compare the node content and the new content
# if it matches, everything is ok
@@ -626,34 +678,32 @@
for a in t.attrs:
attr_name = str(a[0])
attr_tgt = str(a[1])
- # TODO: Split off javascript
if attr_name in attrs_to_recurse:
if str(t.name) in recurse_html:
- plog("NOTICE", "Adding html "+str(t.name)+" target: "+attr_tgt)
targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
elif str(t.name) in recurse_script:
if str(t.name) == "link":
for a in t.attrs:
if str(a[0]) == "type" and str(a[1]) in link_script_types:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
- plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
else:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
- plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
- targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
elif str(t.name) == 'a':
if attr_name == "href":
for f in self.recurse_filetypes:
if f not in got_type and attr_tgt[-len(f):] == f:
got_type[f] = 1
- plog("NOTICE", "Adding http a target: "+attr_tgt)
targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
else:
- plog("NOTICE", "Adding http "+str(t.name)+" target: "+attr_tgt)
targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
for i in sets.Set(targets):
- self.fetch_queue.put_nowait(i)
+ if self._is_useable_url(i[1], html_schemes):
+ plog("NOTICE", "Adding "+i[0]+" target: "+i[1])
+ self.fetch_queue.put_nowait(i)
+ else:
+ plog("NOTICE", "Skipping "+i[0]+" target: "+i[1])
+
def _tag_not_worthy(self, tag):
if str(tag.name) in tags_to_check:
return False
@@ -725,12 +775,19 @@
plog("ERROR", "Javascript 3-way failure at "+exit_node+" for "+address)
return TEST_FAILURE
-
+
def check_html_notags(self, address):
+ plog('INFO', 'Conducting a html tagless test with destination ' + address)
+ ret = self.check_http_nodynamic(address)
+
+ if type(ret) == int:
+ return ret
+ (tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = ret
pass
def check_html(self, address):
# TODO: Break this out into a check_html_notags that just does a sha check
+ # FIXME: Also check+store non-tor mime types
''' check whether a http connection to a given address is molested '''
plog('INFO', 'Conducting an html test with destination ' + address)
@@ -750,6 +807,7 @@
plog('WARN', 'We had no exit node to test, skipping to the next test.')
return TEST_SUCCESS
+ # XXX: Fetch via non-tor first...
if pcode - (pcode % 100) != 200:
plog("NOTICE", exit_node+" had error "+str(pcode)+" fetching content for "+address)
result = HttpTestResult(exit_node, address, TEST_INCONCLUSIVE,
@@ -807,7 +865,11 @@
tag_file.write(string_soup)
tag_file.close()
- self.cookie_jar.save(content_prefix+'.cookies', 'w')
+ try:
+ self.cookie_jar.save(content_prefix+'.cookies')
+ except:
+ traceback.print_exc()
+ plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
content_file = open(content_prefix+'.content', 'w')
content_file.write(content)
@@ -831,8 +893,15 @@
else: self.successes[address]=1
return TEST_SUCCESS
- # if content doesnt match, update the direct content
+ # if content doesnt match, update the direct content and use new cookies
+ self.cookie_jar = cookielib.LWPCookieJar()
+
+ # If we have alternate IPs to bind to on this box, use them?
+ # Sometimes pages have the client IP encoded in them..
+ BindingSocket.bind_to = refetch_ip
(code_new, content_new) = http_request(address, self.cookie_jar, self.headers)
+ BindingSocket.bind_to = None
+
content_new = content_new.decode('ascii', 'ignore')
if not content_new:
plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
@@ -876,8 +945,12 @@
tag_file.close()
os.rename(content_prefix+'.cookies', content_prefix+'.cookies-old')
- self.cookie_jar.save(content_prefix+'.cookies', 'w')
-
+ try:
+ self.cookie_jar.save(content_prefix+'.cookies')
+ except:
+ traceback.print_exc()
+ plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
+
os.rename(content_prefix+'.content', content_prefix+'.content-old')
new_content_file = open(content_prefix+'.content', 'w')
new_content_file.write(content_new)
@@ -1645,15 +1718,102 @@
response = response[:-1]
return response
+class NodeManager(EventHandler):
+ '''
+ A tor control event handler extending TorCtl.EventHandler.
+ Monitors NS and NEWDESC events, and updates each test
+ with new nodes
+ '''
+ def __init__(self, c):
+ EventHandler.__init__(self)
+ self.c = c
+ self.routers = {}
+ self.sorted_r = []
+ self.rlock = threading.Lock()
+ self._read_routers(self.c.get_network_status())
+ self.new_nodes=True
+ c.set_event_handler(self)
+ c.set_events([TorCtl.EVENT_TYPE.NEWDESC, TorCtl.EVENT_TYPE.NS], True)
+
+ def has_new_nodes(self):
+ ret = False
+ plog("DEBUG", "has_new_nodes begin")
+ try:
+ self.rlock.acquire()
+ ret = self.new_nodes
+ self.new_nodes = False
+ finally:
+ self.rlock.release()
+ plog("DEBUG", "has_new_nodes end")
+ return ret
+
+ def get_nodes_for_port(self, port):
+ ''' return a list of nodes that allow exiting to a given port '''
+ plog("DEBUG", "get_nodes_for_port begin")
+ restriction = NodeRestrictionList([FlagsRestriction(["Running", "Valid",
+"Fast"]), MinBWRestriction(min_node_bw), ExitPolicyRestriction('255.255.255.255', port)])
+ try:
+ self.rlock.acquire()
+ ret = [x for x in self.sorted_r if restriction.r_is_ok(x)]
+ finally:
+ self.rlock.release()
+ plog("DEBUG", "get_nodes_for_port end")
+ return ret
+
+ def _read_routers(self, nslist):
+ routers = self.c.read_routers(nslist)
+ new_routers = []
+ for r in routers:
+ if r.idhex in self.routers:
+ if self.routers[r.idhex].nickname != r.nickname:
+ plog("NOTICE", "Router "+r.idhex+" changed names from "
+ +self.routers[r.idhex].nickname+" to "+r.nickname)
+ self.sorted_r.remove(self.routers[r.idhex])
+ self.routers[r.idhex] = r
+ new_routers.append(r)
+
+ self.sorted_r.extend(new_routers)
+ self.sorted_r.sort(lambda x, y: cmp(y.bw, x.bw))
+ # This is an OK update because of the GIL (also we don't touch it)
+ for i in xrange(len(self.sorted_r)): self.sorted_r[i].list_rank = i
+
+ def ns_event(self, n):
+ plog("DEBUG", "ns_event begin")
+ try:
+ self.rlock.acquire()
+ self._read_routers(n.nslist)
+ self.new_nodes = True
+ finally:
+ self.rlock.release()
+ plog("DEBUG", "Read " + str(len(n.nslist))+" NS => "
+ + str(len(self.sorted_r)) + " routers")
+
+ def new_desc_event(self, d):
+ plog("DEBUG", "new_desc_event begin")
+ try:
+ self.rlock.acquire()
+ for i in d.idlist: # Is this too slow?
+ self._read_routers(self.c.get_network_status("id/"+i))
+ self.new_nodes = True
+ finally:
+ self.rlock.release()
+ plog("DEBUG", "Read " + str(len(d.idlist))+" Desc => "
+ + str(len(self.sorted_r)) + " routers")
+
+
class DNSRebindScanner(EventHandler):
'''
A tor control event handler extending TorCtl.EventHandler
Monitors for REMAP events (see check_dns_rebind())
'''
- def __init__(self, mt):
+ def __init__(self, mt, c):
EventHandler.__init__(self)
self.__mt = mt
+ c.set_event_handler(self)
+ c.set_events([TorCtl.EVENT_TYPE.STREAM], True)
+ self.c=c
+
def stream_status_event(self, event):
if event.status == 'REMAP':
octets = map(lambda x: int2bin(x).zfill(8), event.target_host.split('.'))
@@ -1666,7 +1826,7 @@
result = DNSRebindTestResult(node, '', TEST_FAILURE)
handler.saveResult(result)
-class Metatroller:
+class Metaconnection:
''' Abstracts operations with the Metatroller '''
def __init__(self):
'''
@@ -1719,6 +1879,8 @@
plog('ERROR', 'A service other that the Tor control port is listening on ' + control_host + ':' + control_port)
plog('ERROR', e)
exit()
+ self.node_manager = NodeManager(c)
+
def get_exit_node(self):
''' ask metatroller for the last exit used '''
@@ -1766,12 +1928,6 @@
# self.__contol.set_option('AuthDirBadExit', exit) ?
pass
- def get_nodes_for_port(self, port):
- ''' ask control port for a list of nodes that allow exiting to a given port '''
- routers = self.__control.read_routers(self.__control.get_network_status())
- restriction = NodeRestrictionList([FlagsRestriction(["Running", "Valid", "Fast"]), ExitPolicyRestriction('255.255.255.255', port)])
- return [x for x in routers if restriction.r_is_ok(x)]
-
# XXX: Hrmm is this in the right place?
def check_all_exits_port_consistency(self):
'''
@@ -1816,11 +1972,24 @@
The test makes sure that external hosts are not resolved to private addresses
'''
plog('INFO', 'Monitoring REMAP events for weirdness')
- self.__dnshandler = DNSRebindScanner(self)
- self.__control.set_event_handler(self.__dnshandler)
- self.__control.set_events([TorCtl.EVENT_TYPE.STREAM], True)
+ # establish a control port connection
+ try:
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((control_host, control_port))
+ c = Connection(s)
+ c.authenticate()
+ except socket.error, e:
+ plog('ERROR', 'Couldn\'t connect to the control port')
+ plog('ERROR', e)
+ exit()
+ except AttributeError, e:
+ plog('ERROR', 'A service other that the Tor control port is listening on ' + control_host + ':' + control_port)
+ plog('ERROR', e)
+ exit()
+ self.__dnshandler = DNSRebindScanner(self, c)
+
# some helpful methods
def load_wordlist(file):
@@ -1946,7 +2115,7 @@
wordlist = load_wordlist(wordlist_file)
# initiate the connection to the metatroller
- mt = Metatroller()
+ mt = Metaconnection()
# initiate the passive dns rebind attack monitor
if do_dns_rebind:
@@ -2017,9 +2186,15 @@
# start testing
while 1:
+ avail_tests = tests.values()
+ if mt.node_manager.has_new_nodes():
+ plog("INFO", "Got signal for node update.")
+ for test in avail_tests:
+ test.update_nodes()
+ plog("INFO", "Note update complete.")
+
# Get as much milage out of each exit as we safely can:
# Run a random subset of our tests in random order
- avail_tests = tests.values()
n_tests = random.choice(xrange(1,len(avail_tests)+1))
to_run = random.sample(avail_tests, n_tests)
@@ -2056,7 +2231,6 @@
if result == TEST_SUCCESS:
test.mark_chosen(current_exit)
-
# Check each test for rewind
for test in tests.itervalues():
if test.finished():
Modified: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py 2009-02-03 12:42:55 UTC (rev 18377)
+++ torflow/trunk/NetworkScanners/soatstats.py 2009-02-03 13:01:16 UTC (rev 18378)
@@ -129,7 +129,8 @@
changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
- # Verify all of our changed tags are present here
+ # Verify all of our changed tags are present here
+ # XXX: Have this print out more info on changed tags..
if new_vs_tor.has_more_changed_tags(changed_tags) or \
new_vs_tor.has_more_changed_attrs(changed_attributes) or \
new_vs_tor.changed_content() and not changed_content:
More information about the tor-commits
mailing list