[or-cvs] r15819: SOAT: Added TLS tests. Started choosing semi-random urls for (torflow/branches/gsoc2008)
aleksei at seul.org
aleksei at seul.org
Thu Jul 10 15:29:14 UTC 2008
Author: aleksei
Date: 2008-07-10 11:29:14 -0400 (Thu, 10 Jul 2008)
New Revision: 15819
Modified:
torflow/branches/gsoc2008/soat.py
Log:
SOAT: Added TLS tests. Started choosing semi-random urls for testing. Pydoced.
Modified: torflow/branches/gsoc2008/soat.py
===================================================================
--- torflow/branches/gsoc2008/soat.py 2008-07-10 15:25:49 UTC (rev 15818)
+++ torflow/branches/gsoc2008/soat.py 2008-07-10 15:29:14 UTC (rev 15819)
@@ -1,10 +1,24 @@
#!/usr/bin/python
+#
+# 2008 Aleksei Gorny, mentored by Mike Perry
+'''
+Snakes on a Tor exit node scanner
+
+The SOAT scanner checks whether exit nodes behave by initiating connections
+to semi-randomly chosen targets using several protocols (http, https, ssh, smtp, imap, etc)
+and comparing content received directly and via tor.
+
+It interacts with metatroller and the control port to be aware of the tor network status.
+'''
+
+__all__ = ["ExitNodeScanner", "load_wordlist", "get_urls"]
+
import httplib
import os
+import pickle
import random
import re
-import pickle
from sets import Set
import smtplib
import socket
@@ -23,45 +37,28 @@
from TorCtl.PathSupport import *
from TorCtl.TorCtl import Connection
-sys.path.append("./tools/BeautifulSoup/")
-from BeautifulSoup import BeautifulSoup, SoupStrainer
-
-sys.path.append("./tools/SocksiPy/")
-import socks
-
# Try to use system openssl first
try:
from OpenSSL import *
+ sys.path.append("./tools")
except:
sys.path.append("./tools/")
from OpenSSL import *
-sys.path.append("./tools/pyssh")
-import pyssh
+from BeautifulSoup.BeautifulSoup import BeautifulSoup, SoupStrainer
+from SocksiPy import socks
+import Pyssh.pyssh
#
# config stuff
#
-user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0'
+# these are used when searching for 'random' urls for testing
+wordlist_file = './wordlist.txt';
+allowed_filetypes = ['all','pdf']
+result_per_type = 5
-wordlist_file = './wordlist.txt';
-allowed_filetypes = ['all','pdf']
-result_per_type = 1
-
-same_origin_policy = True
-
#
-# links of interest
-#
-
-# FIXME: Turn these into a keyword list that causes us to get semi-random
-# google results in a few different languages
-docs_http = ['http://www.torproject.org','http://www.math.ut.ee','http://www.mozilla.com']
-docs_https = ['mail.google.com','addons.mozilla.org','www.paypal.com','www.fastmail.fm']
-docs_ssh = []
-
-#
# ports to test in the consistency test
#
@@ -73,7 +70,7 @@
["http", ExitPolicyRestriction('255.255.255.255', 80), "https", ExitPolicyRestriction('255.255.255.255', 443)]
]
-# tags and attributes to check in the http test
+# tags and attributes to check in the http test: XXX these should be reviewed
tags_to_check = ['a', 'area', 'base', 'applet', 'embed', 'form', 'frame',
'iframe', 'img', 'link', 'object', 'script']
@@ -85,43 +82,45 @@
linebreak = '\r\n'
-# a simple interface to handle a socket connection
-# with readline and writeline capability
+# a simple interface to handle a socket connection
class Client:
def __init__(self, host, port):
- self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- self.s.connect((host, port))
- self.buffer = self.s.makefile('rb')
+ self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.sock.connect((host, port))
+ self.buffer = self.sock.makefile('rb')
def writeline(self, line):
- self.s.send(line + linebreak)
+ self.sock.send(line + linebreak)
def readline(self):
- s = self.buffer.readline()
- if not s:
+ response = self.buffer.readline()
+ if not response:
raise EOFError
- if s[-2:] == linebreak:
- s = s[:-2]
- elif s[-1:] in linebreak:
- s = s[:-1]
- return s
+ elif response[-2:] == linebreak:
+ response = response[:-2]
+ elif response[-1:] in linebreak:
+ response = response[:-1]
+ return response
-# The scanner class
class ExitNodeScanner:
-
+ ''' The scanner class '''
def __init__(self):
+ '''
+ Establish a connection to metatroller & control port,
+ configure metatroller, load the number of previously tested nodes
+ '''
# establish a metatroller connection
plog('INFO', 'ExitNodeScanner starting up...')
try:
- self.__client = Client(meta_host, meta_port)
+ self.__meta = Client(meta_host, meta_port)
except socket.error:
plog('ERROR', 'Couldn\'t connect to metatroller. Is it on?')
exit()
# skip two lines of metatroller introduction
- data = self.__client.readline()
- data = self.__client.readline()
+ data = self.__meta.readline()
+ data = self.__meta.readline()
# configure metatroller
commands = [
@@ -135,10 +134,10 @@
'RESETSTATS']
plog('INFO', 'Executing preliminary configuration commands')
for c in commands:
- self.__client.writeline(c)
- reply = self.__client.readline()
+ self.__meta.writeline(c)
+ reply = self.__meta.readline()
if reply[:3] != '250': # first three chars indicate the reply code
- reply += self.__client.readline()
+ reply += self.__meta.readline()
plog('ERROR', 'Error configuring metatroller (' + command + ' failed)')
plog('ERROR', reply)
exit()
@@ -166,11 +165,12 @@
plog('INFO', 'ExitNodeScanner up and ready')
def get_exit_node(self):
- self.__client.writeline("GETLASTEXIT")
- reply = self.__client.readline()
+ ''' ask metatroller for the last exit used '''
+ self.__meta.writeline("GETLASTEXIT")
+ reply = self.__meta.readline()
if reply[:3] != '250':
- reply += self.__client.readline()
+ reply += self.__meta.readline()
plog('ERROR', reply)
return 0
@@ -181,36 +181,47 @@
return self.__exit
def get_new_circuit(self):
+ ''' tell metatroller to close the current circuit and open a new one '''
plog('NOTICE', 'Trying to construct a new circuit')
- self.__client.writeline("NEWEXIT")
- reply = self.__client.readline()
+ self.__meta.writeline("NEWEXIT")
+ reply = self.__meta.readline()
if reply[:3] != '250':
plog('ERROR', 'Choosing a new exit failed')
plog('ERROR', reply)
def set_new_exit(self, exit):
+ '''
+ tell metatroller to set the given node as the exit in the next circuit
+ Note: currently not used
+ '''
plog('NOTICE', 'Trying to set ' + exit + ' as the exit for the next circuit')
- self.__client.writeline("SETEXIT " + exit)
- reply = self.__client.readline()
+ self.__meta.writeline("SETEXIT " + exit)
+ reply = self.__meta.readline()
if reply[:3] != '250':
plog('ERROR', 'Setting ' + exit + ' as the new exit failed')
plog('ERROR', reply)
- def report_bad_exit(self, exit):
+ def report_bad_exit(self, exit):
+ '''
+ report an evil exit to the control port using AuthDirBadExit
+ Note: currently not used
+ '''
# self__contol.set_option('AuthDirBadExit', exit) ?
pass
- # get the list of nodes that allow to exit to a port
def get_nodes_for_port(self, port):
+ ''' ask control port for a list of nodes that allow exiting to a given port '''
routers = self.__control.read_routers(self.__control.get_network_status())
restriction = ExitPolicyRestriction('255.255.255.255', port)
return [x for x in routers if restriction.r_is_ok(x)]
- # finds nodes that allow connections over a common protocol
- # while disallowing connections over its secure version
def check_all_exits_port_consistency(self):
+ '''
+ an independent test that finds nodes that allow connections over a common protocol
+ while disallowing connections over its secure version (for instance http/https)
+ '''
# get the structure
routers = self.__control.read_routers(self.__control.get_network_status())
@@ -236,6 +247,7 @@
plog('INFO', 'Total bad exits: ' + `len(bad_exits)` + ' (~' + `(len(bad_exits) * 100 / len(routers))` + '%)')
def check_http(self, address):
+ ''' check whether a http connection to a given address is molested '''
plog('INFO', 'Conducting an http test with destination ' + address)
defaultsocket = socket.socket
@@ -248,15 +260,16 @@
socket.socket = defaultsocket
exit_node = self.get_exit_node()
- if exit_node == 0 or exit_node == '0':
+ if exit_node == 0 or exit_node == '0' or not exit_node:
plog('INFO', 'We had no exit node to test, skipping to the next test.')
return 0
+ address_file = address[7:].replace('/','_') # an address representation acceptable for a filename (leave out the http:// and replace slashes)
+
# if we have no content, we had a connection error
- # address[7:] means we're leaving out the 'http://' from the address part
if pcontent == 0:
result = HttpTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
- result_file = open(http_i_dir + `exit_node` + '_' + address[7:] + '.result','w')
+ result_file = open(http_i_dir + `exit_node` + '_' + address_file + '.result','w')
pickle.dump(result, result_file)
result_file.close()
return TEST_INCONCLUSIVE
@@ -270,15 +283,15 @@
# if we don't have any yet, get it
soup = 0
try:
- tag_file = open(http_tags_dir + address[7:] + '.tags', 'r')
+ tag_file = open(http_tags_dir + address_file + '.tags', 'r')
soup = BeautifulSoup(tag_file.read())
tag_file.close()
except IOError:
content = self.http_request(address)
content = content.decode('ascii','ignore')
soup = BeautifulSoup(content, parseOnlyThese=elements)
- tag_file = open(http_tags_dir + '_' + address[7:] + '.tags', 'w')
- tag_file.write(soup.__str__())
+ tag_file = open(http_tags_dir + address_file + '.tags', 'w')
+ tag_file.write(soup.__str__() + ' ') # the space is needed in case we have some page with no matching tags at all
tag_file.close()
except Exception, e:
plog('ERROR', 'Failed to get the correct tag structure for ' + address)
@@ -291,25 +304,55 @@
# compare the content
# if content matches, everything is ok
if psoup == soup:
- plog('INFO', 'Content matches')
+ result = HttpTestResult(exit_node, address, 0, TEST_SUCCESS)
+ result_file = open(http_s_dir + `exit_node` + '_' + address_file + '.result','w')
+ pickle.dump(result, result_file)
+ result_file.close()
+ return TEST_SUCCESS
# if content doesnt match, update the direct content
-
+ content_new = self.http_request(address)
+ content_new = content_new.decode('ascii', 'ignore')
+ if content_new == 0:
+ result = HttpTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
+ result_file = open(http_i_dir + `exit_node` + '_' + address_file + '.result','w')
+ pickle.dump(result, result_file)
+ result_file.close()
+ return TEST_INCONCLUSIVE
+
+ soup_new = BeautifulSoup(content_new, parseOnlyThese=elements)
# compare the new and old content
# if they match, means the node has been changing the content
+ if soup == soup_new:
+ result = HttpTestResult(exit_node, address, 0, TEST_FAILURE)
+ result_file = open(http_f_dir + `exit_node` + '_' + address_file + '.result','w')
+ pickle.dump(result, result_file)
+ result_file.close()
+ return TEST_FAILURE
-
# if content has changed outside of tor, update the saved file
+ tag_file = open(http_tags_dir + '_' + address_file + '.tags', 'w')
+ tag_file.write(soup_new.__str__())
+ tag_file.close()
# compare the node content and the new content
# if it matches, everything is ok
+ if psoup == soup_new:
+ result = HttpTestResult(exit_node, address, 0, TEST_SUCCESS)
+ result_file = open(http_s_dir + `exit_node` + '_' + address_file + '.result','w')
+ pickle.dump(result, result_file)
+ result_file.close()
+ return TEST_SUCCESS
+ # if it doesn't match, means the node has been changing the content
+ result = HttpTestResult(exit_node, address, 0, TEST_FAILURE)
+ result_file = open(http_f_dir + `exit_node` + '_' + address_file + '.result','w')
+ pickle.dump(result, result_file)
+ result_file.close()
+ return TEST_FAILURE
- # if it doesn't match, means the node has been changing the content
-
- return TEST_SUCCESS
-
def check_openssh(self, address):
+ ''' check whether an openssh connection to a given address is molested '''
ssh = pyssh.Ssh('username', 'host', 22)
ssh.set_sshpath(pyssh.SSH_PATH)
@@ -319,6 +362,7 @@
return 0
def check_openssl(self, address):
+ ''' check whether an https connection to a given address is molested '''
plog('INFO', 'Conducting an ssl test with destination ' + address)
# get the cert via tor
@@ -333,7 +377,7 @@
socket.socket = defaultsocket
exit_node = self.get_exit_node()
- if exit_node == 0 or exit_node == '0':
+ if exit_node == 0 or exit_node == '0' or not exit_node:
plog('INFO', 'We had no exit node to test, skipping to the next test.')
return TEST_FAILURE
@@ -447,35 +491,360 @@
return TEST_FAILURE
- # stub for checking whether smtp & tls function properly
def check_smtp(self, address):
+ '''
+ check whether smtp + tls connection to a given address is molested
+ this is done by going through the STARTTLS sequence and comparing server
+ responses for the direct and tor connections
+ '''
+
+ plog('INFO', 'Conducting an stmp test with destination ' + address)
+
+ defaultsocket = socket.socket
+ socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+ socket.socket = socks.socksocket
+
+ ehlo1_reply = 0
+ has_starttls = 0
+ ehlo2_reply = 0
+
try:
s = smtplib.SMTP(address)
- c = s.ehlo()[0]
- if not c>= 200 or c <= 299:
- return 0
- if not s.has_extn('starttls'):
- return 0
- c = s.ehlo()[0]
- if not c>= 200 or c <= 299:
- return 0
- except:
- pass
+ ehlo1_reply = s.ehlo()[0]
+ if ehlo1_reply != 250:
+ raise smtplib.SMTPException('First ehlo failed')
+ has_starttls = s.has_extn('starttls')
+ if not has_starttls:
+ raise smtplib.SMTPException('It seems the server doesn\'t support starttls')
+ s.starttls()
+ # TODO check certs?
+ ehlo2_reply = s.ehlo()[0]
+ if ehlo2_reply != 250:
+ raise smtplib.SMTPException('Second ehlo failed')
+ except socket.gaierror, e:
+ plog('ERROR', 'A connection error occured while testing smtp at ' + address)
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+ except Exception, e:
+ plog('ERROR','An error occured while testing smtp at ' + address)
+ plog('ERROR', e)
+ finally:
+ # reset the connection method back to direct
+ socket.socket = defaultsocket
- # stub for checking whether pop & tls function properly
+ # check whether the test was valid at all
+ exit_node = self.get_exit_node()
+ if exit_node == 0 or exit_node == '0':
+ plog('INFO', 'We had no exit node to test, skipping to the next test.')
+ return 0
+
+ # now directly
+
+ ehlo1_reply_d = 0
+ has_starttls_d = 0
+ ehlo2_reply_d = 0
+
+ try:
+ s = smtplib.SMTP(address)
+ ehlo1_reply_d = s.ehlo()[0]
+ if ehlo1_reply != 250:
+ raise smtplib.SMTPException('First ehlo failed')
+ has_starttls_d = s.has_extn('starttls')
+ if not has_starttls_d:
+ raise smtplib.SMTPException('It seems that the server doesn\'t support starttls')
+ s.starttls()
+ ehlo2_reply = s.ehlo()[0]
+ if ehlo2_reply != 250:
+ raise smtplib.SMTPException('Second ehlo failed')
+ except Exception, e:
+ plog('ERROR', 'An error occurred while testing smtp at ' + address)
+ plog('ERROR', e)
+
+ # compare
+ if ehlo1_reply != ehlo1_reply_d or has_starttls != has_starttls_d or ehlo2_reply != ehlo2_reply_d:
+ return TEST_FAILURE
+
+ return TEST_SUCCESS
+
def check_pop(self, address):
- pass
+ '''
+ check whether a pop + tls connection to a given address is molested
+ it is implied that the server reads/sends messages compliant with RFC1939 & RFC2449
+ '''
- # stub for checking whether imap & tls function properly
+ plog('INFO', 'Conducting a pop test with destination ' + address)
+
+ defaultsocket = socket.socket
+ socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+ socket.socket = socks.socksocket
+
+ capabilities_ok = False
+ starttls_present = False
+ tls_started = None
+ tls_succeeded = None
+
+ try:
+ pop = Client(address, 110)
+
+ # read the server greeting
+ server_greeting = pop.readline()
+
+ # get the server capabilities
+ pop.writeline('CAPA')
+ capabilities = ''
+ while 1:
+ curr = pop.readline()
+ if '+OK' in curr:
+ capabilities_ok = True
+ elif curr == '.':
+ break
+ elif 'STLS' in curr:
+ starttls_present = True
+
+ if not capabilities_ok:
+ return TEST_INCONCLUSIVE
+
+ # try to start tls negotiation
+ if starttls_present:
+ pop.writeline('STLS')
+
+ starttls_started = '+OK' in starttls_response
+
+ # negotiate TLS and issue some request to feel good about it
+ # TODO check certs?
+ ctx = SSL.Context(SSL.SSLv23_METHOD)
+ c = SSL.Connection(ctx, pop.sock)
+ c.set_connect_state()
+ c.do_handshake()
+ c.send('CAPA' + linebreak)
+
+ while tls_succeeded == None:
+ line = ''
+ char = None
+ while char != '\n':
+ char = c.read(1)
+ if not char:
+ break
+ elif char == '.':
+ tls_succeeded = False
+ line += char
+
+ if '-ERR' in line:
+ tls_succeeded = False
+ elif '+OK' in line:
+ tls_succeeded = True
+ elif not line:
+ tls_succeeded = False
+
+ except Exception, e:
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+ finally:
+ # reset the connection to default
+ socket.socket = defaultsocket
+
+ # do the same for the direct connection
+
+ capabilities_ok_d = False
+ starttls_present_d = False
+ tls_started_d = None
+ tls_succeeded_d = None
+
+ try:
+ pop = Client(address, 110)
+
+ # read the server greeting
+ server_greeting = pop.readline()
+
+ # get the server capabilities
+ pop.writeline('CAPA')
+ capabilities = ''
+ while 1:
+ curr = pop.readline()
+ if '+OK' in curr:
+ capabilities_ok_d = True
+ elif curr == '.':
+ break
+ elif 'STLS' in curr:
+ starttls_present_d = True
+
+ if not capabilities_ok_d:
+ return TEST_INCONCLUSIVE
+
+ # try to start tls negotiation
+ if starttls_present_d:
+ pop.writeline('STLS')
+
+ starttls_started_d = '+OK' in starttls_response
+
+ # negotiate TLS, issue some request to feel good about it
+ ctx = SSL.Context(SSL.SSLv23_METHOD)
+ c = SSL.Connection(ctx, pop.sock)
+ c.set_connect_state()
+ c.do_handshake()
+ c.send('CAPA' + linebreak)
+
+ while tls_succeeded_d == None:
+ line = ''
+ char = None
+ while char != '\n':
+ char = c.read(1)
+ if not char:
+ break
+ elif char == '.':
+ tls_succeeded_d = False
+ line += char
+
+ if '-ERR' in line:
+ tls_succeeded_d = False
+ elif '+OK' in line:
+ tls_succeeded_d = True
+ elif not line:
+ tls_succeeded_d = False
+
+ except Exception, e:
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+
+ # compare
+ if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or
+ tls_started != tls_started_d or tls_suceeded != tls_succeeded_d):
+ return TEST_FAILURE
+
+ return TEST_SUCCESS
+
def check_imap(self, address):
- pass
+ '''
+ check whether an imap + tls connection to a given address is molested
+ it is implied that the server reads/sends messages compliant with RFC3501
+ '''
+ plog('INFO', 'Conducting an imap test with destination ' + address)
+ defaultsocket = socket.socket
+ socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+ socket.socket = socks.socksocket
+
+ capabilities_ok = None
+ starttls_present = None
+ tls_started = None
+ tls_succeeded = None
+ try:
+ imap = Client(address, 143)
+
+ # read server greeting
+ server_greeting = imap.readline()
+
+ # get server capabilities
+ imap.writeline('a001 CAPABILITY')
+ capabilities = imap.readline() # first line - list of capabilities
+ capabilities_ok = 'OK' in imap.readline() # second line - the request status
+
+ if not capabilities_ok:
+ return TEST_INCONCLUSIVE
+
+ # check if starttls is present
+ starttls_present = 'STARTTLS' in capabilities
+
+ if starttls_present:
+ imap.writeline('a002 STARTTLS')
+ tls_started = 'OK' in imap.readline()
+
+ # negotiate TLS, issue a request to feel good about it
+ # TODO check the cert aswell ?
+ ctx = SSL.Context(SSL.SSLv23_METHOD)
+ c = SSL.Connection(ctx, imap.sock)
+ c.set_connect_state()
+ c.do_handshake()
+ c.send('a003 CAPABILITY' + linebreak)
+
+ while tls_succeeded == None:
+ line = ''
+ char = None
+ while char != '\n':
+ char = c.read(1)
+ if not char:
+ break
+ line += char
+
+ if 'Error' in line or 'error' in line:
+ tls_succeeded = False
+ elif 'OK' in line:
+ tls_succeeded = True
+ elif not line:
+ tls_succeeded = False
+
+ except Exception, e:
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+ finally:
+ socket.socket = defaultsocket
+
+ # do the same for the direct connection
+ capabilities_ok_d = None
+ starttls_present_d = None
+ tls_started_d = None
+ tls_succeeded_d = None
+ try:
+ imap = Client(address, 143)
+
+ # read server greeting
+ server_greeting = imap.readline()
+
+ # get server capabilities
+ imap.writeline('a001 CAPABILITY')
+ capabilities = imap.readline() # first line - list of capabilities
+ capabilities_ok_d = 'OK' in imap.readline() # second line - the request status
+
+ if not capabilities_ok_d:
+ return TEST_INCONCLUSIVE
+
+ # check if starttls is present
+ starttls_present_d = 'STARTTLS' in capabilities
+
+ if starttls_present_d:
+ imap.writeline('a002 STARTTLS')
+ tls_started = 'OK' in imap.readline()
+
+ # negotiate TLS, issue some request to feel good about it
+ ctx = SSL.Context(SSL.SSLv23_METHOD)
+ c = SSL.Connection(ctx, imap.sock)
+ c.set_connect_state()
+ c.do_handshake()
+ c.send('a003 CAPABILITY' + linebreak)
+
+ while tls_succeeded_d == None:
+ line = ''
+ char = None
+ while char != '\n':
+ char = c.read(1)
+ if not char:
+ break
+ line += char
+
+ if 'Error' in line or 'error' in line:
+ tls_succeeded_d = False
+ elif 'OK' in line:
+ tls_succeeded_d = True
+ elif not line:
+ tls_succeeded_d = False
+
+ except Exception, e:
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+
+ # compare
+ if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or
+ tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
+ return TEST_FAILURE
+
+ return TEST_SUCCESS
+
def http_request(self, address):
-
+ ''' perform a http GET-request and return the content received '''
request = 0
try:
request = urllib2.Request(address)
- request.add_header('User-Agent', user_agent)
+ request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0')
except Exception, e:
plog('ERROR', 'Forming a http request to ' + address + ' failed.')
plog('ERROR', e)
@@ -496,7 +865,7 @@
pass
def ssl_request(self, address):
-
+ ''' initiate an ssl connection and return the server certificate '''
# specify the context
ctx = SSL.Context(SSL.SSLv23_METHOD)
ctx.set_verify_depth(1)
@@ -523,132 +892,150 @@
# some helpful methods
-'''
-construct a list of urls based on the wordlist and filetypes of interest
-'''
-def load_urls():
- plog('INFO', 'Loading url list')
-
+def load_wordlist(file):
+ ''' load a list of strings from a file (which contains words separated by newlines) '''
+ plog('INFO', 'Loading the wordlist')
+
wordlist = []
- fh = open(wordlist_file, 'r')
+ fh = open(file, 'r')
try:
for line in fh:
wordlist.append(line[:-1]) # get rid of the linebreaks
finally:
fh.close()
+ return wordlist
+
+def get_urls(wordlist, filetypes=['any'], results_per_type=5, protocol='any', g_results_per_page=10):
+ '''
+ construct a list of urls based on the wordlist, filetypes and protocol.
+
+ Note: since we currently use google, which doesn't index by protocol,
+ searches for anything but 'any' could be rather slow
+ '''
+ plog('INFO', 'Searching google for relevant sites...')
+
urllist = []
- for ft in allowed_filetypes:
+ for filetype in filetypes:
type_urls = []
- while len(type_urls) < result_per_type:
- # probably the discover_urls method should consider moving along the search result pages
- type_urls.extend(discover_urls(ft,
- wordlist[int(random.random() * len(wordlist))]))
- type_urls = list(Set(type_urls))
-
- plog('INFO', 'URL list for ' + ft + ': ' + '\n'.join(type_urls) + '\n')
- urllist.extend(type_urls)
-
- return urllist
+ while len(type_urls) < results_per_type:
+ query = random.choice(wordlist)
+ if filetype != 'any':
+ query += ' filetype:' + filetype
+ if protocol != 'any':
+ query += ' allinurl:' + protocol # this isn't too reliable, but we'll re-filter results later
+ #query += '&num=' + `g_results_per_page`
-'''
-Find links to files related to a query
-'''
-def discover_urls(self, filetype, query):
- # search google for relevant pages
- # note: google only accepts requests from idenitified browsers
- if filetype != 'all':
- query += ':' + filetype
-
- host = 'www.google.com'
- params = urllib.urlencode({'q' : query})
- headers = {'User-Agent' : user_agent}
- search_url = '/search' + '?' + params
-
- connection = httplib.HTTPConnection(host)
- connection.request("GET", search_url, {}, headers) # can't add params here for some reason
-
- response = connection.getresponse()
- if response.status == 200:
- # if everything went well, start parsing
- urls = []
+ # search google for relevant pages
+ # note: google only accepts requests from idenitified browsers
+ host = 'www.google.com'
+ params = urllib.urlencode({'q' : query})
+ headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0'}
+ search_path = '/search' + '?' + params
- content = response.read()
+ connection = None
+ response = None
+ try:
+ connection = httplib.HTTPConnection(host)
+ connection.request("GET", search_path, {}, headers)
+ response = connection.getresponse()
+ if response.status != 200:
+ raise Exception(response.status, response.reason)
+ except Exception, e:
+ plog('ERROR', 'Connection to google.com failed')
+ plog('ERROR', e)
+ continue
- soup = BeautifulSoup(content)
+ content = response.read()
+ links = SoupStrainer('a')
+ soup = BeautifulSoup(content, parseOnlyThese=links)
+
+ # get the links and do some additional filtering
+ for link in soup.findAll('a', {'class' : 'l'}):
+ url = link['href']
+ if (protocol != 'any' and url[:len(protocol)] != protocol or
+ filetype != 'any' and url[-len(filetype):] != filetype):
+ pass
+ else:
+ type_urls.append(link['href'])
- # l is the class for relevant reply links
- # probably not the best criterion to rely on, so maybe some other solution needed
- for link in soup.findAll('a', {'class' : 'l'}):
- urls.append(link['href'])
+ if type_urls > results_per_type:
+ type_urls = random.sample(type_urls, results_per_type) # make sure we don't get more urls than needed
+ urllist.extend(type_urls)
+
+ return list(Set(urllist))
- # filter for filetypes if needed
- if filetype != 'all':
- urls = [u for u in urls if u[-len(filetype):] == filetype]
-
- return urls
- else:
- plog('ERROR', 'Google search failed: ' +
- response.status + ' ' + response.reason)
- return []
-
#
# main logic
#
def main(argv):
scanner = ExitNodeScanner()
- # consistency test
+ #
+ # 1) consistency test
+ #
+
# scanner.check_all_exits_port_consistency()
- # find sites for http testing if necessary
#
- # global doc_urls
- # doc_urls.extend(load_url_list())
- # doc_urls = list(Set(doc_urls))
- # plog('NOTICE', 'Final URL list: ' + '\n'.join(doc_urls) + '\n')
+ # 2) test for checking yet unchecked nodes
+ # XXX use SETEXIT systematically, after 'all nodes' have been tested, just continue with NEWEXIT
+ #
- # get the number of nodes that need to be tested
- # XXX: Need to update this periodically for this to work.. But
- # it probably shouldn't be used for a termination condition anyways..
- # It is probably good to ballpark if we've done all exits for
- # informational purposes, but then we should just restart the scan
+ # load the wordlist to search for sites lates on
+ wordlist = load_wordlist(wordlist_file)
+
+ # get the total number of nodes for ports
ssl_nodes = len(scanner.get_nodes_for_port(443))
http_nodes = len(scanner.get_nodes_for_port(80))
- ssh_nodes = len(scanner.get_nodes_for_port(22))
+ #ssh_nodes = len(scanner.get_nodes_for_port(22))
+ # lists of addresses (generated later with get_urls)
+ ssl_urls = []
+ http_urls = []
+ ssh_urls = []
+
+ # test terminating conditions for somewhat ok network coverage
ssl_done = False
http_done = False
ssh_done = True
- while 1:
-
- # https test
+
+ # get some semi-random urls, try to test the exit node for each protocol needed, get a new node
+ while 1:
+ http_urls = get_urls(wordlist, protocol='http')
+ ssl_urls = ['mail.google.com', 'addons.mozilla.org', 'www.fastmail.fm'] # the search for https stuff is yet too slow
+
+ # https test
if not ssl_done:
- for ssl_site in docs_https:
- scanner.check_openssl(ssl_site)
+ ssl_site = random.choice(ssl_urls)
+ scanner.check_openssl(ssl_site)
ssl_tested_n = len(scanner.ssl_tested)
plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes`
+ ' (~' + `((ssl_tested_n * 100) / ssl_nodes)` + '%)')
- if ssl_tested_n == ssl_nodes:
+ if ssl_tested_n >= ssl_nodes:
ssl_done = True
# http test
-
if not http_done:
- for http_site in docs_http:
- scanner.check_http(http_site)
+ http_site = random.choice(http_urls)
+ scanner.check_http(http_site)
http_tested_n = len(scanner.http_tested)
plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes`
+ ' (~' + `((http_tested_n * 100) / http_nodes)` + '%)')
- if http_tested_n == http_nodes:
+ if http_tested_n >= http_nodes:
http_done = True
-
+ '''
# ssh test
- '''
if not ssh_done:
- pass
+ ssh_site = random.choice(ssh_urls)
+ scanner.check_openssh(ssh_site)
+ ssh_tested_n = len(scanner.ssh_tested)
+ plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes`
+ + '(~' + `((ssh_tested_n * 100) / ssh_nodes)` + '%')')
+ if ssh_tested_n >= ssh_nodes:
+ ssh_done = True
'''
# check whether we're done, otherwise get a new circuit
@@ -658,7 +1045,6 @@
else:
scanner.get_new_circuit()
time.sleep(1)
-
#
# initiate the program
#
More information about the tor-commits
mailing list