[or-cvs] r16441: {torflow} Fixed result saving issues. Less googling. Basic dns stuff. (torflow/branches/gsoc2008)
aleksei at seul.org
aleksei at seul.org
Wed Aug 6 02:50:47 UTC 2008
Author: aleksei
Date: 2008-08-05 22:50:47 -0400 (Tue, 05 Aug 2008)
New Revision: 16441
Modified:
torflow/branches/gsoc2008/soat.py
torflow/branches/gsoc2008/soatstats.py
Log:
Fixed result saving issues. Less googling. Basic dns stuff.
Modified: torflow/branches/gsoc2008/soat.py
===================================================================
--- torflow/branches/gsoc2008/soat.py 2008-08-05 23:33:43 UTC (rev 16440)
+++ torflow/branches/gsoc2008/soat.py 2008-08-06 02:50:47 UTC (rev 16441)
@@ -20,6 +20,7 @@
__all__ = ["ExitNodeScanner", "load_wordlist", "get_urls"]
+import commands
import httplib
import os
import random
@@ -158,12 +159,23 @@
# get a data handler
self.__datahandler = DataHandler()
- # get the nodes tested so far
+ # get stats about previous runs
plog('INFO', 'Loading the previous run stats')
- self.ssh_tested = Set([x.exit_node for x in self.__datahandler.getSsh()])
- self.http_tested = Set([x.exit_node for x in self.__datahandler.getHttp()])
- self.ssl_tested = Set([x.exit_node for x in self.__datahandler.getSsl()])
+ ssh_results = self.__datahandler.getSsh()
+ ssl_results = self.__datahandler.getSsl()
+ http_results = self.__datahandler.getHttp()
+
+ # get lists of tested nodes
+ self.ssh_tested = Set([x.exit_node for x in ssh_results])
+ self.http_tested = Set([x.exit_node for x in http_results])
+ self.ssl_tested = Set([x.exit_node for x in ssl_results])
+
+ # get the number of failures
+ self.ssh_fail = [self.__datahandler.filterResults(ssh_results, show_ssh=True, show_bad=True)]
+ self.http_fail = [self.__datahandler.filterResults(http_results, show_http=True, show_bad=True)]
+ self.ssl_fail = [self.__datahandler.filterResults(ssl_results, show_ssl=True, show_bad=True)]
+
plog('INFO', 'ExitNodeScanner up and ready')
def get_exit_node(self):
@@ -266,8 +278,8 @@
plog('INFO', 'We had no exit node to test, skipping to the next test.')
return 0
- # an address representation acceptable for a filename (leave out the http:// and replace slashes)
- address_file = address[7:].replace('/','_')
+ # an address representation acceptable for a filename
+ address_file = self.__datahandler.safeFilename(address[7:])
# if we have no content, we had a connection error
if pcontent == 0:
@@ -365,8 +377,8 @@
''' check whether an https connection to a given address is molested '''
plog('INFO', 'Conducting an ssl test with destination ' + address)
- # an address representation acceptable for a filename (leave out the https:// and replace slashes)
- address_file = address[8:].replace('/','_')
+ # an address representation acceptable for a filename
+ address_file = self.__datahandler.safeFilename(address[8:])
# get the cert via tor
@@ -829,6 +841,28 @@
return TEST_SUCCESS
+ def check_dns(self, address):
+ ''' A basic comparison DNS test. '''
+ # TODO Spawns a lot of false positives (for ex. doesn't work for google.com).
+ plog('INFO', 'Conducting a basic dns test for destination ' + address)
+
+ # this should be replaced
+ ip = commands.getoutput("tor-resolve " + address)
+
+ ips_d = Set([])
+ try:
+ results = socket.getaddrinfo(address,None)
+ for result in results:
+ ips_d.add(result[4][0])
+ except Exception, e:
+ plog('ERROR', e)
+ return TEST_INCONCLUSIVE
+
+ if ip in ips_d:
+ return TEST_SUCCESS
+ else:
+ return TEST_FAILURE # might also do a direct connection reverse lookup
+
def http_request(self, address):
''' perform a http GET-request and return the content received '''
request = 0
@@ -1000,12 +1034,19 @@
http_done = False
ssh_done = True
- # get some semi-random urls, try to test the exit node for each protocol needed, get a new node
- while 1:
+ # get some semi-random test targets
+ # http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+ http_urls = ['http://math.ut.ee']
+ ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
+
+ # get the starting rate of failed tests
+ # so we can determine when the test targets should be updated
+ http_fail = len(scanner.http_fail)
+ ssl_fail = len(scanner.ssl_fail)
+
+ # try to test the exit node for each protocol needed, get a new node
+ while 1:
- http_urls = get_urls(wordlist, protocol='http')
- ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
-
# https test
if not ssl_done:
# XXX Uncomment this to try using SETEXIT
@@ -1038,6 +1079,7 @@
+ ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
if http_tested_n >= http_nodes_n:
http_done = True
+
# ssh test
'''
if not ssh_done:
@@ -1054,10 +1096,16 @@
if ssl_done and http_done and ssh_done:
plog('INFO','Wow! We have tested the whole tor network. Check soatstats.py for results')
break
- else:
- pass
- scanner.get_new_circuit()
- time.sleep(1)
+
+ # if we've been having too many false positives lately, get a new target list
+ if len(scanner.http_fail) - http_fail >= len(http_urls):
+ http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+ http_fail = len(scanner.http_fail)
+
+ # if len(scanner.ssl_fail) - ssl_fail >= len(ssl_urls):
+ # ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm']
+ # ssl_fail = len(scanner.ssl_fail)
+
#
# initiate the program
#
Modified: torflow/branches/gsoc2008/soatstats.py
===================================================================
--- torflow/branches/gsoc2008/soatstats.py 2008-08-05 23:33:43 UTC (rev 16440)
+++ torflow/branches/gsoc2008/soatstats.py 2008-08-06 02:50:47 UTC (rev 16441)
@@ -70,8 +70,8 @@
class DataHandler:
''' Class for saving and managing test result data '''
def filterResults(self, results,
- show_ssh, show_http, show_ssl,
- show_good, show_bad, show_unsure):
+ show_ssh=False, show_http=False, show_ssl=False,
+ show_good=False, show_bad=False, show_unsure=False):
''' filter results based on protocol and success level '''
filters = []
@@ -131,6 +131,15 @@
return results
+ def safeFilename(self, str):
+ '''
+ remove characters illegal in some systems
+ and trim the string to a reasonable length
+ '''
+ replaced = (str.replace('/','_').replace('\\','_').replace('?','_').replace(':','_').
+ replace('|','_').replace('*','_').replace('<','_').replace('>','_').replace('"',''))
+ return replaced[:200]
+
def saveResult(self, result):
''' generic method for saving test results '''
if result.__class__.__name__ == 'HttpTestResult':
@@ -150,8 +159,7 @@
elif result.status == TEST_INCONCLUSIVE:
dir = http_i_dir
- # an address representation acceptable for a filename (leave out the http:// and replace slashes)
- address = result.site[7:].replace('/','_')
+ address = self.safeFilename(result.site[7:])
if dir:
result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
@@ -174,7 +182,7 @@
dir = ssl_i_dir
# an address representation acceptable for a filename (leave out the https:// and replace slashes)
- address = result.site[8:].replace('/','_')
+ address = self.safeFilename(result.site[8:])
if dir:
result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
More information about the tor-commits
mailing list