[or-cvs] r18255: {torflow} Reorganize soat to generalize test execution logic. Leverage (torflow/trunk/NetworkScanners)

mikeperry at seul.org mikeperry at seul.org
Fri Jan 23 13:04:55 UTC 2009


Author: mikeperry
Date: 2009-01-23 08:04:55 -0500 (Fri, 23 Jan 2009)
New Revision: 18255

Modified:
   torflow/trunk/NetworkScanners/soat.py
Log:

Reorganize soat to generalize test execution logic. Leverage
generalization to randomize test execution and also gain some
circuit reuse.



Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-01-23 12:53:12 UTC (rev 18254)
+++ torflow/trunk/NetworkScanners/soat.py	2009-01-23 13:04:55 UTC (rev 18255)
@@ -237,12 +237,12 @@
         p = re.compile('250 LASTEXIT=[\S]+')
         m = p.match(reply)
         self.__exit = m.group()[13:] # drop the irrelevant characters    
-        plog('NOTICE','Current node: ' + self.__exit)
+        plog('INFO','Current node: ' + self.__exit)
         return self.__exit
 
     def get_new_circuit(self):
         ''' tell metatroller to close the current circuit and open a new one '''
-        plog('NOTICE', 'Trying to construct a new circuit')
+        plog('DEBUG', 'Trying to construct a new circuit')
         self.__meta.writeline("NEWEXIT")
         reply = self.__meta.readline()
 
@@ -254,7 +254,7 @@
         ''' 
         tell metatroller to set the given node as the exit in the next circuit 
         '''
-        plog('NOTICE', 'Trying to set ' + `exit` + ' as the exit for the next circuit')
+        plog('DEBUG', 'Trying to set ' + `exit` + ' as the exit for the next circuit')
         self.__meta.writeline("SETEXIT $"+exit)
         reply = self.__meta.readline()
 
@@ -1003,6 +1003,7 @@
     def http_request(self, address):
         ''' perform a http GET-request and return the content received '''
         request = urllib2.Request(address)
+        # XXX: Make all headers match a real firefox browser
         request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0')
 
         content = 0
@@ -1018,7 +1019,7 @@
         except KeyboardInterrupt:
             raise KeyboardInterrupt
         except:
-            plog('WARN', 'An unknown HTTP error occured')
+            plog('WARN', 'An unknown HTTP error occured for '+address)
             traceback.print_exc()
             return 0
 
@@ -1057,7 +1058,7 @@
             plog('WARN', 'An error occured while negotiating socks5 with Tor (timeout?)')
             return 0
         except:
-            plog('WARN', 'An unknown SSL error occured')
+            plog('WARN', 'An unknown SSL error occured for '+address)
             traceback.print_exc()
             return 0
         
@@ -1104,12 +1105,14 @@
             if filetype != 'any':
                 query += ' filetype:' + filetype
             if protocol != 'any':
-                query += ' allinurl:' + protocol # this isn't too reliable, but we'll re-filter results later
+                query += ' inurl:' + protocol # this isn't too reliable, but we'll re-filter results later
             #query += '&num=' + `g_results_per_page` 
 
             # search google for relevant pages
             # note: google only accepts requests from idenitified browsers
             # TODO gracefully handle the case when google doesn't want to give us result anymore
+            # XXX: Make more of these headers match? Maybe set a cookie.. or
+            # use scroogle :)
             host = 'www.google.com'
             params = urllib.urlencode({'q' : query})
             headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0'}
@@ -1125,9 +1128,14 @@
                 if response.status != 200:
                     raise Exception(response.status, response.reason)
             except socket.gaierror, e:
-                plog('ERROR', 'Connection to google.com failed')
-                plog('ERROR', e)
+                plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
+                traceback.print_exc()
                 return list(Set(urllist))
+            except:
+                plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
+                traceback.print_exc()
+                # XXX: Bloody hack just to run some tests overnight
+                return [protocol+"://www.eff.org", protocol+"://www.fastmail.fm", protocol+"://www.torproject.org", protocol+"://secure.wikileaks.org/"]
 
             content = response.read()
             links = SoupStrainer('a')
@@ -1167,6 +1175,49 @@
             bin += str(n % 2)
             n = n >> 1
         return bin[::-1]
+
+
+class NoURLsFound(Exception):
+    pass
+
+class Test:
+    def __init__(self, scanner, proto, port, url_fcn, test_fcn):
+        self.proto = proto
+        self.port = port
+        self.scanner = scanner
+        self.url_fcn = url_fcn
+        self.test_fcn = test_fcn
+        self.rewind()
+
+    def run_test(self):
+        self.tests_run += 1
+        return self.test_fcn(random.choice(self.urls))
+
+    def get_node(self):
+        return random.choice(self.nodes)
+ 
+    def mark_chosen(self, node):
+        self.nodes_marked += 1
+        self.nodes.remove(node)
+       
+    def finished(self):
+        return not self.nodes
+
+    def percent_complete(self):
+        return round(100.0*self.nodes_marked/self.total_nodes, 1)
+ 
+    def rewind(self):
+        self.tests_run = 0
+        self.nodes_marked = 0
+        self.urls = self.url_fcn()
+        if not self.urls:
+            raise NoURLsFound("No URLS found for protocol "+self.proto)
+        self.nodes = self.scanner.get_nodes_for_port(self.port)
+        self.node_map = {}
+        for n in self.nodes: 
+            self.node_map[n.idhex] = n
+        self.total_nodes = len(self.nodes)
+
 #
 # main logic
 #
@@ -1181,7 +1232,6 @@
         print '--smtp (~works)'
         print '--pop (~works)'
         print '--imap (~works)'
-        print '--dns (a basic test, not really reliable)'
         print '--dnsrebind (works with the ssl test)'
         print '--policies (~works)'
         print ''
@@ -1197,7 +1247,6 @@
     do_smtp = ('--smtp','') in flags
     do_pop = ('--pop','') in flags
     do_imap = ('--imap','') in flags
-    do_dns_basic = ('--dns','') in flags
     do_dns_rebind = ('--dnsrebind','') in flags
     do_consistency = ('--policies','') in flags
 
@@ -1216,159 +1265,125 @@
         scanner.check_all_exits_port_consistency()
 
     # maybe only the consistency test was required
-    if not (do_ssl or do_http or do_ssh or do_smtp or do_pop or do_imap or do_dns_basic):
+    if not (do_ssl or do_http or do_ssh or do_smtp or do_pop or do_imap):
         plog('INFO', 'Done.')
         return
 
     # declare some variables and assign values if neccessary
-    ssl_nodes = http_nodes = ssh_nodes = smtp_nodes = pop_nodes = imap_nodes = dns_nodes = []
-    ssl_nodes_n = http_nodes_n = ssh_nodes_n = smtp_nodes_n = pop_nodes_n = imap_nodes_n = dns_nodes_n = 0
-    ssl_urls = http_urls = ssh_urls = smtp_urls = pop_urls = imap_urls = dns_urls = []
-    ssl_fail = http_fail = ssh_fail = smtp_fail = pop_fail = imap_fail = imap_urls = 0
+    http_fail = 0
 
+    tests = {}
+
+    # FIXME: Create an event handler that updates these lists
     if do_ssl:
-        ssl_nodes = scanner.get_nodes_for_port(443)
-        ssl_nodes_n = len(ssl_nodes)
-        http_urls = get_urls(wordlist, protocol='https', results_per_type=10, g_results_per_page=20)
-        ssl_fail = len(scanner.ssl_fail)
+        try:
+            tests["SSL"] = Test(scanner, "SSL", 443, 
+                lambda:
+                  get_urls(wordlist, protocol='https', results_per_type=10,
+g_results_per_page=20), lambda u: scanner.check_openssl(u))
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
 
-        if len(ssl_urls) == 0:
-            plog('ERROR', 'No urls specified for ssl testing.')
-            do_ssl = False
 
     if do_http:
-        http_nodes = scanner.get_nodes_for_port(80)
-        http_nodes_n = len(http_nodes)
-        http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
         http_fail = len(scanner.http_fail)
+        try:
+            tests["HTTP"] = Test(scanner, "HTTP", 80, 
+                lambda:
+                  get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20), lambda u: scanner.check_http(u)) 
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
 
-        if len(http_urls) == 0:
-            plog('ERROR', 'No urls specified for http testing.')
-            do_http = False
-
     if do_ssh:
-        ssh_nodes = scanner.get_nodes_for_port(22)
-        ssh_nodes_n = len(ssh_nodes)
-        ssh_urls = []
-        ssh_fail = len(scanner.ssh_fail)
+        try:
+            tests["SSH"] = Test(scanner, "SSH", 22, lambda: [], 
+                                lambda u: scanner.check_openssh(u))
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
 
-        if len(ssl_urls) == 0:
-            plog('ERROR', 'No urls specified for ssh testing.')
-            do_ssh = False
-
     if do_smtp:
-        smtp_urls = [('smtp.gmail.com','587')]
-
-        if len(smtp_urls) == 0:
-            plog('ERROR', 'No urls specified for smtp testing.')
-            do_smtp = False
-
+        try:
+            tests["SMTP"] = Test(scanner, "SMTP", 587, 
+               lambda: [('smtp.gmail.com','587')], 
+               lambda u: scanner.check_smtp(*u))
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
+      
     if do_pop:
-        pop_urls = []
+        try:
+            tests["POP"] = Test(scanner, "POP", 110, lambda: [],
+               lambda u: scanner.check_pop(*u))
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
 
-        if len(pop_urls) == 0:
-            plog('ERROR', 'No urls specified for pop testing.')
-            do_pop = False
-
     if do_imap:
-        imap_urls = []
+        try:
+            tests["IMAP"] = Test(scanner, "IMAP", 143, lambda: [],
+               lambda u: scanner.check_imap(*u))
+        except NoURLsFound, e:
+            plog('ERROR', e.message)
 
-        if len(imap_urls) == 0:
-            plog('ERROR', 'No urls specified for imap testing.')
-            do_imap = False
-
-    if do_dns_basic:
-        dns_urls = []
-
-        if len(dns_urls) == 0:
-            plog('ERROR', 'No urls specified for dns testing.')
-            do_dns_basic = False
-
     # maybe no tests could be initialized
-    if not (do_ssl or do_http or do_ssh or do_smtp or do_pop or do_imap or do_dns_basic):
+    if not (do_ssl or do_http or do_ssh or do_smtp or do_pop or do_imap):
         plog('INFO', 'Done.')
         sys.exit(0)
         
-
-    # TODO: Do set intersection and reuse nodes for shared tests
-
     # start testing
-    while 1:  
-       
-        # https test  
-        if do_ssl:
-            candidates = [x for x in ssl_nodes if ('$' + `x.idhex`) not in scanner.ssl_tested]
-            if len(candidates) > 0:
-                current_exit = random.choice(candidates)
-                scanner.set_new_exit(current_exit.idhex)
-            
-            scanner.get_new_circuit()
-            ssl_site = random.choice(ssl_urls)
-            scanner.check_openssl(ssl_site)
-            
-            ssl_tested_n = len(scanner.ssl_tested)
-            if ssl_nodes_n > ssl_tested_n:
-                plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes_n`
-                    + ' (~' + `((ssl_tested_n * 100) / ssl_nodes_n)` + '%)')
+    while 1:
+        # Get as much milage out of each exit as we safely can:
+        # Run a random subset of our tests in random order
+        avail_tests = tests.values()
+        n_tests = random.choice(xrange(1,len(avail_tests)+1))
         
-        # http test
-        if do_http:
-            candidates = [x for x in http_nodes if ('$' + `x.idhex`) not in scanner.http_tested]
-            if len(candidates) > 0 :
-                current_exit = random.choice(candidates)
-                scanner.set_new_exit(current_exit.idhex)
-            
-            scanner.get_new_circuit()
-            http_site = random.choice(http_urls)
-            scanner.check_http(http_site)
+        to_run = random.sample(avail_tests, n_tests)
 
-            http_tested_n = len(scanner.http_tested)
-            if http_nodes_n > http_tested_n:
-                plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes_n`
-                    + ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
-        
-        # ssh test
-        if do_ssh:
-            candidates = [x for x in ssh_nodes if ('$' + `x.idhex`) not in scanner.ssh_tested]
-            if len(candidates) > 0:
-                current_exit = random.choice(candidates)
-                scanner.set_new_exit(current_exit.idhex)
-                
-            scanner.get_new_circuit()
-            ssh_site = random.choice(ssh_urls)
-            scanner.check_openssh(ssh_site)
- 
-            ssh_tested_n = len(scanner.ssh_tested)
-            if ssh_nodes_n > ssh_tested_n:
-                plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes_n`
-                    + '(~' + `((ssh_tested_n * 100) / ssh_nodes_n)` + '%')
+        common_nodes = None
+        # Do set intersection and reuse nodes for shared tests
+        for test in to_run:
+            if not common_nodes: common_nodes = Set(map(lambda n: n.idhex, test.nodes))
+            else: common_nodes &= Set(map(lambda n: n.idhex, test.nodes))
 
-        # smtp test
-        if do_smtp:
-            scanner.get_new_circuit()
-            smtp_site = random.choice(smtp_urls)
-            scanner.check_smtp(smtp_site[0], smtp_site[1])
+        if common_nodes:
+            current_exit_idhex = random.choice(list(common_nodes))
+            plog("DEBUG", "Chose to run "+str(n_tests)+" tests via "+current_exit_idhex+" (tests share "+str(len(common_nodes))+" exit nodes")
 
-        # pop test
-        if do_pop:
+            scanner.set_new_exit(current_exit_idhex)
             scanner.get_new_circuit()
-            pop_site = random.choice(pop_urls)
-            scanner.check_pop(pop_site[0], pop_site[1])
-
-        # imap test
-        if do_imap:
-            scanner.get_new_circuit()
-            imap_site = random.choice(imap_urls)
-            scanner.check_imap(imap_site[0], imap_site[1])
-
+            for test in to_run:
+                # Keep testing failures and inconclusives
+                result = test.run_test()
+                if result == TEST_SUCCESS:
+                    test.mark_chosen(test.node_map[current_exit_idhex])
+                plog("INFO", test.proto+" test via "+current_exit_idhex+" has result "+str(result))
+                plog("INFO", test.proto+" attempts: "+str(test.tests_run)+". Completed: "+str(test.nodes_marked)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
+        else:
+            plog("NOTICE", "No nodes in common between "+", ".join(map(lambda t: t.proto, to_run)))
+            for test in to_run:
+                current_exit = test.get_node()
+                scanner.set_new_exit(current_exit.idhex)
+                scanner.get_new_circuit()
+                # Keep testing failures and inconclusives
+                result = test.run_test()
+                plog("INFO", test.proto+" test via "+current_exit.idhex+" has result "+str(result))
+                plog("INFO", test.proto+" attempts: "+str(test.tests_run)+". Completed: "+str(test.nodes_marked)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
+                if result == TEST_SUCCESS:
+                    test.mark_chosen(current_exit)
+       
+ 
+        # Check each test for rewind 
+        for test in tests.itervalues():
+            if test.finished():
+                plog("NOTICE", test.proto+" test has finished all nodes.  Rewinding")
+                test.rewind() 
+       
         #
         # managing url lists
         # if we've been having too many false positives lately, get a new target list
-        # 
+        # XXX: Do this on a per-url basis
 
-        if do_http and len(scanner.http_fail) - http_fail >= len(http_urls):
-            http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
-            http_fail = len(scanner.http_fail)
+        #if do_http and len(scanner.http_fail) - http_fail >= len(http_urls):
+        #    http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+        #    http_fail = len(scanner.http_fail)
         
 #
 # initiate the program



More information about the tor-commits mailing list