[or-cvs] r18319: {torflow} Turns out we can tighten the false positive filter a bit mor (torflow/trunk/NetworkScanners)

mikeperry at seul.org mikeperry at seul.org
Thu Jan 29 14:41:45 UTC 2009


Author: mikeperry
Date: 2009-01-29 09:41:45 -0500 (Thu, 29 Jan 2009)
New Revision: 18319

Modified:
   torflow/trunk/NetworkScanners/soat.py
   torflow/trunk/NetworkScanners/soatstats.py
Log:

Turns out we can tighten the false positive filter a bit
more. If a tag varies between fetches, we should not allow
ALL of its attibutes to vary, but only those attributes that
we've seen vary.




Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-01-29 14:22:47 UTC (rev 18318)
+++ torflow/trunk/NetworkScanners/soat.py	2009-01-29 14:41:45 UTC (rev 18319)
@@ -83,7 +83,7 @@
 
 
 firefox_headers = {
-  'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5'
+  'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5',
   'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
   'Accept-Language':"en-us,en;q=0.5",
   'Accept-Encoding':"gzip,deflate",
@@ -208,7 +208,7 @@
 'ontext', 'onunderflow', 'onunload', 'overflow', 'profile', 'src', 'style',
 'usemap']
 attrs_to_check_map = {}
-for a in attrs_to_check: attrs_to_check_map[a]=1
+for __a in attrs_to_check: attrs_to_check_map[__a]=1
 attrs_to_prune = ['alt', 'label', 'prompt' 'standby', 'summary', 'title',
                   'abbr']
 
@@ -972,7 +972,10 @@
     new_vs_old = SoupDiffer(soup_new, soup)
     new_vs_tor = SoupDiffer(soup_new, psoup)
 
+    # TODO: Consider storing these changing attributes
+    # for more than just this run..
     changed_tags = {}
+    changed_attributes = {}
     # I'm an evil man and I'm going to CPU hell..
     for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
       for t in tags.findAll():
@@ -986,6 +989,10 @@
           changed_tags[t.name] = sets.Set([])
         for attr in t.attrs:
           changed_tags[t.name].add(attr[0])
+    for attr in old_vs_new.changed_attributes():
+      changed_attributes[attr[0]] = 1 
+    for attr in new_vs_old.changed_attributes():
+      changed_attributes[attr[0]] = 1 
     
     changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
 
@@ -998,6 +1005,9 @@
            for attr in t.attrs:
              if attr[0] not in changed_tags[t.name]:
                false_positive = False
+    for attr in new_vs_tor.changed_attributes():
+      if attr[0] not in changed_attributes:
+        false_positive=False
 
     if new_vs_tor.changed_content() and not changed_content:
       false_positive = False

Modified: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py	2009-01-29 14:22:47 UTC (rev 18318)
+++ torflow/trunk/NetworkScanners/soatstats.py	2009-01-29 14:41:45 UTC (rev 18319)
@@ -112,7 +112,9 @@
       new_vs_tor = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()), 
                 BeautifulSoup(open(result.exit_tags, 
                                "r").read()))
+
       changed_tags = {}
+      changed_attributes = {}
       # I'm an evil man and I'm going to CPU hell..
       for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
         for t in tags.findAll():
@@ -126,6 +128,10 @@
             changed_tags[t.name] = sets.Set([])
           for attr in t.attrs:
             changed_tags[t.name].add(attr[0])
+      for attr in old_vs_new.changed_attributes():
+        changed_attributes[attr[0]] = 1 
+      for attr in new_vs_old.changed_attributes():
+        changed_attributes[attr[0]] = 1 
       
       changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
   
@@ -138,10 +144,13 @@
              for attr in t.attrs:
                if attr[0] not in changed_tags[t.name]:
                  false_positive = False
+      for attr in new_vs_tor.changed_attributes():
+        if attr[0] not in changed_attributes:
+          false_positive=False
   
       if new_vs_tor.changed_content() and not changed_content:
         false_positive = False
-
+  
       print false_positive      
 
   print ""



More information about the tor-commits mailing list