[or-cvs] r18319: {torflow} Turns out we can tighten the false positive filter a bit mor (torflow/trunk/NetworkScanners)
mikeperry at seul.org
mikeperry at seul.org
Thu Jan 29 14:41:45 UTC 2009
Author: mikeperry
Date: 2009-01-29 09:41:45 -0500 (Thu, 29 Jan 2009)
New Revision: 18319
Modified:
torflow/trunk/NetworkScanners/soat.py
torflow/trunk/NetworkScanners/soatstats.py
Log:
Turns out we can tighten the false positive filter a bit
more. If a tag varies between fetches, we should not allow
ALL of its attibutes to vary, but only those attributes that
we've seen vary.
Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py 2009-01-29 14:22:47 UTC (rev 18318)
+++ torflow/trunk/NetworkScanners/soat.py 2009-01-29 14:41:45 UTC (rev 18319)
@@ -83,7 +83,7 @@
firefox_headers = {
- 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5'
+ 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':"en-us,en;q=0.5",
'Accept-Encoding':"gzip,deflate",
@@ -208,7 +208,7 @@
'ontext', 'onunderflow', 'onunload', 'overflow', 'profile', 'src', 'style',
'usemap']
attrs_to_check_map = {}
-for a in attrs_to_check: attrs_to_check_map[a]=1
+for __a in attrs_to_check: attrs_to_check_map[__a]=1
attrs_to_prune = ['alt', 'label', 'prompt' 'standby', 'summary', 'title',
'abbr']
@@ -972,7 +972,10 @@
new_vs_old = SoupDiffer(soup_new, soup)
new_vs_tor = SoupDiffer(soup_new, psoup)
+ # TODO: Consider storing these changing attributes
+ # for more than just this run..
changed_tags = {}
+ changed_attributes = {}
# I'm an evil man and I'm going to CPU hell..
for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
for t in tags.findAll():
@@ -986,6 +989,10 @@
changed_tags[t.name] = sets.Set([])
for attr in t.attrs:
changed_tags[t.name].add(attr[0])
+ for attr in old_vs_new.changed_attributes():
+ changed_attributes[attr[0]] = 1
+ for attr in new_vs_old.changed_attributes():
+ changed_attributes[attr[0]] = 1
changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
@@ -998,6 +1005,9 @@
for attr in t.attrs:
if attr[0] not in changed_tags[t.name]:
false_positive = False
+ for attr in new_vs_tor.changed_attributes():
+ if attr[0] not in changed_attributes:
+ false_positive=False
if new_vs_tor.changed_content() and not changed_content:
false_positive = False
Modified: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py 2009-01-29 14:22:47 UTC (rev 18318)
+++ torflow/trunk/NetworkScanners/soatstats.py 2009-01-29 14:41:45 UTC (rev 18319)
@@ -112,7 +112,9 @@
new_vs_tor = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()),
BeautifulSoup(open(result.exit_tags,
"r").read()))
+
changed_tags = {}
+ changed_attributes = {}
# I'm an evil man and I'm going to CPU hell..
for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
for t in tags.findAll():
@@ -126,6 +128,10 @@
changed_tags[t.name] = sets.Set([])
for attr in t.attrs:
changed_tags[t.name].add(attr[0])
+ for attr in old_vs_new.changed_attributes():
+ changed_attributes[attr[0]] = 1
+ for attr in new_vs_old.changed_attributes():
+ changed_attributes[attr[0]] = 1
changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
@@ -138,10 +144,13 @@
for attr in t.attrs:
if attr[0] not in changed_tags[t.name]:
false_positive = False
+ for attr in new_vs_tor.changed_attributes():
+ if attr[0] not in changed_attributes:
+ false_positive=False
if new_vs_tor.changed_content() and not changed_content:
false_positive = False
-
+
print false_positive
print ""
More information about the tor-commits
mailing list