[or-cvs] [pytorctl/master 4/5] Improve TorCtl descriptor processing speed.
mikeperry at torproject.org
mikeperry at torproject.org
Fri Aug 20 20:46:12 UTC 2010
Author: Harry Bock <hbock at ele.uri.edu>
Date: Mon, 9 Aug 2010 02:13:15 -0400
Subject: Improve TorCtl descriptor processing speed.
Commit: 33bf5a0a4a9308d76c2ebc6392f82bbb3b857e0d
By only running one regular expression per descriptor line and
performing a slightly better way of checking which line type
we're handling, we cut the run time of build_from_desc in half.
---
TorCtl.py | 98 ++++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 65 insertions(+), 33 deletions(-)
diff --git a/TorCtl.py b/TorCtl.py
index 1b91c4f..1d58127 100755
--- a/TorCtl.py
+++ b/TorCtl.py
@@ -305,6 +305,24 @@ class RouterVersion:
def __ne__(self, other): return self.version != other.version
def __str__(self): return self.ver_string
+
+# map descriptor keywords to regular expressions.
+desc_re = {
+ "router": r"(\S+) (\S+)",
+ "opt fingerprint": r"(.+).*on (\S+)",
+ "opt hibernating": r"1$",
+ "platform": r"Tor (\S+).*on ([\S\s]+)",
+ "accept": r"(\S+):([^-]+)(?:-(\d+))?",
+ "reject": r"(\S+):([^-]+)(?:-(\d+))?",
+ "bandwidth": r"(\d+) \d+ (\d+)",
+ "uptime": r"(\d+)",
+ "contact": r"(.+)",
+ "published": r"(\S+ \S+)",
+}
+# Compile each regular expression now.
+for kw, reg in desc_re.iteritems():
+ desc_re[kw] = re.compile(reg)
+
class Router:
"""
Class to represent a router from a descriptor. Can either be
@@ -323,7 +341,7 @@ class Router:
if ns_bandwidth != None:
self.bw = ns_bandwidth
else:
- self.bw = bw
+ self.bw = bw
self.desc_bw = bw
self.exitpolicy = exitpolicy
self.flags = flags # Technicaly from NS doc
@@ -353,9 +371,6 @@ class Router:
the flags, the nickname, and the idhex string).
Returns a Router instance.
"""
- # XXX: Compile these regular expressions? This is an expensive process
- # Use http://docs.python.org/lib/profile.html to verify this is
- # the part of startup that is slow
exitpolicy = []
dead = not ("Running" in ns.flags)
bw_observed = 0
@@ -368,40 +383,57 @@ class Router:
contact = None
for line in desc:
- rt = re.search(r"^router (\S+) (\S+)", line)
- fp = re.search(r"^opt fingerprint (.+).*on (\S+)", line)
- pl = re.search(r"^platform Tor (\S+).*on ([\S\s]+)", line)
- ac = re.search(r"^accept (\S+):([^-]+)(?:-(\d+))?", line)
- rj = re.search(r"^reject (\S+):([^-]+)(?:-(\d+))?", line)
- bw = re.search(r"^bandwidth (\d+) \d+ (\d+)", line)
- up = re.search(r"^uptime (\d+)", line)
- ct = re.search(r"^contact (.+)", line)
- pb = re.search(r"^published (\S+ \S+)", line)
- if re.search(r"^opt hibernating 1", line):
- dead = True
- if ("Running" in ns.flags):
- plog("INFO", "Hibernating router "+ns.nickname+" is running, flags: "+" ".join(ns.flags))
- if ac:
- exitpolicy.append(ExitPolicyLine(True, *ac.groups()))
- elif rj:
- exitpolicy.append(ExitPolicyLine(False, *rj.groups()))
- elif bw:
- bws = map(int, bw.groups())
+ # Pull off the keyword...
+ kw, _, rest = line.partition(" ")
+
+ # ...and if it's "opt", extend it by the next keyword
+ # so we get "opt hibernating" as one keyword.
+ if kw == "opt":
+ okw, _, rest = rest.partition(" ")
+ kw += " " + okw
+
+ # try to match the descriptor line by keyword.
+ try:
+ match = desc_re[kw].match(rest)
+ # if we don't handle this keyword, just move on to the next one.
+ except KeyError:
+ continue
+ # if we do handle this keyword but its data is malformed,
+ # move on to the next one without processing it.
+ if not match:
+ continue
+
+ g = match.groups()
+
+ # Handle each keyword individually.
+ # TODO: This could possibly be sped up since we technically already
+ # did the compare with the dictionary lookup... lambda magic time.
+ if kw == "accept":
+ exitpolicy.append(ExitPolicyLine(True, *g))
+ elif kw == "reject":
+ exitpolicy.append(ExitPolicyLine(False, *g))
+ elif kw == "router":
+ router,ip = g
+ elif kw == "bandwidth":
+ bws = map(int, g)
bw_observed = min(bws)
rate_limited = False
if bws[0] < bws[1]:
rate_limited = True
- elif pl:
- version, os = pl.groups()
- elif up:
- uptime = int(up.group(1))
- elif rt:
- router,ip = rt.groups()
- elif pb:
- t = time.strptime(pb.group(1)+" UTC", "20%y-%m-%d %H:%M:%S %Z")
+ elif kw == "platform":
+ version, os = g
+ elif kw == "uptime":
+ uptime = int(g[0])
+ elif kw == "published":
+ t = time.strptime(g[0] + " UTC", "20%y-%m-%d %H:%M:%S %Z")
published = datetime.datetime(*t[0:6])
- elif ct:
- contact = ct.group(1)
+ elif kw == "contact":
+ contact = g[0]
+ elif kw == "opt hibernating":
+ dead = True
+ if ("Running" in ns.flags):
+ plog("INFO", "Hibernating router "+ns.nickname+" is running, flags: "+" ".join(ns.flags))
+
if router != ns.nickname:
plog("NOTICE", "Got different names " + ns.nickname + " vs " +
router + " for " + ns.idhex)
--
1.7.1
More information about the tor-commits
mailing list