[tor-commits] [stem/master] Optimze _read_until_keywords a little

Sat Aug 16 22:43:45 UTC 2014

commit 964eea9fb1ecb282cfbabafc29da45f88ed6ee3b
Author: Nick Mathewson <nickm at torproject.org>
Date:   Wed Aug 13 20:46:30 2014 -0400

    Optimze _read_until_keywords a little
    
    - use an unconditional function call in the inner loop rather than
    checking whether content is set all the time.  This saves a few percent.
    
    - use a compiled regex to determine whether we have found the target
    keywords.  This is a fairly big win fwict.  Perhaps this regex can be tuned
    further?
---
 stem/descriptor/__init__.py |   33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 9fe7235..411ad3d 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -419,7 +419,12 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
     **True**
   """
 
-  content = None if skip else []
+  if skip:
+    content = None
+    content_append = lambda x: None
+  else:
+    content = []
+    content_append = content.append
   ending_keyword = None
 
   if isinstance(keywords, (bytes, unicode)):
@@ -428,8 +433,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
   if ignore_first:
     first_line = descriptor_file.readline()
 
-    if content is not None and first_line is not None:
-      content.append(first_line)
+    if first_line is not None:
+      content_append(first_line)
+
+  match_re = re.compile(r'^(%s)(?:[ \t].*|)$' % "|".join(keywords))
 
   while True:
     last_position = descriptor_file.tell()
@@ -442,25 +449,19 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
     if not line:
       break  # EOF
 
-    line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
+    line_match = match_re.match(stem.util.str_tools._to_unicode(line))
 
-    if not line_match:
-      # no spaces or tabs in the line
-      line_keyword = stem.util.str_tools._to_unicode(line.strip())
-    else:
-      line_keyword = line_match.groups()[0]
-
-    if line_keyword in keywords:
-      ending_keyword = line_keyword
+    if line_match:
+      ending_keyword = line_match.groups()[0]
 
       if not inclusive:
         descriptor_file.seek(last_position)
-      elif content is not None:
-        content.append(line)
+      else:
+        content_append(line)
 
       break
-    elif content is not None:
-      content.append(line)
+    else:
+      content_append(line)
 
   if include_ending_keyword:
     return (content, ending_keyword)