[tor-commits] [stem/master] Optimze _read_until_keywords a little
atagar at torproject.org
atagar at torproject.org
Sat Aug 16 22:43:45 UTC 2014
commit 964eea9fb1ecb282cfbabafc29da45f88ed6ee3b
Author: Nick Mathewson <nickm at torproject.org>
Date: Wed Aug 13 20:46:30 2014 -0400
Optimze _read_until_keywords a little
- use an unconditional function call in the inner loop rather than
checking whether content is set all the time. This saves a few percent.
- use a compiled regex to determine whether we have found the target
keywords. This is a fairly big win fwict. Perhaps this regex can be tuned
further?
---
stem/descriptor/__init__.py | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 9fe7235..411ad3d 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -419,7 +419,12 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
**True**
"""
- content = None if skip else []
+ if skip:
+ content = None
+ content_append = lambda x: None
+ else:
+ content = []
+ content_append = content.append
ending_keyword = None
if isinstance(keywords, (bytes, unicode)):
@@ -428,8 +433,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
if ignore_first:
first_line = descriptor_file.readline()
- if content is not None and first_line is not None:
- content.append(first_line)
+ if first_line is not None:
+ content_append(first_line)
+
+ match_re = re.compile(r'^(%s)(?:[ \t].*|)$' % "|".join(keywords))
while True:
last_position = descriptor_file.tell()
@@ -442,25 +449,19 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
if not line:
break # EOF
- line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(line))
+ line_match = match_re.match(stem.util.str_tools._to_unicode(line))
- if not line_match:
- # no spaces or tabs in the line
- line_keyword = stem.util.str_tools._to_unicode(line.strip())
- else:
- line_keyword = line_match.groups()[0]
-
- if line_keyword in keywords:
- ending_keyword = line_keyword
+ if line_match:
+ ending_keyword = line_match.groups()[0]
if not inclusive:
descriptor_file.seek(last_position)
- elif content is not None:
- content.append(line)
+ else:
+ content_append(line)
break
- elif content is not None:
- content.append(line)
+ else:
+ content_append(line)
if include_ending_keyword:
return (content, ending_keyword)
More information about the tor-commits
mailing list