[tor-commits] [stem/master] Implement lazy router descriptor reading
atagar at torproject.org
atagar at torproject.org
Sat Oct 13 18:35:44 UTC 2012
commit eb0e424ed9459188b70f33ff401e23e9fd89138b
Author: Ravi Chandra Padmala <neenaoffline at gmail.com>
Date: Wed Aug 8 12:39:55 2012 +0530
Implement lazy router descriptor reading
---
stem/descriptor/__init__.py | 60 +++++++++++++++++++++++++++++++++++++-
stem/descriptor/networkstatus.py | 36 ++++++++++++++++-------
2 files changed, 84 insertions(+), 12 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index b1f3ab6..168b357 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -148,7 +148,31 @@ class Descriptor(object):
def __str__(self):
return self._raw_contents
-def _read_until_keywords(keywords, descriptor_file, inclusive = False):
+def _peek_keyword(descriptor_file):
+ """
+ Returns the keyword at the current offset of descriptor_file. Respects the
+ "opt" keyword and returns the next keyword instead.
+
+ :param file descriptor_file: file with the descriptor content
+
+ :returns: keyword at the current offset of descriptor_file
+ """
+
+ last_position = descriptor_file.tell()
+ line = descriptor_file.readline()
+ if not line: return None
+
+ if " " in line:
+ keyword = line.split(" ", 1)[0]
+ if keyword == "opt":
+ keyword = line.split(" ", 2)[1]
+ else: keyword = line.strip()
+
+ descriptor_file.seek(last_position)
+
+ return keyword
+
+def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False):
"""
Reads from the descriptor file until we get to one of the given keywords or reach the
end of the file.
@@ -156,6 +180,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False):
:param str,list keywords: keyword(s) we want to read until
:param file descriptor_file: file with the descriptor content
:param bool inclusive: includes the line with the keyword if True
+ :param bool ignore_first: doesn't check if the first line read has one of the given keywords
:returns: list with the lines until we find one of the keywords
"""
@@ -163,6 +188,10 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False):
content = []
if type(keywords) == str: keywords = (keywords,)
+ if ignore_first:
+ content.append(descriptor_file.readline())
+ if content == [None]: return []
+
while True:
last_position = descriptor_file.tell()
line = descriptor_file.readline()
@@ -181,6 +210,35 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False):
return content
+def _skip_until_keywords(keywords, descriptor_file, inclusive = False):
+ """
+ Reads and discards lines of data from the descriptor file until we get to one
+ of the given keywords or reach the end of the file.
+
+ :param str,list keywords: keyword(s) we want to skip until
+ :param file descriptor_file: file with the descriptor content
+ :param bool inclusive: includes the line with the keyword if True
+
+ :returns: descriptor_file with the new offset
+ """
+
+ if type(keywords) == str: keywords = (keywords,)
+
+ while True:
+ last_position = descriptor_file.tell()
+ line = descriptor_file.readline()
+ if not line: break # EOF
+
+ if " " in line: line_keyword = line.split(" ", 1)[0]
+ else: line_keyword = line.strip()
+
+ if line_keyword in keywords:
+ if not inclusive: descriptor_file.seek(last_position)
+
+ break
+
+ return descriptor_file
+
def _get_pseudo_pgp_block(remaining_contents):
"""
Checks if given contents begins with a pseudo-Open-PGP-style block and, if
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index e4cfda1..a51fcbd 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -40,14 +40,19 @@ The documents can be obtained from any of the following sources...
import re
import datetime
+from StringIO import StringIO
import stem.descriptor
import stem.version
import stem.exit_policy
+from stem.descriptor import _read_until_keywords, _skip_until_keywords, _peek_keyword
+
_bandwidth_weights_regex = re.compile(" ".join(["W%s=\d+" % weight for weight in ["bd",
"be", "bg", "bm", "db", "eb", "ed", "ee", "eg", "em", "gb", "gd", "gg", "gm", "mb", "md", "me", "mg", "mm"]]))
+_router_desc_end_kws = ["r", "bandwidth-weights", "directory-footer", "directory-signature"]
+
def parse_file(document_file, validate = True):
"""
Iterates over the router descriptors in a network status document.
@@ -62,13 +67,30 @@ def parse_file(document_file, validate = True):
* IOError if the file can't be read
"""
- return NetworkStatusDocument(document_file.read(), validate).router_descriptors
+ # parse until "r"
+ document_data = "".join(_read_until_keywords("r", document_file))
+ # store offset
+ r_offset = document_file.tell()
+ # skip until end of router descriptors
+ _skip_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], document_file)
+ # parse until end
+ document_data = document_data + document_file.read()
+ document = NetworkStatusDocument(document_data, validate)
+ document_file.seek(r_offset)
+ document.router_descriptors = _router_desc_generator(document_file, document.vote_status == "vote", validate)
+ return document.router_descriptors
def _strptime(string, validate = True, optional = False):
try:
return datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S")
except ValueError, exc:
if validate or not optional: raise exc
+ else: return None
+
+def _router_desc_generator(document_file, vote, validate):
+ while _peek_keyword(document_file) == "r":
+ desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True))
+ yield RouterDescriptor(desc_content, vote, validate)
class NetworkStatusDocument(stem.descriptor.Descriptor):
"""
@@ -193,21 +215,13 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
# authority section
while doc_parser.line.startswith("dir-source "):
- dirauth_data = doc_parser.read_until(["dir-source", "r"])
+ dirauth_data = doc_parser.read_until(["dir-source", "r", "directory-footer", "directory-signature", "bandwidth-weights"])
self.directory_authorities.append(DirectoryAuthority(dirauth_data, vote, validate))
- def _router_desc_generator(raw_content, vote, validate):
- parser = stem.descriptor.DescriptorParser(raw_content, validate)
- while parser.line != None:
- descriptor = parser.read_until("r")
- yield self._generate_router(descriptor, vote, validate)
-
# router descriptors
if doc_parser.peek_keyword() == "r":
router_descriptors_data = doc_parser.read_until(["bandwidth-weights", "directory-footer", "directory-signature"])
- self.router_descriptors = _router_desc_generator(router_descriptors_data, vote, validate)
- elif validate:
- raise ValueError("No router descriptors found")
+ self.router_descriptors = _router_desc_generator(StringIO(router_descriptors_data), vote, validate)
# footer section
if self.consensus_method > 9 or vote and filter(lambda x: x >= 9, self.consensus_methods):
More information about the tor-commits
mailing list