[tor-commits] [stem/master] Getting rid of the _get_document_content() helper
atagar at torproject.org
atagar at torproject.org
Sat Oct 13 18:35:45 UTC 2012
commit e4185194801b6afd10671e6e7a1a53977a598c29
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Sep 22 13:18:16 2012 -0700
Getting rid of the _get_document_content() helper
Too many helper functions and the code becomes fragmented. The
_get_document_content() was only used a couple places, and both of those were
actually better with their own slightly different implementations.
---
stem/descriptor/networkstatus.py | 90 +++++++++++++------------------------
1 files changed, 32 insertions(+), 58 deletions(-)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index f279805..030d413 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -131,59 +131,28 @@ def parse_file(document_file, validate = True, is_microdescriptor = False):
* IOError if the file can't be read
"""
- header, authorities, footer, routers_end = _get_document_content(document_file, validate)
- document_data = header + authorities + footer
+ # getting the document without the routers section
- if not is_microdescriptor:
- document = NetworkStatusDocument(document_data, validate)
- router_type = RouterStatusEntry
- else:
- document = MicrodescriptorConsensus(document_data, validate)
- router_type = RouterMicrodescriptor
-
- for desc in _get_routers(document_file, validate, document, routers_end, router_type):
- yield desc
-
-def _get_document_content(document_file, validate):
- """
- Network status documents consist of four sections:
- * header
- * authority entries
- * router entries
- * footer
-
- This provides back a tuple with the following...
- (header, authorities, footer, routers_end)
-
- This leaves the document_file at the start of the router entries.
-
- :param file document_file: file with network status document content
- :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
-
- :returns: tuple with the network status document content and ending position of the routers
-
- :raises:
- * ValueError if the contents is malformed and validate is True
- * IOError if the file can't be read
- """
-
- # parse until the first record of a following section
- header = _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)
- authorities = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
+ header = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
- # skip router section, just taking note of the position
routers_start = document_file.tell()
_read_until_keywords(FOOTER_START, document_file, skip = True)
routers_end = document_file.tell()
footer = document_file.readlines()
+ document_content = header + footer
- # leave our position at the start of the router section
- document_file.seek(routers_start)
+ if not is_microdescriptor:
+ document = NetworkStatusDocument(document_content, validate)
+ router_type = RouterStatusEntry
+ else:
+ document = MicrodescriptorConsensus(document_content, validate)
+ router_type = RouterMicrodescriptor
- return ("".join(header), "".join(authorities), "".join(footer), routers_end)
+ for desc in _get_routers(document_file, validate, document, routers_start, routers_end, router_type):
+ yield desc
-def _get_routers(document_file, validate, document, end_position, router_type):
+def _get_routers(document_file, validate, document, start_position, end_position, router_type):
"""
Iterates over the router entries in a given document. The document_file is
expected to be at the start of the router section and the end_position
@@ -192,7 +161,8 @@ def _get_routers(document_file, validate, document, end_position, router_type):
:param file document_file: file with network status document content
:param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
:param object document: document the descriptors originate from
- :param int end_position: location in the document_file where the router section ends
+ :param int start_position: start of the routers section
+ :param int end_position: end of the routers section
:param class router_type: router class to construct
:returns: iterator over router_type instances
@@ -202,15 +172,16 @@ def _get_routers(document_file, validate, document, end_position, router_type):
* IOError if the file can't be read
"""
+ document_file.seek(start_position)
while document_file.tell() < end_position:
desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = end_position))
yield router_type(desc_content, document, validate)
-def _get_authorities(authority_lines, is_vote, validate):
+def _get_authorities(authorities, is_vote, validate):
"""
Iterates over the authoritiy entries in given content.
- :param list authority_lines: lines of content to be parsed
+ :param str authority_lines: content of the authorities section
:param bool is_vote: indicates if this is for a vote or contensus document
:param bool validate: True if the document is to be validated, False otherwise
@@ -221,7 +192,7 @@ def _get_authorities(authority_lines, is_vote, validate):
auth_buffer = []
- for line in authority_lines:
+ for line in authorities.split("\n"):
if not line: continue
elif line.startswith(AUTH_START) and auth_buffer:
yield DirectoryAuthority("\n".join(auth_buffer), is_vote, validate)
@@ -276,27 +247,30 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
"""
super(NetworkStatusDocument, self).__init__(raw_content)
+ document_file = StringIO(raw_content)
- self.directory_authorities = []
- self._unrecognized_lines = []
+ header = _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)
+ self._header = _DocumentHeader("".join(header), validate, default_params)
- document_file = StringIO(raw_content)
- header_content, authority_content, footer_content, routers_end = _get_document_content(document_file, validate)
+ authorities = _read_until_keywords((ROUTERS_START, FOOTER_START), document_file)
+ self.directory_authorities = list(_get_authorities("".join(authorities), self._header.is_vote, validate))
- self._header = _DocumentHeader(header_content, validate, default_params)
- self._footer = _DocumentFooter(footer_content, validate, self._header)
- self.directory_authorities = list(_get_authorities(authority_content.split("\n"), self._header.is_vote, validate))
+ routers_start = document_file.tell()
+ _read_until_keywords(FOOTER_START, document_file, skip = True)
+ routers_end = document_file.tell()
+ self._footer = _DocumentFooter(document_file.read(), validate, self._header)
+
+ self._unrecognized_lines = []
+
+ # copy the header and footer attributes into us
for attr, value in vars(self._header).items() + vars(self._footer).items():
if attr != "_unrecognized_lines":
setattr(self, attr, value)
else:
self._unrecognized_lines += value
- if document_file.tell() < routers_end:
- self.routers = tuple(_get_routers(document_file, validate, self, routers_end, self._get_router_type()))
- else:
- self.routers = ()
+ self.routers = tuple(_get_routers(document_file, validate, self, routers_start, routers_end, self._get_router_type()))
def _get_router_type(self):
return RouterStatusEntry
More information about the tor-commits
mailing list