[tor-commits] [stem/master] Hybrid approach between iterator and attribute
atagar at torproject.org
atagar at torproject.org
Sat Oct 13 18:35:45 UTC 2012
commit f9a2ef8d0dd60a73c7cfe5a329b20329c4b4218c
Author: Damian Johnson <atagar at torproject.org>
Date: Sun Aug 19 11:54:29 2012 -0700
Hybrid approach between iterator and attribute
Ok, changed my mind. At least partly. If the user is using parse_file() then we
want to provide them with router entries and a bare document, like I was.
However, if they're calling the document constructors then they've already read
the complete document into memory so there's little reason not to parse it.
In short, documents now have a 'routers' attribute. That attribute is filled
with the parsed routers if we use the constructor directly, and left empty if
we used parse_file().
---
stem/descriptor/networkstatus.py | 75 +++++++++++++++++++++++++-------
test/integ/descriptor/networkstatus.py | 8 ++--
2 files changed, 63 insertions(+), 20 deletions(-)
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 5054628..a91dcc5 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -76,7 +76,9 @@ Flag = stem.util.enum.Enum(
def parse_file(document_file, validate = True, is_microdescriptor = False):
"""
- Parses a network status document and provides a NetworkStatusDocument object.
+ Parses a network status and iterates over the RouterDescriptor or
+ RouterMicrodescriptor in it. The document that these instances reference have
+ an empty 'rotuers' attribute to allow for limited memory usage.
:param file document_file: file with network status document content
:param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
@@ -89,29 +91,31 @@ def parse_file(document_file, validate = True, is_microdescriptor = False):
* IOError if the file can't be read
"""
+ header, footer, routers_end = _get_document_content(document_file, validate)
+ document_data = "".join(header + footer)
+
if not is_microdescriptor:
- document_type, router_type = NetworkStatusDocument, RouterDescriptor
+ document = NetworkStatusDocument(document_data, validate)
+ router_type = RouterDescriptor
else:
- document_type, router_type = MicrodescriptorConsensus, RouterMicrodescriptor
-
- document, routers_start, routers_end = _get_document(document_file, validate, document_type)
- document_file.seek(routers_start)
+ document = MicrodescriptorConsensus(document_data, validate)
+ router_type = RouterMicrodescriptor
- while document_file.tell() < routers_end:
- desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = routers_end))
- yield router_type(desc_content, document, validate)
+ for desc in _get_routers(document_file, validate, document, routers_end, router_type):
+ yield desc
-def _get_document(document_file, validate, document_type):
+def _get_document_content(document_file, validate):
"""
Network status documents consist of three sections: header, router entries,
and the footer. This provides back a tuple with the following...
- (NetworkStatusDocument, routers_start, routers_end)
+ (header_lines, footer_lines, routers_end)
+
+ This leaves the document_file at the start of the router entries.
:param file document_file: file with network status document content
:param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
- :param object document_type: consensus document class to construct
- :returns: tuple with the network status document and range that has the routers
+ :returns: tuple with the network status document content and ending position of the routers
:raises:
* ValueError if the contents is malformed and validate is True
@@ -132,9 +136,31 @@ def _get_document(document_file, validate, document_type):
routers_end = document_file.tell()
footer = document_file.readlines()
- document_data = "".join(header + footer)
+ document_file.seek(routers_start)
+ return (header, footer, routers_end)
+
+def _get_routers(document_file, validate, document, end_position, router_type):
+ """
+ Iterates over the router entries in a given document. The document_file is
+ expected to be at the start of the router section and the end_position
+ desigates where that section ends.
+
+ :param file document_file: file with network status document content
+ :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise
+ :param object document: document the descriptors originate from
+ :param int end_position: location in the document_file where the router section ends
+ :param class router_type: router class to construct
- return (document_type(document_data, validate), routers_start, routers_end)
+ :returns: iterator over router_type instances
+
+ :raises:
+ * ValueError if the contents is malformed and validate is True
+ * IOError if the file can't be read
+ """
+
+ while document_file.tell() < end_position:
+ desc_content = "".join(_read_until_keywords("r", document_file, ignore_first = True, end_position = end_position))
+ yield router_type(desc_content, document, validate)
class NetworkStatusDocument(stem.descriptor.Descriptor):
"""
@@ -142,6 +168,8 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
This could be a v3 consensus or vote document.
+ :var tuple routers: RouterDescriptor contained in the document
+
:var bool validated: **\*** whether the document is validated
:var str network_status_version: **\*** a document format version. For v3 documents this is "3"
:var str vote_status: **\*** status of the vote (is either "vote" or "consensus")
@@ -198,7 +226,19 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
self.params = {}
self.bandwidth_weights = {}
- self._parse(raw_content)
+ document_file = StringIO(raw_content)
+ header, footer, routers_end = _get_document_content(document_file, validate)
+
+ document_content = "".join(header + footer)
+ self._parse(document_content)
+
+ if document_file.tell() < routers_end:
+ self.routers = tuple(_get_routers(document_file, validate, self, routers_end, self._get_router_type()))
+ else:
+ self.routers = ()
+
+ def _get_router_type(self):
+ return RouterDescriptor
def _validate_network_status_version(self):
return self.network_status_version == "3"
@@ -586,6 +626,9 @@ class MicrodescriptorConsensus(NetworkStatusDocument):
| **~** attribute appears only in consensuses
"""
+ def _get_router_type(self):
+ return RouterMicrodescriptor
+
def _validate_network_status_version(self):
return self.network_status_version == "3 microdesc"
diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py
index f95270b..d16929d 100644
--- a/test/integ/descriptor/networkstatus.py
+++ b/test/integ/descriptor/networkstatus.py
@@ -74,8 +74,8 @@ class TestNetworkStatusDocument(unittest.TestCase):
descriptor_path = test.integ.descriptor.get_resource("cached-consensus")
descriptor_file = file(descriptor_path)
- router1 = next(stem.descriptor.networkstatus.parse_file(descriptor_file))
- desc = router1.document
+ desc = stem.descriptor.networkstatus.NetworkStatusDocument(descriptor_file.read())
+ router1 = desc.routers[0]
descriptor_file.close()
self.assertEquals(True, desc.validated)
@@ -167,8 +167,8 @@ I/TJmV928na7RLZe2mGHCAW3VQOvV+QkCfj05VZ8CsY=
descriptor_path = test.integ.descriptor.get_resource("vote")
descriptor_file = file(descriptor_path)
- router1 = next(stem.descriptor.networkstatus.parse_file(descriptor_file))
- desc = router1.document
+ desc = stem.descriptor.networkstatus.NetworkStatusDocument(descriptor_file.read())
+ router1 = desc.routers[0]
descriptor_file.close()
self.assertEquals(True, desc.validated)
More information about the tor-commits
mailing list