[tor-commits] [stem/master] Always processing @type headers
atagar at torproject.org
atagar at torproject.org
Sun Jan 20 00:03:31 UTC 2013
commit 3d3e69417975499fbe5d2a3eb591d808c6800874
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Jan 19 15:53:06 2013 -0800
Always processing @type headers
The parse_file() function only consumed the @type annotation when we didn't
have a descriptor_type nor recognized filename. Changing it so we always
consume the @type header.
I'm also changing the priority order of parse_file() so if the file could be
both a metrics archive or cached descriptor (for instance a file named
'cached-consensus' that starts with a @type annotation) then we process it as a
metrics archive.
---
stem/descriptor/__init__.py | 41 ++++++++++++++++++--------------
test/integ/descriptor/networkstatus.py | 26 +++++++++++--------
2 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 7280222..f1fb86e 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -48,12 +48,12 @@ def parse_file(descriptor_file, descriptor_type = None, path = None):
If you don't provide a **descriptor_type** argument then this automatically
tries to determine the descriptor type based on the following...
- * The filename if it matches something from tor's data directory. For
- instance, tor's 'cached-descriptors' contains server descriptors.
-
* The @type annotation on the first line. These are generally only found in
the `descriptor archives <https://metrics.torproject.org>`_.
+ * The filename if it matches something from tor's data directory. For
+ instance, tor's 'cached-descriptors' contains server descriptors.
+
This is a handy function for simple usage, but if you're reading multiple
descriptor files you might want to consider the
:class:`~stem.descriptor.reader.DescriptorReader`.
@@ -101,7 +101,12 @@ def parse_file(descriptor_file, descriptor_type = None, path = None):
# by an annotation on their first line...
# https://trac.torproject.org/5651
- # Cached descriptor handling. These contain multiple descriptors per file.
+ initial_position = descriptor_file.tell()
+ first_line = descriptor_file.readline().strip()
+ metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
+
+ if not metrics_header_match:
+ descriptor_file.seek(initial_position)
filename = '<undefined>' if path is None else os.path.basename(path)
file_parser = None
@@ -114,22 +119,22 @@ def parse_file(descriptor_file, descriptor_type = None, path = None):
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f)
else:
raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'")
- elif filename == "cached-descriptors":
- file_parser = stem.descriptor.server_descriptor._parse_file
- elif filename == "cached-extrainfo":
- file_parser = stem.descriptor.extrainfo_descriptor._parse_file
- elif filename == "cached-consensus":
- file_parser = stem.descriptor.networkstatus._parse_file
- elif filename == "cached-microdesc-consensus":
- file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True)
- else:
+ elif metrics_header_match:
# Metrics descriptor handling
- first_line, desc = descriptor_file.readline().strip(), None
- metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
- if metrics_header_match:
- desc_type, major_version, minor_version = metrics_header_match.groups()
- file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f)
+ desc_type, major_version, minor_version = metrics_header_match.groups()
+ file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f)
+ else:
+ # Cached descriptor handling. These contain multiple descriptors per file.
+
+ if filename == "cached-descriptors":
+ file_parser = stem.descriptor.server_descriptor._parse_file
+ elif filename == "cached-extrainfo":
+ file_parser = stem.descriptor.extrainfo_descriptor._parse_file
+ elif filename == "cached-consensus":
+ file_parser = stem.descriptor.networkstatus._parse_file
+ elif filename == "cached-microdesc-consensus":
+ file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True)
if file_parser:
for desc in file_parser(descriptor_file):
diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py
index c654158..035289d 100644
--- a/test/integ/descriptor/networkstatus.py
+++ b/test/integ/descriptor/networkstatus.py
@@ -117,17 +117,21 @@ class TestNetworkStatus(unittest.TestCase):
consensus_path = test.integ.descriptor.get_resource("metrics_consensus")
- with open(consensus_path) as descriptor_file:
- descriptors = stem.descriptor.parse_file(descriptor_file, path = consensus_path)
-
- router = next(descriptors)
- self.assertEquals("sumkledi", router.nickname)
- self.assertEquals("0013D22389CD50D0B784A3E4061CB31E8CE8CEB5", router.fingerprint)
- self.assertEquals("8mCr8Sl7RF4ENU4jb0FZFA/3do8", router.digest)
- self.assertEquals(datetime.datetime(2012, 7, 12, 4, 1, 55), router.published)
- self.assertEquals("178.218.213.229", router.address)
- self.assertEquals(80, router.or_port)
- self.assertEquals(None, router.dir_port)
+ for specify_type in (True, False):
+ with open(consensus_path) as descriptor_file:
+ if specify_type:
+ descriptors = stem.descriptor.parse_file(descriptor_file, "network-status-consensus-3 1.0", path = consensus_path)
+ else:
+ descriptors = stem.descriptor.parse_file(descriptor_file, path = consensus_path)
+
+ router = next(descriptors)
+ self.assertEquals("sumkledi", router.nickname)
+ self.assertEquals("0013D22389CD50D0B784A3E4061CB31E8CE8CEB5", router.fingerprint)
+ self.assertEquals("8mCr8Sl7RF4ENU4jb0FZFA/3do8", router.digest)
+ self.assertEquals(datetime.datetime(2012, 7, 12, 4, 1, 55), router.published)
+ self.assertEquals("178.218.213.229", router.address)
+ self.assertEquals(80, router.or_port)
+ self.assertEquals(None, router.dir_port)
def test_metrics_bridge_consensus(self):
"""
More information about the tor-commits
mailing list