[tor-commits] [stem/master] Adding get_archive_path() method to descriptors
atagar at torproject.org
atagar at torproject.org
Sun Feb 17 06:07:15 UTC 2013
commit f83c7efce3380c60c81028802e198791aa30dfbd
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Feb 16 10:53:39 2013 -0800
Adding get_archive_path() method to descriptors
We can't use a TarInfo's 'name' attribute for get_path() since that corresponds
to its location within the archive. That said, I've often wanted both paths so
both fixing get_path() for tarballs and adding a get_archive_path().
---
stem/descriptor/__init__.py | 17 +++++++++++++++++
stem/descriptor/reader.py | 9 +++++++++
test/integ/descriptor/reader.py | 18 ++++++++++++++++++
test/settings.cfg | 2 +-
4 files changed, 45 insertions(+), 1 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 6e5d68f..87872d6 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -9,6 +9,7 @@ Package for parsing and processing descriptor data.
Descriptor - Common parent for all descriptor file types.
|- get_path - location of the descriptor on disk if it came from a file
+ |- get_archive_path - location of the descriptor within the archive it came from
|- get_unrecognized_lines - unparsed descriptor content
+- __str__ - string that the descriptor was made from
@@ -250,6 +251,7 @@ class Descriptor(object):
def __init__(self, contents):
self._path = None
+ self._archive_path = None
self._raw_contents = contents
def get_path(self):
@@ -261,6 +263,18 @@ class Descriptor(object):
return self._path
+ def get_archive_path(self):
+ """
+ If this descriptor came from an archive then provides its path within the
+ archive. This is only set if the descriptor came from a
+ :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
+ descriptor didn't come from an archive.
+
+ :returns: **str** with the descriptor's path within the archive
+ """
+
+ return self._archive_path
+
def get_unrecognized_lines(self):
"""
Provides a list of lines that were either ignored or had data that we did
@@ -275,6 +289,9 @@ class Descriptor(object):
def _set_path(self, path):
self._path = path
+ def _set_archive_path(self, path):
+ self._archive_path = path
+
def __str__(self):
return self._raw_contents
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 92adeaf..b017e06 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -545,10 +545,19 @@ class DescriptorReader(object):
if tar_entry.isfile():
entry = tar_file.extractfile(tar_entry)
+ # The parse_file() function uses the 'name' attribute to figure out
+ # the file that it came from. In the case of TarInfo instances the
+ # 'name' is the path within the archive. We'll want that for
+ # _set_archive_path().
+
+ archive_path = entry.name
+ entry.name = target
+
for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler):
if self._is_stopped.isSet():
return
+ desc._set_archive_path(archive_path)
self._unreturned_descriptors.put(desc)
self._iter_notice.set()
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index 936cf39..3ed47ae 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -266,6 +266,24 @@ class TestDescriptorReader(unittest.TestCase):
with reader:
self.assertEqual(0, len(list(reader)))
+ def test_archived_paths(self):
+ """
+ Checks the get_path() and get_archive_path() for a tarball.
+ """
+
+ expected_archive_paths = (
+ "descriptor_archive/0/2/02c311d3d789f3f55c0880b5c85f3c196343552c",
+ "descriptor_archive/1/b/1bb798cae15e21479db0bc700767eee4733e9d4a",
+ "descriptor_archive/1/b/1ef75fef564180d8b3f72c6f8635ff0cd855f92c",
+ )
+
+ test_path = os.path.join(DESCRIPTOR_TEST_DATA, "descriptor_archive.tar")
+
+ with stem.descriptor.reader.DescriptorReader(test_path) as reader:
+ for desc in reader:
+ self.assertEqual(test_path, desc.get_path())
+ self.assertTrue(desc.get_archive_path() in expected_archive_paths)
+
def test_archived_uncompressed(self):
"""
Checks that we can read descriptors from an uncompressed archive.
diff --git a/test/settings.cfg b/test/settings.cfg
index a5abc91..b467fd4 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -157,7 +157,7 @@ target.torrc RUN_PTRACE => PORT, PTRACE
pyflakes.ignore stem/prereq.py => 'RSA' imported but unused
pyflakes.ignore stem/prereq.py => 'asn1' imported but unused
pyflakes.ignore stem/prereq.py => 'long_to_bytes' imported but unused
-pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 54
+pyflakes.ignore stem/descriptor/__init__.py => redefinition of unused 'OrderedDict' from line 55
pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_bytes' from line 54
pyflakes.ignore stem/util/str_tools.py => redefinition of function '_to_unicode' from line 60
pyflakes.ignore test/mocking.py => undefined name 'builtins'
More information about the tor-commits
mailing list