[tor-commits] [stem/master] Note compression of files

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit 9d9d635669126bf0f183cfe4d961dd9933508865
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri Jul 12 11:14:19 2019 -0700

    Note compression of files
    
    Attribute on the File class we'll want for decompression.
---
 stem/descriptor/collector.py      | 12 ++++++++++++
 test/unit/descriptor/collector.py | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index d94d5871..a47612cb 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -161,16 +161,28 @@ class File(object):
   File within CollecTor.
 
   :var str path: file path within collector
+  :var stem.descriptor.Compression compression: file compression, **None** if
+    this cannot be determined
+  :var bool tar: **True** if a tarball, **False** otherwise
   :var int size: size of the file
   :var datetime last_modified: when the file was last modified
   """
 
   def __init__(self, path, size, last_modified):
     self.path = path
+    self.compression = None
+    self.tar = path.endswith('.tar') or '.tar.' in path
     self.size = size
     self.last_modified = datetime.datetime.strptime(last_modified, '%Y-%m-%d %H:%M')
     self._guessed_type = None
 
+    if '.' not in self.path or self.path.endswith('.tar'):
+      self.compression = Compression.PLAINTEXT
+    else:
+      for compression in (Compression.LZMA, Compression.BZ2, Compression.GZIP):
+        if self.path.endswith(compression.extension):
+          self.compression = compression
+
   def guess_descriptor_types(self):
     """
     Descriptor @type this file is expected to have based on its path. If unable
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index 609a4929..3c4d39a0 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -133,9 +133,24 @@ class TestCollector(unittest.TestCase):
 
     extrainfo_file = files[test_path]
     self.assertEqual(test_path, extrainfo_file.path)
+    self.assertEqual(Compression.LZMA, extrainfo_file.compression)
+    self.assertEqual(True, extrainfo_file.tar)
     self.assertEqual(6459884, extrainfo_file.size)
     self.assertEqual(datetime.datetime(2016, 6, 23, 9, 54), extrainfo_file.last_modified)
 
+  def test_file_compression_attributes(self):
+    f = File('archive/relay-descriptors/microdescs/microdescs-2014-01.tar.xz', 7515396, '2014-02-07 03:59')
+    self.assertEqual(Compression.LZMA, f.compression)
+    self.assertEqual(True, f.tar)
+
+    f = File('archive/webstats/webstats-2015-03.tar', 20480, '2018-03-19 16:07')
+    self.assertEqual(Compression.PLAINTEXT, f.compression)
+    self.assertEqual(True, f.tar)
+
+    f = File('recent/relay-descriptors/extra-infos/2019-07-03-02-05-00-extra-infos', 1162899, '2019-07-03 02:05')
+    self.assertEqual(Compression.PLAINTEXT, f.compression)
+    self.assertEqual(False, f.tar)
+
   def test_guess_descriptor_types(self):
     f = File('archive/bridge-descriptors/extra-infos/bridge-extra-infos-2008-05.tar.xz', 377644, '2016-09-04 09:21')
     self.assertEqual(('bridge-extra-info 1.3',), f.guess_descriptor_types())





More information about the tor-commits mailing list