[tor-commits] [stem/master] Compression class
atagar at torproject.org
atagar at torproject.org
Sat Aug 17 20:44:26 UTC 2019
commit 9e9458501fa4b2f8819f93ff853c658b9e63366c
Author: Damian Johnson <atagar at torproject.org>
Date: Tue Jun 18 18:37:05 2019 -0700
Compression class
Both our collector and remote modules need to decompress descriptors, so adding
a little helper class to assist with availability checks and decompression.
---
stem/descriptor/__init__.py | 79 +++++++++++++++++++++++++++++++
stem/descriptor/collector.py | 77 +++---------------------------
test/integ/descriptor/collector.py | 9 ++--
test/settings.cfg | 3 +-
test/unit/descriptor/collector.py | 11 +++--
test/unit/descriptor/compression.py | 39 +++++++++++++++
test/unit/descriptor/data/compressed_bz2 | Bin 0 -> 1691 bytes
7 files changed, 138 insertions(+), 80 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 4d13ec60..0b3fda91 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -11,6 +11,8 @@ Package for parsing and processing descriptor data.
parse_file - Parses the descriptors in a file.
create_signing_key - Cretes a signing key that can be used for creating descriptors.
+ Compression - method of descriptor decompression
+
Descriptor - Common parent for all descriptor file types.
| |- content - creates the text of a new descriptor
| |- create - creates a new descriptor
@@ -172,6 +174,83 @@ DocumentHandler = stem.util.enum.UppercaseEnum(
)
+class _Compression(object):
+ """
+ Compression method supported by CollecTor.
+
+ :var bool available: **True** if this method of decryption is available,
+ **False** otherwise
+ :var str encoding: `http 'Accept-Encoding' parameter <https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens>`_
+ :var str extension: file extension of this compression
+
+ .. versionadded:: 1.8.0
+ """
+
+ def __init__(self, name, module, encoding, extension, decompression_func):
+ if module is None:
+ self._module = None
+ self.available = True
+ else:
+ # Compression modules are optional. Usually gzip and bz2 are available,
+ # but they might be missing if compiling python yourself. As for lzma it
+ # was added in python 3.3.
+
+ try:
+ self._module = __import__(module)
+ self.available = True
+ except ImportError:
+ self._module = None
+ self.available = False
+
+ self.extension = extension
+ self.encoding = encoding
+
+ self._name = name
+ self._module_name = module
+ self._decompression_func = decompression_func
+
+ def decompress(self, content):
+ """
+ Decompresses the given content via this method.
+
+ :param bytes content: content to be decompressed
+
+ :returns: **bytes** with the decompressed content
+
+ :raises:
+ If unable to decompress this provide...
+
+ * **IOError** if content isn't compressed with this
+ * **ImportError** if this method if decompression is unavalable
+ """
+
+ if not self.available:
+ raise ImportError("'%s' decompression module is unavailable" % self._module_name)
+
+ return self._decompression_func(self._module, content)
+
+ def __str__(self):
+ return self._name
+
+
+def _zstd_decompress(module, content):
+ output_buffer = io.BytesIO()
+
+ with module.ZstdDecompressor().write_to(output_buffer) as decompressor:
+ decompressor.write(content)
+
+ return output_buffer.getvalue()
+
+
+Compression = stem.util.enum.Enum(
+ ('PLAINTEXT', _Compression('plaintext', None, 'identity', '.txt', lambda module, content: content)),
+ ('GZIP', _Compression('gzip', 'zlib', 'gzip', '.gz', lambda module, content: module.decompress(content, module.MAX_WBITS | 32))),
+ ('BZ2', _Compression('bzip2', 'bz2', 'bzip2', '.bz2', lambda module, content: module.decompress(content))),
+ ('LZMA', _Compression('lzma', 'lzma', 'x-tor-lzma', '.xz', lambda module, content: module.decompress(content))),
+ ('ZSTD', _Compression('zstd', 'zstd', 'zstd', '.zst', _zstd_decompress)),
+)
+
+
class TypeAnnotation(collections.namedtuple('TypeAnnotation', ['name', 'major_version', 'minor_version'])):
"""
`Tor metrics type annotation
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 21a774e9..b3f99241 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -50,10 +50,11 @@ With this you can either download and read directly from CollecTor...
.. versionadded:: 1.8.0
"""
-import io
import json
import time
+from stem.descriptor import Compression
+
try:
# account for urllib's change between python 2.x and 3.x
import urllib.request as urllib
@@ -68,82 +69,18 @@ COLLECTOR_URL = 'https://collector.torproject.org/'
REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
-class Compression(object):
- """
- Compression method supported by CollecTor.
-
- :var bool available: **True** if this method of decryption is available,
- **False** otherwise
- :var str extension: file extension of this compression
- """
-
- def __init__(self, module, extension):
- # Compression modules are optional. Usually gzip and bz2 are available, but
- # they might be missing if compiling python yourself. As for lzma it was
- # added in python 3.3.
-
- try:
- self._module = __import__(module)
- self.available = True
- except ImportError:
- self._module = None
- self.available = False
-
- self.extension = extension
- self._module_name = module
-
- def decompress(self, content):
- """
- Decompresses the given content via this method.
-
- :param bytes content: content to be decompressed
-
- :returns: **bytes** with the decompressed content
-
- :raises:
- If unable to decompress this provide...
-
- * **IOError** if content isn't compressed with this
- * **ImportError** if this method if decompression is unavalable
- """
-
- if not self.available:
- raise ImportError("'%s' decompression module is unavailable" % self)
-
- if self._module_name == 'gzip':
- if stem.prereq.is_python_3():
- return self._module.decompress(content)
- else:
- # prior to python 3.2 gzip only had GzipFile
- return self._module.GzipFile(fileobj = io.BytesIO(content)).read()
- elif self._module_name == 'bz2':
- return self._module.decompress(content)
- elif self._module_name == 'lzma':
- return self._module.decompress(content)
- else:
- raise ImportError('BUG: No implementation for %s decompression' % self)
-
- def __str__(self):
- return self._module_name
-
-
-GZIP = Compression('gzip', '.gz')
-BZ2 = Compression('bz2', '.bz2')
-LZMA = Compression('lzma', '.xz')
-
-
def url(resource, compression = None):
"""
Provides CollecTor url for the given resource.
:param str resource: resource type of the url
- :param descriptor.collector.Compression compression: compression type to
+ :param descriptor.Compression compression: compression type to
download from
:returns: **str** with the CollecTor url
"""
- # TODO: Not yet sure how to most elegantly map resources to urls. No doubt
+ # TODO: Unsure how to most elegantly map resources to urls. No doubt
# this'll change as we add more types.
if resource == 'index':
@@ -152,7 +89,7 @@ def url(resource, compression = None):
raise ValueError("'%s' isn't a recognized resource type" % resource)
suffix = compression.extension if compression else ''
- return ''.join((COLLECTOR_URL, '/'.join(path), suffix))
+ return COLLECTOR_URL + '/'.join(path) + suffix
class CollecTor(object):
@@ -161,7 +98,7 @@ class CollecTor(object):
provided in `an index <https://collector.torproject.org/index/index.json>`_
that's fetched as required.
- :var descriptor.collector.Compression compression: compression type to
+ :var descriptor.Compression compression: compression type to
download from, if undefiled we'll use the best decompression available
:var int retries: number of times to attempt the request if downloading it
fails
@@ -172,7 +109,7 @@ class CollecTor(object):
if compression == 'best':
self.compression = None
- for option in (LZMA, BZ2, GZIP):
+ for option in (Compression.LZMA, Compression.BZ2, Compression.GZIP):
if option.available:
self.compression = option
break
diff --git a/test/integ/descriptor/collector.py b/test/integ/descriptor/collector.py
index 1af329a5..dbb09d5a 100644
--- a/test/integ/descriptor/collector.py
+++ b/test/integ/descriptor/collector.py
@@ -6,7 +6,8 @@ import unittest
import test.require
-from stem.descriptor.collector import GZIP, BZ2, LZMA, CollecTor
+from stem.descriptor import Compression
+from stem.descriptor.collector import CollecTor
class TestCollector(unittest.TestCase):
@@ -18,17 +19,17 @@ class TestCollector(unittest.TestCase):
@test.require.only_run_once
@test.require.online
def test_index_gzip(self):
- self._test_index(GZIP)
+ self._test_index(Compression.GZIP)
@test.require.only_run_once
@test.require.online
def test_index_bz2(self):
- self._test_index(BZ2)
+ self._test_index(Compression.BZ2)
@test.require.only_run_once
@test.require.online
def test_index_lzma(self):
- self._test_index(LZMA)
+ self._test_index(Compression.LZMA)
def _test_index(self, compression):
if compression and not compression.available:
diff --git a/test/settings.cfg b/test/settings.cfg
index 6f71a329..1bdb1a0a 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -239,10 +239,11 @@ test.unit_tests
|test.unit.util.tor_tools.TestTorTools
|test.unit.util.__init__.TestBaseUtil
|test.unit.installation.TestInstallation
-|test.unit.descriptor.collector.TestCollector
|test.unit.descriptor.descriptor.TestDescriptor
+|test.unit.descriptor.compression.TestCompression
|test.unit.descriptor.export.TestExport
|test.unit.descriptor.reader.TestDescriptorReader
+|test.unit.descriptor.collector.TestCollector
|test.unit.descriptor.remote.TestDescriptorDownloader
|test.unit.descriptor.server_descriptor.TestServerDescriptor
|test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index a0f8cb2e..b2b464ce 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -7,7 +7,8 @@ import unittest
import stem.prereq
-from stem.descriptor.collector import GZIP, BZ2, LZMA, CollecTor, url
+from stem.descriptor import Compression
+from stem.descriptor.collector import CollecTor, url
try:
# added in python 3.3
@@ -22,9 +23,9 @@ class TestCollector(unittest.TestCase):
def test_url(self):
self.assertEqual('https://collector.torproject.org/index/index.json', url('index'))
self.assertEqual('https://collector.torproject.org/index/index.json', url('index', compression = None))
- self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = GZIP))
- self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = BZ2))
- self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = LZMA))
+ self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = Compression.GZIP))
+ self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = Compression.BZ2))
+ self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = Compression.LZMA))
@patch(URL_OPEN, Mock(return_value = io.BytesIO(b'{"index_created":"2017-12-25 21:06","build_revision":"56a303e","path":"https://collector.torproject.org"}')))
def test_index(self):
@@ -47,7 +48,7 @@ class TestCollector(unittest.TestCase):
self.assertRaisesRegexp(ValueError, 'No JSON object could be decoded', collector.index)
def test_index_malformed_compression(self):
- for compression in (GZIP, BZ2, LZMA):
+ for compression in (Compression.GZIP, Compression.BZ2, Compression.LZMA):
with patch(URL_OPEN, Mock(return_value = io.BytesIO(b'not compressed'))):
collector = CollecTor(compression = compression)
self.assertRaisesRegexp(IOError, 'Unable to decompress response as %s' % compression, collector.index)
diff --git a/test/unit/descriptor/compression.py b/test/unit/descriptor/compression.py
new file mode 100644
index 00000000..3945bc9c
--- /dev/null
+++ b/test/unit/descriptor/compression.py
@@ -0,0 +1,39 @@
+"""
+Unit tests for stem.descriptor.Compression.
+"""
+
+import unittest
+
+from stem.descriptor import Compression
+
+from test.unit.descriptor import get_resource
+
+
+class TestCompression(unittest.TestCase):
+ def test_decompress_plaintext(self):
+ self._check_file(Compression.PLAINTEXT, 'compressed_identity')
+
+ def test_decompress_gzip(self):
+ self._check_file(Compression.GZIP, 'compressed_gzip')
+
+ def test_decompress_bz2(self):
+ self._check_file(Compression.BZ2, 'compressed_bz2')
+
+ def test_decompress_lzma(self):
+ self._check_file(Compression.LZMA, 'compressed_lzma')
+
+ def test_decompress_zstd(self):
+ self._check_file(Compression.ZSTD, 'compressed_zstd')
+
+ def _check_file(self, compression, filename):
+ """
+ Decompress one of our 'compressed_*' server descriptors.
+ """
+
+ if not compression.available:
+ self.skipTest('(%s unavailable)' % compression)
+ return
+
+ with open(get_resource(filename), 'rb') as compressed_file:
+ content = compression.decompress(compressed_file.read())
+ self.assertTrue(content.startswith(b'router moria1 128.31.0.34 9101 0 9131'))
diff --git a/test/unit/descriptor/data/compressed_bz2 b/test/unit/descriptor/data/compressed_bz2
new file mode 100644
index 00000000..4d645a71
Binary files /dev/null and b/test/unit/descriptor/data/compressed_bz2 differ
More information about the tor-commits
mailing list