[tor-commits] [stem/master] Working zstd support
atagar at torproject.org
atagar at torproject.org
Sun Apr 1 04:40:01 UTC 2018
commit 5da64e67948c07a3c509825e6fecef32cd7bbd8d
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Mar 31 21:13:18 2018 -0700
Working zstd support
Oops, turns out we weren't using the officially suggested zstd python module.
Moving to the following did the trick...
https://pypi.python.org/pypi/zstandard
---
stem/descriptor/remote.py | 52 +++++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 17 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 9b71be8d..e1190fc7 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -92,8 +92,8 @@ content. For example...
=============== ===========
**PLAINTEXT** Uncompressed data.
**GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_.
- **ZSTD** `Zstandard compression <https://www.zstd.net>`_.
- **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_.
+ **ZSTD** `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_.
+ **LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_.
=============== ===========
"""
@@ -133,9 +133,18 @@ except ImportError:
LZMA_SUPPORTED = False
try:
- # https://pypi.python.org/pypi/zstd
+ # We use the suggested python zstd library...
+ #
+ # https://pypi.python.org/pypi/zstandard
+ #
+ # Unfortunately this installs as a zstd module which can be confused with...
+ #
+ # https://pypi.python.org/pypi/zstd
+ #
+ # As such checking for the specific decompression class we'll need.
+
import zstd
- ZSTD_SUPPORTED = True
+ ZSTD_SUPPORTED = hasattr(zstd, 'ZstdDecompressor')
except ImportError:
ZSTD_SUPPORTED = False
@@ -146,8 +155,8 @@ Compression = stem.util.enum.Enum(
('LZMA', 'x-tor-lzma'),
)
-ZSTD_UNAVAILABLE_MSG = 'ZSTD is not yet supported'
-LZMA_UNAVAILABLE_MSG = 'LZMA compression was requested but requires the lzma module, which was added in python 3.3'
+ZSTD_UNAVAILABLE_MSG = 'ZSTD compression requires the zstandard module (https://pypi.python.org/pypi/zstandard)'
+LZMA_UNAVAILABLE_MSG = 'LZMA compression requires the lzma module (https://docs.python.org/3/library/lzma.html)'
# Tor has a limited number of descriptors we can fetch explicitly by their
# fingerprint or hashes due to a limit on the url length by squid proxies.
@@ -307,9 +316,9 @@ class Query(object):
/tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities
=============================================== ===========
- **LZMA** compression requires the `lzma module
- <https://docs.python.org/3/library/lzma.html>`_ which was added in Python
- 3.3.
+ **ZSTD** compression requires `zstandard
+ <https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma
+ module <https://docs.python.org/3/library/lzma.html>`_.
For legacy reasons if our resource has a '.z' suffix then our **compression**
argument is overwritten with Compression.GZIP.
@@ -367,14 +376,14 @@ class Query(object):
if isinstance(compression, str):
compression = [compression] # caller provided only a single option
- if Compression.LZMA in compression and not LZMA_SUPPORTED:
- log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG)
- compression.remove(Compression.LZMA)
-
if Compression.ZSTD in compression and not ZSTD_SUPPORTED:
log.log_once('stem.descriptor.remote.zstd_unavailable', log.INFO, ZSTD_UNAVAILABLE_MSG)
compression.remove(Compression.ZSTD)
+ if Compression.LZMA in compression and not LZMA_SUPPORTED:
+ log.log_once('stem.descriptor.remote.lzma_unavailable', log.INFO, LZMA_UNAVAILABLE_MSG)
+ compression.remove(Compression.LZMA)
+
if not compression:
compression = [Compression.PLAINTEXT]
@@ -528,13 +537,22 @@ class Query(object):
data = response.read()
encoding = response.info().getheader('Content-Encoding')
- if encoding in (Compression.GZIP, 'deflate'):
- # The '32' is for automatic header detection...
- # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
+ # Tor doesn't include compression headers. As such when using gzip we
+ # need to include '32' for automatic header detection...
+ #
+ # https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
+ #
+ # ... and with zstd we need to use the streaming API.
+ if encoding in (Compression.GZIP, 'deflate'):
data = zlib.decompress(data, zlib.MAX_WBITS | 32)
elif encoding == Compression.ZSTD and ZSTD_SUPPORTED:
- data = zstd.decompress(data)
+ output_buffer = io.BytesIO()
+
+ with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor:
+ decompressor.write(data)
+
+ data = output_buffer.getvalue()
elif encoding == Compression.LZMA and LZMA_SUPPORTED:
data = lzma.decompress(data)
More information about the tor-commits
mailing list