[tor-commits] [stem/master] Download helper utility
atagar at torproject.org
atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019
commit 6d4cbd2180d11682d7d65b6926c562155907d049
Author: Damian Johnson <atagar at torproject.org>
Date: Thu Aug 1 19:58:07 2019 -0700
Download helper utility
Stem only raises documented exceptions, but urllib makes this difficult in that
it raises a wide variety of exceptions. Wrapping it within a DownloadFailed
exception that retains its wrapped exception.
---
stem/descriptor/collector.py | 76 +++-------------------------
stem/util/connection.py | 112 ++++++++++++++++++++++++++++++++++++++++++
test/integ/util/connection.py | 33 +++++++++++--
test/unit/util/connection.py | 43 ++++++++++++++++
4 files changed, 192 insertions(+), 72 deletions(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index a78d60c4..d9f159e1 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -54,20 +54,13 @@ import json
import os
import re
import shutil
-import sys
import tempfile
import time
+import stem.util.connection
import stem.util.str_tools
from stem.descriptor import Compression, parse_file
-from stem.util import log
-
-try:
- # account for urllib's change between python 2.x and 3.x
- import urllib.request as urllib
-except ImportError:
- import urllib2 as urllib
COLLECTOR_URL = 'https://collector.torproject.org/'
REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
@@ -148,12 +141,7 @@ def get_server_descriptors(start = None, end = None, cache_to = None, timeout =
:class:`~stem.descriptor.server_descriptor.ServerDescriptor` for the given
time range
- :raises:
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
-
- Note that the urllib2 module may fail with other exception types, in
- which case we'll pass it along.
+ :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails
"""
for f in get_instance().files('server-descriptor', start, end):
@@ -161,43 +149,6 @@ def get_server_descriptors(start = None, end = None, cache_to = None, timeout =
yield desc
-def _download(url, timeout, retries):
- """
- Download from the given url.
-
- :param str url: uncompressed url to download from
- :param int timeout: timeout when connection becomes idle, no timeout applied
- if **None**
- :param int retires: maximum attempts to impose
-
- :returns: content of the given url
-
- :raises:
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
-
- Note that the urllib2 module may fail with other exception types, in
- which case we'll pass it along.
- """
-
- start_time = time.time()
-
- try:
- return urllib.urlopen(url, timeout = timeout).read()
- except:
- exc = sys.exc_info()[1]
-
- if timeout is not None:
- timeout -= time.time() - start_time
-
- if retries > 0 and (timeout is None or timeout > 0):
- log.debug("Failed to download from CollecTor at '%s' (%i retries remaining): %s" % (url, retries, exc))
- return _download(url, timeout, retries - 1)
- else:
- log.debug("Failed to download from CollecTor at '%s': %s" % (url, exc))
- raise
-
-
class File(object):
"""
File within CollecTor.
@@ -258,11 +209,7 @@ class File(object):
:raises:
* **ValueError** if unable to determine the descirptor type
* **TypeError** if we cannot parse this descriptor type
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
-
- Note that the urllib2 module may fail with other exception types, in
- which case we'll pass it along.
+ * :class:`~stem.util.connection.DownloadFailed` if the download fails
"""
if descriptor_type is None:
@@ -309,12 +256,7 @@ class File(object):
:returns: **str** with the path we downloaded to
- :raises:
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
-
- Note that the urllib2 module may fail with other exception types, in
- which case we'll pass it along.
+ :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails
"""
# TODO: If checksums get added to the index we should replace
@@ -334,7 +276,7 @@ class File(object):
elif os.path.exists(path):
return path # file already exists
- response = _download(COLLECTOR_URL + self.path, timeout, retries)
+ response = stem.util.connection.download(COLLECTOR_URL + self.path, timeout, retries)
if decompress:
response = self.compression.decompress(response)
@@ -441,8 +383,7 @@ class CollecTor(object):
* **ValueError** if json is malformed
* **IOError** if unable to decompress
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
+ * :class:`~stem.util.connection.DownloadFailed` if the download fails
"""
if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE:
@@ -456,7 +397,7 @@ class CollecTor(object):
extension = compression.extension if compression != Compression.PLAINTEXT else ''
url = COLLECTOR_URL + 'index/index.json' + extension
- response = compression.decompress(_download(url, self.timeout, self.retries))
+ response = compression.decompress(stem.util.connection.download(url, self.timeout, self.retries))
self._cached_index = json.loads(stem.util.str_tools._to_unicode(response))
self._cached_index_at = time.time()
@@ -478,8 +419,7 @@ class CollecTor(object):
* **ValueError** if json is malformed
* **IOError** if unable to decompress
- * **socket.timeout** if our request timed out
- * **urllib2.URLError** for most request failures
+ * :class:`~stem.util.connection.DownloadFailed` if the download fails
"""
if not self._cached_files or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE:
diff --git a/stem/util/connection.py b/stem/util/connection.py
index c23d74e7..7be7fe09 100644
--- a/stem/util/connection.py
+++ b/stem/util/connection.py
@@ -8,6 +8,10 @@ Connection and networking based utility functions.
::
+ DownloadFailed - Inability to download a resource.
+ +- DownloadTimeout - Download timeout reached.
+
+ download - download from a given url
get_connections - quieries the connections belonging to a given process
system_resolvers - provides connection resolution methods that are likely to be available
port_usage - brief description of the common usage for a port
@@ -58,6 +62,10 @@ import collections
import os
import platform
import re
+import socket
+import sys
+import time
+import traceback
import stem.util
import stem.util.proc
@@ -65,6 +73,12 @@ import stem.util.system
from stem.util import conf, enum, log, str_tools
+try:
+ # account for urllib's change between python 2.x and 3.x
+ import urllib.request as urllib
+except ImportError:
+ import urllib2 as urllib
+
# Connection resolution is risky to log about since it's highly likely to
# contain sensitive information. That said, it's also difficult to get right in
# a platform independent fashion. To opt into the logging requried to
@@ -162,6 +176,104 @@ class Connection(collections.namedtuple('Connection', ['local_address', 'local_p
"""
+class DownloadFailed(IOError):
+ """
+ Inability to download a resource. Python's urllib module raises
+ a wide variety of undocumented exceptions (urllib2.URLError,
+ socket.timeout, and others).
+
+ This wraps lower level failures in a common exception type that
+ retains their exception and `stacktrace
+ <https://docs.python.org/3/library/traceback.html>`_.
+
+ .. versionadded:: 1.8.0
+
+ :var str url: url we failed to download from
+ :var Exception error: original urllib exception
+ :var traceback stacktrace: original stacktrace
+ :var str stacktrace_str: string representation of the stacktrace
+ """
+
+ def __init__(self, url, error, stacktrace, message = None):
+ if message is None:
+ # The string representation of exceptions can reside in several places.
+ # urllib.URLError use a 'reason' attribute that in turn may referrence
+ # low level structures such as socket.gaierror. Whereas most exceptions
+ # use a 'message' attribute.
+
+ reason = str(error)
+
+ all_str_repr = (
+ getattr(getattr(error, 'reason', None), 'strerror', None),
+ getattr(error, 'reason', None),
+ getattr(error, 'message', None),
+ )
+
+ for str_repr in all_str_repr:
+ if str_repr and isinstance(str_repr, str):
+ reason = str_repr
+ break
+
+ message = 'Failed to download from %s (%s): %s' % (url, type(error).__name__, reason)
+
+ super(DownloadFailed, self).__init__(message)
+
+ self.url = url
+ self.error = error
+ self.stacktrace = stacktrace
+ self.stacktrace_str = ''.join(traceback.format_tb(stacktrace))
+
+
+class DownloadTimeout(DownloadFailed):
+ """
+ Timeout reached while downloading this resource.
+
+ .. versionadded:: 1.8.0
+ """
+
+ def __init__(self, url, error, stacktrace, timeout):
+ super(DownloadTimeout, self).__init__('Failed to download from %s: %0.1f second timeout reached' % (url, timeout))
+
+
+def download(url, timeout = None, retries = None):
+ """
+ Download from the given url.
+
+ .. versionadded:: 1.8.0
+
+ :param str url: uncompressed url to download from
+ :param int timeout: timeout when connection becomes idle, no timeout applied
+ if **None**
+ :param int retires: maximum attempts to impose
+
+ :returns: **bytes** content of the given url
+
+ :raises: :class:`~stem.util.connection.DownloadFailed` if the download fails
+ """
+
+ if retries is None:
+ retries = 0
+
+ start_time = time.time()
+
+ try:
+ return urllib.urlopen(url, timeout = timeout).read()
+ except socket.timeout as exc:
+ raise DownloadTimeout(url, exc, sys.exc_info()[2], timeout)
+ except:
+ exc, stacktrace = sys.exc_info()[1:3]
+
+ if timeout is not None:
+ timeout -= time.time() - start_time
+
+ if retries > 0 and (timeout is None or timeout > 0):
+ log.debug('Failed to download from %s (%i retries remaining): %s' % (url, retries, exc))
+ return download(url, timeout, retries - 1)
+ else:
+ log.debug('Failed to download from %s: %s' % (url, exc))
+ raise DownloadFailed(url, exc, stacktrace)
+
+
def get_connections(resolver = None, process_pid = None, process_name = None):
"""
Retrieves a list of the current connections for a given process. This
diff --git a/test/integ/util/connection.py b/test/integ/util/connection.py
index 12ce8ac4..4617fe56 100644
--- a/test/integ/util/connection.py
+++ b/test/integ/util/connection.py
@@ -5,11 +5,18 @@ that we're running.
import unittest
+import stem.util.connection
import stem.util.system
import test.require
import test.runner
-from stem.util.connection import RESOLVER_COMMAND, Resolver, get_connections, system_resolvers
+from stem.util.connection import Resolver
+
+try:
+ # account for urllib's change between python 2.x and 3.x
+ import urllib.request as urllib
+except ImportError:
+ import urllib2 as urllib
class TestConnection(unittest.TestCase):
@@ -20,22 +27,40 @@ class TestConnection(unittest.TestCase):
if test.runner.Torrc.PORT not in runner.get_options():
self.skipTest('(no control port)')
return
- elif resolver not in system_resolvers():
+ elif resolver not in stem.util.connection.system_resolvers():
self.skipTest('(resolver unavailable on this platform)')
return
with runner.get_tor_socket():
- connections = get_connections(resolver, process_pid = runner.get_pid())
+ connections = stem.util.connection.get_connections(resolver, process_pid = runner.get_pid())
for conn in connections:
if conn.local_address == '127.0.0.1' and conn.local_port == test.runner.CONTROL_PORT:
return
- resolver_command = RESOLVER_COMMAND[resolver].format(pid = runner.get_pid())
+ resolver_command = stem.util.connection.RESOLVER_COMMAND[resolver].format(pid = runner.get_pid())
resolver_output = stem.util.system.call(resolver_command)
self.fail('Unable to find our controller connection with %s (%s). Connections found were...\n\n%s\n\nCommand output was...\n\n%s' % (resolver, resolver_command, '\n'.join(map(str, connections)), resolver_output))
+ @test.require.only_run_once
+ @test.require.online
+ def test_download(self):
+ response = stem.util.connection.download('https://collector.torproject.org/index/index.json')
+ self.assertTrue(b'"path":"https://collector.torproject.org"' in response)
+
+ @test.require.only_run_once
+ @test.require.online
+ def test_download_failure(self):
+ try:
+ stem.util.connection.download('https://no.such.testing.url')
+ self.fail('expected a stem.util.connection.DownloadFailed to be raised')
+ except stem.util.connection.DownloadFailed as exc:
+ self.assertEqual('Failed to download from https://no.such.testing.url (URLError): Name or service not known', str(exc))
+ self.assertEqual('https://no.such.testing.url', exc.url)
+ self.assertEqual('Name or service not known', exc.error.reason.strerror)
+ self.assertEqual(urllib.URLError, type(exc.error))
+
def test_connections_by_proc(self):
self.check_resolver(Resolver.PROC)
diff --git a/test/unit/util/connection.py b/test/unit/util/connection.py
index a2162029..57718446 100644
--- a/test/unit/util/connection.py
+++ b/test/unit/util/connection.py
@@ -2,6 +2,7 @@
Unit tests for the stem.util.connection functions.
"""
+import io
import platform
import unittest
@@ -10,11 +11,20 @@ import stem.util.connection
from stem.util.connection import Resolver, Connection
try:
+ # account for urllib's change between python 2.x and 3.x
+ import urllib.request as urllib
+except ImportError:
+ import urllib2 as urllib
+
+try:
# added in python 3.3
from unittest.mock import Mock, patch
except ImportError:
from mock import Mock, patch
+URL_OPEN = 'urllib.request.urlopen' if stem.prereq.is_python_3() else 'urllib2.urlopen'
+URL = 'https://example.unit.test.url'
+
NETSTAT_OUTPUT = """\
Active Internet connections (w/o servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
@@ -166,6 +176,39 @@ _tor tor 15843 20* internet stream tcp 0x0 192.168.1.100:36174 -->
class TestConnection(unittest.TestCase):
+ @patch(URL_OPEN)
+ def test_download(self, urlopen_mock):
+ urlopen_mock.return_value = io.BytesIO(b'hello')
+
+ self.assertEqual(b'hello', stem.util.connection.download(URL))
+ urlopen_mock.assert_called_with(URL, timeout = None)
+
+ @patch(URL_OPEN)
+ def test_download_failure(self, urlopen_mock):
+ urlopen_mock.side_effect = urllib.URLError('boom')
+
+ try:
+ stem.util.connection.download(URL)
+ self.fail('expected a stem.util.connection.DownloadFailed to be raised')
+ except stem.util.connection.DownloadFailed as exc:
+ self.assertEqual('Failed to download from https://example.unit.test.url (URLError): boom', str(exc))
+ self.assertEqual(URL, exc.url)
+ self.assertEqual('boom', exc.error.reason)
+ self.assertEqual(urllib.URLError, type(exc.error))
+ self.assertTrue('return urllib.urlopen(url, timeout = timeout).read()' in exc.stacktrace_str)
+
+ @patch(URL_OPEN)
+ def test_download_retries(self, urlopen_mock):
+ urlopen_mock.side_effect = urllib.URLError('boom')
+
+ self.assertRaisesRegexp(IOError, 'boom', stem.util.connection.download, URL)
+ self.assertEqual(1, urlopen_mock.call_count)
+
+ urlopen_mock.reset_mock()
+
+ self.assertRaisesRegexp(IOError, 'boom', stem.util.connection.download, URL, retries = 4)
+ self.assertEqual(5, urlopen_mock.call_count)
+
@patch('os.access')
@patch('stem.util.system.is_available')
@patch('stem.util.proc.is_available')
More information about the tor-commits
mailing list