[tor-commits] [stem/master] Stub initial CollecTor module
atagar at torproject.org
atagar at torproject.org
Sat Aug 17 20:44:26 UTC 2019
commit ce8474dcf61cdb3800108e3820dadcef545220ee
Author: Damian Johnson <atagar at torproject.org>
Date: Mon Dec 25 13:10:14 2017 -0800
Stub initial CollecTor module
Presently doesn't do much. Just starting with url resolution and fetching the
index.
---
stem/descriptor/__init__.py | 1 +
stem/descriptor/collector.py | 145 +++++++++++++++++++++++++++++++++++++
test/integ/descriptor/__init__.py | 1 +
test/integ/descriptor/collector.py | 20 +++++
test/settings.cfg | 2 +
test/unit/descriptor/__init__.py | 1 +
test/unit/descriptor/collector.py | 16 ++++
7 files changed, 186 insertions(+)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index ef6530ed..4d13ec60 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -113,6 +113,7 @@ except ImportError:
__all__ = [
'bandwidth_file',
'certificate',
+ 'collector',
'export',
'extrainfo_descriptor',
'hidden_service_descriptor',
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
new file mode 100644
index 00000000..ca0e6921
--- /dev/null
+++ b/stem/descriptor/collector.py
@@ -0,0 +1,145 @@
+# Copyright 2017, Damian Johnson and The Tor Project
+# See LICENSE for licensing information
+
+"""
+Module for downloading from Tor's descriptor archive, CollecTor...
+
+ https://collector.torproject.org/
+
+This stores descriptors going back in time. If you need to know what the
+network topology looked like at a past point in time, this is the place to go.
+
+With this you can either download and read directly from CollecTor...
+
+::
+
+ import datetime
+ import stem.descriptor.collector
+
+ collector = stem.descriptor.collector.CollecTor()
+ yesterday = datetime.date.today() - datetime.timedelta(1)
+
+ # provide yesterday's exits
+
+ for desc in collector.get_server_descriptors(start = yesterday):
+ if desc.exit_policy.is_exiting_allowed():
+ print ' %s (%s)' % (desc.nickname, desc.fingerprint)
+
+... or download the descriptors to disk and read them later.
+
+::
+
+ import datetime
+ import stem.descriptor.collector
+ import stem.descriptor.reader
+
+ collector = stem.descriptor.collector.CollecTor()
+ yesterday = datetime.date.today() - datetime.timedelta(1)
+
+ collector.download_server_descriptors(
+ destination = '~/descriptor_cache',
+ start = yesterday,
+ ).join()
+
+ reader = stem.descriptor.reader.DescriptorReader('~/descriptor_cache')
+
+ for desc in reader:
+ if desc.exit_policy.is_exiting_allowed():
+ print ' %s (%s)' % (desc.nickname, desc.fingerprint)
+
+.. versionadded:: 1.7.0
+"""
+
+import json
+import time
+
+try:
+ # account for urllib's change between python 2.x and 3.x
+ import urllib.request as urllib
+except ImportError:
+ import urllib2 as urllib
+
+import stem.util.enum
+
+Compression = stem.util.enum.Enum('NONE', 'BZ2', 'GZ', 'XZ')
+
+COLLECTOR_URL = 'https://collector.torproject.org/'
+REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
+
+COMPRESSION_SUFFIX = {
+ Compression.NONE: '',
+ Compression.BZ2: '.bz2',
+ Compression.GZ: '.gz',
+ Compression.XZ: '.xz',
+}
+
+
+def url(resource, compression = Compression.NONE):
+ """
+ Provides CollecTor url for the given resource.
+
+ :param str resource: resource type of the url
+ :param descriptor.collector.Compression compression: compression type to
+ download from
+
+ :returns: **str** with the CollecTor url
+ """
+
+ if compression not in COMPRESSION_SUFFIX:
+ raise ValueError("'%s' isn't a compression enumeration" % compression)
+
+ # TODO: Not yet sure how to most elegantly map resources to urls. No doubt
+ # this'll change as we add more types.
+
+ if resource == 'index':
+ path = ('index', 'index.json')
+ else:
+ raise ValueError("'%s' isn't a recognized resource type" % resource)
+
+ return ''.join((COLLECTOR_URL, '/'.join(path), COMPRESSION_SUFFIX[compression]))
+
+
+class CollecTor(object):
+ """
+ Downloader for descriptors from CollecTor. The contents of CollecTor are
+ provided in `an index <https://collector.torproject.org/index/index.json>`_
+ that's fetched as required.
+
+ :var descriptor.collector.Compression compression: compression type to
+ download from
+ :var int retries: number of times to attempt the request if downloading it
+ fails
+ :var float timeout: duration before we'll time out our request
+ """
+
+ def __init__(self, compression = Compression.XZ, retries = 2, timeout = None):
+ self.compression = compression
+ self.retries = retries
+ self.timeout = timeout
+
+ self._cached_index = None
+ self._cached_index_at = 0
+
+ def index(self):
+ """
+ Provides the archives available in CollecTor.
+
+ :returns: **dict** with the archive contents
+
+ :raises:
+ If unable to retrieve the index this provide...
+
+ * **ValueError** if the index is malformed
+ * **socket.timeout** if our request timed out
+ * **urllib2.URLError** for most request failures
+ """
+
+ if not self._cached_index or time.time() - self._cached_index_at >= REFRESH_INDEX_RATE:
+ response = urllib.urlopen(url('index', self.compression), timeout = self.timeout).read()
+
+ # TODO: add compression and retry support
+
+ self._cached_index = json.loads(response)
+ self._cached_index_at = time.time()
+
+ return self._cached_index
diff --git a/test/integ/descriptor/__init__.py b/test/integ/descriptor/__init__.py
index 331316a2..2ed1feef 100644
--- a/test/integ/descriptor/__init__.py
+++ b/test/integ/descriptor/__init__.py
@@ -3,6 +3,7 @@ Integration tests for stem.descriptor.* contents.
"""
__all__ = [
+ 'collector',
'extrainfo_descriptor',
'microdescriptor',
'networkstatus',
diff --git a/test/integ/descriptor/collector.py b/test/integ/descriptor/collector.py
new file mode 100644
index 00000000..25f5d503
--- /dev/null
+++ b/test/integ/descriptor/collector.py
@@ -0,0 +1,20 @@
+"""
+Integration tests for stem.descriptor.collector.
+"""
+
+import unittest
+
+import test.require
+
+from stem.descriptor.collector import CollecTor, Compression
+
+
+class TestCollector(unittest.TestCase):
+ @test.require.only_run_once
+ @test.require.online
+ def test_index(self):
+ collector = CollecTor(compression = Compression.NONE)
+ index = collector.index()
+
+ self.assertEqual('https://collector.torproject.org', index['path'])
+ self.assertEqual(['archive', 'recent'], [entry['path'] for entry in index['directories']])
diff --git a/test/settings.cfg b/test/settings.cfg
index d422ffa8..6f71a329 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -239,6 +239,7 @@ test.unit_tests
|test.unit.util.tor_tools.TestTorTools
|test.unit.util.__init__.TestBaseUtil
|test.unit.installation.TestInstallation
+|test.unit.descriptor.collector.TestCollector
|test.unit.descriptor.descriptor.TestDescriptor
|test.unit.descriptor.export.TestExport
|test.unit.descriptor.reader.TestDescriptorReader
@@ -309,6 +310,7 @@ test.integ_tests
|test.integ.connection.connect.TestConnect
|test.integ.control.base_controller.TestBaseController
|test.integ.control.controller.TestController
+|test.integ.descriptor.collector.TestCollector
|test.integ.descriptor.remote.TestDescriptorDownloader
|test.integ.descriptor.server_descriptor.TestServerDescriptor
|test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor
diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py
index a2c03f1d..c5cf01e1 100644
--- a/test/unit/descriptor/__init__.py
+++ b/test/unit/descriptor/__init__.py
@@ -6,6 +6,7 @@ import os
__all__ = [
'bandwidth_file',
+ 'collector',
'export',
'extrainfo_descriptor',
'microdescriptor',
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
new file mode 100644
index 00000000..5dc12164
--- /dev/null
+++ b/test/unit/descriptor/collector.py
@@ -0,0 +1,16 @@
+"""
+Unit tests for stem.descriptor.collector.
+"""
+
+import unittest
+
+from stem.descriptor.collector import Compression, url
+
+
+class TestCollector(unittest.TestCase):
+ def test_url(self):
+ self.assertEqual('https://collector.torproject.org/index/index.json', url('index'))
+ self.assertEqual('https://collector.torproject.org/index/index.json', url('index', compression = Compression.NONE))
+ self.assertEqual('https://collector.torproject.org/index/index.json.gz', url('index', compression = Compression.GZ))
+ self.assertEqual('https://collector.torproject.org/index/index.json.bz2', url('index', compression = Compression.BZ2))
+ self.assertEqual('https://collector.torproject.org/index/index.json.xz', url('index', compression = Compression.XZ))
More information about the tor-commits
mailing list