[tor-commits] [stem/master] Add CollecTor to our descriptor tutorial
atagar at torproject.org
atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019
commit 6a44d211342a727b71824825262123aeaf300c99
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Aug 17 13:22:53 2019 -0700
Add CollecTor to our descriptor tutorial
Replacing our deprecated stem.descriptor.reader example with usage of our new
collector module.
---
docs/_static/example/collector_caching.py | 18 ++++++++++
docs/_static/example/collector_reading.py | 10 ++++++
docs/_static/example/past_descriptors.py | 5 ---
docs/api.rst | 1 +
docs/api/descriptor/collector.rst | 5 +++
docs/change_log.rst | 3 +-
docs/contents.rst | 1 +
docs/tutorials/mirror_mirror_on_the_wall.rst | 13 +++++--
stem/descriptor/collector.py | 51 ++++++----------------------
stem/descriptor/server_descriptor.py | 17 ++++++----
test/unit/tutorial.py | 18 +++++-----
11 files changed, 78 insertions(+), 64 deletions(-)
diff --git a/docs/_static/example/collector_caching.py b/docs/_static/example/collector_caching.py
new file mode 100644
index 00000000..bff63c47
--- /dev/null
+++ b/docs/_static/example/collector_caching.py
@@ -0,0 +1,18 @@
+import datetime
+import stem.descriptor
+import stem.descriptor.collector
+
+yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
+cache_dir = '~/descriptor_cache/server_desc_today'
+
+collector = stem.descriptor.collector.CollecTor()
+
+for f in collector.files('server-descriptor', start = yesterday):
+ f.download(cache_dir)
+
+# then later...
+
+for f in collector.files('server-descriptor', start = yesterday):
+ for desc in f.read(cache_dir):
+ if desc.exit_policy.is_exiting_allowed():
+ print(' %s (%s)' % (desc.nickname, desc.fingerprint))
diff --git a/docs/_static/example/collector_reading.py b/docs/_static/example/collector_reading.py
new file mode 100644
index 00000000..06cc913a
--- /dev/null
+++ b/docs/_static/example/collector_reading.py
@@ -0,0 +1,10 @@
+import datetime
+import stem.descriptor.collector
+
+yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
+
+# provide yesterday's exits
+
+for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday):
+ if desc.exit_policy.is_exiting_allowed():
+ print(' %s (%s)' % (desc.nickname, desc.fingerprint))
diff --git a/docs/_static/example/past_descriptors.py b/docs/_static/example/past_descriptors.py
deleted file mode 100644
index 41004845..00000000
--- a/docs/_static/example/past_descriptors.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from stem.descriptor.reader import DescriptorReader
-
-with DescriptorReader(["/home/atagar/server-descriptors-2013-03.tar"]) as reader:
- for desc in reader:
- print("found relay %s (%s)" % (desc.nickname, desc.fingerprint))
diff --git a/docs/api.rst b/docs/api.rst
index 2e2f9fae..a8ba7e24 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -43,6 +43,7 @@ remotely like Tor does.
* `stem.directory <api/directory.html>`_ - Directory authority and fallback directory information.
* `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk.
* `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities.
+* `stem.descriptor.collector <api/descriptor/collector.html>`_ - Downloads past descriptors from `CollecTor <https://metrics.torproject.org/collector.html>`_.
* `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats.
Utilities
diff --git a/docs/api/descriptor/collector.rst b/docs/api/descriptor/collector.rst
new file mode 100644
index 00000000..e699d0e7
--- /dev/null
+++ b/docs/api/descriptor/collector.rst
@@ -0,0 +1,5 @@
+CollecTor
+=========
+
+.. automodule:: stem.descriptor.collector
+
diff --git a/docs/change_log.rst b/docs/change_log.rst
index fec98f5b..a2337e60 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -56,6 +56,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
+ * Added the `stem.descriptor.collector <api/descriptor/collector.html>`_ module.
* `Bandwidth file support <api/descriptor/bandwidth_file.html>`_ (:trac:`29056`)
* Ed25519 validity checks are now done though the cryptography module rather than PyNaCl (:trac:`22022`)
* Download compressed descriptors by default (:trac:`29186`)
@@ -134,7 +135,7 @@ and the `stem.directory module <api/directory.html>`_.
* Added the *orport_v6* attribute to the :class:`~stem.directory.Authority` class
* Added server descriptor's new is_hidden_service_dir attribute
* Added the network status vote's new bandwidth_file_headers attribute (:spec:`84591df`)
- * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e8`)
+ * Added the microdescriptor router status entry's new or_addresses attribute (:trac:`26405`, :spec:`fdc8f3e`)
* Don't retry downloading descriptors when we've timed out
* Don't download from tor26, an authority that frequently timeout
* Replaced Bifroest bridge authority with Serge (:trac:`26771`)
diff --git a/docs/contents.rst b/docs/contents.rst
index fb4d6b24..267979e0 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -43,6 +43,7 @@ Contents
api/descriptor/bandwidth_file
api/descriptor/certificate
+ api/descriptor/collector
api/descriptor/descriptor
api/descriptor/server_descriptor
api/descriptor/extrainfo_descriptor
diff --git a/docs/tutorials/mirror_mirror_on_the_wall.rst b/docs/tutorials/mirror_mirror_on_the_wall.rst
index eed4c65c..04cc86de 100644
--- a/docs/tutorials/mirror_mirror_on_the_wall.rst
+++ b/docs/tutorials/mirror_mirror_on_the_wall.rst
@@ -117,10 +117,17 @@ Where can I get past descriptors?
---------------------------------
Descriptor archives are available from `CollecTor
-<https://collector.torproject.org/>`_. These archives can be read with
-the `DescriptorReader <../api/descriptor/reader.html>`_...
+<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology
+at a prior point in time this is the place to go!
-.. literalinclude:: /_static/example/past_descriptors.py
+With CollecTor you can either read descriptors directly...
+
+.. literalinclude:: /_static/example/collector_reading.py
+ :language: python
+
+... or download the descriptors to disk and read them later.
+
+.. literalinclude:: /_static/example/collector_caching.py
:language: python
.. _can-i-get-descriptors-from-the-tor-process:
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 20eb6872..28fcbd49 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -2,50 +2,19 @@
# See LICENSE for licensing information
"""
-Module for downloading from Tor's descriptor archive, CollecTor...
+Descriptor archives are available from `CollecTor
+<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology
+at a prior point in time this is the place to go!
- https://collector.torproject.org/
+With CollecTor you can either read descriptors directly...
-This stores descriptors going back in time. If you need to know what the
-network topology looked like at a past point in time, this is the place to go.
-
-With this you can either download and read directly from CollecTor...
-
-::
-
- import datetime
- import stem.descriptor.collector
-
- yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
-
- # provide yesterday's exits
-
- for desc in stem.descriptor.collector.get_server_descriptors(start = yesterday):
- if desc.exit_policy.is_exiting_allowed():
- print(' %s (%s)' % (desc.nickname, desc.fingerprint))
+.. literalinclude:: /_static/example/collector_reading.py
+ :language: python
... or download the descriptors to disk and read them later.
-::
-
- import datetime
- import stem.descriptor
- import stem.descriptor.collector
-
- yesterday = datetime.datetime.utcnow() - datetime.timedelta(days = 1)
- cache_dir = '~/descriptor_cache/server_desc_today'
-
- collector = stem.descriptor.collector.CollecTor()
-
- for f in collector.files('server-descriptor', start = yesterday):
- f.download(cache_dir)
-
- # then later...
-
- for f in collector.files('server-descriptor', start = yesterday):
- for desc in f.read(cache_dir):
- if desc.exit_policy.is_exiting_allowed():
- print(' %s (%s)' % (desc.nickname, desc.fingerprint))
+.. literalinclude:: /_static/example/collector_caching.py
+ :language: python
::
@@ -282,7 +251,7 @@ class File(object):
:param str descriptor_type: `descriptor type
<https://metrics.torproject.org/collector.html#data-formats>`_, this is
guessed if not provided
- :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+ :param stem.descriptor.__init__.DocumentHandler document_handler: method in
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param int timeout: timeout when connection becomes idle, no timeout
applied if **None**
@@ -553,7 +522,7 @@ class CollecTor(object):
:param datetime.datetime end: time range to end with
:param str cache_to: directory to cache archives into, if an archive is
available here it is not downloaded
- :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+ :param stem.descriptor.__init__.DocumentHandler document_handler: method in
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param int version: consensus variant to retrieve (versions 2 or 3)
:param bool microdescriptor: provides the microdescriptor consensus if
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 2d9133f5..85e35f57 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -869,21 +869,26 @@ class RelayDescriptor(ServerDescriptor):
self.certificate.validate(self)
@classmethod
- def content(cls, attr = None, exclude = (), sign = False, signing_key = None):
+ def content(cls, attr = None, exclude = (), sign = False, signing_key = None, exit_policy = None):
if signing_key:
sign = True
if attr is None:
attr = {}
- base_header = (
+ if exit_policy is None:
+ exit_policy = REJECT_ALL_POLICY
+
+ base_header = [
('router', '%s %s 9001 0 0' % (_random_nickname(), _random_ipv4_address())),
('published', _random_date()),
('bandwidth', '153600 256000 104590'),
- ('reject', '*:*'),
+ ] + [
+ tuple(line.split(' ', 1)) for line in str(exit_policy).splitlines()
+ ] + [
('onion-key', _random_crypto_blob('RSA PUBLIC KEY')),
('signing-key', _random_crypto_blob('RSA PUBLIC KEY')),
- )
+ ]
if sign:
if attr and 'signing-key' in attr:
@@ -909,8 +914,8 @@ class RelayDescriptor(ServerDescriptor):
))
@classmethod
- def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None):
- return cls(cls.content(attr, exclude, sign, signing_key), validate = validate, skip_crypto_validation = not sign)
+ def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None, exit_policy = None):
+ return cls(cls.content(attr, exclude, sign, signing_key, exit_policy), validate = validate, skip_crypto_validation = not sign)
@lru_cache()
def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
diff --git a/test/unit/tutorial.py b/test/unit/tutorial.py
index c74b1912..9d70a5cf 100644
--- a/test/unit/tutorial.py
+++ b/test/unit/tutorial.py
@@ -8,10 +8,10 @@ import unittest
import stem.descriptor.remote
from stem.control import Controller
-from stem.descriptor.reader import DescriptorReader
from stem.descriptor.router_status_entry import RouterStatusEntryV2, RouterStatusEntryV3
from stem.descriptor.networkstatus import NetworkStatusDocumentV3
from stem.descriptor.server_descriptor import RelayDescriptor
+from stem.exit_policy import ExitPolicy
from test.unit import exec_documentation_example
try:
@@ -165,13 +165,15 @@ class TestTutorial(unittest.TestCase):
self.assertEqual('found relay caerSidi (A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB)\n', stdout_mock.getvalue())
@patch('sys.stdout', new_callable = StringIO)
- @patch('stem.descriptor.reader.DescriptorReader', spec = DescriptorReader)
- def test_mirror_mirror_on_the_wall_4(self, reader_mock, stdout_mock):
- reader = reader_mock().__enter__()
- reader.__iter__.return_value = iter([RelayDescriptor.create({'router': 'caerSidi 71.35.133.197 9001 0 0'})])
-
- exec_documentation_example('past_descriptors.py')
- self.assertEqual('found relay caerSidi (None)\n', stdout_mock.getvalue())
+ @patch('stem.descriptor.collector.get_server_descriptors')
+ def test_mirror_mirror_on_the_wall_4(self, get_desc_mock, stdout_mock):
+ get_desc_mock.return_value = iter([RelayDescriptor.create({
+ 'router': 'caerSidi 71.35.133.197 9001 0 0',
+ 'fingerprint': '2C3C 4662 5698 B6D6 7DF3 2BC1 918A D3EE 1F99 06B1',
+ }, exit_policy = ExitPolicy('accept *:*'), validate = False)])
+
+ exec_documentation_example('collector_reading.py')
+ self.assertEqual(' caerSidi (2C3C46625698B6D67DF32BC1918AD3EE1F9906B1)\n', stdout_mock.getvalue())
@patch('sys.stdout', new_callable = StringIO)
@patch('stem.descriptor.remote.DescriptorDownloader')
More information about the tor-commits
mailing list