[tor-commits] [stem/master] Fallback directory information

atagar at torproject.org atagar at torproject.org
Mon Feb 22 02:13:02 UTC 2016


commit 332ef6e6fe89d9f0d48e9b95acff7de9f3ec647f
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Feb 21 18:04:40 2016 -0800

    Fallback directory information
    
    Recently tor added fallback directory authorities to help clients bootstrap
    when the dirauths are unavailable...
    
      https://trac.torproject.org/projects/tor/ticket/17158
    
    To help support this effort teor asked for a DocTor check to notify us when the
    directories become unavailable...
    
      https://trac.torproject.org/projects/tor/ticket/18177
    
    Great idea but I'm expanding this. Stem now has a FallbackDirectory class with
    two methods for getting this information...
    
      * FallbackDirectory.from_remote() - Reads the latest fallback_dirs.inc from
        gitweb, providing the latest fallback directories in tor's master branch.
    
      * FallbackDirectory.from_cache() - Provides the latest fallback directories
        Stem has cached. This is only as up-to-date as the Stem release you're
        using but is quicker and avoids relying on gitweb.
    
    Advantages are...
    
      * Stem's descriptor.remote module now puts less load on the directory
        authorities since it uses fallback directories as well.
    
      * Running Stem's integ tests with the ONLINE target includes a test that
        exercises all the fallback directories, notifying us if any are down.
---
 cache_manual.py                 |   4 +-
 docs/change_log.rst             |   1 +
 run_tests.py                    |   1 +
 setup.py                        |   2 +-
 stem/descriptor/remote.py       | 266 ++++++++++++++++++++++++++++++++++++----
 test/integ/descriptor/remote.py |  38 ++++++
 test/unit/descriptor/remote.py  |  56 +++++++++
 7 files changed, 341 insertions(+), 27 deletions(-)

diff --git a/cache_manual.py b/cache_manual.py
index aa55dc5..19e73f0 100755
--- a/cache_manual.py
+++ b/cache_manual.py
@@ -6,7 +6,6 @@
 Caches tor's latest manual content. Run this to pick new man page changes.
 """
 
-import os
 import re
 import sys
 
@@ -19,7 +18,6 @@ try:
 except ImportError:
   import urllib2 as urllib
 
-CACHE_PATH = os.path.join(os.path.dirname(__file__), 'stem', 'cached_tor_manual.cfg')
 GITWEB_MAN_LOG = 'https://gitweb.torproject.org/tor.git/log/doc/tor.1.txt'
 MAN_LOG_LINK = "href='/tor.git/commit/doc/tor.1.txt\?id=([^']*)'"
 
@@ -53,4 +51,4 @@ if __name__ == '__main__':
 
   latest_manual.man_commit = man_commit
   latest_manual.stem_commit = stem_commit
-  latest_manual.save(CACHE_PATH)
+  latest_manual.save(stem.manual.CACHE_PATH)
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 81bf5a2..c8dc434 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -58,6 +58,7 @@ The following are only available within Stem's `git repository
 
  * **Descriptors**
 
+  * Fallback directory information, lessing the load of `stem.descriptor.remote <api/descriptor/remote.html>`_ on the directory authorities
   * Support for ed25519 descriptor fields (:spec:`5a79d67`)
   * Server descriptor validation fails with 'extra-info-digest line had an invalid value' from additions in proposal 228 (:trac:`16227`)
   * :class:`~stem.descriptor.server_descriptor.BridgeDescriptor` now has 'ntor_onion_key' like its unsanitized counterparts
diff --git a/run_tests.py b/run_tests.py
index 3d0f845..2fb93bf 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -42,6 +42,7 @@ SRC_PATHS = [os.path.join(STEM_BASE, path) for path in (
   'test',
   'run_tests.py',
   'cache_manual.py',
+  'cache_fallback_directories.py',
   'setup.py',
   'tor-prompt',
   os.path.join('docs', 'republish.py'),
diff --git a/setup.py b/setup.py
index 2a78b9b..2e492d4 100644
--- a/setup.py
+++ b/setup.py
@@ -17,5 +17,5 @@ distutils.core.setup(
   packages = ['stem', 'stem.descriptor', 'stem.interpreter', 'stem.response', 'stem.util'],
   keywords = 'tor onion controller',
   scripts = ['tor-prompt'],
-  package_data = {'stem': ['cached_tor_manual.cfg', 'settings.cfg'], 'stem.interpreter': ['settings.cfg'], 'stem.util': ['ports.cfg']},
+  package_data = {'stem': ['cached_tor_manual.cfg', 'settings.cfg'], 'stem.descriptor': ['fallback_directories.cfg'], 'stem.interpreter': ['settings.cfg'], 'stem.util': ['ports.cfg']},
 )
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index d199fb2..dc40e41 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -44,7 +44,11 @@ itself...
 
   get_authorities - Provides tor directory information.
 
-  DirectoryAuthority - Information about a tor directory authority.
+  Directory - Relay we can retrieve directory information from
+    |- DirectoryAuthority - Information about a tor directory authority
+    +- FallbackDirectory - Directory mirror tor uses when authories are unavailable
+        |- from_cache - Provides fallback directories cached with Stem.
+        +- from_remote - Retrieves fallback directories remotely from tor's latest commit.
 
   Query - Asynchronous request to download tor descriptors
     |- start - issues the query if it isn't already running
@@ -73,7 +77,9 @@ itself...
 """
 
 import io
+import os
 import random
+import re
 import sys
 import threading
 import time
@@ -88,7 +94,7 @@ except ImportError:
 import stem.descriptor
 
 from stem import Flag
-from stem.util import log
+from stem.util import connection, log, tor_tools
 
 # Tor has a limited number of descriptors we can fetch explicitly by their
 # fingerprint or hashes due to a limit on the url length by squid proxies.
@@ -96,6 +102,9 @@ from stem.util import log
 MAX_FINGERPRINTS = 96
 MAX_MICRODESCRIPTOR_HASHES = 92
 
+GITWEB_FALLBACK_DIR_URL = 'https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc'
+CACHE_PATH = os.path.join(os.path.dirname(__file__), 'fallback_directories.cfg')
+
 
 def _guess_descriptor_type(resource):
   # Attempts to determine the descriptor type based on the resource url. This
@@ -340,8 +349,10 @@ class Query(object):
     """
 
     if use_authority or not self.endpoints:
-      authority = random.choice(list(filter(lambda auth: auth.v3ident is not None, get_authorities().values())))
-      address, dirport = authority.address, authority.dir_port
+      directories = get_authorities().values() + FallbackDirectory.from_cache().values()
+
+      picked = random.choice(directories)
+      address, dirport = picked.address, picked.dir_port
     else:
       address, dirport = random.choice(self.endpoints)
 
@@ -390,8 +401,8 @@ class DescriptorDownloader(object):
   def __init__(self, use_mirrors = False, **default_args):
     self._default_args = default_args
 
-    authorities = filter(lambda auth: auth.v3ident is not None, get_authorities().values())
-    self._endpoints = [(auth.address, auth.dir_port) for auth in authorities]
+    directories = get_authorities().values() + FallbackDirectory.from_cache().values()
+    self._endpoints = [(directory.address, directory.dir_port) for directory in directories]
 
     if use_mirrors:
       try:
@@ -412,8 +423,8 @@ class DescriptorDownloader(object):
     :raises: **Exception** if unable to determine the directory mirrors
     """
 
-    authorities = filter(lambda auth: auth.v3ident is not None, get_authorities().values())
-    new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities])
+    directories = get_authorities().values() + FallbackDirectory.from_cache().values()
+    new_endpoints = set([(directory.address, directory.dir_port) for directory in directories])
 
     consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
 
@@ -617,7 +628,39 @@ class DescriptorDownloader(object):
     )
 
 
-class DirectoryAuthority(object):
+class Directory(object):
+  """
+  Relay we can constact for directory information
+
+  .. versionadded:: 1.5.0
+
+  :var str address: IP address of the authority, currently they're all IPv4 but
+    this may not always be the case
+  :var int or_port: port on which the relay services relay traffic
+  :var int dir_port: port on which directory information is available
+  :var str fingerprint: relay fingerprint
+  :var str nickname: nickname of the authority
+  """
+
+  def __init__(self, address, or_port, dir_port, fingerprint, nickname):
+    self.address = address
+    self.or_port = or_port
+    self.dir_port = dir_port
+    self.fingerprint = fingerprint
+    self.nickname = nickname
+
+  def __eq__(self, other):
+    if not isinstance(other, Directory):
+      return False
+
+    for attr in ('nickname', 'address', 'or_port', 'dir_port', 'fingerprint'):
+      if getattr(self, attr) != getattr(other, attr):
+        return False
+
+    return True
+
+
+class DirectoryAuthority(Directory):
   """
   Tor directory authority, a special type of relay `hardcoded into tor
   <https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_
@@ -648,23 +691,21 @@ class DirectoryAuthority(object):
   .. versionchanged:: 1.3.0
      Added the is_bandwidth_authority attribute.
 
-  :var str nickname: nickname of the authority
-  :var str address: IP address of the authority, currently they're all IPv4 but
-    this may not always be the case
-  :var int or_port: port on which the relay services relay traffic
-  :var int dir_port: port on which directory information is available
-  :var str fingerprint: relay fingerprint
   :var str v3ident: identity key fingerprint used to sign votes and consensus
+  :var bool is_bandwidth_authority: **True** if this is a bandwidth authority,
+    **False** otherwise
   """
 
-  def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, is_bandwidth_authority = False, fingerprint = None, v3ident = None):
-    self.nickname = nickname
-    self.address = address
-    self.or_port = or_port
-    self.dir_port = dir_port
-    self.is_bandwidth_authority = is_bandwidth_authority
-    self.fingerprint = fingerprint
+  def __init__(self, address = None, or_port = None, dir_port = None, fingerprint = None, nickname = None, v3ident = None, is_bandwidth_authority = False):
+    super(DirectoryAuthority, self).__init__(address, or_port, dir_port, fingerprint, nickname)
     self.v3ident = v3ident
+    self.is_bandwidth_authority = is_bandwidth_authority
+
+  def __eq__(self, other):
+    if isinstance(other, DirectoryAuthority) and super(DirectoryAuthority, self).__eq__(other):
+      return self.v3ident == other.v3ident and self.is_bandwidth_authority == other.is_bandwidth_authority
+    else:
+      return False
 
 
 DIRECTORY_AUTHORITIES = {
@@ -767,7 +808,186 @@ def get_authorities():
   The directory information hardcoded into Tor and occasionally changes, so the
   information this provides might not necessarily match your version of tor.
 
-  :returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
+  :returns: **dict** of **str** nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
   """
 
   return dict(DIRECTORY_AUTHORITIES)
+
+
+class FallbackDirectory(Directory):
+  """
+  Tor directories tor uses as alternates for the authorities. These relays are
+  `hardcoded in tor <https://gitweb.torproject.org/tor.git/tree/src/or/fallback_dirs.inc>`_.
+
+  .. versionadded:: 1.5.0
+  """
+
+  def __init__(self, address = None, or_port = None, dir_port = None, fingerprint = None, nickname = None):
+    super(FallbackDirectory, self).__init__(address, or_port, dir_port, fingerprint, nickname)
+
+  @staticmethod
+  def from_cache():
+    """
+    Provides fallback directory information cached with Stem. Unlike
+    :func:`~stem.descriptor.remote.FallbackDirectory.from_remote` this doesn't
+    have any system requirements, and is faster too. Only drawback is that
+    these fallback directories are only as up to date as the Stem release we're
+    using.
+
+    :returns: **dict** of **str** fingerprints to their
+      :class:`~stem.descriptor.remote.FallbackDirectory`
+    """
+
+    conf = stem.util.conf.Config()
+    conf.load(CACHE_PATH)
+
+    results = {}
+
+    for nickname in set([key.split('.')[0] for key in conf.keys()]):
+      if nickname in ('tor_commit', 'stem_commit'):
+        continue
+
+      attr = {}
+
+      for attr_name in ('address', 'or_port', 'dir_port', 'fingerprint'):
+        key = '%s.%s' % (nickname, attr_name)
+        attr[attr_name] = conf.get(key)
+
+        if not attr[attr_name]:
+          raise IOError("'%s' is missing from %s" % (key, CACHE_PATH))
+
+      if not connection.is_valid_ipv4_address(attr['address']):
+        raise IOError("'%s.address' was an invalid address (%s)" % (nickname, attr['address']))
+      elif not connection.is_valid_port(attr['or_port']):
+        raise IOError("'%s.or_port' was an invalid port (%s)" % (nickname, attr['or_port']))
+      elif not connection.is_valid_port(attr['dir_port']):
+        raise IOError("'%s.dir_port' was an invalid port (%s)" % (nickname, attr['dir_port']))
+      elif not tor_tools.is_valid_fingerprint(attr['fingerprint']):
+        raise IOError("'%s.fingerprint' was an invalid fingerprint (%s)" % (nickname, attr['fingerprint']))
+
+      results[attr['fingerprint']] = FallbackDirectory(
+        address = attr['address'],
+        or_port = int(attr['or_port']),
+        dir_port = int(attr['dir_port']),
+        fingerprint = attr['fingerprint'],
+        nickname = nickname,
+      )
+
+    return results
+
+  @staticmethod
+  def from_remote(timeout = 60):
+    """
+    Reads and parses tor's latest fallback directories `from
+    gitweb.torproject.org
+    <https://gitweb.torproject.org/tor.git/plain/src/or/fallback_dirs.inc>`_.
+    Note that while convenient, this reliance on GitWeb means you should alway
+    call with a fallback, such as...
+
+    ::
+
+      try:
+        fallback_directories = stem.descriptor.remote.from_remote()
+      except IOError:
+        fallback_directories = stem.descriptor.remote.from_cache()
+
+    :param int timeout: seconds to wait before timing out the request
+
+    :returns: **dict** of **str** fingerprints to their
+      :class:`~stem.descriptor.remote.FallbackDirectory`
+
+    :raises: **IOError** if unable to retrieve the fallback directories
+    """
+
+    try:
+      fallback_dir_page = urllib.urlopen(GITWEB_FALLBACK_DIR_URL, timeout = timeout).read()
+    except:
+      exc = sys.exc_info()[1]
+      raise IOError("Unable to download tor's fallback directories from %s: %s" % (GITWEB_FALLBACK_DIR_URL, exc))
+
+    # Example of an entry...
+    #
+    #   /*
+    #   wagner
+    #   Flags: Fast Guard Running Stable V2Dir Valid
+    #   Fallback Weight: 43680 / 491920 (8.879%)
+    #   Consensus Weight: 62600 / 546000 (11.465%)
+    #   Rarely used email <trff914 AT gmail DOT com>
+    #   */
+    #   "5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33"
+    #   " weight=43680",
+
+    results, nickname, last_line = {}, None, None
+
+    for line in fallback_dir_page.splitlines():
+      if last_line == '/*':
+        nickname = line
+      elif line.startswith('"'):
+        addr_line_match = re.match('"([\d\.]+):(\d+) orport=(\d+) id=([\dA-F]{40}).*', line)
+
+        if addr_line_match:
+          address, dir_port, or_port, fingerprint = addr_line_match.groups()
+
+          if not connection.is_valid_ipv4_address(address):
+            raise IOError('%s has an invalid address: %s' % (nickname, address))
+          elif not connection.is_valid_port(or_port):
+            raise IOError('%s has an invalid or_port: %s' % (nickname, or_port))
+          elif not connection.is_valid_port(dir_port):
+            raise IOError('%s has an invalid dir_port: %s' % (nickname, dir_port))
+          elif not tor_tools.is_valid_fingerprint(fingerprint):
+            raise IOError('%s has an invalid fingerprint: %s' % (nickname, fingerprint))
+
+          results[fingerprint] = FallbackDirectory(
+            address = address,
+            or_port = int(or_port),
+            dir_port = int(dir_port),
+            fingerprint = fingerprint,
+            nickname = nickname,
+          )
+
+      last_line = line
+
+    return results
+
+
+def _fallback_directory_differences(previous_directories, new_directories):
+  """
+  Provides a description of how fallback directories differ.
+  """
+
+  lines = []
+
+  added_fp = set(new_directories.keys()).difference(previous_directories.keys())
+  removed_fp = set(previous_directories.keys()).difference(new_directories.keys())
+
+  for fp in added_fp:
+    directory = new_directories[fp]
+
+    lines += [
+      '* Added %s as a new fallback directory:' % directory.nickname,
+      '  address: %s' % directory.address,
+      '  or_port: %s' % directory.or_port,
+      '  dir_port: %s' % directory.dir_port,
+      '  fingerprint: %s' % directory.fingerprint,
+      '',
+    ]
+
+  for fp in removed_fp:
+    lines.append('* Removed %s as a fallback directory' % previous_directories[fp].nickname)
+
+  for fp in new_directories:
+    if fp in added_fp or fp in removed_fp:
+      continue  # already discussed these
+
+    previous_directory = previous_directories[fp]
+    new_directory = new_directories[fp]
+
+    if previous_directory != new_directory:
+      for attr in ('nickname', 'address', 'or_port', 'dir_port', 'fingerprint'):
+        old_attr = getattr(previous_directory, attr)
+        new_attr = getattr(new_directory, attr)
+
+        if old_attr != new_attr:
+          lines.append('* Changed the %s of %s from %s to %s' % (attr, fp, old_attr, new_attr))
+
+  return '\n'.join(lines)
diff --git a/test/integ/descriptor/remote.py b/test/integ/descriptor/remote.py
index fd37b57..9ce9b8f 100644
--- a/test/integ/descriptor/remote.py
+++ b/test/integ/descriptor/remote.py
@@ -218,3 +218,41 @@ class TestDescriptorDownloader(unittest.TestCase):
     self.assertTrue(isinstance(single_query_results[0], stem.descriptor.networkstatus.KeyCertificate))
 
     self.assertEqual(2, len(list(multiple_query)))
+
+  @require_online
+  def test_that_cache_is_up_to_date(self):
+    """
+    Check if the cached fallback directories bundled with Stem are up to date
+    or not.
+    """
+
+    cached_fallback_directories = stem.descriptor.remote.FallbackDirectory.from_cache()
+    latest_fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote()
+
+    if cached_fallback_directories != latest_fallback_directories:
+      self.fail("Stem's cached fallback directories are out of date. Please run 'cache_fallback_directories.py'...\n\n%s" % stem.descriptor.remote._fallback_directory_differences(cached_fallback_directories, latest_fallback_directories))
+
+  @require_online
+  def test_that_fallback_directories_are_reachable(self):
+    """
+    Fetch information from each fallback directory to confirm that it's
+    available.
+    """
+
+    unsuccessful = {}
+    downloader = stem.descriptor.remote.DescriptorDownloader()
+    moria1_v3ident = stem.descriptor.remote.get_authorities()['moria1'].v3ident
+
+    for fallback_directory in stem.descriptor.remote.FallbackDirectory.from_cache().values():
+      try:
+        downloader.get_key_certificates(authority_v3idents = moria1_v3ident, endpoints = [(fallback_directory.address, fallback_directory.dir_port)]).run()
+      except Exception as exc:
+        unsuccessful[fallback_directory] = exc
+
+    if unsuccessful:
+      lines = ['We were unable to contact the following fallback directories...\n']
+
+      for fallback_directory, exc in unsuccessful.items():
+        lines.append('* %s:%s (%s): %s' % (fallback_directory.address, fallback_directory.dir_port, fallback_directory.fingerprint, exc))
+
+      self.fail('\n'.join(lines))
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 19907a7..f0e0dea 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -58,6 +58,32 @@ iO3EUE0AEYah2W9gdz8t+i3Dtr0zgqLS841GC/TyDKCm+MKmN8d098qnwK0NGF9q
 -----END SIGNATURE-----
 """
 
+FALLBACK_DIR_CONTENT = b"""\
+/* Trial fallbacks for 0.2.8.1-alpha with ADDRESS_AND_PORT_STABLE_DAYS = 30
+ * This works around an issue where relays post a descriptor without a DirPort
+ * when restarted. If these relays stay up, they will have been up for 120 days
+ * by the 0.2.8 stable release -- teor */
+/*
+wagner
+Flags: Fast Guard Running Stable V2Dir Valid
+Fallback Weight: 43680 / 491920 (8.879%)
+Consensus Weight: 62600 / 546000 (11.465%)
+Rarely used email <trff914 AT gmail DOT com>
+*/
+"5.175.233.86:80 orport=443 id=5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33"
+" weight=43680",
+/*
+kitten2
+Flags: Fast Guard HSDir Running Stable V2Dir Valid
+Fallback Weight: 43680 / 491920 (8.879%)
+Consensus Weight: 59100 / 546000 (10.824%)
+0xEFB74277ECE4E222 Aeris <aeris+tor AT imirhil DOT fr> - 1aerisnnLWPchhDSXpxWGYWwLiSFUVFnd
+*/
+"62.210.124.124:9130 orport=9101 id=2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E"
+" ipv6=[2001:bc8:3f23:100::1]:9101"
+" weight=43680",
+"""
+
 
 class TestDescriptorDownloader(unittest.TestCase):
   @patch(URL_OPEN)
@@ -154,3 +180,33 @@ class TestDescriptorDownloader(unittest.TestCase):
     self.assertEqual(1, len(list(query)))
     self.assertEqual(1, len(list(query)))
     self.assertEqual(1, len(list(query)))
+
+  def test_fallback_directories_from_cache(self):
+    # quick sanity test that we can load cached content
+    fallback_directories = stem.descriptor.remote.FallbackDirectory.from_cache()
+    self.assertTrue(len(fallback_directories) > 10)
+    self.assertEqual('wagner', fallback_directories['5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33'].nickname)
+
+  @patch(URL_OPEN)
+  def test_fallback_directories_from_remote(self, urlopen_mock):
+    urlopen_mock.return_value = io.BytesIO(FALLBACK_DIR_CONTENT)
+    fallback_directories = stem.descriptor.remote.FallbackDirectory.from_remote()
+
+    expected = {
+      '5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33': stem.descriptor.remote.FallbackDirectory(
+        nickname = 'wagner',
+        address = '5.175.233.86',
+        or_port = 443,
+        dir_port = 80,
+        fingerprint = '5525D0429BFE5DC4F1B0E9DE47A4CFA169661E33',
+      ),
+      '2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E': stem.descriptor.remote.FallbackDirectory(
+        nickname = 'kitten2',
+        address = '62.210.124.124',
+        or_port = 9101,
+        dir_port = 9130,
+        fingerprint = '2EBD117806EE43C3CC885A8F1E4DC60F207E7D3E',
+      ),
+    }
+
+    self.assertEqual(expected, fallback_directories)



More information about the tor-commits mailing list