[tor-commits] [stem/master] ExtraInfo lazy loading

atagar at torproject.org atagar at torproject.org
Sun Jan 25 22:37:34 UTC 2015


commit 8bbc48950095f35d9a8dbc64f9b27aa66a79875e
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Jan 17 15:01:30 2015 -0800

    ExtraInfo lazy loading
    
    Implement lazy loading for extrainfo descriptors. This highlighted a bug in
    that we need a shallow copy of our default values. Otherwise defaults like
    lists and dictionaries will be shared between descriptors.
---
 stem/descriptor/__init__.py             |    5 +-
 stem/descriptor/extrainfo_descriptor.py |  235 +++++++++++++++----------------
 2 files changed, 116 insertions(+), 124 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 05a7d0d..0baacdb 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -50,6 +50,7 @@ __all__ = [
   'Descriptor',
 ]
 
+import copy
 import os
 import re
 import tarfile
@@ -387,7 +388,7 @@ class Descriptor(object):
     # set defaults
 
     for attr in self.ATTRIBUTES:
-      setattr(self, attr, self.ATTRIBUTES[attr][0])
+      setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0]))
 
     for keyword, values in list(entries.items()):
       try:
@@ -427,7 +428,7 @@ class Descriptor(object):
           # despite having a validation failure check to see if we set something
           return super(Descriptor, self).__getattribute__(name)
         except AttributeError:
-          setattr(self, name, default)
+          setattr(self, name, copy.copy(default))
 
     return super(Descriptor, self).__getattribute__(name)
 
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index b14932b..97623e7 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -534,6 +534,24 @@ def _parse_bridge_ip_transports_line(descriptor, entries):
   descriptor.ip_transports = ip_transports
 
 
+def _parse_router_signature_line(descriptor, entries):
+  value, block_type, block_contents = entries['router-signature'][0]
+
+  if not block_contents or block_type != 'SIGNATURE':
+    raise ValueError("'router-signature' should be followed by a SIGNATURE block rather than a '%s'" % block_type)
+
+  descriptor.signature = block_contents
+
+
+def _parse_router_digest(descriptor, entries):
+  value = _value('router-digest', entries)
+
+  if not stem.util.tor_tools.is_hex_digits(value, 40):
+    raise ValueError('Router digest line had an invalid sha1 digest: router-digest %s' % value)
+
+  descriptor._digest = value
+
+
 _parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown')
 _parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown')
 _parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown')
@@ -673,6 +691,85 @@ class ExtraInfoDescriptor(Descriptor):
   a default value, others are left as **None** if undefined
   """
 
+  ATTRIBUTES = {
+    'nickname': (None, _parse_extra_info_line),
+    'fingerprint': (None, _parse_extra_info_line),
+    'published': (None, _parse_published_line),
+    'geoip_db_digest': (None, _parse_geoip_db_digest_line),
+    'geoip6_db_digest': (None, _parse_geoip6_db_digest_line),
+    'transport': ({}, _parse_transport_line),
+
+    'conn_bi_direct_end': (None, _parse_conn_bi_direct_line),
+    'conn_bi_direct_interval': (None, _parse_conn_bi_direct_line),
+    'conn_bi_direct_below': (None, _parse_conn_bi_direct_line),
+    'conn_bi_direct_read': (None, _parse_conn_bi_direct_line),
+    'conn_bi_direct_write': (None, _parse_conn_bi_direct_line),
+    'conn_bi_direct_both': (None, _parse_conn_bi_direct_line),
+
+    'read_history_end': (None, _parse_read_history_line),
+    'read_history_interval': (None, _parse_read_history_line),
+    'read_history_values': (None, _parse_read_history_line),
+
+    'write_history_end': (None, _parse_write_history_line),
+    'write_history_interval': (None, _parse_write_history_line),
+    'write_history_values': (None, _parse_write_history_line),
+
+    'cell_stats_end': (None, _parse_cell_stats_end_line),
+    'cell_stats_interval': (None, _parse_cell_stats_end_line),
+    'cell_processed_cells': (None, _parse_cell_processed_cells_line),
+    'cell_queued_cells': (None, _parse_cell_queued_cells_line),
+    'cell_time_in_queue': (None, _parse_cell_time_in_queue_line),
+    'cell_circuits_per_decile': (None, _parse_cell_circuits_per_decline_line),
+
+    'dir_stats_end': (None, _parse_dirreq_stats_end_line),
+    'dir_stats_interval': (None, _parse_dirreq_stats_end_line),
+    'dir_v2_ips': (None, _parse_dirreq_v2_ips_line),
+    'dir_v3_ips': (None, _parse_dirreq_v3_ips_line),
+    'dir_v2_share': (None, _parse_dirreq_v2_share_line),
+    'dir_v3_share': (None, _parse_dirreq_v3_share_line),
+    'dir_v2_requests': (None, _parse_dirreq_v2_reqs_line),
+    'dir_v3_requests': (None, _parse_dirreq_v3_reqs_line),
+    'dir_v2_responses': (None, _parse_dirreq_v2_resp_line),
+    'dir_v3_responses': (None, _parse_dirreq_v3_resp_line),
+    'dir_v2_responses_unknown': (None, _parse_dirreq_v2_resp_line),
+    'dir_v3_responses_unknown': (None, _parse_dirreq_v3_resp_line),
+    'dir_v2_direct_dl': (None, _parse_dirreq_v2_direct_dl_line),
+    'dir_v3_direct_dl': (None, _parse_dirreq_v3_direct_dl_line),
+    'dir_v2_direct_dl_unknown': (None, _parse_dirreq_v2_direct_dl_line),
+    'dir_v3_direct_dl_unknown': (None, _parse_dirreq_v3_direct_dl_line),
+    'dir_v2_tunneled_dl': (None, _parse_dirreq_v2_tunneled_dl_line),
+    'dir_v3_tunneled_dl': (None, _parse_dirreq_v3_tunneled_dl_line),
+    'dir_v2_tunneled_dl_unknown': (None, _parse_dirreq_v2_tunneled_dl_line),
+    'dir_v3_tunneled_dl_unknown': (None, _parse_dirreq_v3_tunneled_dl_line),
+
+    'dir_read_history_end': (None, _parse_dirreq_read_history_line),
+    'dir_read_history_interval': (None, _parse_dirreq_read_history_line),
+    'dir_read_history_values': (None, _parse_dirreq_read_history_line),
+
+    'dir_write_history_end': (None, _parse_dirreq_write_history_line),
+    'dir_write_history_interval': (None, _parse_dirreq_write_history_line),
+    'dir_write_history_values': (None, _parse_dirreq_write_history_line),
+
+    'entry_stats_end': (None, _parse_entry_stats_end_line),
+    'entry_stats_interval': (None, _parse_entry_stats_end_line),
+    'entry_ips': (None, _parse_entry_ips_line),
+
+    'exit_stats_end': (None, _parse_exit_stats_end_line),
+    'exit_stats_interval': (None, _parse_exit_stats_end_line),
+    'exit_kibibytes_written': (None, _parse_exit_kibibytes_written_line),
+    'exit_kibibytes_read': (None, _parse_exit_kibibytes_read_line),
+    'exit_streams_opened': (None, _parse_exit_streams_opened_line),
+
+    'bridge_stats_end': (None, _parse_bridge_stats_end_line),
+    'bridge_stats_interval': (None, _parse_bridge_stats_end_line),
+    'bridge_ips': (None, _parse_bridge_ips_line),
+    'geoip_start_time': (None, _parse_geoip_start_time_line),
+    'geoip_client_origins': (None, _parse_geoip_client_origins_line),
+
+    'ip_versions': (None, _parse_bridge_ip_versions_line),
+    'ip_transports': (None, _parse_bridge_ip_transports_line),
+  }
+
   PARSER_FOR_LINE = {
     'extra-info': _parse_extra_info_line,
     'geoip-db-digest': _parse_geoip_db_digest_line,
@@ -732,84 +829,8 @@ class ExtraInfoDescriptor(Descriptor):
     super(ExtraInfoDescriptor, self).__init__(raw_contents)
     raw_contents = stem.util.str_tools._to_unicode(raw_contents)
 
-    self.nickname = None
-    self.fingerprint = None
-    self.published = None
-    self.geoip_db_digest = None
-    self.geoip6_db_digest = None
-    self.transport = {}
-
-    self.conn_bi_direct_end = None
-    self.conn_bi_direct_interval = None
-    self.conn_bi_direct_below = None
-    self.conn_bi_direct_read = None
-    self.conn_bi_direct_write = None
-    self.conn_bi_direct_both = None
-
-    self.read_history_end = None
-    self.read_history_interval = None
-    self.read_history_values = None
-
-    self.write_history_end = None
-    self.write_history_interval = None
-    self.write_history_values = None
-
-    self.cell_stats_end = None
-    self.cell_stats_interval = None
-    self.cell_processed_cells = None
-    self.cell_queued_cells = None
-    self.cell_time_in_queue = None
-    self.cell_circuits_per_decile = None
-
-    self.dir_stats_end = None
-    self.dir_stats_interval = None
-    self.dir_v2_ips = None
-    self.dir_v3_ips = None
-    self.dir_v2_share = None
-    self.dir_v3_share = None
-    self.dir_v2_requests = None
-    self.dir_v3_requests = None
-    self.dir_v2_responses = None
-    self.dir_v3_responses = None
-    self.dir_v2_responses_unknown = None
-    self.dir_v3_responses_unknown = None
-    self.dir_v2_direct_dl = None
-    self.dir_v3_direct_dl = None
-    self.dir_v2_direct_dl_unknown = None
-    self.dir_v3_direct_dl_unknown = None
-    self.dir_v2_tunneled_dl = None
-    self.dir_v3_tunneled_dl = None
-    self.dir_v2_tunneled_dl_unknown = None
-    self.dir_v3_tunneled_dl_unknown = None
-
-    self.dir_read_history_end = None
-    self.dir_read_history_interval = None
-    self.dir_read_history_values = None
-
-    self.dir_write_history_end = None
-    self.dir_write_history_interval = None
-    self.dir_write_history_values = None
-
-    self.entry_stats_end = None
-    self.entry_stats_interval = None
-    self.entry_ips = None
-
-    self.exit_stats_end = None
-    self.exit_stats_interval = None
-    self.exit_kibibytes_written = None
-    self.exit_kibibytes_read = None
-    self.exit_streams_opened = None
-
-    self.bridge_stats_end = None
-    self.bridge_stats_interval = None
-    self.bridge_ips = None
-    self.geoip_start_time = None
-    self.geoip_client_origins = None
-
-    self.ip_versions = None
-    self.ip_transports = None
-
     entries = _get_descriptor_components(raw_contents, validate)
+    self._lazy_loading = not validate
 
     if validate:
       for keyword in self._required_fields():
@@ -828,7 +849,9 @@ class ExtraInfoDescriptor(Descriptor):
       if expected_last_keyword and expected_last_keyword != list(entries.keys())[-1]:
         raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
 
-    self._parse(entries, validate)
+      self._parse(entries, validate)
+    else:
+      self._entries = entries
 
   def digest(self):
     """
@@ -862,10 +885,13 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
   **\*** attribute is required when we're parsed with validation
   """
 
-  def __init__(self, raw_contents, validate = True):
-    self.signature = None
+  ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
+    'signature': (None, _parse_router_signature_line),
+  })
 
-    super(RelayExtraInfoDescriptor, self).__init__(raw_contents, validate)
+  PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
+    'router-signature': _parse_router_signature_line,
+  })
 
   @lru_cache()
   def digest(self):
@@ -874,27 +900,6 @@ class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
     raw_content = raw_content[:raw_content.find(ending) + len(ending)]
     return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
 
-  def _parse(self, entries, validate):
-    entries = dict(entries)  # shallow copy since we're destructive
-
-    # handles fields only in server descriptors
-    for keyword, values in list(entries.items()):
-      value, block_type, block_contents = values[0]
-
-      line = '%s %s' % (keyword, value)  # original line
-
-      if block_contents:
-        line += '\n%s' % block_contents
-
-      if keyword == 'router-signature':
-        if validate and (not block_contents or block_type != 'SIGNATURE'):
-          raise ValueError("'router-signature' should be followed by a SIGNATURE block: %s" % line)
-
-        self.signature = block_contents
-        del entries['router-signature']
-
-    ExtraInfoDescriptor._parse(self, entries, validate)
-
 
 class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
   """
@@ -902,31 +907,17 @@ class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
   <https://collector.torproject.org/formats.html#bridge-descriptors>`_)
   """
 
-  def __init__(self, raw_contents, validate = True):
-    self._digest = None
+  ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
+    '_digest': (None, _parse_router_digest),
+  })
 
-    super(BridgeExtraInfoDescriptor, self).__init__(raw_contents, validate)
+  PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
+    'router-digest': _parse_router_digest,
+  })
 
   def digest(self):
     return self._digest
 
-  def _parse(self, entries, validate):
-    entries = dict(entries)  # shallow copy since we're destructive
-
-    # handles fields only in server descriptors
-    for keyword, values in list(entries.items()):
-      value, _, _ = values[0]
-      line = '%s %s' % (keyword, value)  # original line
-
-      if keyword == 'router-digest':
-        if validate and not stem.util.tor_tools.is_hex_digits(value, 40):
-          raise ValueError('Router digest line had an invalid sha1 digest: %s' % line)
-
-        self._digest = value
-        del entries['router-digest']
-
-    ExtraInfoDescriptor._parse(self, entries, validate)
-
   def _required_fields(self):
     excluded_fields = [
       'router-signature',





More information about the tor-commits mailing list