[tor-commits] [stem/master] Standardize hash handling

atagar at torproject.org atagar at torproject.org
Fri Jul 1 17:26:09 UTC 2016


commit 3b304ded555c1960321b1a476f72f56e11a06310
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri Jul 1 10:27:10 2016 -0700

    Standardize hash handling
    
    Any time we define an __eq__() method we need to define __hash__() as well,
    otherwise using the class as a hash key fails under python 3.x with...
    
      TypeError: unhashable type: 'DirectoryAuthority'
    
    Regression caught by dgoulet for the DirectoryAuthority class so taking this as
    an opportunety to ensure all our classes are in sync with their equality and
    hashing.
---
 stem/__init__.py                       | 26 ++++++++++++++
 stem/descriptor/networkstatus.py       |  9 +++++
 stem/descriptor/remote.py              | 45 ++++++++++++------------
 stem/descriptor/router_status_entry.py | 12 +++++++
 stem/exit_policy.py                    | 62 +++++++++-------------------------
 stem/manual.py                         | 25 +++++---------
 stem/response/events.py                |  3 ++
 stem/version.py                        | 23 ++++---------
 test/unit/descriptor/remote.py         |  4 +++
 9 files changed, 107 insertions(+), 102 deletions(-)

diff --git a/stem/__init__.py b/stem/__init__.py
index 1f61075..56d294d 100644
--- a/stem/__init__.py
+++ b/stem/__init__.py
@@ -860,3 +860,29 @@ HSAuth = stem.util.enum.UppercaseEnum(
   'STEALTH_AUTH',
   'UNKNOWN',
 )
+
+
+def _hash_attr(obj, *attributes, **kwargs):
+  """
+  Provide a hash value for the given set of attributes.
+
+  :param Object obj: object to be hashed
+  :param list attributes: attribute names to take into account
+  :param class parent: parent object to include in the hash value
+  """
+
+  my_hash = 0 if kwargs.get('parent') == None else kwargs.get('parent').__hash__(obj)
+
+  for attr in attributes:
+    my_hash *= 1024
+
+    attr_value = getattr(obj, attr)
+
+    if attr_value is not None:
+      if isinstance(attr_value, dict):
+        for k, v in attr_value.items():
+          my_hash = (my_hash + hash(k)) * 1024 + hash(v)
+      else:
+        my_hash += hash(attr_value)
+
+  return my_hash
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index 08c284b..dcbfc5d 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -1236,6 +1236,9 @@ class DirectoryAuthority(Descriptor):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -1352,6 +1355,9 @@ class KeyCertificate(Descriptor):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -1404,6 +1410,9 @@ class DocumentSignature(object):
 
     return method(True, True)  # we're equal
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 6d48e39..69f4d39 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -91,6 +91,8 @@ import threading
 import time
 import zlib
 
+import stem
+
 try:
   # account for urllib's change between python 2.x and 3.x
   import urllib.request as urllib
@@ -716,15 +718,14 @@ class Directory(object):
     self.dir_port = dir_port
     self.fingerprint = fingerprint
 
-  def __eq__(self, other):
-    if not isinstance(other, Directory):
-      return False
+  def __hash__(self):
+    return stem._hash_attr(self, 'address', 'or_port', 'dir_port', 'fingerprint')
 
-    for attr in ('address', 'or_port', 'dir_port', 'fingerprint'):
-      if getattr(self, attr) != getattr(other, attr):
-        return False
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, Directory) else False
 
-    return True
+  def __ne__(self, other):
+    return not self == other
 
 
 class DirectoryAuthority(Directory):
@@ -770,17 +771,14 @@ class DirectoryAuthority(Directory):
     self.v3ident = v3ident
     self.is_bandwidth_authority = is_bandwidth_authority
 
-  def __eq__(self, other):
-    if not isinstance(other, DirectoryAuthority):
-      return False
-    elif not super(DirectoryAuthority, self).__eq__(other):
-      return False
+  def __hash__(self):
+    return stem._hash_attr(self, 'nickname', 'v3ident', 'is_bandwidth_authority', parent = Directory)
 
-    for attr in ('nickname', 'v3ident', 'is_bandwidth_authority'):
-      if getattr(self, attr) != getattr(other, attr):
-        return False
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, DirectoryAuthority) else False
 
-    return True
+  def __ne__(self, other):
+    return not self == other
 
 
 DIRECTORY_AUTHORITIES = {
@@ -1067,15 +1065,14 @@ class FallbackDirectory(Directory):
 
     return results
 
+  def __hash__(self):
+    return stem._hash_attr(self, 'orport_v6', parent = Directory)
+
   def __eq__(self, other):
-    if not isinstance(other, FallbackDirectory):
-      return False
-    elif not super(FallbackDirectory, self).__eq__(other):
-      return False
-    elif self.orport_v6 != other.orport_v6:
-      return False
-
-    return True
+    return hash(self) == hash(other) if isinstance(other, FallbackDirectory) else False
+
+  def __ne__(self, other):
+    return not self == other
 
 
 def _fallback_directory_differences(previous_directories, new_directories):
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index a30ad0f..cc6c659 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -477,6 +477,9 @@ class RouterStatusEntry(Descriptor):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -520,6 +523,9 @@ class RouterStatusEntryV2(RouterStatusEntry):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -604,6 +610,9 @@ class RouterStatusEntryV3(RouterStatusEntry):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -664,6 +673,9 @@ class RouterStatusEntryMicroV3(RouterStatusEntry):
 
     return method(str(self).strip(), str(other).strip())
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
diff --git a/stem/exit_policy.py b/stem/exit_policy.py
index c630608..a0b3064 100644
--- a/stem/exit_policy.py
+++ b/stem/exit_policy.py
@@ -70,6 +70,7 @@ from __future__ import absolute_import
 import socket
 import zlib
 
+import stem
 import stem.prereq
 import stem.util.connection
 import stem.util.enum
@@ -519,10 +520,7 @@ class ExitPolicy(object):
     return self._hash
 
   def __eq__(self, other):
-    if isinstance(other, ExitPolicy):
-      return self._get_rules() == list(other)
-    else:
-      return False
+    return hash(self) == hash(other) if isinstance(other, ExitPolicy) else False
 
   def __ne__(self, other):
     return not self == other
@@ -605,10 +603,7 @@ class MicroExitPolicy(ExitPolicy):
     return hash(str(self))
 
   def __eq__(self, other):
-    if isinstance(other, MicroExitPolicy):
-      return str(self) == str(other)
-    else:
-      return False
+    return hash(self) == hash(other) if isinstance(other, MicroExitPolicy) else False
 
   def __ne__(self, other):
     return not self == other
@@ -903,25 +898,6 @@ class ExitPolicyRule(object):
 
     return label
 
-  def __hash__(self):
-    if self._hash is None:
-      my_hash = 0
-
-      for attr in ('is_accept', 'address', 'min_port', 'max_port'):
-        my_hash *= 1024
-
-        attr_value = getattr(self, attr)
-
-        if attr_value is not None:
-          my_hash += hash(attr_value)
-
-      my_hash *= 1024
-      my_hash += hash(self.get_mask(False))
-
-      self._hash = my_hash
-
-    return self._hash
-
   @lru_cache()
   def _get_mask_bin(self):
     # provides an integer representation of our mask
@@ -1036,16 +1012,14 @@ class ExitPolicyRule(object):
     else:
       raise ValueError("Port value isn't a wildcard, integer, or range: %s" % rule)
 
-  def __eq__(self, other):
-    if isinstance(other, ExitPolicyRule):
-      # Our string representation encompasses our effective policy. Technically
-      # this isn't quite right since our rule attribute may differ (ie, 'accept
-      # 0.0.0.0/0' == 'accept 0.0.0.0/0.0.0.0' will be True), but these
-      # policies are effectively equivalent.
+  def __hash__(self):
+    if self._hash is None:
+      self._hash = stem._hash_attr(self, 'is_accept', 'address', 'min_port', 'max_port') * 1024 + hash(self.get_mask(False))
 
-      return hash(self) == hash(other)
-    else:
-      return False
+    return self._hash
+
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, ExitPolicyRule) else False
 
   def __ne__(self, other):
     return not self == other
@@ -1086,19 +1060,15 @@ class MicroExitPolicyRule(ExitPolicyRule):
 
   def __hash__(self):
     if self._hash is None:
-      my_hash = 0
+      self._hash = stem._hash_attr(self, 'is_accept', 'min_port', 'max_port')
 
-      for attr in ('is_accept', 'min_port', 'max_port'):
-        my_hash *= 1024
-
-        attr_value = getattr(self, attr)
-
-        if attr_value is not None:
-          my_hash += hash(attr_value)
+    return self._hash
 
-      self._hash = my_hash
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, MicroExitPolicyRule) else False
 
-    return self._hash
+  def __ne__(self, other):
+    return not self == other
 
 
 DEFAULT_POLICY_RULES = tuple([ExitPolicyRule(rule) for rule in (
diff --git a/stem/manual.py b/stem/manual.py
index 1e2a5e1..fe83487 100644
--- a/stem/manual.py
+++ b/stem/manual.py
@@ -52,6 +52,7 @@ import shutil
 import sys
 import tempfile
 
+import stem
 import stem.prereq
 import stem.util.conf
 import stem.util.enum
@@ -110,15 +111,11 @@ class ConfigOption(object):
     self.summary = summary
     self.description = description
 
-  def __eq__(self, other):
-    if not isinstance(other, ConfigOption):
-      return False
-
-    for attr in ('name', 'category', 'usage', 'summary', 'description'):
-      if getattr(self, attr) != getattr(other, attr):
-        return False
+  def __hash__(self):
+    return stem._hash_attr(self, 'name', 'category', 'usage', 'summary', 'description')
 
-    return True
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, ConfigOption) else False
 
   def __ne__(self, other):
     return not self == other
@@ -470,15 +467,11 @@ class Manual(object):
 
     conf.save(path)
 
-  def __eq__(self, other):
-    if not isinstance(other, Manual):
-      return False
-
-    for attr in ('name', 'synopsis', 'description', 'commandline_options', 'signals', 'files', 'config_options'):
-      if getattr(self, attr) != getattr(other, attr):
-        return False
+  def __hash__(self):
+    return stem._hash_attr(self, 'name', 'synopsis', 'description', 'commandline_options', 'signals', 'files', 'config_options')
 
-    return True
+  def __eq__(self, other):
+    return hash(self) == hash(other) if isinstance(other, Manual) else False
 
   def __ne__(self, other):
     return not self == other
diff --git a/stem/response/events.py b/stem/response/events.py
index 35cc98f..22adc25 100644
--- a/stem/response/events.py
+++ b/stem/response/events.py
@@ -413,6 +413,9 @@ class CircuitEvent(Event):
 
     return True
 
+  def __hash__(self):
+    return hash(str(self).strip())
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
diff --git a/stem/version.py b/stem/version.py
index 2dbcff2..de4a784 100644
--- a/stem/version.py
+++ b/stem/version.py
@@ -72,6 +72,7 @@ easily parsed and compared, for instance...
 import os
 import re
 
+import stem
 import stem.util.enum
 import stem.util.system
 
@@ -232,6 +233,12 @@ class Version(object):
 
     return method(my_status, other_status)
 
+  def __hash__(self):
+    if self._hash is None:
+      self._hash = stem._hash_attr(self, 'major', 'minor', 'micro', 'patch', 'status')
+
+    return self._hash
+
   def __eq__(self, other):
     return self._compare(other, lambda s, o: s == o)
 
@@ -264,22 +271,6 @@ class Version(object):
 
     return self._compare(other, lambda s, o: s >= o)
 
-  def __hash__(self):
-    if self._hash is None:
-      my_hash = 0
-
-      for attr in ('major', 'minor', 'micro', 'patch', 'status'):
-        my_hash *= 1024
-
-        attr_value = getattr(self, attr)
-
-        if attr_value is not None:
-          my_hash += hash(attr_value)
-
-      self._hash = my_hash
-
-    return self._hash
-
 
 class _VersionRequirements(object):
   """
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index 8193cc3..95fd6a7 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -167,6 +167,10 @@ class TestDescriptorDownloader(unittest.TestCase):
     self.assertEqual(1, len(list(query)))
     self.assertEqual(1, len(list(query)))
 
+  def test_using_authorities_in_hash(self):
+    # ensure our DirectoryAuthority instances can be used in hashes
+    {stem.descriptor.remote.get_authorities()['moria1']: 'hello'}
+
   def test_fallback_directories_from_cache(self):
     # quick sanity test that we can load cached content
     fallback_directories = stem.descriptor.remote.FallbackDirectory.from_cache()



More information about the tor-commits mailing list