[tor-commits] [stem/master] ControlMessage equality and hashing
atagar at torproject.org
atagar at torproject.org
Tue Aug 7 22:19:30 UTC 2018
commit b48ce682ef1a5fc1d4237d106700ec929f49246b
Author: Damian Johnson <atagar at torproject.org>
Date: Tue Aug 7 15:04:20 2018 -0700
ControlMessage equality and hashing
Interesting! Previously I implemented comparability for a single event type but
that's all. I'm surprised we've gone this long without needing it.
Messages have only three things we need to account for in the hash...
* The message content (ie. the bytes we got from tor).
* The message class, if we've casted ourselves to something.
* The arrived_at timestamp if this is an event.
That's it. Everything else is derived from those.
Changing hash_attr()'s parent argument back to a class because a boolean makes
it pretty likely we'll hit an invinite loop. In this case we hit it because we
had the following object hierarchy...
ControlMessage <= has a hash method
Event <= has a hash method
BandwidthEvent <= our object type
If our Event class' hash method calls super it will return the Event class, not
ControlMessage, causing us to loop indefinitely. Changing hash_attr() so it
takes an explicit reference for our parent instead.
---
docs/change_log.rst | 1 +
stem/__init__.py | 2 +-
stem/directory.py | 4 ++--
stem/response/__init__.py | 17 +++++++++++---
stem/response/events.py | 43 ++++++++++-------------------------
stem/util/__init__.py | 6 +++--
test/unit/response/control_message.py | 24 +++++++++++++++++++
7 files changed, 58 insertions(+), 39 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 282a02c8..362e4e10 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -58,6 +58,7 @@ The following are only available within Stem's `git repository
* Added the delivered_read, delivered_written, overhead_read, and overhead_written attributes to :class:`~stem.response.events.CircuitBandwidthEvent` (:spec:`fbb38ec`)
* The *config* attribute of :class:`~stem.response.events.ConfChangedEvent` couldn't represent tor configuration options with multiple values. It has been replaced with new *changed* and *unset* attributes.
* Replaced socket's :func:`~stem.socket.ControlPort.get_address`, :func:`~stem.socket.ControlPort.get_port`, and :func:`~stem.socket.ControlSocketFile.get_socket_path` with attributes
+ * :class:`~stem.response.ControlMessage` is now comparable and hashable
* Removed 'raw' argument from :func:`~stem.socket.ControlSocket.send`
* **Descriptors**
diff --git a/stem/__init__.py b/stem/__init__.py
index 6da73320..1679b0a0 100644
--- a/stem/__init__.py
+++ b/stem/__init__.py
@@ -601,7 +601,7 @@ class ORPort(Endpoint):
def __hash__(self):
if self._hash is None:
- self._hash = stem.util._hash_attr(self, 'link_protocols', parent = True)
+ self._hash = stem.util._hash_attr(self, 'link_protocols', parent = Endpoint)
return self._hash
diff --git a/stem/directory.py b/stem/directory.py
index 020ac696..5d243707 100644
--- a/stem/directory.py
+++ b/stem/directory.py
@@ -316,7 +316,7 @@ class Authority(Directory):
def __hash__(self):
if self._hash is None:
- self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = True)
+ self._hash = stem.util._hash_attr(self, 'v3ident', 'is_bandwidth_authority', parent = Directory)
return self._hash
@@ -521,7 +521,7 @@ class Fallback(Directory):
def __hash__(self):
if self._hash is None:
- self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = True)
+ self._hash = stem.util._hash_attr(self, 'has_extrainfo', 'header', parent = Directory)
return self._hash
diff --git a/stem/response/__init__.py b/stem/response/__init__.py
index 327be819..54eb9c44 100644
--- a/stem/response/__init__.py
+++ b/stem/response/__init__.py
@@ -16,9 +16,7 @@ Parses replies from the control socket.
|- from_str - provides a ControlMessage for the given string
|- is_ok - response had a 250 status
|- content - provides the parsed message content
- |- raw_content - unparsed socket data
- |- __str__ - content stripped of protocol formatting
- +- __iter__ - ControlLine entries for the content of the message
+ +- raw_content - unparsed socket data
ControlLine - String subclass with methods for parsing controller responses.
|- remainder - provides the unparsed content
@@ -36,6 +34,7 @@ import re
import threading
import stem.socket
+import stem.util
import stem.util.str_tools
__all__ = [
@@ -129,6 +128,9 @@ class ControlMessage(object):
Message from the control socket. This is iterable and can be stringified for
individual message components stripped of protocol formatting. Messages are
never empty.
+
+ .. versionchanged:: 1.7.0
+ Implemented equality and hashing.
"""
@staticmethod
@@ -299,6 +301,15 @@ class ControlMessage(object):
return ControlLine(content)
+ def __hash__(self):
+ return stem.util._hash_attr(self, '_raw_content')
+
+ def __eq__(self, other):
+ return hash(self) == hash(other) if isinstance(other, ControlMessage) else False
+
+ def __ne__(self, other):
+ return not self == other
+
class ControlLine(str):
"""
diff --git a/stem/response/events.py b/stem/response/events.py
index 5623d1a7..37b60b38 100644
--- a/stem/response/events.py
+++ b/stem/response/events.py
@@ -57,6 +57,7 @@ class Event(stem.response.ControlMessage):
self.type = str(self).split()[0]
self.arrived_at = arrived_at
+ self._hash = None
# if we're a recognized event type then translate ourselves into that subclass
@@ -71,6 +72,12 @@ class Event(stem.response.ControlMessage):
self._parse()
+ def __hash__(self):
+ if self._hash is None:
+ self._hash = stem.util._hash_attr(self, 'arrived_at', parent = stem.response.ControlMessage)
+
+ return self._hash
+
def _parse_standard_attr(self):
"""
Most events are of the form...
@@ -410,41 +417,15 @@ class CircuitEvent(Event):
self._log_if_unrecognized('remote_reason', stem.CircClosureReason)
def _compare(self, other, method):
+ # sorting circuit events by their identifier
+
if not isinstance(other, CircuitEvent):
return False
- for attr in ('id', 'status', 'path', 'build_flags', 'purpose', 'hs_state', 'rend_query', 'created', 'reason', 'remote_reason', 'socks_username', 'socks_password'):
- my_attr = getattr(self, attr)
- other_attr = getattr(other, attr)
-
- # Our id attribute is technically a string, but Tor conventionally uses
- # ints. Attempt to handle as ints if that's the case so we get numeric
- # ordering.
-
- if attr == 'id' and my_attr and other_attr:
- if my_attr.isdigit() and other_attr.isdigit():
- my_attr = int(my_attr)
- other_attr = int(other_attr)
-
- if my_attr is None:
- my_attr = ''
-
- if other_attr is None:
- other_attr = ''
-
- if my_attr != other_attr:
- return method(my_attr, other_attr)
-
- return True
-
- def __hash__(self):
- return hash(str(self).strip())
-
- def __eq__(self, other):
- return self._compare(other, lambda s, o: s == o)
+ my_id = getattr(self, 'id')
+ their_id = getattr(other, 'id')
- def __ne__(self, other):
- return not self == other
+ return method(my_id, their_id) if my_id != their_id else method(hash(self), hash(other))
def __gt__(self, other):
return self._compare(other, lambda s, o: s > o)
diff --git a/stem/util/__init__.py b/stem/util/__init__.py
index cc40bd0a..f0c2397b 100644
--- a/stem/util/__init__.py
+++ b/stem/util/__init__.py
@@ -130,10 +130,12 @@ def _hash_attr(obj, *attributes, **kwargs):
:param Object obj: object to be hashed
:param list attributes: attribute names to take into account
- :param bool parent: include parent's hash value
+ :param class parent: include parent's hash value
"""
- my_hash = super(type(obj), obj).__hash__() if kwargs.get('parent') else 0
+ parent_class = kwargs.get('parent')
+
+ my_hash = parent_class.__hash__(obj) if parent_class else 0
my_hash = my_hash * 1024 + hash(str(type(obj)))
for attr in attributes:
diff --git a/test/unit/response/control_message.py b/test/unit/response/control_message.py
index 8b05d6a8..abf5debf 100644
--- a/test/unit/response/control_message.py
+++ b/test/unit/response/control_message.py
@@ -168,6 +168,30 @@ class TestControlMessage(unittest.TestCase):
control_socket_file = control_socket.makefile()
self.assertRaises(stem.SocketClosed, stem.socket.recv_message, control_socket_file)
+ def test_equality(self):
+ msg = stem.response.ControlMessage.from_str(EVENT_BW)
+ event_msg = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT')
+
+ # basic check for identical and differing events
+
+ self.assertEqual(msg, stem.response.ControlMessage.from_str(EVENT_BW))
+ self.assertNotEqual(msg, stem.response.ControlMessage.from_str(EVENT_CIRC_TIMEOUT))
+
+ # casting to different message types should cause us to mismatch
+
+ self.assertNotEqual(event_msg, msg)
+ stem.response.convert('EVENT', msg)
+ self.assertEqual(event_msg, msg)
+
+ # events also take into account when they were received
+
+ event1 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123)
+ event2 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 456)
+ event3 = stem.response.ControlMessage.from_str(EVENT_BW, 'EVENT', arrived_at = 123)
+
+ self.assertNotEqual(event1, event2)
+ self.assertEqual(event1, event3)
+
def _assert_message_parses(self, controller_reply):
"""
Performs some basic sanity checks that a reply mirrors its parsed result.
More information about the tor-commits
mailing list