[tor-commits] [stem/master] Fixing deadlock in BaseController
atagar at torproject.org
atagar at torproject.org
Sun Feb 19 02:23:36 UTC 2012
commit d5162f4e369f5d2226f0e5262f2b63025f9a68ec
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Feb 18 18:15:07 2012 -0800
Fixing deadlock in BaseController
Found two concurrency bugs which were causing deadlock issues, and adding a
test that's more likely to trigger connect() and close() concurrency issues.
The issues were...
* The recv() method calls close if the socket is still flagged as being
alive. Unfortunately this can cause deadlock if the closing thread joins
on the recv thread.
* For some reason using a Condition rather than an Event caused the event
loop to sometimes miss the notice that caused the event thread to close,
causing its join() call to get stuck.
---
stem/control.py | 15 +++++----------
stem/socket.py | 31 ++++++++++++++++++++++---------
test/integ/control/base_controller.py | 25 +++++++++++++------------
3 files changed, 40 insertions(+), 31 deletions(-)
diff --git a/stem/control.py b/stem/control.py
index d5596f6..55c6e7a 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -99,7 +99,7 @@ class BaseController:
self._reader_thread = None
# thread to pull from the _event_queue and call handle_event
- self._event_cond = threading.Condition()
+ self._event_notice = threading.Event()
self._event_thread = None
# saves our socket's prior _connect() and _close() methods so they can be
@@ -291,9 +291,7 @@ class BaseController:
# awake from recv() raising a closure exception. Wake up the event thread
# too so it can end.
- self._event_cond.acquire()
- self._event_cond.notifyAll()
- self._event_cond.release()
+ self._event_notice.set()
# joins on our threads if it's safe to do so
@@ -379,10 +377,8 @@ class BaseController:
if control_message.content()[-1][0] == "650":
# asynchronous message, adds to the event queue and wakes up its handler
- self._event_cond.acquire()
self._event_queue.put(control_message)
- self._event_cond.notifyAll()
- self._event_cond.release()
+ self._event_notice.set()
else:
# response to a msg() call
self._reply_queue.put(control_message)
@@ -407,7 +403,6 @@ class BaseController:
event_message = self._event_queue.get_nowait()
self._handle_event(event_message)
except Queue.Empty:
- self._event_cond.acquire()
- self._event_cond.wait()
- self._event_cond.release()
+ self._event_notice.wait()
+ self._event_notice.clear()
diff --git a/stem/socket.py b/stem/socket.py
index 166c348..afea436 100644
--- a/stem/socket.py
+++ b/stem/socket.py
@@ -89,6 +89,9 @@ class ControlSocket:
self._socket, self._socket_file = None, None
self._is_alive = False
+ # indicates that we're in the midst of calling close()
+ self._handling_close = False
+
# Tracks sending and receiving separately. This should be safe, and doing
# so prevents deadlock where we block writes because we're waiting to read
# a message that isn't coming.
@@ -135,15 +138,22 @@ class ControlSocket:
complete message
"""
- try:
- with self._recv_lock:
- if not self.is_alive(): raise SocketClosed()
- return recv_message(self._socket_file)
- except SocketClosed, exc:
- # if recv_message raises a SocketClosed then we should properly shut
- # everything down
- if self.is_alive(): self.close()
- raise exc
+ with self._recv_lock:
+ try:
+ # makes a temporary reference to the _socket_file because connect()
+ # and close() may set or unset it
+
+ socket_file = self._socket_file
+
+ if not socket_file: raise SocketClosed()
+ return recv_message(socket_file)
+ except SocketClosed, exc:
+ # If recv_message raises a SocketClosed then we should properly shut
+ # everything down. However, if this was caused *from* a close call
+ # and it's joining on our thread then this would risk deadlock.
+
+ if self.is_alive() and not self._handling_close: self.close()
+ raise exc
def is_alive(self):
"""
@@ -197,6 +207,7 @@ class ControlSocket:
# is causing our is_alive() state to change.
is_change = self.is_alive()
+ self._handling_close = True
if self._socket:
# if we haven't yet established a connection then this raises an error
@@ -223,6 +234,8 @@ class ControlSocket:
if is_change:
self._close()
+
+ self._handling_close = False
def _get_send_lock(self):
"""
diff --git a/test/integ/control/base_controller.py b/test/integ/control/base_controller.py
index 2648496..4c6260d 100644
--- a/test/integ/control/base_controller.py
+++ b/test/integ/control/base_controller.py
@@ -38,8 +38,6 @@ class TestBaseController(unittest.TestCase):
Basic sanity check for the from_port constructor.
"""
- self.skipTest("work in progress")
-
if test.runner.Torrc.PORT in test.runner.get_runner().get_options():
controller = stem.control.BaseController.from_port(control_port = test.runner.CONTROL_PORT)
self.assertTrue(isinstance(controller, stem.control.BaseController))
@@ -51,21 +49,30 @@ class TestBaseController(unittest.TestCase):
Basic sanity check for the from_socket_file constructor.
"""
- self.skipTest("work in progress")
-
if test.runner.Torrc.SOCKET in test.runner.get_runner().get_options():
controller = stem.control.BaseController.from_socket_file(test.runner.CONTROL_SOCKET_PATH)
self.assertTrue(isinstance(controller, stem.control.BaseController))
else:
self.assertRaises(stem.socket.SocketError, stem.control.BaseController.from_socket_file, test.runner.CONTROL_SOCKET_PATH)
+ def test_connect_repeatedly(self):
+ """
+ Connects and closes the socket repeatedly. This is a simple attempt to
+ trigger concurrency issues.
+ """
+
+ with test.runner.get_runner().get_tor_socket() as control_socket:
+ controller = stem.control.BaseController(control_socket)
+
+ for i in xrange(250):
+ controller.connect()
+ controller.close()
+
def test_msg(self):
"""
Tests a basic query with the msg() method.
"""
- self.skipTest("work in progress")
-
runner = test.runner.get_runner()
with runner.get_tor_socket() as control_socket:
controller = stem.control.BaseController(control_socket)
@@ -79,8 +86,6 @@ class TestBaseController(unittest.TestCase):
Tests the msg() method against an invalid controller command.
"""
- self.skipTest("work in progress")
-
with test.runner.get_runner().get_tor_socket() as control_socket:
controller = stem.control.BaseController(control_socket)
response = controller.msg("invalid")
@@ -91,8 +96,6 @@ class TestBaseController(unittest.TestCase):
Tests the msg() method against a non-existant GETINFO option.
"""
- self.skipTest("work in progress")
-
with test.runner.get_runner().get_tor_socket() as control_socket:
controller = stem.control.BaseController(control_socket)
response = controller.msg("GETINFO blarg")
@@ -104,8 +107,6 @@ class TestBaseController(unittest.TestCase):
remove_status_listener() methods.
"""
- self.skipTest("work in progress")
-
state_observer = StateObserver()
with test.runner.get_runner().get_tor_socket(False) as control_socket:
More information about the tor-commits
mailing list