[tor-commits] [sbws/master] scanner: Warn when there is no progress
juga at torproject.org
juga at torproject.org
Thu Mar 21 22:05:13 UTC 2019
commit ea37bc9f74dfa238dc6e94f2a98a762a7d27f93a
Author: juga0 <juga at riseup.net>
Date: Mon Feb 18 14:15:29 2019 +0000
scanner: Warn when there is no progress
measuring unique relays.
Create new module heartbeat.
Closes: #28652
---
sbws/core/scanner.py | 15 ++++++++++++-
sbws/lib/heartbeat.py | 46 ++++++++++++++++++++++++++++++++++++++++
sbws/lib/relaylist.py | 7 ++++++
tests/unit/lib/test_heartbeat.py | 21 ++++++++++++++++++
4 files changed, 88 insertions(+), 1 deletion(-)
diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py
index 24a975b..f6443e5 100644
--- a/sbws/core/scanner.py
+++ b/sbws/core/scanner.py
@@ -33,6 +33,7 @@ import requests
import random
from .. import settings
+from ..lib import heartbeat
rng = random.SystemRandom()
log = logging.getLogger(__name__)
@@ -478,6 +479,13 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump,
measured.
"""
+ # Variable to count total progress in the last days:
+ # In case it is needed to see which relays are not being measured,
+ # store their fingerprint, not only their number.
+ measured_fp_set = set()
+ measured_percent = 0
+ main_loop_tstart = time.monotonic()
+
# Set the time to wait for a thread to finish as the half of an HTTP
# request timeout.
# Do not start a new loop if sbws is stopping.
@@ -503,7 +511,7 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump,
[args, conf, destinations, circuit_builder, relay_list,
target], {}, callback, callback_err)
pending_results.append(async_result)
-
+ measured_fp_set.add(async_result)
# After the for has finished, the pool has queued all the relays
# and pending_results has the list of all the AsyncResults.
# It could also be obtained with pool._cache, which contains
@@ -511,6 +519,11 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump,
num_relays_to_measure = len(pending_results)
wait_for_results(num_relays_to_measure, pending_results)
+ measured_percent = heartbeat.total_measured_percent(
+ measured_percent, relay_list.relays_fingerprints, measured_fp_set,
+ main_loop_tstart, conf.getpath('paths', 'state_fname')
+ )
+
loop_tstop = time.time()
loop_tdelta = (loop_tstop - loop_tstart) / 60
log.debug("Measured %s relays in %s minutes", num_relays, loop_tdelta)
diff --git a/sbws/lib/heartbeat.py b/sbws/lib/heartbeat.py
new file mode 100644
index 0000000..7dfa716
--- /dev/null
+++ b/sbws/lib/heartbeat.py
@@ -0,0 +1,46 @@
+"""
+Classes and functions to implement a heartbeat system to monitor the progress.
+"""
+import logging
+import time
+
+from ..util.state import State
+
+
+log = logging.getLogger(__name__)
+
+# NOTE tech-debt: this could go be tracked globally as a singleton
+consensus_fp_set = set()
+
+
+def total_measured_percent(measured_percent, relays_fingerprints,
+ measured_fp_set, main_loop_tstart, state_path):
+ """Returns the new percentage of the different relays that were measured.
+
+ This way it can be known whether the scanner is making progress measuring
+ all the Network.
+
+ Log the percentage, the number of relays measured and not measured,
+ the number of loops and the time elapsed since it started measuring.
+ """
+ global consensus_fp_set
+ # NOTE: in a future refactor make State a singleton in __init__.py
+ state_dict = State(state_path)
+ loops_count = state_dict.get('recent_priority_list_count', 0)
+
+ # Store all the relays seen in all the consensuses.
+ [consensus_fp_set.add(r) for r in relays_fingerprints]
+
+ not_measured_fp_set = consensus_fp_set.difference(measured_fp_set)
+ main_loop_tdelta = (time.monotonic() - main_loop_tstart) / 60
+ new_measured_percent = round(
+ len(measured_fp_set) / len(consensus_fp_set) * 100)
+ log.info("Run %s main loops.", loops_count)
+ log.info("Measured in total %s (%s%%) unique relays in %s minutes",
+ len(measured_fp_set), new_measured_percent, main_loop_tdelta)
+ log.info("%s relays still not measured.", len(not_measured_fp_set))
+ # The case when it is equal will only happen when all the relays have been
+ # measured.
+ if (new_measured_percent <= measured_percent):
+ log.warning("There is no progress measuring relays!.")
+ return new_measured_percent
diff --git a/sbws/lib/relaylist.py b/sbws/lib/relaylist.py
index 6a66069..fcfbdea 100644
--- a/sbws/lib/relaylist.py
+++ b/sbws/lib/relaylist.py
@@ -355,6 +355,13 @@ class RelayList:
def authorities(self):
return self._relays_with_flag(Flag.AUTHORITY)
+ @property
+ def relays_fingerprints(self):
+ # Using relays instead of _relays, so that the list get updated if
+ # needed, since this method is used to know which fingerprints are in
+ # the consensus.
+ return [r.fingerprint for r in self.relays]
+
def random_relay(self):
return self.rng.choice(self.relays)
diff --git a/tests/unit/lib/test_heartbeat.py b/tests/unit/lib/test_heartbeat.py
new file mode 100644
index 0000000..55573a8
--- /dev/null
+++ b/tests/unit/lib/test_heartbeat.py
@@ -0,0 +1,21 @@
+"""Unit tests for heartbeat"""
+import logging
+import time
+
+from sbws.lib import heartbeat
+
+
+def test_total_measured_percent(conf, caplog):
+ measured_percent = 0
+ measured_fp_set = set(['A', 'B'])
+ main_loop_tstart = time.monotonic()
+ relays_fingerprints = set(['A', 'B', 'C'])
+
+ caplog.set_level(logging.INFO)
+ new_measured_percent = heartbeat.total_measured_percent(
+ measured_percent, relays_fingerprints, measured_fp_set,
+ main_loop_tstart, conf.getpath('paths', 'state_fname')
+ )
+ assert new_measured_percent == 67
+ caplog.records[1].getMessage().find("Measured in total 2 (67%)")
+ caplog.records[2].getMessage().find("1 relays still not measured")
More information about the tor-commits
mailing list