[tor-commits] [sbws/master] new: v3bwfile: Add time to report half network
juga at torproject.org
juga at torproject.org
Thu Mar 21 18:30:42 UTC 2019
commit 3db367b26eb584cce6f7e06a0cf1e98fd5771e71
Author: juga0 <juga at riseup.net>
Date: Wed Mar 13 16:11:26 2019 +0000
new: v3bwfile: Add time to report half network
to the bandwidth file header.
Closes: #28983.
---
sbws/lib/v3bwfile.py | 59 ++++++++++++++++++++++++++++++++++++++++-
sbws/util/timestamp.py | 4 +++
tests/unit/lib/test_v3bwfile.py | 15 +++++++++++
3 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index 048f1b3..61657b9 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -18,7 +18,7 @@ from sbws.globals import (SPEC_VERSION, BW_LINE_SIZE, SBWS_SCALE_CONSTANT,
from sbws.lib.resultdump import ResultSuccess, _ResultType
from sbws.util.filelock import DirectoryLock
from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str,
- now_unixts)
+ now_unixts, isostr_to_dt_obj)
from sbws.util.state import State
log = logging.getLogger(__name__)
@@ -76,6 +76,9 @@ BW_HEADER_KEYVALUES_MONITOR = [
'recent_measurement_exclusion_not_distanciated_count',
'recent_measurement_exclusion_not_recent_count',
'recent_measurement_exclusion_not_min_num_count',
+
+ # The time it took to report about half of the network.
+ 'time_to_report_half_network',
]
BANDWIDTH_HEADER_KEY_VALUES_INIT = \
['earliest_bandwidth', 'generator_started',
@@ -432,6 +435,59 @@ class V3BWHeader(object):
[setattr(self, k, str(v)) for k, v in kwargs.items()
if k in STATS_KEYVALUES]
+ def add_time_report_half_network(self):
+ """Add to the header the time it took to measure half of the network.
+
+ It is not the time the scanner actually takes on measuring all the
+ network, but the ``number_eligible_relays`` that are reported in the
+ bandwidth file and directory authorities will vote on.
+
+ This is calculated for half of the network, so that failed or not
+ reported relays do not affect too much.
+
+ For instance, if there are 6500 relays in the network, half of the
+ network would be 3250. And if there were 4000 eligible relays
+ measured in an interval of 3 days, the time to measure half of the
+ network would be 3 days * 3250 / 4000.
+
+ Since the elapsed time is calculated from the earliest and the
+ latest measurement and a relay might have more than 2 measurements,
+ this would give an estimate on how long it would take to measure
+ the network including all the valid measurements.
+
+ Log also an estimated on how long it would take with the current
+ number of relays included in the bandwidth file.
+ """
+ # NOTE: in future refactor do not convert attributes to str until
+ # writing to the file, so that they do not need to be converted back
+ # to do some calculations.
+ elapsed_time = (
+ (isostr_to_dt_obj(self.latest_bandwidth)
+ - isostr_to_dt_obj(self.earliest_bandwidth))
+ .total_seconds())
+
+ # This attributes were added later and some tests that
+ # do not initialize them would fail.
+ eligible_relays = int(getattr(self, 'number_eligible_relays', 0))
+ consensus_relays = int(getattr(self, 'number_consensus_relays', 0))
+ if not(eligible_relays and consensus_relays):
+ return
+
+ half_network = consensus_relays / 2
+ # Calculate the time it would take to measure half of the network
+ if eligible_relays >= half_network:
+ time_half_network = round(
+ elapsed_time * half_network / eligible_relays
+ )
+ self.time_to_report_half_network = str(time_half_network)
+
+ # In any case log an estimated on the time to measure all the network.
+ estimated_time = round(
+ elapsed_time * consensus_relays / eligible_relays
+ )
+ log.info("Estimated time to measure the network: %s hours.",
+ round(estimated_time / 60 / 60))
+
class V3BWLine(object):
"""
@@ -773,6 +829,7 @@ class V3BWFile(object):
# log.debug(bw_lines[-1])
# Not using the result for now, just warning
cls.is_max_bw_diff_perc_reached(bw_lines, max_bw_diff_perc)
+ header.add_time_report_half_network()
f = cls(header, bw_lines)
return f
diff --git a/sbws/util/timestamp.py b/sbws/util/timestamp.py
index 00f3d1c..ff3efd8 100644
--- a/sbws/util/timestamp.py
+++ b/sbws/util/timestamp.py
@@ -14,6 +14,10 @@ def dt_obj_to_isodt_str(dt):
return dt.replace(microsecond=0).isoformat()
+def isostr_to_dt_obj(isostr):
+ return datetime.strptime(isostr, "%Y-%m-%dT%H:%M:%S")
+
+
def unixts_to_dt_obj(unixts):
"""
Convert unix timestamp to naive datetime object in UTC time zone.
diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py
index c4c6a20..a7c0946 100644
--- a/tests/unit/lib/test_v3bwfile.py
+++ b/tests/unit/lib/test_v3bwfile.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Test generation of bandwidth measurements document (v3bw)"""
import json
+import logging
import math
import os.path
@@ -393,3 +394,17 @@ def test_update_progress(datadir, tmpdir):
assert header.number_consensus_relays == '3'
assert header.number_eligible_relays == '3'
assert header.percent_eligible_relays == '100'
+
+
+def test_time_measure_half_network(caplog):
+ header = V3BWHeader(timestamp_l,
+ file_created=file_created,
+ generator_started=generator_started,
+ earliest_bandwidth=earliest_bandwidth)
+ header.number_consensus_relays = '6500'
+ header.number_eligible_relays = '4000'
+ caplog.set_level(logging.INFO)
+ header.add_time_report_half_network()
+ assert header.time_to_report_half_network == '70200' # 19.5h
+ expected_log = "Estimated time to measure the network: 39 hours." # 19.5*2
+ assert caplog.records[-1].getMessage() == expected_log
More information about the tor-commits
mailing list