[tor-commits] [stem/master] Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines
atagar at torproject.org
atagar at torproject.org
Mon May 14 00:14:27 UTC 2012
commit bbf0e80cf889cc4b0738acf93c698fc54720c767
Author: Damian Johnson <atagar at torproject.org>
Date: Sun May 13 13:35:33 2012 -0700
Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines
Parsing four extrainfo descriptor fields related to directory mirroring stat
collection. These are similar to dirreq-v*-resp in that they're key=value
mappings, with largely known key sets.
---
stem/descriptor/extrainfo_descriptor.py | 69 ++++++++++++++++++++++---
test/integ/descriptor/extrainfo_descriptor.py | 6 ++
test/unit/descriptor/extrainfo_descriptor.py | 45 ++++++++++++++++-
3 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index d1967a9..9493f2b 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -24,6 +24,16 @@ DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses
|- NOT_MODIFIED - network status unmodified since If-Modified-Since time
+- BUSY - directory was busy
+DirStats - known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl
+ |- COMPLETE - requests that completed successfully
+ |- TIMEOUT - requests that didn't complete within a ten minute timeout
+ |- RUNNING - requests still in procress when measurement's taken
+ |- MIN - smallest rate at which a descriptor was downloaded in B/s
+ |- MAX - largest rate at which a descriptor was downloaded in B/s
+ |- D1-4 and D6-9 - rate of the slowest x/10 download rates in B/s
+ |- Q1 and Q3 - rate of the slowest and fastest querter download rates in B/s
+ +- MD - median download rate in B/s
+
parse_file - Iterates over the extra-info descriptors in a file.
ExtraInfoDescriptor - Tor extra-info descriptor.
+- get_unrecognized_lines - lines with unrecognized content
@@ -45,6 +55,12 @@ DirResponses = stem.util.enum.Enum(
("BUSY", "busy"),
)
+# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
+dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
+dir_stats += ['d%i' % i for i in range(1, 5)]
+dir_stats += ['d%i' % i for i in range(6, 10)]
+DirStats = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
+
# relay descriptors must have exactly one of the following
REQUIRED_FIELDS = (
"extra-info",
@@ -178,14 +194,25 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
dir_stats_interval (int) - length in seconds of the interval
dir_v2_ips (dict) - mapping of locales to rounded count of requester ips
dir_v3_ips (dict) - mapping of locales to rounded count of requester ips
+ dir_v2_share (float) - percent of total directory traffic it expects to serve
+ dir_v3_share (float) - percent of total directory traffic it expects to serve
dir_v2_requests (dict) - mapping of locales to rounded count of requests
dir_v3_requests (dict) - mapping of locales to rounded count of requests
+
dir_v2_responses (dict) - mapping of DirResponses to their rounded count
dir_v3_responses (dict) - mapping of DirResponses to their rounded count
dir_v2_responses_unknown (dict) - mapping of unrecognized statuses to their count
dir_v3_responses_unknown (dict) - mapping of unrecognized statuses to their count
- dir_v2_share (float) - percent of total directory traffic it expects to serve
- dir_v3_share (float) - percent of total directory traffic it expects to serve
+
+ dir_v2_direct_dl (dict) - mapping of DirStats to measurement over DirPort
+ dir_v3_direct_dl (dict) - mapping of DirStats to measurement over DirPort
+ dir_v2_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+ dir_v3_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+
+ dir_v2_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort
+ dir_v3_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort
+ dir_v2_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+ dir_v3_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement
Bytes read/written for directory mirroring
dir_read_history_end (datetime) - end of the sampling interval
@@ -246,14 +273,22 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
self.dir_stats_interval = None
self.dir_v2_ips = None
self.dir_v3_ips = None
+ self.dir_v2_share = None
+ self.dir_v3_share = None
self.dir_v2_requests = None
self.dir_v3_requests = None
self.dir_v2_responses = None
self.dir_v3_responses = None
self.dir_v2_responses_unknown = None
self.dir_v3_responses_unknown = None
- self.dir_v2_share = None
- self.dir_v3_share = None
+ self.dir_v2_direct_dl = None
+ self.dir_v3_direct_dl = None
+ self.dir_v2_direct_dl_unknown = None
+ self.dir_v3_direct_dl_unknown = None
+ self.dir_v2_tunneled_dl = None
+ self.dir_v3_tunneled_dl = None
+ self.dir_v2_tunneled_dl_unknown = None
+ self.dir_v3_tunneled_dl_unknown = None
self.dir_read_history_end = None
self.dir_read_history_interval = None
@@ -336,10 +371,16 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
self.geoip_db_digest = value
- elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp"):
+ elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
recognized_counts = {}
unrecognized_counts = {}
- error_msg = "%s lines should contain STATUS=COUNT mappings: %s" % (keyword, line)
+
+ is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
+ key_set = DirResponses if is_response_stats else DirStats
+
+ key_type = "STATUS" if is_response_stats else "STAT"
+ error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)
+
if value:
for entry in value.split(","):
@@ -350,7 +391,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
status, count = entry.split("=", 1)
if count.isdigit():
- if status in DirResponses:
+ if status in key_set:
recognized_counts[status] = int(count)
else:
unrecognized_counts[status] = int(count)
@@ -360,9 +401,21 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
if keyword == "dirreq-v2-resp":
self.dir_v2_responses = recognized_counts
self.dir_v2_responses_unknown = unrecognized_counts
- else:
+ elif keyword == "dirreq-v3-resp":
self.dir_v3_responses = recognized_counts
self.dir_v3_responses_unknown = unrecognized_counts
+ elif keyword == "dirreq-v2-direct-dl":
+ self.dir_v2_direct_dl = recognized_counts
+ self.dir_v2_direct_dl_unknown = unrecognized_counts
+ elif keyword == "dirreq-v3-direct-dl":
+ self.dir_v3_direct_dl = recognized_counts
+ self.dir_v3_direct_dl_unknown = unrecognized_counts
+ elif keyword == "dirreq-v2-tunneled-dl":
+ self.dir_v2_tunneled_dl = recognized_counts
+ self.dir_v2_tunneled_dl_unknown = unrecognized_counts
+ elif keyword == "dirreq-v3-tunneled-dl":
+ self.dir_v3_tunneled_dl = recognized_counts
+ self.dir_v3_tunneled_dl_unknown = unrecognized_counts
elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
# "<keyword>" num%
diff --git a/test/integ/descriptor/extrainfo_descriptor.py b/test/integ/descriptor/extrainfo_descriptor.py
index 841aa54..99edfea 100644
--- a/test/integ/descriptor/extrainfo_descriptor.py
+++ b/test/integ/descriptor/extrainfo_descriptor.py
@@ -87,6 +87,12 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
self.fail("Unrecognized statuses on dirreq-v2-resp lines: %s" % desc.dir_v2_responses_unknown)
elif desc.dir_v3_responses_unknown:
self.fail("Unrecognized statuses on dirreq-v3-resp lines: %s" % desc.dir_v3_responses_unknown)
+ elif desc.dir_v2_direct_dl_unknown:
+ self.fail("Unrecognized stats on dirreq-v2-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown)
+ elif desc.dir_v3_direct_dl_unknown:
+ self.fail("Unrecognized stats on dirreq-v3-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown)
+ elif desc.dir_v2_tunneled_dl_unknown:
+ self.fail("Unrecognized stats on dirreq-v2-tunneled-dl lines: %s" % desc.dir_v2_tunneled_dl_unknown)
elif unrecognized_lines:
# TODO: This isn't actually a problem, and rather than failing we
# should alert the user about these entries at the end of the tests
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index b167ad4..e855a1d 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -4,7 +4,7 @@ Unit tests for stem.descriptor.extrainfo_descriptor.
import datetime
import unittest
-from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses
+from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses, DirStats
CRYPTO_BLOB = """
K5FSywk7qvw/boA4DQcqkls6Ize5vcBYfhQ8JnOeRQC9+uDxbnpm3qaYN9jZ8myj
@@ -162,6 +162,49 @@ class TestExtraInfoDescriptor(unittest.TestCase):
self.assertEqual({}, getattr(desc, attr))
self.assertEqual({}, getattr(desc, unknown_attr))
+ def test_dir_stat_lines(self):
+ """
+ Parses the dirreq-v2-direct-dl, dirreq-v3-direct-dl, dirreq-v2-tunneled-dl,
+ and dirreq-v3-tunneled-dl lines with valid and invalid data.
+ """
+
+ for keyword in ("dirreq-v2-direct-dl", "dirreq-v2-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v2-tunneled-dl"):
+ attr = keyword.replace('-', '_').replace('dirreq', 'dir')
+ unknown_attr = attr + "_unknown"
+
+ test_value = "complete=2712,timeout=32,running=4,min=741,d1=14507,d2=22702,q1=28881,d3=38277,d4=73729,md=111455,d6=168231,d7=257218,q3=319833,d8=390507,d9=616301,something-new=11,max=29917857"
+ desc_text = _make_descriptor({keyword: test_value})
+ desc = ExtraInfoDescriptor(desc_text)
+ self.assertEquals(2712, getattr(desc, attr)[DirStats.COMPLETE])
+ self.assertEquals(32, getattr(desc, attr)[DirStats.TIMEOUT])
+ self.assertEquals(4, getattr(desc, attr)[DirStats.RUNNING])
+ self.assertEquals(741, getattr(desc, attr)[DirStats.MIN])
+ self.assertEquals(14507, getattr(desc, attr)[DirStats.D1])
+ self.assertEquals(22702, getattr(desc, attr)[DirStats.D2])
+ self.assertEquals(28881, getattr(desc, attr)[DirStats.Q1])
+ self.assertEquals(38277, getattr(desc, attr)[DirStats.D3])
+ self.assertEquals(73729, getattr(desc, attr)[DirStats.D4])
+ self.assertEquals(111455, getattr(desc, attr)[DirStats.MD])
+ self.assertEquals(168231, getattr(desc, attr)[DirStats.D6])
+ self.assertEquals(257218, getattr(desc, attr)[DirStats.D7])
+ self.assertEquals(319833, getattr(desc, attr)[DirStats.Q3])
+ self.assertEquals(390507, getattr(desc, attr)[DirStats.D8])
+ self.assertEquals(616301, getattr(desc, attr)[DirStats.D9])
+ self.assertEquals(29917857, getattr(desc, attr)[DirStats.MAX])
+ self.assertEquals(11, getattr(desc, unknown_attr)["something-new"])
+
+ test_entries = (
+ "complete=-4",
+ "complete:4",
+ "complete=4.timeout=3",
+ )
+
+ for entry in test_entries:
+ desc_text = _make_descriptor({keyword: entry})
+ desc = self._expect_invalid_attr(desc_text)
+ self.assertEqual({}, getattr(desc, attr))
+ self.assertEqual({}, getattr(desc, unknown_attr))
+
def test_percentage_lines(self):
"""
Uses valid and invalid data to tests lines of the form...
More information about the tor-commits
mailing list