[tor-commits] [stem/master] Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines

atagar at torproject.org atagar at torproject.org
Mon May 14 00:14:27 UTC 2012


commit bbf0e80cf889cc4b0738acf93c698fc54720c767
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun May 13 13:35:33 2012 -0700

    Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines
    
    Parsing four extrainfo descriptor fields related to directory mirroring stat
    collection. These are similar to dirreq-v*-resp in that they're key=value
    mappings, with largely known key sets.
---
 stem/descriptor/extrainfo_descriptor.py       |   69 ++++++++++++++++++++++---
 test/integ/descriptor/extrainfo_descriptor.py |    6 ++
 test/unit/descriptor/extrainfo_descriptor.py  |   45 ++++++++++++++++-
 3 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index d1967a9..9493f2b 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -24,6 +24,16 @@ DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses
   |- NOT_MODIFIED - network status unmodified since If-Modified-Since time
   +- BUSY - directory was busy
 
+DirStats - known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl
+  |- COMPLETE - requests that completed successfully
+  |- TIMEOUT - requests that didn't complete within a ten minute timeout
+  |- RUNNING - requests still in procress when measurement's taken
+  |- MIN - smallest rate at which a descriptor was downloaded in B/s
+  |- MAX - largest rate at which a descriptor was downloaded in B/s
+  |- D1-4 and D6-9 - rate of the slowest x/10 download rates in B/s
+  |- Q1 and Q3 - rate of the slowest and fastest querter download rates in B/s
+  +- MD - median download rate in B/s
+
 parse_file - Iterates over the extra-info descriptors in a file.
 ExtraInfoDescriptor - Tor extra-info descriptor.
   +- get_unrecognized_lines - lines with unrecognized content
@@ -45,6 +55,12 @@ DirResponses = stem.util.enum.Enum(
   ("BUSY", "busy"),
 )
 
+# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
+dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
+dir_stats += ['d%i' % i for i in range(1, 5)]
+dir_stats += ['d%i' % i for i in range(6, 10)]
+DirStats = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
+
 # relay descriptors must have exactly one of the following
 REQUIRED_FIELDS = (
   "extra-info",
@@ -178,14 +194,25 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
       dir_stats_interval (int) - length in seconds of the interval
       dir_v2_ips (dict) - mapping of locales to rounded count of requester ips
       dir_v3_ips (dict) - mapping of locales to rounded count of requester ips
+      dir_v2_share (float) - percent of total directory traffic it expects to serve
+      dir_v3_share (float) - percent of total directory traffic it expects to serve
       dir_v2_requests (dict) - mapping of locales to rounded count of requests
       dir_v3_requests (dict) - mapping of locales to rounded count of requests
+      
       dir_v2_responses (dict) - mapping of DirResponses to their rounded count
       dir_v3_responses (dict) - mapping of DirResponses to their rounded count
       dir_v2_responses_unknown (dict) - mapping of unrecognized statuses to their count
       dir_v3_responses_unknown (dict) - mapping of unrecognized statuses to their count
-      dir_v2_share (float) - percent of total directory traffic it expects to serve
-      dir_v3_share (float) - percent of total directory traffic it expects to serve
+      
+      dir_v2_direct_dl (dict) - mapping of DirStats to measurement over DirPort
+      dir_v3_direct_dl (dict) - mapping of DirStats to measurement over DirPort
+      dir_v2_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+      dir_v3_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+      
+      dir_v2_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort
+      dir_v3_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort
+      dir_v2_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement
+      dir_v3_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement
       
       Bytes read/written for directory mirroring
         dir_read_history_end (datetime) - end of the sampling interval
@@ -246,14 +273,22 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
     self.dir_stats_interval = None
     self.dir_v2_ips = None
     self.dir_v3_ips = None
+    self.dir_v2_share = None
+    self.dir_v3_share = None
     self.dir_v2_requests = None
     self.dir_v3_requests = None
     self.dir_v2_responses = None
     self.dir_v3_responses = None
     self.dir_v2_responses_unknown = None
     self.dir_v3_responses_unknown = None
-    self.dir_v2_share = None
-    self.dir_v3_share = None
+    self.dir_v2_direct_dl = None
+    self.dir_v3_direct_dl = None
+    self.dir_v2_direct_dl_unknown = None
+    self.dir_v3_direct_dl_unknown = None
+    self.dir_v2_tunneled_dl = None
+    self.dir_v3_tunneled_dl = None
+    self.dir_v2_tunneled_dl_unknown = None
+    self.dir_v3_tunneled_dl_unknown = None
     
     self.dir_read_history_end = None
     self.dir_read_history_interval = None
@@ -336,10 +371,16 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
           raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
         
         self.geoip_db_digest = value
-      elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp"):
+      elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"):
         recognized_counts = {}
         unrecognized_counts = {}
-        error_msg = "%s lines should contain STATUS=COUNT mappings: %s" % (keyword, line)
+        
+        is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp")
+        key_set = DirResponses if is_response_stats else DirStats
+        
+        key_type = "STATUS" if is_response_stats else "STAT"
+        error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line)
+        
         
         if value:
           for entry in value.split(","):
@@ -350,7 +391,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
             status, count = entry.split("=", 1)
             
             if count.isdigit():
-              if status in DirResponses:
+              if status in key_set:
                 recognized_counts[status] = int(count)
               else:
                 unrecognized_counts[status] = int(count)
@@ -360,9 +401,21 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
         if keyword == "dirreq-v2-resp":
           self.dir_v2_responses = recognized_counts
           self.dir_v2_responses_unknown = unrecognized_counts
-        else:
+        elif keyword == "dirreq-v3-resp":
           self.dir_v3_responses = recognized_counts
           self.dir_v3_responses_unknown = unrecognized_counts
+        elif keyword == "dirreq-v2-direct-dl":
+          self.dir_v2_direct_dl = recognized_counts
+          self.dir_v2_direct_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v3-direct-dl":
+          self.dir_v3_direct_dl = recognized_counts
+          self.dir_v3_direct_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v2-tunneled-dl":
+          self.dir_v2_tunneled_dl = recognized_counts
+          self.dir_v2_tunneled_dl_unknown = unrecognized_counts
+        elif keyword == "dirreq-v3-tunneled-dl":
+          self.dir_v3_tunneled_dl = recognized_counts
+          self.dir_v3_tunneled_dl_unknown = unrecognized_counts
       elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
         # "<keyword>" num%
         
diff --git a/test/integ/descriptor/extrainfo_descriptor.py b/test/integ/descriptor/extrainfo_descriptor.py
index 841aa54..99edfea 100644
--- a/test/integ/descriptor/extrainfo_descriptor.py
+++ b/test/integ/descriptor/extrainfo_descriptor.py
@@ -87,6 +87,12 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
           self.fail("Unrecognized statuses on dirreq-v2-resp lines: %s" % desc.dir_v2_responses_unknown)
         elif desc.dir_v3_responses_unknown:
           self.fail("Unrecognized statuses on dirreq-v3-resp lines: %s" % desc.dir_v3_responses_unknown)
+        elif desc.dir_v2_direct_dl_unknown:
+          self.fail("Unrecognized stats on dirreq-v2-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown)
+        elif desc.dir_v3_direct_dl_unknown:
+          self.fail("Unrecognized stats on dirreq-v3-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown)
+        elif desc.dir_v2_tunneled_dl_unknown:
+          self.fail("Unrecognized stats on dirreq-v2-tunneled-dl lines: %s" % desc.dir_v2_tunneled_dl_unknown)
         elif unrecognized_lines:
           # TODO: This isn't actually a problem, and rather than failing we
           # should alert the user about these entries at the end of the tests
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index b167ad4..e855a1d 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -4,7 +4,7 @@ Unit tests for stem.descriptor.extrainfo_descriptor.
 
 import datetime
 import unittest
-from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses
+from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses, DirStats
 
 CRYPTO_BLOB = """
 K5FSywk7qvw/boA4DQcqkls6Ize5vcBYfhQ8JnOeRQC9+uDxbnpm3qaYN9jZ8myj
@@ -162,6 +162,49 @@ class TestExtraInfoDescriptor(unittest.TestCase):
         self.assertEqual({}, getattr(desc, attr))
         self.assertEqual({}, getattr(desc, unknown_attr))
   
+  def test_dir_stat_lines(self):
+    """
+    Parses the dirreq-v2-direct-dl, dirreq-v3-direct-dl, dirreq-v2-tunneled-dl,
+    and dirreq-v3-tunneled-dl lines with valid and invalid data.
+    """
+    
+    for keyword in ("dirreq-v2-direct-dl", "dirreq-v2-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v2-tunneled-dl"):
+      attr = keyword.replace('-', '_').replace('dirreq', 'dir')
+      unknown_attr = attr + "_unknown"
+      
+      test_value = "complete=2712,timeout=32,running=4,min=741,d1=14507,d2=22702,q1=28881,d3=38277,d4=73729,md=111455,d6=168231,d7=257218,q3=319833,d8=390507,d9=616301,something-new=11,max=29917857"
+      desc_text = _make_descriptor({keyword: test_value})
+      desc = ExtraInfoDescriptor(desc_text)
+      self.assertEquals(2712, getattr(desc, attr)[DirStats.COMPLETE])
+      self.assertEquals(32, getattr(desc, attr)[DirStats.TIMEOUT])
+      self.assertEquals(4, getattr(desc, attr)[DirStats.RUNNING])
+      self.assertEquals(741, getattr(desc, attr)[DirStats.MIN])
+      self.assertEquals(14507, getattr(desc, attr)[DirStats.D1])
+      self.assertEquals(22702, getattr(desc, attr)[DirStats.D2])
+      self.assertEquals(28881, getattr(desc, attr)[DirStats.Q1])
+      self.assertEquals(38277, getattr(desc, attr)[DirStats.D3])
+      self.assertEquals(73729, getattr(desc, attr)[DirStats.D4])
+      self.assertEquals(111455, getattr(desc, attr)[DirStats.MD])
+      self.assertEquals(168231, getattr(desc, attr)[DirStats.D6])
+      self.assertEquals(257218, getattr(desc, attr)[DirStats.D7])
+      self.assertEquals(319833, getattr(desc, attr)[DirStats.Q3])
+      self.assertEquals(390507, getattr(desc, attr)[DirStats.D8])
+      self.assertEquals(616301, getattr(desc, attr)[DirStats.D9])
+      self.assertEquals(29917857, getattr(desc, attr)[DirStats.MAX])
+      self.assertEquals(11, getattr(desc, unknown_attr)["something-new"])
+      
+      test_entries = (
+        "complete=-4",
+        "complete:4",
+        "complete=4.timeout=3",
+      )
+      
+      for entry in test_entries:
+        desc_text = _make_descriptor({keyword: entry})
+        desc = self._expect_invalid_attr(desc_text)
+        self.assertEqual({}, getattr(desc, attr))
+        self.assertEqual({}, getattr(desc, unknown_attr))
+  
   def test_percentage_lines(self):
     """
     Uses valid and invalid data to tests lines of the form...





More information about the tor-commits mailing list