[tor-commits] [stem/master] Parsing 'dirreq-v2-resp' and 'dirreq-v3-resp' lines

atagar at torproject.org atagar at torproject.org
Mon May 14 00:14:27 UTC 2012


commit f7527756275e71b82d426394a74fc0f3ac9824b8
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat May 12 20:10:12 2012 -0700

    Parsing 'dirreq-v2-resp' and 'dirreq-v3-resp' lines
    
    These lines were a bit more substantial to implement since callers would want
    enums for their statuses, but not all statuses are necessarily known. Looks
    like the 'dirreq-v*-direct-dl' and 'dirreq-v*-tunneled-dl' lines will be
    similar.
---
 stem/descriptor/extrainfo_descriptor.py       |   71 +++++++++++++++++++++----
 test/integ/descriptor/extrainfo_descriptor.py |    9 ++-
 test/integ/descriptor/server_descriptor.py    |    3 +-
 test/unit/descriptor/extrainfo_descriptor.py  |   33 +++++++++++-
 4 files changed, 99 insertions(+), 17 deletions(-)

diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 9b01ffc..d1967a9 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -16,6 +16,14 @@ Extra-info descriptors are available from a few sources...
 - tor metrics, at https://metrics.torproject.org/data.html
 - directory authorities and mirrors via their DirPort
 
+DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses
+  |- OK - network status requests that were answered
+  |- NOT_ENOUGH_SIGS - network status wasn't signed by enough authorities
+  |- UNAVAILABLE - requested network status was unavailable
+  |- NOT_FOUND - requested network status was not found
+  |- NOT_MODIFIED - network status unmodified since If-Modified-Since time
+  +- BUSY - directory was busy
+
 parse_file - Iterates over the extra-info descriptors in a file.
 ExtraInfoDescriptor - Tor extra-info descriptor.
   +- get_unrecognized_lines - lines with unrecognized content
@@ -25,6 +33,17 @@ import re
 import datetime
 
 import stem.descriptor
+import stem.util.enum
+
+# known statuses for dirreq-v2-resp and dirreq-v3-resp...
+DirResponses = stem.util.enum.Enum(
+  ("OK", "ok"),
+  ("NOT_ENOUGH_SIGS", "not-enough-sigs"),
+  ("UNAVAILABLE", "unavailable"),
+  ("NOT_FOUND", "not-found"),
+  ("NOT_MODIFIED", "not-modified"),
+  ("BUSY", "busy"),
+)
 
 # relay descriptors must have exactly one of the following
 REQUIRED_FIELDS = (
@@ -161,6 +180,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
       dir_v3_ips (dict) - mapping of locales to rounded count of requester ips
       dir_v2_requests (dict) - mapping of locales to rounded count of requests
       dir_v3_requests (dict) - mapping of locales to rounded count of requests
+      dir_v2_responses (dict) - mapping of DirResponses to their rounded count
+      dir_v3_responses (dict) - mapping of DirResponses to their rounded count
+      dir_v2_responses_unknown (dict) - mapping of unrecognized statuses to their count
+      dir_v3_responses_unknown (dict) - mapping of unrecognized statuses to their count
       dir_v2_share (float) - percent of total directory traffic it expects to serve
       dir_v3_share (float) - percent of total directory traffic it expects to serve
       
@@ -225,6 +248,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
     self.dir_v3_ips = None
     self.dir_v2_requests = None
     self.dir_v3_requests = None
+    self.dir_v2_responses = None
+    self.dir_v3_responses = None
+    self.dir_v2_responses_unknown = None
+    self.dir_v3_responses_unknown = None
     self.dir_v2_share = None
     self.dir_v3_share = None
     
@@ -309,6 +336,33 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
           raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
         
         self.geoip_db_digest = value
+      elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp"):
+        recognized_counts = {}
+        unrecognized_counts = {}
+        error_msg = "%s lines should contain STATUS=COUNT mappings: %s" % (keyword, line)
+        
+        if value:
+          for entry in value.split(","):
+            if not "=" in entry:
+              if validate: raise ValueError(error_msg)
+              else: continue
+            
+            status, count = entry.split("=", 1)
+            
+            if count.isdigit():
+              if status in DirResponses:
+                recognized_counts[status] = int(count)
+              else:
+                unrecognized_counts[status] = int(count)
+            elif validate:
+              raise ValueError(error_msg)
+        
+        if keyword == "dirreq-v2-resp":
+          self.dir_v2_responses = recognized_counts
+          self.dir_v2_responses_unknown = unrecognized_counts
+        else:
+          self.dir_v3_responses = recognized_counts
+          self.dir_v3_responses_unknown = unrecognized_counts
       elif keyword in ("dirreq-v2-share", "dirreq-v3-share"):
         # "<keyword>" num%
         
@@ -385,6 +439,12 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
           if validate: raise exc
       elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "dirreq-v2-reqs", "dirreq-v3-reqs", "geoip-client-origins", "bridge-ips"):
         # "<keyword>" CC=N,CC=N,...
+        #
+        # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
+        # locale codes for some special values, for instance...
+        #   A1,"Anonymous Proxy"
+        #   A2,"Satellite Provider"
+        #   ??,"Unknown"
         
         locale_usage = {}
         error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
@@ -395,23 +455,12 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
               if validate: raise ValueError(error_msg)
               else: continue
             
-            # The maxmind geoip has numeric locale codes for some special
-            # values, for instance...
-            #
-            #   A1,"Anonymous Proxy"
-            #   A2,"Satellite Provider"
-            #   ??,"Unknown"
-            #
-            # https://www.maxmind.com/app/iso3166
-            
-            
             locale, count = entry.split("=", 1)
             
             if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
               locale_usage[locale] = int(count)
             elif validate:
               raise ValueError(error_msg)
-        
         if keyword == "dirreq-v2-ips":
           self.dir_v2_ips = locale_usage
         elif keyword == "dirreq-v3-ips":
diff --git a/test/integ/descriptor/extrainfo_descriptor.py b/test/integ/descriptor/extrainfo_descriptor.py
index 75b26e0..841aa54 100644
--- a/test/integ/descriptor/extrainfo_descriptor.py
+++ b/test/integ/descriptor/extrainfo_descriptor.py
@@ -83,13 +83,16 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
         #unrecognized_lines = desc.get_unrecognized_lines()
         unrecognized_lines = []
         
-        if unrecognized_lines:
+        if desc.dir_v2_responses_unknown:
+          self.fail("Unrecognized statuses on dirreq-v2-resp lines: %s" % desc.dir_v2_responses_unknown)
+        elif desc.dir_v3_responses_unknown:
+          self.fail("Unrecognized statuses on dirreq-v3-resp lines: %s" % desc.dir_v3_responses_unknown)
+        elif unrecognized_lines:
           # TODO: This isn't actually a problem, and rather than failing we
           # should alert the user about these entries at the end of the tests
           # (along with new events, getinfo options, and such). For now though
           # there doesn't seem to be anything in practice to trigger this so
           # failing to get our attention if it does.
           
-          print "Unrecognized descriptor content: %s" % unrecognized_lines
-          self.fail()
+          self.fail("Unrecognized descriptor content: %s" % unrecognized_lines)
 
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index 3c07364..b5664c4 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -172,8 +172,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
           # there doesn't seem to be anything in practice to trigger this so
           # failing to get our attention if it does.
           
-          print "Unrecognized descriptor content: %s" % unrecognized_lines
-          self.fail()
+          self.fail("Unrecognized descriptor content: %s" % unrecognized_lines)
   
   def test_non_ascii_descriptor(self):
     """
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index d8849c0..b167ad4 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -4,7 +4,7 @@ Unit tests for stem.descriptor.extrainfo_descriptor.
 
 import datetime
 import unittest
-from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor
+from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses
 
 CRYPTO_BLOB = """
 K5FSywk7qvw/boA4DQcqkls6Ize5vcBYfhQ8JnOeRQC9+uDxbnpm3qaYN9jZ8myj
@@ -131,6 +131,37 @@ class TestExtraInfoDescriptor(unittest.TestCase):
       desc_text = _make_descriptor({"geoip-db-digest": entry})
       desc = self._expect_invalid_attr(desc_text, "geoip_db_digest", entry)
   
+  def test_dir_response_lines(self):
+    """
+    Parses the dirreq-v2-resp and dirreq-v3-resp lines with valid and invalid
+    data.
+    """
+    
+    for keyword in ("dirreq-v2-resp", "dirreq-v3-resp"):
+      attr = keyword.replace('-', '_').replace('dirreq', 'dir').replace('resp', 'responses')
+      unknown_attr = attr + "_unknown"
+      
+      test_value = "ok=0,unavailable=0,not-found=984,not-modified=0,something-new=7"
+      desc_text = _make_descriptor({keyword: test_value})
+      desc = ExtraInfoDescriptor(desc_text)
+      self.assertEquals(0, getattr(desc, attr)[DirResponses.OK])
+      self.assertEquals(0, getattr(desc, attr)[DirResponses.UNAVAILABLE])
+      self.assertEquals(984, getattr(desc, attr)[DirResponses.NOT_FOUND])
+      self.assertEquals(0, getattr(desc, attr)[DirResponses.NOT_MODIFIED])
+      self.assertEquals(7, getattr(desc, unknown_attr)["something-new"])
+      
+      test_entries = (
+        "ok=-4",
+        "ok:4",
+        "ok=4.not-found=3",
+      )
+      
+      for entry in test_entries:
+        desc_text = _make_descriptor({keyword: entry})
+        desc = self._expect_invalid_attr(desc_text)
+        self.assertEqual({}, getattr(desc, attr))
+        self.assertEqual({}, getattr(desc, unknown_attr))
+  
   def test_percentage_lines(self):
     """
     Uses valid and invalid data to tests lines of the form...





More information about the tor-commits mailing list