[tor-commits] [oonib/master] Do the conversion of YAML to JSON on the submission stage

art at torproject.org art at torproject.org
Mon May 9 17:57:02 UTC 2016


commit 973297a0ad8f5093ec9c08954a4d8419f50bd5d7
Author: Arturo Filastò <arturo at filasto.net>
Date:   Wed Jan 27 16:52:46 2016 +0100

    Do the conversion of YAML to JSON on the submission stage
---
 oonib/__init__.py        | 24 ++++++++++++++
 oonib/errors.py          |  4 +++
 oonib/onion.py           |  2 +-
 oonib/otime.py           |  2 +-
 oonib/report/handlers.py | 84 ++++++++++++++++++++++--------------------------
 5 files changed, 69 insertions(+), 47 deletions(-)

diff --git a/oonib/__init__.py b/oonib/__init__.py
index ba6dea3..773f2db 100644
--- a/oonib/__init__.py
+++ b/oonib/__init__.py
@@ -7,6 +7,7 @@ __version__ = '1.1.4'
 
 __all__ = ['Storage', 'randomStr']
 
+import json
 import string
 from random import SystemRandom
 random = SystemRandom()
@@ -59,3 +60,26 @@ def randomStr(length, num=True):
     if num:
         chars += string.digits
     return ''.join(random.choice(chars) for x in range(length))
+
+def binary_to_base64_dict(data):
+    from base64 import b64encode
+    return {
+        "data": b64encode(data),
+        "format": "base64"
+    }
+
+def json_dumps(obj):
+    def _default(o):
+        if isinstance(o, set):
+            return list(o)
+        elif isinstance(o, str):
+            try:
+                o = unicode(o, 'ascii')
+            except UnicodeDecodeError:
+                try:
+                    o = unicode(o, 'utf-8')
+                except UnicodeDecodeError:
+                    o = binary_to_base64_dict(o)
+            return o
+
+    return json.dumps(obj, default=_default)
diff --git a/oonib/errors.py b/oonib/errors.py
index 6d97190..b82f8fe 100644
--- a/oonib/errors.py
+++ b/oonib/errors.py
@@ -111,6 +111,10 @@ class TestHelperNotFound(OONIBError):
     log_message = "test-helper-not-found"
 
 
+class InvalidFormatField(OONIBError):
+    status_code = 400
+    log_message = "invalid-format-field"
+
 class ConfigFileNotSpecified(Exception):
     pass
 
diff --git a/oonib/onion.py b/oonib/onion.py
index 9c94d02..c06dc6b 100644
--- a/oonib/onion.py
+++ b/oonib/onion.py
@@ -73,7 +73,7 @@ def startTor(torconfig):
         if os.path.exists(config.main.tor_datadir):
             torconfig.DataDirectory = os.path.abspath(config.main.tor_datadir)
         else:
-            raise Exception
+            raise Exception("Could not find tor datadir")
 
     tor_log_file = os.path.join(torconfig.DataDirectory, "tor.log")
     torconfig.Log = ["notice stdout", "notice file %s" % tor_log_file]
diff --git a/oonib/otime.py b/oonib/otime.py
index 67a6bc6..31f0cc7 100644
--- a/oonib/otime.py
+++ b/oonib/otime.py
@@ -95,5 +95,5 @@ def timestamp(t=None):
     """
     if t is None:
         t = datetime.utcnow()
-    ISO8601 = "%Y-%m-%dT%H%M%SZ"
+    ISO8601 = "%Y%m%dT%H%M%SZ"
     return t.strftime(ISO8601)
diff --git a/oonib/report/handlers.py b/oonib/report/handlers.py
index 50e0cb7..7671723 100644
--- a/oonib/report/handlers.py
+++ b/oonib/report/handlers.py
@@ -12,17 +12,12 @@ from oonib.handlers import OONIBHandler
 from oonib.policy.handlers import Policy
 
 from datetime import datetime
-from oonib import randomStr, otime, log
+from oonib import randomStr, otime, log, json_dumps
 from oonib.config import config
 
 
 def report_file_name(archive_dir, report_details):
     timestamp = datetime.fromtimestamp(report_details['start_time'])
-    ext = report_details.get("format")
-    if ext == "json":
-        ext = "json"
-    else:
-        ext = "yamloo"
     keys = dict(
         report_details.items(),
         iso8601_timestamp=otime.timestamp(timestamp),
@@ -31,10 +26,9 @@ def report_file_name(archive_dir, report_details):
         day=timestamp.strftime("%d"),
         hour=timestamp.strftime("%H"),
         minute=timestamp.strftime("%M"),
-        second=timestamp.strftime("%S"),
-        ext=ext
+        second=timestamp.strftime("%S")
     )
-    report_file_template = "{probe_cc}/{test_name}-{iso8601_timestamp}-{probe_asn}-probe.{ext}"
+    report_file_template = "{iso8601_timestamp}-{test_name}-{probe_asn}-{probe_cc}-probe-0.2.0.json"
     if config.main.report_file_template:
         report_file_template = config.main.report_file_template
     dst_filename = os.path.join(archive_dir, report_file_template.format(**keys))
@@ -51,14 +45,13 @@ class Report(object):
                  stale_time,
                  report_dir,
                  archive_dir,
-                 reports, file_format="yaml"):
+                 reports):
         self.report_id = report_id
 
         self.stale_time = stale_time
         self.report_dir = report_dir
         self.archive_dir = archive_dir
         self.reports = reports
-        self.file_format = file_format
 
         self.refresh()
 
@@ -83,24 +76,14 @@ class Report(object):
         report_filename = get_report_path(self.report_id)
         try:
             with open(report_filename) as fd:
-                if self.file_format == "json":
-                    line = fd.readline()
-                    json.loads(line)
-                else:
-                    g = yaml.safe_load_all(fd)
-                    report_details = g.next()
+                line = fd.readline()
+                report_details = json.loads(line.strip())
         except IOError:
             raise e.ReportNotFound
 
         dst_filename = report_file_name(self.archive_dir, report_details)
         shutil.move(report_filename, dst_filename)
 
-        if self.file_format == "json":
-            report_details["record_type"] = "footer"
-            with open(dst_filename, "a+") as fd:
-                json.dump(report_details, dst_filename)
-                fd.write("\n")
-
         if not self.delayed_call.called:
             self.delayed_call.cancel()
         del self.reports[self.report_id]
@@ -169,6 +152,8 @@ def parseNewReportRequest(request):
     except KeyError:
         pass
 
+    parsed_request['format'] = parsed_request.get('format', 'yaml')
+
     return parsed_request
 
 
@@ -213,7 +198,6 @@ class ReportHandler(OONIBHandler):
 
 
 class UpdateReportMixin(object):
-
     def updateReport(self, report_id, parsed_request):
 
         log.debug("Got this request %s" % parsed_request)
@@ -224,9 +208,24 @@ class UpdateReportMixin(object):
         except KeyError:
             raise e.OONIBError(404, "Report not found")
 
+        content_format = parsed_request.get('format', 'yaml')
+        if content_format == 'json':
+            data = json_dumps(parsed_request['content'])
+        elif content_format == 'yaml':
+            try:
+                entry = yaml.safe_load_all(parsed_request['content']).next()
+                data = json_dumps(entry)
+            except Exception as exc:
+                log.error("Received an invalid entry")
+                log.msg(parsed_request['content'])
+                log.exception(exc)
+                raise e.OONIBError(400, "Invalid report entry")
+        else:
+            raise e.InvalidFormatField
         try:
             with open(report_filename, 'a+') as fd:
-                fd.write(parsed_request['content'])
+                fd.write(data)
+                fd.write("\n")
         except IOError:
             raise e.OONIBError(404, "Report not found")
         self.write({'status': 'success'})
@@ -306,25 +305,15 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
             self.checkPolicy()
 
         if 'content' in report_data:
-            content = yaml.safe_load(report_data['content'])
-            report_header = validate_report_header(content)
-
+            if report_data['format'] == 'json':
+                content = report_data['content']
+            elif report_data['format'] == 'yaml':
+                content = None
+            else:
+                raise e.InvalidFormatField
+            content['backend_version'] = config.backend_version
         else:
-            content = {
-                'software_name': software_name,
-                'software_version': software_version,
-                'probe_asn': probe_asn,
-                'probe_cc': probe_cc,
-                'test_name': self.testName,
-                'test_version': self.testVersion,
-                'input_hashes': self.inputHashes,
-                'start_time': time.time()
-            }
-
-        content['backend_version'] = config.backend_version
-
-        report_header = yaml.dump(content)
-        content = "---\n" + report_header + '...\n'
+            content = None
 
         if not probe_asn:
             probe_asn = "AS0"
@@ -356,8 +345,13 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
                                          self.report_dir,
                                          self.archive_dir,
                                          self.reports)
-
-        self.writeToReport(report_filename, content)
+        if content:
+            # XXX make sure to validate the cotent to ensure it's possible to
+            # encode as JSON
+            data = json_dumps(content) + "\n"
+            self.writeToReport(report_filename, data)
+        else:
+            open(report_filename, 'w+').close()
 
         self.write(response)
 





More information about the tor-commits mailing list