[tor-commits] [stem/master] Revising descriptor exporter and associated tests
atagar at torproject.org
atagar at torproject.org
Sat Aug 4 19:19:49 UTC 2012
commit 7022021c4207a0066cb93261629f2ba020d307f6
Author: Damian Johnson <atagar at torproject.org>
Date: Sat Aug 4 12:13:47 2012 -0700
Revising descriptor exporter and associated tests
Several changes for the prior csv export functionality, especially its tests.
Most notably this includes...
* rewrite of the tests to be simpler and break down by test cases
* newline breaks rather than windows style '\r\n'
* excluding private attributes (attributes that start with an underscore)
* writing directly to a file in export_csv_file() rather than buffering to a string first
* general refactoring to hopefully improve readability
---
run_tests.py | 6 +-
stem/descriptor/__init__.py | 2 +-
stem/descriptor/export.py | 148 ++++++++++++++----------------
test/unit/descriptor/__init__.py | 2 +-
test/unit/descriptor/export.py | 189 ++++++++++++++------------------------
5 files changed, 142 insertions(+), 205 deletions(-)
diff --git a/run_tests.py b/run_tests.py
index bed8e50..4ad551a 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -16,6 +16,7 @@ import test.output
import test.runner
import test.check_whitespace
import test.unit.connection.authentication
+import test.unit.descriptor.export
import test.unit.descriptor.reader
import test.unit.descriptor.server_descriptor
import test.unit.descriptor.extrainfo_descriptor
@@ -50,8 +51,6 @@ import test.integ.util.proc
import test.integ.util.system
import test.integ.process
import test.integ.version
-#TODO move this by other descriptor tests
-import test.unit.descriptor.export
import stem.prereq
import stem.util.conf
@@ -109,6 +108,7 @@ UNIT_TESTS = (
test.unit.util.proc.TestProc,
test.unit.util.system.TestSystem,
test.unit.util.tor_tools.TestTorTools,
+ test.unit.descriptor.export.TestExport,
test.unit.descriptor.reader.TestDescriptorReader,
test.unit.descriptor.server_descriptor.TestServerDescriptor,
test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor,
@@ -123,9 +123,7 @@ UNIT_TESTS = (
test.unit.response.protocolinfo.TestProtocolInfoResponse,
test.unit.response.authchallenge.TestAuthChallengeResponse,
test.unit.connection.authentication.TestAuthenticate,
- test.unit.descriptor.export.TestExport,
)
-# TODO move export test to proper location
INTEG_TESTS = (
test.integ.util.conf.TestConf,
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index bb6f874..45cd016 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -13,7 +13,7 @@ Package for parsing and processing descriptor data.
"""
__all__ = [
- "descriptor",
+ "export",
"reader",
"extrainfo_descriptor",
"server_descriptor",
diff --git a/stem/descriptor/export.py b/stem/descriptor/export.py
index 89a5644..16a148d 100644
--- a/stem/descriptor/export.py
+++ b/stem/descriptor/export.py
@@ -1,97 +1,89 @@
-import os, csv, sets, cStringIO
+"""
+Utilities for exporting descriptors to other formats.
+"""
+import csv
+import cStringIO
-def export_csv(descriptor, include_fields=(), exclude_fields=()):
+import stem.descriptor
+
+class _ExportDialect(csv.excel):
+ lineterminator = '\n'
+
+def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
"""
- Takes a single descriptor object, puts it in a list, and passes it to
- descriptors_csv_exp to build a csv.
+ Provides a newline separated CSV for one or more descriptors. If simply
+ provided with descriptors then the CSV contains all of its attributes,
+ labelled with a header row. Either 'included_fields' or 'excluded_fields' can
+ be used for more granular control over its attributes and the order.
- :param object descriptor: single descriptor whose attributes will be returned as a string.
- :param list include_fields: list of attribute fields to include in the csv string.
- :param list exclude_fields: list of attribute fields to exclude from csv string.
+ :param Descriptor,list descriptors: descriptor or list of descriptors to be exported
+ :param list included_fields: attributes to include in the csv
+ :param list excluded_fields: attributes to exclude from the csv
+ :param bool header: if True then the first line will be a comma separated list of the attribute names
- :returns: single csv line as a string with one descriptor attribute per cell.
+ :returns: str of the CSV for the descriptors, one per line
+ :raises: ValueError if descriptors contain more than one descriptor type
"""
- descr = (descriptor,)
- return export_csvs(descr, include_fields=include_fields, exclude_fields=exclude_fields)
+ output_buffer = cStringIO.StringIO()
+ export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
+ return output_buffer.getvalue()
-
-def export_csvs(descriptors, include_fields=[], exclude_fields=[], header=False):
+def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
"""
- Takes an iterable of descriptors, returns a string with one line per descriptor
- where each line is a comma separated list of descriptor attributes.
+ Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
+ written directly to a file.
- :param list descrs: List of descriptor objects whose attributes will be written.
- :param list include_fields: list of attribute fields to include in the csv string.
- :param list exclude_fields: list of attribute fields to exclude from csv string.
- :param bool header: whether or not a header is requested.
+ :param file output_file: file to be written to
+ :param Descriptor,list descriptors: descriptor or list of descriptors to be exported
+ :param list included_fields: attributes to include in the csv
+ :param list excluded_fields: attributes to exclude from the csv
+ :param bool header: if True then the first line will be a comma separated list of the attribute names
- :returns: csv string with one descriptor per line and one attribute per cell.
- :raises: ValueError if more than one descriptor type (e.g. server_descriptor,
- extrainfo_descriptor) is provided in the iterable.
+ :returns: str of the CSV for the descriptors, one per line
+ :raises: ValueError if descriptors contain more than one descriptor type
"""
- # Need a file object to write to with DictWriter.
- temp_file = cStringIO.StringIO()
+ if isinstance(descriptors, stem.descriptor.Descriptor):
+ descriptors = (descriptors,)
- first = True
+ if not descriptors:
+ return
- for desc in descriptors:
- #umport sys
- attr = vars(desc)
-
- # Defining incl_fields and the dwriter object requires having access
- # to a descriptor object.
- if first:
- # All descriptor objects should be of the same type
- # (i.e. server_descriptor.RelayDesrciptor)
- desc_type = type(desc)
-
- # define incl_fields, 4 cases where final case is incl_fields already
- # defined and excl_fields left blank, so no action is necessary.
- if not include_fields and exclude_fields:
- incl = set(attr.keys())
- include_fields = list(incl.difference(exclude_fields))
-
- elif not include_fields and not exclude_fields:
- include_fields = attr.keys()
-
- elif include_fields and exclude_fields:
- incl = set(include_fields)
- include_fields = list(incl.difference(exclude_fields))
-
- dwriter = csv.DictWriter(temp_file, include_fields, extrasaction='ignore')
-
- if header:
- dwriter.writeheader()
- first = False
-
- if desc_type == type(desc):
- dwriter.writerow(attr)
- else:
- raise ValueError('More than one descriptor type provided. Started with a %s, and just got a %s' % (desc_type, type(desc)))
+ descriptor_type = type(descriptors[0])
+ descriptor_type_label = descriptor_type.__name__
+ included_fields = list(included_fields)
- return temp_file.getvalue()
- # cStringIO files are closed automatically when the current scope is exited.
-
-def export_csv_file(descriptors, document, include_fields=(), exclude_fields=(), header=True):
- """
- Writes descriptor attributes to a csv file on disk.
+ # If the user didn't specify the fields to include then export everything,
+ # ordered alphabetically. If they did specify fields then make sure that
+ # they exist.
- Calls get_csv_lines with the given argument, then writes the returned string
- to a file location specified by document_location.
- Precondition that param document has a 'write' attribute.
+ desc_attr = sorted(vars(descriptors[0]).keys())
- :param list descriptors: descriptor objects with attributes to export as csv file.
- :param object document: File object to be written to.
- :param bool header: defaults to true, determines if document will have a header row.
- :param list include_fields: list of attribute fields to include in the csv line.
- :param list exclude_fields: list of attribute fields to exclude from csv line.
- """
+ if included_fields:
+ for field in included_fields:
+ if not field in desc_attr:
+ raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ", ".join(desc_attr)))
+ else:
+ included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
+
+ for field in excluded_fields:
+ try:
+ included_fields.remove(field)
+ except ValueError:
+ pass
+
+ writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
+
+ if header:
+ writer.writeheader()
- try:
- document.write(export_csvs(descriptors, include_fields=include_fields, exclude_fields=exclude_fields, header=header))
- except AttributeError:
- print "Provided %r object does not have a write() method." % document
- raise
+ for desc in descriptors:
+ if not isinstance(desc, stem.descriptor.Descriptor):
+ raise ValueError("Unable to export a descriptor CSV since %s is not a descriptor." % type(desc).__name__)
+ elif descriptor_type != type(desc):
+ raise ValueError("To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s." % (descriptor_type_label, type(desc)))
+
+ writer.writerow(vars(desc))
+
diff --git a/test/unit/descriptor/__init__.py b/test/unit/descriptor/__init__.py
index cf4fceb..1837a07 100644
--- a/test/unit/descriptor/__init__.py
+++ b/test/unit/descriptor/__init__.py
@@ -2,5 +2,5 @@
Unit tests for stem.descriptor.
"""
-__all__ = ["reader", "extrainfo_descriptor", "server_descriptor"]
+__all__ = ["export", "reader", "extrainfo_descriptor", "server_descriptor"]
diff --git a/test/unit/descriptor/export.py b/test/unit/descriptor/export.py
index 12d5ed3..6ef52cb 100644
--- a/test/unit/descriptor/export.py
+++ b/test/unit/descriptor/export.py
@@ -1,146 +1,93 @@
"""
-Unit testing code for the stem.descriptor.export module
+Unit tests for stem.descriptor.export.
"""
-import unittest
-import stem.descriptor.export as export
-import test.mocking as mocking
-
-from collections import namedtuple
-import stem.descriptor as desc
-import cStringIO
-
-# Create descriptor objects.
-DESCR_DICT = {'average_bandwidth': 5242880, 'onion_key': 'RSA PUB = JAIK', 'address': '79.139.135.90', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'AAAAAAAAAAAAAAAAAAA'}
-DESCR2_DICT = {'average_bandwidth': 5555555, 'onion_key': 'RSA PUB = GOUS', 'address': '100.1.1.1', '_digest': None, 'exit_policy': ['reject *:*'], 'fingerprint': 'BBBBBBBBBBBBBBBBBBBBB'}
-DESCR3_DICT = {'bandwidth':12345,'average_bandwidth': 6666666, 'address': '101.0.0.1','extra_info':None}
-RAW = 'router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon'
-
-descriptor = desc.Descriptor(RAW)
-descriptor.__dict__.update(DESCR_DICT)
-
-descriptor2 = desc.Descriptor(RAW)
-descriptor2.__dict__.update(DESCR2_DICT)
-
-descriptor3 = desc.server_descriptor.RelayDescriptor(RAW, validate=False)
-descriptor3.__dict__.update(DESCR3_DICT)
-
-# Expected return csv strings.
-SINGLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,'
-SINGLE_PART = '79.139.135.90,[\'reject *:*\']'
-SINGLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,'
-SINGLE_PART3 = '79.139.135.90,AAAAAAAAAAAAAAAAAAA'
-DOUBLE_ALL = '5242880,RSA PUB = JAIK,AAAAAAAAAAAAAAAAAAA,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,\r\n5555555,RSA PUB = GOUS,BBBBBBBBBBBBBBBBBBBBB,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],100.1.1.1,\r\n'
-DOUBLE_PART = '79.139.135.90,[\'reject *:*\']\r\n100.1.1.1,[\'reject *:*\']\r\n'
-DOUBLE_PART2 = '5242880,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],79.139.135.90,\r\n5555555,,router TORsinn3r 46.17.96.217 9001 0 0 platform Tor 0.2.3.19-rc on Linux bandwidth 4 5 6 ...andonandon,[\'reject *:*\'],100.1.1.1,\r\n'
+import StringIO
+import unittest
-SINGLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + SINGLE_ALL + '\r\n'
-SINGLE_PART3_HEAD = 'address,fingerprint\r\n' + SINGLE_PART3
-DOUBLE_ALL_HEAD = 'average_bandwidth,onion_key,fingerprint,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_ALL
-DOUBLE_PART_HEAD = 'address,exit_policy\r\n' + DOUBLE_PART
-DOUBLE_PART2_HEAD = 'average_bandwidth,_digest,_raw_contents,exit_policy,address,_path\r\n' + DOUBLE_PART2
+from stem.descriptor.export import export_csv, export_csv_file
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
+from test.mocking import get_server_descriptor
class TestExport(unittest.TestCase):
- def tearDown(self):
- mocking.revert_mocking()
-
- def test_export_csv(self):
+ def test_minimal_descriptor(self):
"""
- Tests the export_csv function which takes a single descriptor object.
+ Exports a single minimal tor server descriptor.
"""
- Fields = namedtuple('Fields', 'include_fields exclude_fields')
- # Descriptors must be an iterable
- # named tuples replace dictionaries as dict keys must immutable.
- ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=())):SINGLE_ALL,
- ((descriptor,), Fields(include_fields=('address', 'exit_policy'),
- exclude_fields=())):SINGLE_PART,
- ((descriptor,), Fields(include_fields=(),
- exclude_fields=('onion_key', 'fingerprint'))):SINGLE_PART2,
- ((descriptor,), Fields(include_fields=('address', 'exit_policy', 'fingerprint'),
- exclude_fields=('fingerprint',))):SINGLE_PART,
- ((descriptor,), Fields(include_fields=('address', 'fingerprint'),
- exclude_fields=('_digest',))):SINGLE_PART3
- }
- mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields))
+ desc = RelayDescriptor(get_server_descriptor())
- # Used tuples for incl/exclude_fields for parameter matching with ret_vals dict.
- self.assertEqual(SINGLE_ALL, export.export_csv(descriptor))
- self.assertEqual(SINGLE_PART, export.export_csv(descriptor,
- include_fields=('address', 'exit_policy')))
- self.assertEqual(SINGLE_PART2, export.export_csv(descriptor,
- exclude_fields=('onion_key', 'fingerprint')))
- self.assertEqual(SINGLE_PART, export.export_csv(descriptor,
- include_fields=('address', 'exit_policy', 'fingerprint'), exclude_fields=('fingerprint',)))
- self.assertEqual(SINGLE_PART3, export.export_csv(descriptor,
- include_fields=('address', 'fingerprint'), exclude_fields=('_digest',)))
-
+ desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = False)
+ expected = "caerSidi,71.35.133.197,2012-03-01 17:15:27\n"
+ self.assertEquals(expected, desc_csv)
+
+ desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = True)
+ expected = "nickname,address,published\n" + expected
+ self.assertEquals(expected, desc_csv)
- def test_export_csvs(self):
+ def test_multiple_descriptors(self):
"""
- Test the export_csvs function which takes a list of descriptor objects.
+ Exports multiple descriptors, making sure that we get them back in the same
+ order.
"""
- # Single descriptor
- self.assertEquals(SINGLE_ALL + "\r\n", export.export_csvs([descriptor]))
- self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor],
- include_fields=['address', 'exit_policy']))
- self.assertEqual(SINGLE_PART2 + "\r\n", export.export_csvs([descriptor],
- exclude_fields=['onion_key', 'fingerprint']))
- self.assertEqual(SINGLE_PART + "\r\n", export.export_csvs([descriptor],
- include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint']))
-
- # Multiple descriptors
- self.assertEqual(DOUBLE_ALL, export.export_csvs([descriptor, descriptor2]))
- self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2],
- include_fields=['address', 'exit_policy']))
- self.assertEqual(DOUBLE_PART2, export.export_csvs([descriptor, descriptor2],
- exclude_fields=['onion_key', 'fingerprint']))
- self.assertEqual(DOUBLE_PART, export.export_csvs([descriptor, descriptor2],
- include_fields=['address', 'exit_policy', 'fingerprint'], exclude_fields=['fingerprint']))
+ nicknames = ('relay1', 'relay3', 'relay2', 'caerSidi', 'zeus')
+ descriptors = []
- # Tests with headers
- self.assertEqual(SINGLE_ALL_HEAD, export.export_csvs([descriptor], header=True))
- self.assertEqual(SINGLE_PART3_HEAD + "\r\n", export.export_csvs([descriptor],
- include_fields=['address', 'fingerprint'], exclude_fields=['_digest'], header=True))
- self.assertEqual(DOUBLE_ALL_HEAD, export.export_csvs([descriptor, descriptor2], header=True))
- self.assertEqual(DOUBLE_PART_HEAD, export.export_csvs([descriptor, descriptor2],
- include_fields=['address', 'exit_policy'], header=True))
- self.assertEqual(DOUBLE_PART2_HEAD, export.export_csvs([descriptor, descriptor2],
- exclude_fields=['onion_key', 'fingerprint'], header=True))
+ for nickname in nicknames:
+ router_line = "%s 71.35.133.197 9001 0 0" % nickname
+ descriptors.append(RelayDescriptor(get_server_descriptor({'router': router_line})))
- # Other tests
- self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3])
- self.assertRaises(ValueError, export.export_csvs, [descriptor, descriptor3],
- include_fields=['onion_key', 'address', 'fingerprint'], exclude_fields=['onion_key'])
-
+ expected = "\n".join(nicknames) + "\n"
+ self.assertEqual(expected, export_csv(descriptors, included_fields = ('nickname',), header = False))
-
- def test_export_csv_file(self):
+ def test_file_output(self):
"""
- Tests the export_csv_file function.
+ Basic test for the export_csv_file() function, checking that it provides
+ the same output as export_csv().
"""
- sample_csv_string = 'This, is, a, sample, string.\r\nline, two.\r\n'
- sample_csv_string2 = 'Another, sample\r\n,, second,\r\n'
- sample_file = cStringIO.StringIO()
- # Must use named tuples again for ret_vals dictionary.
- Fields = namedtuple('Fields', 'include_fields exclude_fields header')
+ desc = RelayDescriptor(get_server_descriptor())
+ desc_csv = export_csv(desc)
- ret_vals = {((descriptor,), Fields(include_fields=(), exclude_fields=(), header=True)):sample_csv_string,
- ((descriptor,), Fields(include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False)):sample_csv_string2}
- # TODO Ask Danner: mock it once then do both tests (not including assertRaises), or do separate mockings.
- # the latter requires that we still include empty incl_fields and excl_fields parameters instead of
- # letting them default to []. Same for header.
- mocking.mock(export.export_csvs, mocking.return_for_args(ret_vals, kwarg_type=Fields))
+ csv_buffer = StringIO.StringIO()
+ export_csv_file(csv_buffer, desc)
- export.export_csv_file((descriptor,), sample_file)
- self.assertEqual(sample_csv_string, sample_file.getvalue())
+ self.assertEqual(desc_csv, csv_buffer.getvalue())
+
+ def test_excludes_private_attr(self):
+ """
+ Checks that the default attributes for our csv output doesn't include private fields.
+ """
- sample_file = cStringIO.StringIO()
+ desc = RelayDescriptor(get_server_descriptor())
+ desc_csv = export_csv(desc)
- export.export_csv_file((descriptor,), sample_file, include_fields=('address', 'onion_key'), exclude_fields=('address',), header=False)
- self.assertEqual(sample_csv_string2, sample_file.getvalue())
+ self.assertTrue(',signature' in desc_csv)
+ self.assertFalse(',_digest' in desc_csv)
+ self.assertFalse(',_annotation_lines' in desc_csv)
+
+ def test_empty_input(self):
+ """
+ Exercises when we don't provide any descriptors.
+ """
- # Make sure error is Raised when necessary.
- self.assertRaises(AttributeError, export.export_csv_file, (descriptor,), sample_csv_string)
+ self.assertEquals("", export_csv([]))
+
+ def test_invalid_attributes(self):
+ """
+ Attempts to make a csv with attributes that don't exist.
+ """
+
+ desc = RelayDescriptor(get_server_descriptor())
+ self.assertRaises(ValueError, export_csv, desc, ('nickname', 'blarg!'))
+
+ def test_multiple_descriptor_types(self):
+ """
+ Attempts to make a csv with multiple descriptor types.
+ """
+
+ server_desc = RelayDescriptor(get_server_descriptor())
+ bridge_desc = BridgeDescriptor(get_server_descriptor(is_bridge = True))
+ self.assertRaises(ValueError, export_csv, (server_desc, bridge_desc))
+
More information about the tor-commits
mailing list