[tor-commits] [stem/master] Skeletion for descriptor reader
atagar at torproject.org
atagar at torproject.org
Mon Mar 26 00:10:01 UTC 2012
commit 5a7bb2584ec976b64a91986e5aa8ca60d8ab60a0
Author: Damian Johnson <atagar at torproject.org>
Date: Sun Mar 4 01:22:36 2012 -0800
Skeletion for descriptor reader
Initial skeleton for a class to read descriptor data, not including most of the
functional bits. This will be similar to the metrics-lib DescriptorReader
class, but with a slightly different API and the ability to handle tarball
archives.
---
stem/descriptor/__init__.py | 6 ++
stem/descriptor/reader.py | 119 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+), 0 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
new file mode 100644
index 0000000..fef6750
--- /dev/null
+++ b/stem/descriptor/__init__.py
@@ -0,0 +1,6 @@
+"""
+Utilities for parsing and processing descriptor data.
+"""
+
+__all__ = ["reader"]
+
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
new file mode 100644
index 0000000..b6404f3
--- /dev/null
+++ b/stem/descriptor/reader.py
@@ -0,0 +1,119 @@
+"""
+Reads descriptors from local directories and archives.
+
+Example:
+ my_descriptors = [
+ "/tmp/server-descriptors-2012-03.tar.bz2",
+ "/tmp/archived_descriptors/",
+ ]
+
+ reader = DescriptorReader(my_descriptors)
+ reader.start()
+
+ # prints all of the descriptor contents
+ for descriptor in reader:
+ print descriptor
+
+ reader.stop()
+ reader.join()
+"""
+
+import os
+import theading
+import mimetypes
+import Queue
+
+class DescriptorReader(threading.Thread):
+ """
+ Iterator for the descriptor data on the local file system. This can process
+ text files, tarball archives (gzip or bzip2), or recurse directories.
+
+ This ignores files that cannot be processed (either due to read errors or
+ because they don't contain descriptor data). The caller can be notified of
+ files that are skipped by restering a listener with register_skip_listener().
+ """
+
+ def __init__(self, targets):
+ self.targets = targets
+ self.skip_listeners = []
+ self.processed_files = {}
+
+ def stop(self):
+ """
+ Stops further reading of descriptors.
+ """
+
+ pass # TODO: implement
+
+ def get_processed_files(self):
+ """
+ For each file we have provided descriptor data for this provides a mapping
+ of the form...
+
+ absolute_path (str) => modified_time (int)
+
+ This includes entries set through the set_processed_files() method.
+
+ Returns:
+ dict with the paths and unix timestamp for the last modified times of the
+ files we have processed
+ """
+
+ return self.processed_files
+
+ def set_processed_files(self, processed_files):
+ """
+ Appends a dictionary of 'path => modified timestamp' mappings to our
+ listing of processed files. With the get_processed_files() method this can
+ be used to skip descriptors that we have already read. For instance...
+
+ # gets the initial descriptors
+ reader = DescriptorReader(["/tmp/descriptor_data"])
+
+ with reader:
+ initial_descriptors = list(reader)
+ processed_files = reader.get_processed_files()
+
+ # only gets the descriptors that have changed since we last checked
+ reader = DescriptorReader(["/tmp/descriptor_data"])
+ reader.set_processed_files(processed_files)
+
+ with reader:
+ new_descriptors = list(reader)
+
+ Arguments:
+ processed_files (dict) - mapping of absolute paths (str) to unix
+ timestamps for the last modified time (int)
+ """
+
+ self.processed_files.update(processed_files)
+
+ def register_skip_listener(self, listener):
+ """
+ Registers a listener for files that are skipped. This listener is expected
+ to be a functor of the form...
+
+ my_listener(path, exception)
+
+ Arguments:
+ listener (functor) - functor to be notified of files that are skipped to
+ read errors or because they couldn't be parsed as
+ valid descriptor data
+ """
+
+ self.skip_listeners.append(listener)
+
+ def run(self):
+ pass # TODO: implement
+
+ def _notify_skip_listener(self, path, exception):
+ for listener in self.skip_listeners:
+ listener(path, exception)
+
+ def __enter__(self):
+ self.start()
+
+ def __exit__(self, type, value, traceback):
+ self.stop()
+ self.join()
+
More information about the tor-commits
mailing list