[tor-commits] [stem/master] Descriptor module with common functionality
atagar at torproject.org
atagar at torproject.org
Mon Mar 26 00:10:01 UTC 2012
commit da4ccb0bedc122c4776048751c4de0b6cc0e010e
Author: Damian Johnson <atagar at torproject.org>
Date: Wed Mar 14 09:55:38 2012 -0700
Descriptor module with common functionality
Adding the descriptor module which has the Descriptor class (the common parent
for all descriptor types) and the function for parsing descriptors from a file.
This parser will later do type recognition to figure out how descriptors
should be parsed. This is identical to how metrics-lib does this.
Nice advantage is that we can now move all of the remaining 'TODO' notes out of
the reader. It's done. :)
---
stem/descriptor/__init__.py | 2 +-
stem/descriptor/descriptor.py | 58 +++++++++++++++++++++++++++++++++++++++++
stem/descriptor/reader.py | 38 ++++++++++++++++----------
3 files changed, 82 insertions(+), 16 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index fef6750..33970ef 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -2,5 +2,5 @@
Utilities for parsing and processing descriptor data.
"""
-__all__ = ["reader"]
+__all__ = ["descriptor", "reader"]
diff --git a/stem/descriptor/descriptor.py b/stem/descriptor/descriptor.py
new file mode 100644
index 0000000..90d3bed
--- /dev/null
+++ b/stem/descriptor/descriptor.py
@@ -0,0 +1,58 @@
+"""
+Common functionality for descriptors.
+"""
+
+def parse_descriptors(path, descriptor_file):
+ """
+ Provides an iterator for the descriptors within a given file.
+
+ Arguments:
+ path (str) - absolute path to the file's location on disk
+ descriptor_file (file) - opened file with the descriptor contents
+
+ Returns:
+ iterator that parses the file's contents into descriptors
+
+ Raises:
+ TypeError if we can't match the contents of the file to a descriptor type
+ IOError if unable to read from the descriptor_file
+ """
+
+ # TODO: implement actual descriptor type recognition and parsing
+ # TODO: add integ test for non-descriptor text content
+ yield Descriptor(path, descriptor_file.read())
+
+class Descriptor:
+ """
+ Common parent for all types of descriptors.
+ """
+
+ def __init__(self, path, raw_contents):
+ self._path = path
+ self._raw_contents = raw_contents
+
+ def get_path(self):
+ """
+ Provides the absolute path that we loaded this descriptor from.
+
+ Returns:
+ str with the absolute path of the descriptor source
+ """
+
+ return self._path
+
+ def get_unrecognized_lines(self):
+ """
+ Provides a list of lines that were either ignored or had data that we did
+ not know how to process. This is most common due to new descriptor fields
+ that this library does not yet know how to process. Patches welcome!
+
+ Returns:
+ list of lines of unrecognized content
+ """
+
+ return []
+
+ def __str__(self):
+ return self._raw_contents
+
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 8117361..3b6f5fd 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -70,6 +70,8 @@ import threading
import mimetypes
import Queue
+import stem.descriptor.descriptor
+
# flag to indicate when the reader thread is out of descriptor files to read
FINISHED = "DONE"
@@ -362,25 +364,31 @@ class DescriptorReader:
def _handle_descriptor_file(self, target):
try:
- # TODO: replace with actual descriptor parsing when we have it
- target_file = open(target)
- self._enqueue_descriptor(target_file.read())
- target_file.close()
-
- self._iter_notice.set()
+ with open(target) as target_file:
+ for desc in stem.descriptor.descriptor.parse_descriptors(target, target_file):
+ self._enqueue_descriptor(desc)
+ self._iter_notice.set()
+ except TypeError, exc:
+ self._notify_skip_listeners(target, ParsingFailure(exc))
except IOError, exc:
self._notify_skip_listeners(target, ReadFailed(exc))
def _handle_archive(self, target):
- with tarfile.open(target) as tar_file:
- for tar_entry in tar_file:
- if tar_entry.isfile():
- # TODO: replace with actual descriptor parsing when we have it
- entry = tar_file.extractfile(tar_entry)
- self._enqueue_descriptor(entry.read())
- entry.close()
-
- self._iter_notice.set()
+ try:
+ with tarfile.open(target) as tar_file:
+ for tar_entry in tar_file:
+ if tar_entry.isfile():
+ entry = tar_file.extractfile(tar_entry)
+
+ for desc in stem.descriptor.descriptor.parse_descriptors(target, entry):
+ self._enqueue_descriptor(desc)
+ self._iter_notice.set()
+
+ entry.close()
+ except TypeError, exc:
+ self._notify_skip_listeners(target, ParsingFailure(exc))
+ except IOError, exc:
+ self._notify_skip_listeners(target, ReadFailed(exc))
def _enqueue_descriptor(self, descriptor):
# blocks until their is either room for the descriptor or we're stopped
More information about the tor-commits
mailing list