[tor-commits] [metrics-lib/master] Support reading descriptor tarballs.

karsten at torproject.org karsten at torproject.org
Fri Mar 30 13:10:08 UTC 2012


commit afca9db71c6fb0704553a1b4a0d2baef507458c5
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Mar 30 15:09:45 2012 +0200

    Support reading descriptor tarballs.
---
 .../torproject/descriptor/DescriptorReader.java    |    7 ++-
 .../descriptor/impl/DescriptorReaderImpl.java      |   90 +++++++++++++++++++-
 2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/src/org/torproject/descriptor/DescriptorReader.java b/src/org/torproject/descriptor/DescriptorReader.java
index 1d46a79..167f8b6 100644
--- a/src/org/torproject/descriptor/DescriptorReader.java
+++ b/src/org/torproject/descriptor/DescriptorReader.java
@@ -8,9 +8,14 @@ import java.util.Iterator;
 /* Read descriptors from one or more local directories. */
 public interface DescriptorReader {
 
-  /* Add a local directory to read descriptors from. */
+  /* Add a local directory to read descriptor files or tarballs containing
+   * descriptor files from. */
   public void addDirectory(File directory);
 
+  /* Add an uncompressed or bz2-compressed tarball to read descriptors
+   * from. */
+  public void addTarball(File tarball);
+
   /* Exclude files that are contained in the given history file and that
    * haven't changed since they were last read.  Add reads from the
    * current run to the history file.  Remove files that don't exist
diff --git a/src/org/torproject/descriptor/impl/DescriptorReaderImpl.java b/src/org/torproject/descriptor/impl/DescriptorReaderImpl.java
index c22cbe8..ee45fa1 100644
--- a/src/org/torproject/descriptor/impl/DescriptorReaderImpl.java
+++ b/src/org/torproject/descriptor/impl/DescriptorReaderImpl.java
@@ -20,6 +20,9 @@ import java.util.SortedMap;
 import java.util.Stack;
 import java.util.TreeMap;
 
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.torproject.descriptor.Descriptor;
 import org.torproject.descriptor.DescriptorFile;
 import org.torproject.descriptor.DescriptorReader;
@@ -37,6 +40,15 @@ public class DescriptorReaderImpl implements DescriptorReader {
     this.directories.add(directory);
   }
 
+  private List<File> tarballs = new ArrayList<File>();
+  public void addTarball(File tarball) {
+    if (this.hasStartedReading) {
+      throw new IllegalStateException("Reconfiguration is not permitted "
+          + "after starting to read.");
+    }
+    this.tarballs.add(tarball);
+  }
+
   private File historyFile;
   public void setExcludeFiles(File historyFile) {
     if (this.hasStartedReading) {
@@ -64,21 +76,24 @@ public class DescriptorReaderImpl implements DescriptorReader {
     BlockingIteratorImpl<DescriptorFile> descriptorQueue =
         new BlockingIteratorImpl<DescriptorFile>();
     DescriptorReaderRunnable reader = new DescriptorReaderRunnable(
-        this.directories, descriptorQueue, this.historyFile,
-        this.failUnrecognizedDescriptorLines);
+        this.directories, this.tarballs, descriptorQueue,
+        this.historyFile, this.failUnrecognizedDescriptorLines);
     new Thread(reader).start();
     return descriptorQueue;
   }
 
   private static class DescriptorReaderRunnable implements Runnable {
     private List<File> directories;
+    private List<File> tarballs;
     private BlockingIteratorImpl<DescriptorFile> descriptorQueue;
     private File historyFile;
     private boolean failUnrecognizedDescriptorLines;
     private DescriptorReaderRunnable(List<File> directories,
+        List<File> tarballs,
         BlockingIteratorImpl<DescriptorFile> descriptorQueue,
         File historyFile, boolean failUnrecognizedDescriptorLines) {
       this.directories = directories;
+      this.tarballs = tarballs;
       this.descriptorQueue = descriptorQueue;
       this.historyFile = historyFile;
       this.failUnrecognizedDescriptorLines =
@@ -87,6 +102,8 @@ public class DescriptorReaderImpl implements DescriptorReader {
     public void run() {
       this.readOldHistory();
       this.readDescriptors();
+      this.readTarballs();
+      this.descriptorQueue.setOutOfDescriptors();
       this.writeNewHistory();
     }
     private SortedMap<String, Long>
@@ -147,6 +164,9 @@ public class DescriptorReaderImpl implements DescriptorReader {
           File file = files.pop();
           if (file.isDirectory()) {
             files.addAll(Arrays.asList(file.listFiles()));
+          } else if (file.getName().endsWith(".tar") ||
+              file.getName().endsWith(".tar.bz2")) {
+            this.tarballs.add(file);
           } else {
             String absolutePath = file.getAbsolutePath();
             long lastModifiedMillis = file.lastModified();
@@ -171,7 +191,71 @@ public class DescriptorReaderImpl implements DescriptorReader {
           }
         }
       }
-      this.descriptorQueue.setOutOfDescriptors();
+    }
+    private void readTarballs() {
+      List<File> files = new ArrayList<File>(this.tarballs);
+      boolean abortReading = false;
+      while (!abortReading && !files.isEmpty()) {
+        File tarball = files.remove(0);
+        if (!tarball.getName().endsWith(".tar") &&
+            !tarball.getName().endsWith(".tar.bz2")) {
+          continue;
+        }
+        String absolutePath = tarball.getAbsolutePath();
+        long lastModifiedMillis = tarball.lastModified();
+        this.newHistory.put(absolutePath, lastModifiedMillis);
+        if (this.oldHistory.containsKey(absolutePath) &&
+            this.oldHistory.get(absolutePath) == lastModifiedMillis) {
+          continue;
+        }
+        try {
+          FileInputStream in = new FileInputStream(tarball);
+          if (in.available() > 0) {
+            TarArchiveInputStream tais = null;
+            if (tarball.getName().endsWith(".tar.bz2")) {
+              tais = new TarArchiveInputStream(
+                  new BZip2CompressorInputStream(in));
+            } else if (tarball.getName().endsWith(".tar")) {
+              tais = new TarArchiveInputStream(in);
+            }
+            BufferedInputStream bis = new BufferedInputStream(tais);
+            TarArchiveEntry tae = null;
+            while ((tae = tais.getNextTarEntry()) != null) {
+              DescriptorFileImpl descriptorFile =
+                  new DescriptorFileImpl();
+              /* TODO Is it correct to set these values for files
+               * contained in a tarball? */
+              descriptorFile.setDirectory(tarball);
+              descriptorFile.setFile(null);
+              descriptorFile.setLastModified(lastModifiedMillis);
+              ByteArrayOutputStream baos = new ByteArrayOutputStream();
+              int len;
+              byte[] data = new byte[1024];
+              while ((len = bis.read(data, 0, 1024)) >= 0) {
+                baos.write(data, 0, len);
+              }
+              byte[] rawDescriptorBytes = baos.toByteArray();
+              if (rawDescriptorBytes.length < 1) {
+                continue;
+              }
+              try {
+                String fileName = tae.getName().substring(
+                    tae.getName().lastIndexOf("/") + 1);
+                List<Descriptor> parsedDescriptors =
+                    DescriptorImpl.parseRelayOrBridgeDescriptors(
+                    rawDescriptorBytes, fileName,
+                    this.failUnrecognizedDescriptorLines);
+                descriptorFile.setDescriptors(parsedDescriptors);
+              } catch (DescriptorParseException e) {
+                descriptorFile.setException(e);
+             }
+             this.descriptorQueue.add(descriptorFile);
+            }
+          }
+        } catch (IOException e) {
+          abortReading = true;
+        }
+      }
     }
     private List<Descriptor> readFile(File file) throws IOException,
         DescriptorParseException {



More information about the tor-commits mailing list