[tor-commits] [onionoo/master] Fetch descriptors from both CollecTor instances.

karsten at torproject.org karsten at torproject.org
Thu Apr 5 18:28:38 UTC 2018


commit 520c1577c119f46f5de8db83ee9d4eb498a30e7a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Apr 3 15:19:36 2018 +0200

    Fetch descriptors from both CollecTor instances.
    
    Fixes #25700.
---
 CHANGELOG.md                                       |  1 +
 .../onionoo/updater/DescriptorSource.java          | 85 +++++++++++++++-------
 2 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36d6471..ac15a8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
  * Medium changes
    - Add version_status field to details documents.
+   - Fetch descriptors from both CollecTor instances.
 
  * Minor changes
    - Don't attempt to un-escape character sequences in contact lines
diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
index 6bddd02..20e249d 100644
--- a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
+++ b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
@@ -25,32 +25,54 @@ public class DescriptorSource {
 
   private final File inDir = new File("in");
 
-  private final File inRecentDir = new File(inDir, "recent");
+  private final String[] collecTorHosts = new String[] {
+      "collector.torproject.org", "collector2.torproject.org" };
+
+  private File[] inCollecTorHostDirs;
+
+  private File[] inCollecTorHostRecentDirs;
+
+  private List<DescriptorQueue> recentDescriptorQueues;
 
   private final File inArchiveDir = new File(inDir, "archive");
 
   private final File statusDir = new File("status");
 
-  private List<DescriptorQueue> descriptorQueues;
-
   private DescriptorQueue archiveDescriptorQueue;
 
   /** Instantiates a new descriptor source. */
   public DescriptorSource() {
-    this.descriptorQueues = new ArrayList<>();
+    this.inCollecTorHostDirs = new File[this.collecTorHosts.length];
+    this.inCollecTorHostRecentDirs = new File[this.collecTorHosts.length];
+    for (int collecTorHostIndex = 0;
+         collecTorHostIndex < this.collecTorHosts.length;
+         collecTorHostIndex++) {
+      this.inCollecTorHostDirs[collecTorHostIndex]
+          = new File(this.statusDir, this.collecTorHosts[collecTorHostIndex]);
+      this.inCollecTorHostRecentDirs[collecTorHostIndex]
+          = new File(this.inCollecTorHostDirs[collecTorHostIndex], "recent");
+    }
+    this.recentDescriptorQueues = new ArrayList<>();
     this.descriptorListeners = new HashMap<>();
   }
 
-  private DescriptorQueue getDescriptorQueue(
+  private List<DescriptorQueue> getDescriptorQueues(
       DescriptorType descriptorType,
       DescriptorHistory descriptorHistory) {
-    DescriptorQueue descriptorQueue = new DescriptorQueue(
-        this.inRecentDir, descriptorType, this.statusDir);
-    if (descriptorHistory != null) {
-      descriptorQueue.readHistoryFile(descriptorHistory);
+    List<DescriptorQueue> descriptorQueues = new ArrayList<>();
+    for (int collecTorHostIndex = 0;
+         collecTorHostIndex < this.collecTorHosts.length;
+         collecTorHostIndex++) {
+      DescriptorQueue descriptorQueue = new DescriptorQueue(
+          this.inCollecTorHostRecentDirs[collecTorHostIndex], descriptorType,
+          this.inCollecTorHostDirs[collecTorHostIndex]);
+      if (descriptorHistory != null) {
+        descriptorQueue.readHistoryFile(descriptorHistory);
+      }
+      descriptorQueues.add(descriptorQueue);
     }
-    this.descriptorQueues.add(descriptorQueue);
-    return descriptorQueue;
+    this.recentDescriptorQueues.addAll(descriptorQueues);
+    return descriptorQueues;
   }
 
   private Map<DescriptorType, Set<DescriptorListener>>
@@ -68,14 +90,24 @@ public class DescriptorSource {
 
   /** Downloads descriptors from CollecTor. */
   public void downloadDescriptors() {
-    List<String> remoteDirectories = new ArrayList<>();
+    List<String> remoteDirectoriesList = new ArrayList<>();
     for (DescriptorType descriptorType : DescriptorType.values()) {
-      remoteDirectories.add("/recent/" + descriptorType.getDir());
+      remoteDirectoriesList.add("/recent/" + descriptorType.getDir());
+    }
+    for (int collecTorHostIndex = 0;
+         collecTorHostIndex < this.collecTorHosts.length;
+         collecTorHostIndex++) {
+      String collecTorBaseUrl = "https://"
+          + this.collecTorHosts[collecTorHostIndex];
+      String[] remoteDirectories = remoteDirectoriesList.toArray(new String[0]);
+      long minLastModified = 0L;
+      File localDirectory = this.inCollecTorHostDirs[collecTorHostIndex];
+      boolean deleteExtraneousLocalFiles = true;
+      DescriptorCollector dc = org.torproject.descriptor.DescriptorSourceFactory
+          .createDescriptorCollector();
+      dc.collectDescriptors(collecTorBaseUrl, remoteDirectories,
+          minLastModified, localDirectory, deleteExtraneousLocalFiles);
     }
-    DescriptorCollector dc = org.torproject.descriptor.DescriptorSourceFactory
-        .createDescriptorCollector();
-    dc.collectDescriptors("https://collector.torproject.org",
-        remoteDirectories.toArray(new String[0]), 0L, inDir, true);
   }
 
   /** Reads archived and recent descriptors from disk and feeds them into
@@ -113,12 +145,13 @@ public class DescriptorSource {
     }
     Set<DescriptorListener> descriptorListeners =
         this.descriptorListeners.get(descriptorType);
-    DescriptorQueue descriptorQueue = this.getDescriptorQueue(
-        descriptorType, descriptorHistory);
-    Descriptor descriptor;
-    while ((descriptor = descriptorQueue.nextDescriptor()) != null) {
-      for (DescriptorListener descriptorListener : descriptorListeners) {
-        descriptorListener.processDescriptor(descriptor, relay);
+    for (DescriptorQueue descriptorQueue
+        : this.getDescriptorQueues(descriptorType, descriptorHistory)) {
+      Descriptor descriptor;
+      while ((descriptor = descriptorQueue.nextDescriptor()) != null) {
+        for (DescriptorListener descriptorListener : descriptorListeners) {
+          descriptorListener.processDescriptor(descriptor, relay);
+        }
       }
     }
     log.info("Read recent/{}.", descriptorType.getDir());
@@ -185,7 +218,7 @@ public class DescriptorSource {
   /** Writes parse histories for recent descriptors to disk. */
   public void writeHistoryFiles() {
     log.debug("Writing parse histories for recent descriptors...");
-    for (DescriptorQueue descriptorQueue : this.descriptorQueues) {
+    for (DescriptorQueue descriptorQueue : this.recentDescriptorQueues) {
       descriptorQueue.writeHistoryFile();
     }
   }
@@ -194,13 +227,13 @@ public class DescriptorSource {
    * descriptors during the current execution. */
   public String getStatsString() {
     StringBuilder sb = new StringBuilder();
-    sb.append("    ").append(this.descriptorQueues.size())
+    sb.append("    ").append(this.recentDescriptorQueues.size())
       .append(" descriptor ").append("queues created for recent descriptors\n");
     int historySizeBefore = 0;
     int historySizeAfter = 0;
     long descriptors = 0L;
     long bytes = 0L;
-    for (DescriptorQueue descriptorQueue : this.descriptorQueues) {
+    for (DescriptorQueue descriptorQueue : this.recentDescriptorQueues) {
       historySizeBefore += descriptorQueue.getHistorySizeBefore();
       historySizeAfter += descriptorQueue.getHistorySizeAfter();
       descriptors += descriptorQueue.getReturnedDescriptors();



More information about the tor-commits mailing list