[tor-commits] [onionoo/release] Switch to metrics-lib's DescriptorCollector.

karsten at torproject.org karsten at torproject.org
Thu Aug 31 15:02:37 UTC 2017


commit 32992eea202770065b4cb61d4dc39bbee1b87628
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed May 17 15:58:22 2017 +0200

    Switch to metrics-lib's DescriptorCollector.
    
    Implements #22287.
---
 CHANGELOG.md                                       |   2 +
 .../onionoo/updater/DescriptorDownloader.java      | 184 ---------------------
 .../onionoo/updater/DescriptorSource.java          |  41 ++---
 3 files changed, 13 insertions(+), 214 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb38dd0..e028dd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
    - Remove optional fields "countries", "transports", and "versions"
      from clients objects which were still labeled as beta.
    - Add new "version" parameter to filter for Tor version.
+   - Switch from our own CollecTor downloader to metrics-lib's
+     DescriptorCollector.
 
 
 # Changes in version 4.0-1.3.0 - 2017-08-04
diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java b/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
deleted file mode 100644
index 1e41f25..0000000
--- a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.onionoo.updater;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.SortedSet;
-import java.util.TreeSet;
-import java.util.zip.GZIPInputStream;
-
-class DescriptorDownloader {
-
-  private static Logger log = LoggerFactory.getLogger(
-      DescriptorDownloader.class);
-
-  private final String protocolHostNameResourcePrefix =
-      "https://collector.torproject.org/recent/";
-
-  private String directory;
-
-  private final File inDir = new File("in/recent");
-
-  public DescriptorDownloader(DescriptorType descriptorType) {
-    switch (descriptorType) {
-      case RELAY_CONSENSUSES:
-        this.directory = "relay-descriptors/consensuses/";
-        break;
-      case RELAY_SERVER_DESCRIPTORS:
-        this.directory = "relay-descriptors/server-descriptors/";
-        break;
-      case RELAY_EXTRA_INFOS:
-        this.directory = "relay-descriptors/extra-infos/";
-        break;
-      case EXIT_LISTS:
-        this.directory = "exit-lists/";
-        break;
-      case BRIDGE_STATUSES:
-        this.directory = "bridge-descriptors/statuses/";
-        break;
-      case BRIDGE_SERVER_DESCRIPTORS:
-        this.directory = "bridge-descriptors/server-descriptors/";
-        break;
-      case BRIDGE_EXTRA_INFOS:
-        this.directory = "bridge-descriptors/extra-infos/";
-        break;
-      default:
-        log.error("Unknown descriptor type.");
-        return;
-    }
-  }
-
-  private SortedSet<String> localFiles = new TreeSet<>();
-
-  public int statLocalFiles() {
-    File localDirectory = new File(this.inDir, this.directory);
-    if (localDirectory.exists()) {
-      for (File file : localDirectory.listFiles()) {
-        this.localFiles.add(file.getName());
-      }
-    }
-    return this.localFiles.size();
-  }
-
-  private SortedSet<String> remoteFiles = new TreeSet<>();
-
-  public int fetchRemoteDirectory() {
-    String directoryUrl = this.protocolHostNameResourcePrefix
-        + this.directory;
-    try {
-      URL url = new URL(directoryUrl);
-      HttpURLConnection huc = (HttpURLConnection) url.openConnection();
-      huc.setRequestMethod("GET");
-      huc.connect();
-      if (huc.getResponseCode() != 200) {
-        log.error("Could not fetch " + directoryUrl
-            + ": " + huc.getResponseCode() + " "
-            + huc.getResponseMessage() + ".  Skipping.");
-        return 0;
-      }
-      try (BufferedReader br = new BufferedReader(new InputStreamReader(
-          huc.getInputStream()))) {
-        String line;
-        while ((line = br.readLine()) != null) {
-          if (!line.trim().startsWith("<tr>")
-              || !line.contains("<a href=\"")) {
-            continue;
-          }
-          String linePart = line.substring(
-              line.indexOf("<a href=\"") + "<a href=\"".length());
-          if (!linePart.contains("\"")) {
-            continue;
-          }
-          linePart = linePart.substring(0, linePart.indexOf("\""));
-          if (linePart.endsWith("/")) {
-            continue;
-          }
-          this.remoteFiles.add(linePart);
-        }
-      }
-    } catch (IOException e) {
-      log.error("Could not fetch or parse " + directoryUrl
-          + ".  Skipping. Reason: " + e.getMessage());
-    }
-    return this.remoteFiles.size();
-  }
-
-  public int fetchRemoteFiles() {
-    int fetchedFiles = 0;
-    for (String remoteFile : this.remoteFiles) {
-      if (this.localFiles.contains(remoteFile)) {
-        continue;
-      }
-      String fileUrl = this.protocolHostNameResourcePrefix
-          + this.directory + remoteFile;
-      File localTempFile = new File(this.inDir, this.directory
-          + remoteFile + ".tmp");
-      File localFile = new File(this.inDir, this.directory + remoteFile);
-      try {
-        localFile.getParentFile().mkdirs();
-        URL url = new URL(fileUrl);
-        HttpURLConnection huc = (HttpURLConnection) url.openConnection();
-        huc.setRequestMethod("GET");
-        huc.addRequestProperty("Accept-Encoding", "gzip");
-        huc.connect();
-        if (huc.getResponseCode() != 200) {
-          log.error("Could not fetch \n\t" + fileUrl
-              + ": " + huc.getResponseCode() + " "
-              + huc.getResponseMessage() + ".  Skipping.");
-          continue;
-        }
-        InputStream is;
-        if (huc.getContentEncoding() != null
-            && huc.getContentEncoding().equalsIgnoreCase("gzip")) {
-          is = new GZIPInputStream(huc.getInputStream());
-        } else {
-          is = huc.getInputStream();
-        }
-        try (BufferedInputStream bis = new BufferedInputStream(is);
-            BufferedOutputStream bos = new BufferedOutputStream(
-            new FileOutputStream(localTempFile))) {
-          int len;
-          byte[] data = new byte[1024];
-          while ((len = bis.read(data, 0, 1024)) >= 0) {
-            bos.write(data, 0, len);
-          }
-        }
-        localTempFile.renameTo(localFile);
-        long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L);
-        if (lastModified >= 0) {
-          localFile.setLastModified(lastModified);
-        }
-        fetchedFiles++;
-      } catch (IOException e) {
-        log.error("Could not fetch or store \n\t" + fileUrl
-            + ".  Skipping.\n\tReason: " + e.getMessage());
-      }
-    }
-    return fetchedFiles;
-  }
-
-  public int deleteOldLocalFiles() {
-    int deletedFiles = 0;
-    for (String localFile : this.localFiles) {
-      if (!this.remoteFiles.contains(localFile)) {
-        new File(this.inDir, this.directory + localFile).delete();
-        deletedFiles++;
-      }
-    }
-    return deletedFiles;
-  }
-}
-
diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
index 45b40ee..d32727f 100644
--- a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
+++ b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
@@ -4,6 +4,7 @@
 package org.torproject.onionoo.updater;
 
 import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorCollector;
 import org.torproject.onionoo.util.FormattingUtils;
 
 import org.slf4j.Logger;
@@ -22,9 +23,11 @@ public class DescriptorSource {
   private static final Logger log = LoggerFactory.getLogger(
       DescriptorSource.class);
 
-  private final File inRecentDir = new File("in/recent");
+  private final File inDir = new File("in");
 
-  private final File inArchiveDir = new File("in/archive");
+  private final File inRecentDir = new File(inDir, "recent");
+
+  private final File inArchiveDir = new File(inDir, "archive");
 
   private final File statusDir = new File("status");
 
@@ -65,28 +68,14 @@ public class DescriptorSource {
 
   /** Downloads descriptors from CollecTor. */
   public void downloadDescriptors() {
+    List<String> remoteDirectories = new ArrayList<>();
     for (DescriptorType descriptorType : DescriptorType.values()) {
-      log.info("Loading: " + descriptorType);
-      this.downloadDescriptors(descriptorType);
+      remoteDirectories.add("/recent/" + descriptorType.getDir());
     }
-  }
-
-  private int localFilesBefore = 0;
-
-  private int foundRemoteFiles = 0;
-
-  private int downloadedFiles = 0;
-
-  private int deletedLocalFiles = 0;
-
-  private void downloadDescriptors(DescriptorType descriptorType) {
-    DescriptorDownloader descriptorDownloader =
-        new DescriptorDownloader(descriptorType);
-    this.localFilesBefore += descriptorDownloader.statLocalFiles();
-    this.foundRemoteFiles +=
-        descriptorDownloader.fetchRemoteDirectory();
-    this.downloadedFiles += descriptorDownloader.fetchRemoteFiles();
-    this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles();
+    DescriptorCollector dc = org.torproject.descriptor.DescriptorSourceFactory
+        .createDescriptorCollector();
+    dc.collectDescriptors("https://collector.torproject.org",
+        remoteDirectories.toArray(new String[0]), 0L, inDir, true);
   }
 
   /** Reads archived and recent descriptors from disk and feeds them into
@@ -206,14 +195,6 @@ public class DescriptorSource {
    * descriptors during the current execution. */
   public String getStatsString() {
     StringBuilder sb = new StringBuilder();
-    sb.append("    ").append(this.localFilesBefore)
-      .append(" recent descriptor files ").append("found locally\n");
-    sb.append("    ").append(this.foundRemoteFiles)
-      .append(" recent descriptor files ").append("found remotely\n");
-    sb.append("    ").append(this.downloadedFiles)
-      .append(" recent descriptor files ").append("downloaded from remote\n");
-    sb.append("    ").append(this.deletedLocalFiles)
-      .append(" recent descriptor ").append("files deleted locally\n");
     sb.append("    ").append(this.descriptorQueues.size())
       .append(" descriptor ").append("queues created for recent descriptors\n");
     int historySizeBefore = 0;





More information about the tor-commits mailing list