[tor-commits] [onionoo/release] Switch to metrics-lib's DescriptorCollector.
karsten at torproject.org
karsten at torproject.org
Thu Aug 31 15:02:37 UTC 2017
commit 32992eea202770065b4cb61d4dc39bbee1b87628
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed May 17 15:58:22 2017 +0200
Switch to metrics-lib's DescriptorCollector.
Implements #22287.
---
CHANGELOG.md | 2 +
.../onionoo/updater/DescriptorDownloader.java | 184 ---------------------
.../onionoo/updater/DescriptorSource.java | 41 ++---
3 files changed, 13 insertions(+), 214 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb38dd0..e028dd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
- Remove optional fields "countries", "transports", and "versions"
from clients objects which were still labeled as beta.
- Add new "version" parameter to filter for Tor version.
+ - Switch from our own CollecTor downloader to metrics-lib's
+ DescriptorCollector.
# Changes in version 4.0-1.3.0 - 2017-08-04
diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java b/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
deleted file mode 100644
index 1e41f25..0000000
--- a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.onionoo.updater;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.SortedSet;
-import java.util.TreeSet;
-import java.util.zip.GZIPInputStream;
-
-class DescriptorDownloader {
-
- private static Logger log = LoggerFactory.getLogger(
- DescriptorDownloader.class);
-
- private final String protocolHostNameResourcePrefix =
- "https://collector.torproject.org/recent/";
-
- private String directory;
-
- private final File inDir = new File("in/recent");
-
- public DescriptorDownloader(DescriptorType descriptorType) {
- switch (descriptorType) {
- case RELAY_CONSENSUSES:
- this.directory = "relay-descriptors/consensuses/";
- break;
- case RELAY_SERVER_DESCRIPTORS:
- this.directory = "relay-descriptors/server-descriptors/";
- break;
- case RELAY_EXTRA_INFOS:
- this.directory = "relay-descriptors/extra-infos/";
- break;
- case EXIT_LISTS:
- this.directory = "exit-lists/";
- break;
- case BRIDGE_STATUSES:
- this.directory = "bridge-descriptors/statuses/";
- break;
- case BRIDGE_SERVER_DESCRIPTORS:
- this.directory = "bridge-descriptors/server-descriptors/";
- break;
- case BRIDGE_EXTRA_INFOS:
- this.directory = "bridge-descriptors/extra-infos/";
- break;
- default:
- log.error("Unknown descriptor type.");
- return;
- }
- }
-
- private SortedSet<String> localFiles = new TreeSet<>();
-
- public int statLocalFiles() {
- File localDirectory = new File(this.inDir, this.directory);
- if (localDirectory.exists()) {
- for (File file : localDirectory.listFiles()) {
- this.localFiles.add(file.getName());
- }
- }
- return this.localFiles.size();
- }
-
- private SortedSet<String> remoteFiles = new TreeSet<>();
-
- public int fetchRemoteDirectory() {
- String directoryUrl = this.protocolHostNameResourcePrefix
- + this.directory;
- try {
- URL url = new URL(directoryUrl);
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- if (huc.getResponseCode() != 200) {
- log.error("Could not fetch " + directoryUrl
- + ": " + huc.getResponseCode() + " "
- + huc.getResponseMessage() + ". Skipping.");
- return 0;
- }
- try (BufferedReader br = new BufferedReader(new InputStreamReader(
- huc.getInputStream()))) {
- String line;
- while ((line = br.readLine()) != null) {
- if (!line.trim().startsWith("<tr>")
- || !line.contains("<a href=\"")) {
- continue;
- }
- String linePart = line.substring(
- line.indexOf("<a href=\"") + "<a href=\"".length());
- if (!linePart.contains("\"")) {
- continue;
- }
- linePart = linePart.substring(0, linePart.indexOf("\""));
- if (linePart.endsWith("/")) {
- continue;
- }
- this.remoteFiles.add(linePart);
- }
- }
- } catch (IOException e) {
- log.error("Could not fetch or parse " + directoryUrl
- + ". Skipping. Reason: " + e.getMessage());
- }
- return this.remoteFiles.size();
- }
-
- public int fetchRemoteFiles() {
- int fetchedFiles = 0;
- for (String remoteFile : this.remoteFiles) {
- if (this.localFiles.contains(remoteFile)) {
- continue;
- }
- String fileUrl = this.protocolHostNameResourcePrefix
- + this.directory + remoteFile;
- File localTempFile = new File(this.inDir, this.directory
- + remoteFile + ".tmp");
- File localFile = new File(this.inDir, this.directory + remoteFile);
- try {
- localFile.getParentFile().mkdirs();
- URL url = new URL(fileUrl);
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setRequestMethod("GET");
- huc.addRequestProperty("Accept-Encoding", "gzip");
- huc.connect();
- if (huc.getResponseCode() != 200) {
- log.error("Could not fetch \n\t" + fileUrl
- + ": " + huc.getResponseCode() + " "
- + huc.getResponseMessage() + ". Skipping.");
- continue;
- }
- InputStream is;
- if (huc.getContentEncoding() != null
- && huc.getContentEncoding().equalsIgnoreCase("gzip")) {
- is = new GZIPInputStream(huc.getInputStream());
- } else {
- is = huc.getInputStream();
- }
- try (BufferedInputStream bis = new BufferedInputStream(is);
- BufferedOutputStream bos = new BufferedOutputStream(
- new FileOutputStream(localTempFile))) {
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- bos.write(data, 0, len);
- }
- }
- localTempFile.renameTo(localFile);
- long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L);
- if (lastModified >= 0) {
- localFile.setLastModified(lastModified);
- }
- fetchedFiles++;
- } catch (IOException e) {
- log.error("Could not fetch or store \n\t" + fileUrl
- + ". Skipping.\n\tReason: " + e.getMessage());
- }
- }
- return fetchedFiles;
- }
-
- public int deleteOldLocalFiles() {
- int deletedFiles = 0;
- for (String localFile : this.localFiles) {
- if (!this.remoteFiles.contains(localFile)) {
- new File(this.inDir, this.directory + localFile).delete();
- deletedFiles++;
- }
- }
- return deletedFiles;
- }
-}
-
diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
index 45b40ee..d32727f 100644
--- a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
+++ b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
@@ -4,6 +4,7 @@
package org.torproject.onionoo.updater;
import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorCollector;
import org.torproject.onionoo.util.FormattingUtils;
import org.slf4j.Logger;
@@ -22,9 +23,11 @@ public class DescriptorSource {
private static final Logger log = LoggerFactory.getLogger(
DescriptorSource.class);
- private final File inRecentDir = new File("in/recent");
+ private final File inDir = new File("in");
- private final File inArchiveDir = new File("in/archive");
+ private final File inRecentDir = new File(inDir, "recent");
+
+ private final File inArchiveDir = new File(inDir, "archive");
private final File statusDir = new File("status");
@@ -65,28 +68,14 @@ public class DescriptorSource {
/** Downloads descriptors from CollecTor. */
public void downloadDescriptors() {
+ List<String> remoteDirectories = new ArrayList<>();
for (DescriptorType descriptorType : DescriptorType.values()) {
- log.info("Loading: " + descriptorType);
- this.downloadDescriptors(descriptorType);
+ remoteDirectories.add("/recent/" + descriptorType.getDir());
}
- }
-
- private int localFilesBefore = 0;
-
- private int foundRemoteFiles = 0;
-
- private int downloadedFiles = 0;
-
- private int deletedLocalFiles = 0;
-
- private void downloadDescriptors(DescriptorType descriptorType) {
- DescriptorDownloader descriptorDownloader =
- new DescriptorDownloader(descriptorType);
- this.localFilesBefore += descriptorDownloader.statLocalFiles();
- this.foundRemoteFiles +=
- descriptorDownloader.fetchRemoteDirectory();
- this.downloadedFiles += descriptorDownloader.fetchRemoteFiles();
- this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles();
+ DescriptorCollector dc = org.torproject.descriptor.DescriptorSourceFactory
+ .createDescriptorCollector();
+ dc.collectDescriptors("https://collector.torproject.org",
+ remoteDirectories.toArray(new String[0]), 0L, inDir, true);
}
/** Reads archived and recent descriptors from disk and feeds them into
@@ -206,14 +195,6 @@ public class DescriptorSource {
* descriptors during the current execution. */
public String getStatsString() {
StringBuilder sb = new StringBuilder();
- sb.append(" ").append(this.localFilesBefore)
- .append(" recent descriptor files ").append("found locally\n");
- sb.append(" ").append(this.foundRemoteFiles)
- .append(" recent descriptor files ").append("found remotely\n");
- sb.append(" ").append(this.downloadedFiles)
- .append(" recent descriptor files ").append("downloaded from remote\n");
- sb.append(" ").append(this.deletedLocalFiles)
- .append(" recent descriptor ").append("files deleted locally\n");
sb.append(" ").append(this.descriptorQueues.size())
.append(" descriptor ").append("queues created for recent descriptors\n");
int historySizeBefore = 0;
More information about the tor-commits
mailing list