[tor-commits] [collector/master] Create utility class for downloading from HTTP servers

karsten at torproject.org karsten at torproject.org
Fri Sep 27 08:42:25 UTC 2019


commit dc3a2e4aea03ca44be033dafc3dcc3d7e0d5c07f
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Sep 26 11:48:18 2019 +0200

    Create utility class for downloading from HTTP servers
    
    Also skip two tests in ConfigurationTest that together take 53 seconds
    which has the effect that we don't run tests very often.
    
    Implements #31599.
---
 CHANGELOG.md                                       |   6 +
 src/build                                          |   2 +-
 .../metrics/collector/downloader/Downloader.java   |  62 ++++++++++
 .../collector/exitlists/ExitListDownloader.java    |  33 ++---
 .../collector/onionperf/OnionPerfDownloader.java   |  23 +++-
 .../relaydescs/RelayDescriptorDownloader.java      |  32 +----
 .../snowflake/SnowflakeStatsDownloader.java        |  56 ++-------
 .../metrics/collector/conf/ConfigurationTest.java  |   3 +
 .../collector/downloader/DownloaderTest.java       | 134 +++++++++++++++++++++
 9 files changed, 250 insertions(+), 101 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e30b086..6e72f02 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# Changes in version 1.12.0 - 2019-??-??
+
+ * Medium changes
+   - Require Mockito 1.10.19 as dependency for running tests.
+
+
 # Changes in version 1.11.1 - 2019-09-19
 
  * Minor changes
diff --git a/src/build b/src/build
index 07c2a00..62a964a 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit 07c2a00c27f0d536223f8b5a61fc91e60eb524d4
+Subproject commit 62a964a7702086ca6ba0e2307809d4f983163b47
diff --git a/src/main/java/org/torproject/metrics/collector/downloader/Downloader.java b/src/main/java/org/torproject/metrics/collector/downloader/Downloader.java
new file mode 100644
index 0000000..a1f3852
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/downloader/Downloader.java
@@ -0,0 +1,62 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.downloader;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.zip.InflaterInputStream;
+
+/**
+ * Utility class for downloading resources from HTTP servers.
+ */
+public class Downloader {
+
+  /**
+   * Download the given URL from an HTTP server and return downloaded bytes.
+   *
+   * @param url URL to download.
+   * @return Downloaded bytes, or {@code null} if the resource was not found.
+   * @throws IOException Thrown if anything goes wrong while downloading.
+   */
+  public static byte[] downloadFromHttpServer(URL url) throws IOException {
+    return Downloader.downloadFromHttpServer(url, false);
+  }
+
+  /**
+   * Download the given URL from an HTTP server, possibly inflate the response,
+   * and return downloaded bytes.
+   *
+   * @param url URL to download.
+   * @param isDeflated Whether the response is deflated.
+   * @return Downloaded bytes, or {@code null} if the resource was not found.
+   * @throws IOException Thrown if anything goes wrong while downloading.
+   */
+  public static byte[] downloadFromHttpServer(URL url, boolean isDeflated)
+      throws IOException {
+    ByteArrayOutputStream downloadedBytes = new ByteArrayOutputStream();
+    HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+    huc.setRequestMethod("GET");
+    huc.setReadTimeout(5000);
+    huc.connect();
+    int response = huc.getResponseCode();
+    if (response != 200) {
+      return null;
+    }
+    try (BufferedInputStream in = isDeflated
+        ? new BufferedInputStream(new InflaterInputStream(
+            huc.getInputStream()))
+        : new BufferedInputStream(huc.getInputStream())) {
+      int len;
+      byte[] data = new byte[1024];
+      while ((len = in.read(data, 0, 1024)) >= 0) {
+        downloadedBytes.write(data, 0, len);
+      }
+    }
+    return downloadedBytes.toByteArray();
+  }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
index 66fc1a7..b4bee15 100644
--- a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
@@ -12,16 +12,15 @@ import org.torproject.metrics.collector.conf.Configuration;
 import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.downloader.Downloader;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedInputStream;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.net.HttpURLConnection;
 import java.net.URL;
 import java.nio.file.Paths;
 import java.text.SimpleDateFormat;
@@ -77,32 +76,18 @@ public class ExitListDownloader extends CollecTorMain {
     sb.append("Downloaded ").append(dateTimeFormat.format(downloadedDate))
         .append("\n");
     URL url = config.getUrl(Key.ExitlistUrl);
-    try  {
-      HttpURLConnection huc = (HttpURLConnection) url.openConnection();
-      huc.setRequestMethod("GET");
-      huc.setReadTimeout(5000);
-      huc.connect();
-      int response = huc.getResponseCode();
-      if (response != 200) {
-        logger.warn("Could not download exit list. Response code {}",
-            response);
-        return;
-      }
-      try (BufferedInputStream in = new BufferedInputStream(
-          huc.getInputStream())) {
-        int len;
-        byte[] data = new byte[1024];
-        while ((len = in.read(data, 0, 1024)) >= 0) {
-          sb.append(new String(data, 0, len));
-        }
-      }
-      downloadedExitList = sb.toString();
-      logger.debug("Finished downloading exit list.");
+    byte[] downloadedBytes;
+    try {
+      downloadedBytes = Downloader.downloadFromHttpServer(url);
     } catch (IOException e) {
       logger.warn("Failed downloading exit list", e);
       return;
     }
-    if (downloadedExitList == null) {
+    if (null != downloadedBytes) {
+      sb.append(new String(downloadedBytes));
+      downloadedExitList = sb.toString();
+      logger.debug("Finished downloading exit list.");
+    } else {
       logger.warn("Failed downloading exit list.");
       return;
     }
diff --git a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
index ca307a5..b106be7 100644
--- a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
@@ -11,6 +11,7 @@ import org.torproject.metrics.collector.conf.Configuration;
 import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.downloader.Downloader;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -21,7 +22,6 @@ import java.io.File;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -200,12 +200,25 @@ public class OnionPerfDownloader extends CollecTorMain {
 
     /* Download file contents to temporary file. */
     File tempFile = new File(this.recentDirectory, "." + tpfFileName);
+    byte[] downloadedBytes;
+    try {
+      downloadedBytes = Downloader.downloadFromHttpServer(
+          new URL(baseUrl + tpfFileName));
+    } catch (IOException e) {
+      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl, tpfFileName,
+          e);
+      return;
+    }
+    if (null == downloadedBytes) {
+      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl, tpfFileName);
+      return;
+    }
     tempFile.getParentFile().mkdirs();
-    try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) {
-      Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+    try {
+      Files.write(tempFile.toPath(), downloadedBytes);
     } catch (IOException e) {
-      logger.warn("Unable to download '{}{}' to temporary file '{}'.  "
-          + "Skipping.", baseUrl, tpfFileName, tempFile, e);
+      logger.warn("Unable to write previously downloaded '{}{}' to temporary "
+          + "file '{}'. Skipping.", baseUrl, tpfFileName, tempFile, e);
       return;
     }
 
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
index f1179d8..3337633 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
@@ -3,20 +3,19 @@
 
 package org.torproject.metrics.collector.relaydescs;
 
+import org.torproject.metrics.collector.downloader.Downloader;
+
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.net.HttpURLConnection;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
@@ -33,7 +32,6 @@ import java.util.SortedSet;
 import java.util.TimeZone;
 import java.util.TreeMap;
 import java.util.TreeSet;
-import java.util.zip.InflaterInputStream;
 
 /**
  * Downloads relay descriptors from the directory authorities via HTTP.
@@ -875,29 +873,11 @@ public class RelayDescriptorDownloader {
     String fullUrl
         = "http://" + authority + resource + (isCompressed ? ".z" : "");
     URL url = new URL(fullUrl);
-    HttpURLConnection huc = (HttpURLConnection) url.openConnection();
-    huc.setRequestMethod("GET");
-    huc.setReadTimeout(5000);
-    huc.connect();
-    int response = huc.getResponseCode();
-    if (response == 200) {
-      try (BufferedInputStream in
-          = isCompressed ? new BufferedInputStream(new InflaterInputStream(
-          huc.getInputStream()))
-          : new BufferedInputStream(huc.getInputStream())) {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        int len;
-        byte[] data = new byte[1024];
-        while ((len = in.read(data, 0, 1024)) >= 0) {
-          baos.write(data, 0, len);
-        }
-        allData = baos.toByteArray();
-      }
-    }
-    logger.debug("Downloaded {} -> {} ({} bytes)", fullUrl, response,
-        allData == null ? 0 : allData.length);
+    allData = Downloader.downloadFromHttpServer(url, isCompressed);
     int receivedDescriptors = 0;
-    if (allData != null) {
+    logger.debug("Downloaded {} -> ({} bytes)", fullUrl,
+        allData == null ? 0 : allData.length);
+    if (null != allData) {
       if (resource.startsWith("/tor/status-vote/")) {
         this.rdp.parse(allData, null);
         receivedDescriptors = 1;
diff --git a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
index 4f7994e..cb5f0cc 100644
--- a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
@@ -12,18 +12,16 @@ import org.torproject.metrics.collector.conf.Configuration;
 import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.downloader.Downloader;
 import org.torproject.metrics.collector.persist.SnowflakeStatsPersistence;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.net.HttpURLConnection;
 import java.net.URL;
 import java.time.LocalDateTime;
 import java.util.Arrays;
@@ -60,9 +58,15 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
     this.recentPathName = config.getPath(Key.RecentPath).toString();
     logger.debug("Downloading snowflake stats...");
     URL url = config.getUrl(Key.SnowflakeStatsUrl);
-    ByteArrayOutputStream downloadedSnowflakeStats
-        = this.downloadFromHttpServer(url);
-    if (null == downloadedSnowflakeStats) {
+    byte[] downloadedBytes;
+    try {
+      downloadedBytes = Downloader.downloadFromHttpServer(url);
+    } catch (IOException e) {
+      logger.warn("Failed downloading {}.", url, e);
+      return;
+    }
+    if (null == downloadedBytes) {
+      logger.warn("Could not download {}.", url);
       return;
     }
     logger.debug("Finished downloading {}.", url);
@@ -72,7 +76,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
     SortedSet<LocalDateTime> snowflakeStatsEnds = new TreeSet<>();
     String outputPathName = config.getPath(Key.OutputPath).toString();
     for (Descriptor descriptor : descriptorParser.parseDescriptors(
-        downloadedSnowflakeStats.toByteArray(), null, null)) {
+        downloadedBytes, null, null)) {
       if (descriptor instanceof SnowflakeStats) {
         SnowflakeStats snowflakeStats = (SnowflakeStats) descriptor;
         LocalDateTime snowflakeStatsEnd = snowflakeStats.snowflakeStatsEnd();
@@ -106,44 +110,6 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
   }
 
   /**
-   * Download the given URL from an HTTP server and return a stream with
-   * downloaded bytes.
-   *
-   * <p>If anything goes wrong while downloading, log a warning and return
-   * {@code null}.</p>
-   *
-   * @param url URL to download.
-   * @return Stream with downloaded bytes, or {@code null} if an error has
-   *     occurred.
-   */
-  private ByteArrayOutputStream downloadFromHttpServer(URL url) {
-    ByteArrayOutputStream downloadedBytes = new ByteArrayOutputStream();
-    try  {
-      HttpURLConnection huc = (HttpURLConnection) url.openConnection();
-      huc.setRequestMethod("GET");
-      huc.setReadTimeout(5000);
-      huc.connect();
-      int response = huc.getResponseCode();
-      if (response != 200) {
-        logger.warn("Could not download {}. Response code {}", url, response);
-        return null;
-      }
-      try (BufferedInputStream in = new BufferedInputStream(
-          huc.getInputStream())) {
-        int len;
-        byte[] data = new byte[1024];
-        while ((len = in.read(data, 0, 1024)) >= 0) {
-          downloadedBytes.write(data, 0, len);
-        }
-      }
-    } catch (IOException e) {
-      logger.warn("Failed downloading {}.", url, e);
-      return null;
-    }
-    return downloadedBytes;
-  }
-
-  /**
    * Write the given byte array(s) to the given file.
    *
    * <p>If the file already exists, it is overwritten. If the parent directory
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 201d541..67a9082 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -12,6 +12,7 @@ import org.torproject.metrics.collector.MainTest;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.cron.Dummy;
 
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
@@ -127,6 +128,7 @@ public class ConfigurationTest {
     }
   }
 
+  @Ignore("This test takes 40 seconds, which is too long.")
   @Test()
   public void testUrlArrayValues() throws Exception {
     URL[] array = new URL[randomSource.nextInt(30) + 1];
@@ -180,6 +182,7 @@ public class ConfigurationTest {
     conf.setWatchableSourceAndLoad(Paths.get("/tmp/phantom.path"));
   }
 
+  @Ignore("This test takes 13 seconds, which is too long.")
   @Test()
   public void testConfigChange() throws Exception {
     Configuration conf = new Configuration();
diff --git a/src/test/java/org/torproject/metrics/collector/downloader/DownloaderTest.java b/src/test/java/org/torproject/metrics/collector/downloader/DownloaderTest.java
new file mode 100644
index 0000000..19db782
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/downloader/DownloaderTest.java
@@ -0,0 +1,134 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.downloader;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+import static org.mockito.BDDMockito.given;
+import static org.mockito.Mockito.mock;
+
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.SocketTimeoutException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLStreamHandler;
+import java.net.URLStreamHandlerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Test class for {@link Downloader}.
+ *
+ * <p>This test class is heavily based on the blog post
+ * <a href="https://claritysoftware.co.uk/mocking-javas-url-with-mockito/">"How
+ * to mock the Java URL class with Mockito"</a> by nathan from March 10,
+ * 2017.</p>
+ */
+public class DownloaderTest {
+
+  /**
+   * Custom {@link URLStreamHandler} that allows us to control the
+   * {@link URLConnection URLConnections} that are returned by {@link URL URLs}
+   * in the code under test.
+   */
+  private static class HttpUrlStreamHandler extends URLStreamHandler {
+
+    private Map<URL, URLConnection> connections = new HashMap();
+
+    @Override
+    protected URLConnection openConnection(URL url) throws IOException {
+      return connections.get(url);
+    }
+
+    private void resetConnections() {
+      connections = new HashMap();
+    }
+
+    private HttpUrlStreamHandler addConnection(URL url,
+        URLConnection urlConnection) {
+      connections.put(url, urlConnection);
+      return this;
+    }
+  }
+
+  private static HttpUrlStreamHandler httpUrlStreamHandler;
+
+  /**
+   * Set up our own stream handler for all tests in this class.
+   */
+  @BeforeClass
+  public static void setupUrlStreamHandlerFactory() {
+    URLStreamHandlerFactory urlStreamHandlerFactory
+        = mock(URLStreamHandlerFactory.class);
+    URL.setURLStreamHandlerFactory(urlStreamHandlerFactory);
+    httpUrlStreamHandler = new HttpUrlStreamHandler();
+    given(urlStreamHandlerFactory.createURLStreamHandler("http"))
+        .willReturn(httpUrlStreamHandler);
+  }
+
+  /**
+   * Clear any connections from previously run tests.
+   */
+  @Before
+  public void reset() {
+    httpUrlStreamHandler.resetConnections();
+  }
+
+  @Test
+  public void testExistingResource() throws Exception {
+    URL requestedUrl = new URL("http://example.org/exists");
+    byte[] expectedDownloadedBytes = "content".getBytes();
+    HttpURLConnection urlConnection = mock(HttpURLConnection.class);
+    httpUrlStreamHandler.addConnection(requestedUrl, urlConnection);
+    given(urlConnection.getResponseCode()).willReturn(200);
+    given(urlConnection.getInputStream()).willReturn(
+        new ByteArrayInputStream(expectedDownloadedBytes));
+    byte[] downloadedBytes = Downloader.downloadFromHttpServer(requestedUrl);
+    assertArrayEquals(expectedDownloadedBytes, downloadedBytes);
+  }
+
+  @Test
+  public void testNonExistingResource() throws Exception {
+    URL requestedUrl = new URL("http://example.org/notfound");
+    HttpURLConnection urlConnection = mock(HttpURLConnection.class);
+    httpUrlStreamHandler.addConnection(requestedUrl, urlConnection);
+    given(urlConnection.getResponseCode()).willReturn(404);
+    byte[] downloadedBytes = Downloader.downloadFromHttpServer(requestedUrl);
+    assertNull(downloadedBytes);
+  }
+
+  @Test
+  public void testEmptyResource() throws Exception {
+    URL requestedUrl = new URL("http://example.org/empty");
+    byte[] expectedDownloadedBytes = new byte[0];
+    HttpURLConnection urlConnection = mock(HttpURLConnection.class);
+    httpUrlStreamHandler.addConnection(requestedUrl, urlConnection);
+    given(urlConnection.getResponseCode()).willReturn(200);
+    given(urlConnection.getInputStream()).willReturn(
+        new ByteArrayInputStream(expectedDownloadedBytes));
+    byte[] downloadedBytes = Downloader.downloadFromHttpServer(requestedUrl);
+    assertEquals(0, downloadedBytes.length);
+  }
+
+  @Test(expected = SocketTimeoutException.class)
+  public void testTimeout() throws Exception {
+    URL requestedUrl = new URL("http://example.org/timeout");
+    SocketTimeoutException expectedException = new SocketTimeoutException();
+    HttpURLConnection urlConnection = mock(HttpURLConnection.class);
+    httpUrlStreamHandler.addConnection(requestedUrl, urlConnection);
+    given(urlConnection.getResponseCode()).willReturn(200);
+    given(urlConnection.getInputStream()).willThrow(expectedException);
+    Downloader.downloadFromHttpServer(requestedUrl);
+    fail("Should have thrown a SocketTimeoutException.");
+  }
+}
+



More information about the tor-commits mailing list