[tor-commits] [metrics-web/master] Make file layout comply to Metrics' standards a little more.
karsten at torproject.org
karsten at torproject.org
Fri Feb 24 08:58:24 UTC 2017
commit 1da05983fe24ebda82c2bd9018eba846468d933e
Author: iwakeh <iwakeh at torproject.org>
Date: Thu Feb 23 13:30:26 2017 +0000
Make file layout comply to Metrics' standards a little more.
Tweak build files.
Set new descriptor/metrics-lib version.
---
modules/advbwdist/build.xml | 3 -
.../org/torproject/metrics/advbwdist/Main.java | 158 ++++
.../src/org/torproject/metrics/advbwdist/Main.java | 158 ----
.../java/org/torproject/metrics/clients/Main.java | 478 ++++++++++
.../src/org/torproject/metrics/clients/Main.java | 478 ----------
.../org/torproject/metrics/collectdescs/Main.java | 31 +
.../org/torproject/metrics/collectdescs/Main.java | 31 -
modules/connbidirect/build.xml | 61 +-
modules/hidserv/build.xml | 3 -
.../org/torproject/metrics/hidserv/Aggregator.java | 198 ++++
.../metrics/hidserv/ComputedNetworkFractions.java | 183 ++++
.../torproject/metrics/hidserv/DateTimeHelper.java | 107 +++
.../org/torproject/metrics/hidserv/Document.java | 26 +
.../torproject/metrics/hidserv/DocumentStore.java | 176 ++++
.../metrics/hidserv/ExtrapolatedHidServStats.java | 170 ++++
.../torproject/metrics/hidserv/Extrapolator.java | 253 ++++++
.../java/org/torproject/metrics/hidserv/Main.java | 88 ++
.../org/torproject/metrics/hidserv/Parser.java | 440 +++++++++
.../metrics/hidserv/ReportedHidServStats.java | 141 +++
.../org/torproject/metrics/hidserv/Simulate.java | 365 ++++++++
.../org/torproject/metrics/hidserv/Aggregator.java | 198 ----
.../metrics/hidserv/ComputedNetworkFractions.java | 183 ----
.../torproject/metrics/hidserv/DateTimeHelper.java | 107 ---
.../org/torproject/metrics/hidserv/Document.java | 26 -
.../torproject/metrics/hidserv/DocumentStore.java | 176 ----
.../metrics/hidserv/ExtrapolatedHidServStats.java | 170 ----
.../torproject/metrics/hidserv/Extrapolator.java | 253 ------
.../src/org/torproject/metrics/hidserv/Main.java | 88 --
.../src/org/torproject/metrics/hidserv/Parser.java | 440 ---------
.../metrics/hidserv/ReportedHidServStats.java | 141 ---
.../org/torproject/metrics/hidserv/Simulate.java | 365 --------
modules/legacy/build.xml | 1 -
.../org/torproject/ernie/cron/Configuration.java | 206 +++++
.../java/org/torproject/ernie/cron/LockFile.java | 58 ++
.../ernie/cron/LoggingConfiguration.java | 100 +++
.../main/java/org/torproject/ernie/cron/Main.java | 90 ++
.../cron/RelayDescriptorDatabaseImporter.java | 995 +++++++++++++++++++++
.../cron/network/ConsensusStatsFileHandler.java | 412 +++++++++
.../ernie/cron/performance/TorperfProcessor.java | 292 ++++++
.../org/torproject/ernie/cron/Configuration.java | 206 -----
.../src/org/torproject/ernie/cron/LockFile.java | 58 --
.../ernie/cron/LoggingConfiguration.java | 100 ---
.../legacy/src/org/torproject/ernie/cron/Main.java | 90 --
.../cron/RelayDescriptorDatabaseImporter.java | 995 ---------------------
.../cron/network/ConsensusStatsFileHandler.java | 412 ---------
.../ernie/cron/performance/TorperfProcessor.java | 292 ------
modules/webstats/build.xml | 3 -
shared/build-base.xml | 5 +-
48 files changed, 4978 insertions(+), 5032 deletions(-)
diff --git a/modules/advbwdist/build.xml b/modules/advbwdist/build.xml
index 9aa187f..0493d8a 100644
--- a/modules/advbwdist/build.xml
+++ b/modules/advbwdist/build.xml
@@ -7,9 +7,6 @@
<path id="classpath">
<pathelement path="${classes}"/>
<path refid="base.classpath" />
- <fileset dir="${libs}">
- <include name="commons-codec-1.9.jar"/>
- </fileset>
</path>
<target name="run" depends="compile">
diff --git a/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java b/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java
new file mode 100644
index 0000000..7d7678d
--- /dev/null
+++ b/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java
@@ -0,0 +1,158 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.advbwdist;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+public class Main {
+
+ /** Executes this data-processing module. */
+ public static void main(String[] args) throws IOException {
+
+ /* Parse server descriptors, not keeping a parse history, and memorize
+ * the advertised bandwidth for every server descriptor. */
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(
+ new File("../../shared/in/recent/relay-descriptors/"
+ + "server-descriptors"));
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ Map<String, Long> serverDescriptors = new HashMap<>();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof ServerDescriptor)) {
+ continue;
+ }
+ ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor;
+ String digest = serverDescriptor.getServerDescriptorDigest();
+ long advertisedBandwidth = Math.min(Math.min(
+ serverDescriptor.getBandwidthRate(),
+ serverDescriptor.getBandwidthBurst()),
+ serverDescriptor.getBandwidthObserved());
+ serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth);
+ }
+ }
+
+ /* Parse consensuses, keeping a parse history. */
+ descriptorReader = DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(
+ new File("../../shared/in/recent/relay-descriptors/consensuses"));
+ descriptorReader.setExcludeFiles(
+ new File("status/parsed-consensuses"));
+ descriptorFiles = descriptorReader.readDescriptors();
+ File resultsFile = new File("stats/advbwdist-validafter.csv");
+ resultsFile.getParentFile().mkdirs();
+ boolean writeHeader = !resultsFile.exists();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(resultsFile,
+ true));
+ if (writeHeader) {
+ bw.write("valid_after,isexit,relay,percentile,advbw\n");
+ }
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+ continue;
+ }
+
+ /* Parse server descriptor digests from consensus and look up
+ * advertised bandwidths. */
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ List<Long> advertisedBandwidthsAllRelays = new ArrayList<>();
+ List<Long> advertisedBandwidthsExitsOnly = new ArrayList<>();
+ for (NetworkStatusEntry relay
+ : consensus.getStatusEntries().values()) {
+ if (!relay.getFlags().contains("Running")) {
+ continue;
+ }
+ String serverDescriptorDigest = relay.getDescriptor()
+ .toUpperCase();
+ if (!serverDescriptors.containsKey(serverDescriptorDigest)) {
+ continue;
+ }
+ long advertisedBandwidth = serverDescriptors.get(
+ serverDescriptorDigest);
+ advertisedBandwidthsAllRelays.add(advertisedBandwidth);
+ if (relay.getFlags().contains("Exit")
+ && !relay.getFlags().contains("BadExit")) {
+ advertisedBandwidthsExitsOnly.add(advertisedBandwidth);
+ }
+ }
+
+ /* Write advertised bandwidths of n-th fastest relays/exits. */
+ Collections.sort(advertisedBandwidthsAllRelays,
+ Collections.reverseOrder());
+ Collections.sort(advertisedBandwidthsExitsOnly,
+ Collections.reverseOrder());
+ int[] fastestRelays = new int[] { 1, 2, 3, 5, 10, 20, 30, 50, 100,
+ 200, 300, 500, 1000, 2000, 3000, 5000 };
+ for (int fastestRelay : fastestRelays) {
+ if (advertisedBandwidthsAllRelays.size() >= fastestRelay) {
+ bw.write(String.format("%s,,%d,,%d%n", validAfter,
+ fastestRelay,
+ advertisedBandwidthsAllRelays.get(fastestRelay - 1)));
+ }
+ }
+ for (int fastestRelay : fastestRelays) {
+ if (advertisedBandwidthsExitsOnly.size() >= fastestRelay) {
+ bw.write(String.format("%s,TRUE,%d,,%d%n", validAfter,
+ fastestRelay,
+ advertisedBandwidthsExitsOnly.get(fastestRelay - 1)));
+ }
+ }
+
+ /* Write advertised bandwidth percentiles of relays/exits. */
+ Collections.sort(advertisedBandwidthsAllRelays);
+ Collections.sort(advertisedBandwidthsExitsOnly);
+ int[] percentiles = new int[] { 0, 1, 2, 3, 5, 9, 10, 20, 25, 30,
+ 40, 50, 60, 70, 75, 80, 90, 91, 95, 97, 98, 99, 100 };
+ if (!advertisedBandwidthsAllRelays.isEmpty()) {
+ for (int percentile : percentiles) {
+ bw.write(String.format("%s,,,%d,%d%n", validAfter,
+ percentile, advertisedBandwidthsAllRelays.get(
+ ((advertisedBandwidthsAllRelays.size() - 1)
+ * percentile) / 100)));
+ }
+ }
+ if (!advertisedBandwidthsExitsOnly.isEmpty()) {
+ for (int percentile : percentiles) {
+ bw.write(String.format("%s,TRUE,,%d,%d%n", validAfter,
+ percentile, advertisedBandwidthsExitsOnly.get(
+ ((advertisedBandwidthsExitsOnly.size() - 1)
+ * percentile) / 100)));
+ }
+ }
+ }
+ }
+ bw.close();
+ }
+}
+
diff --git a/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java b/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java
deleted file mode 100644
index 7d7678d..0000000
--- a/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.advbwdist;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-import org.torproject.descriptor.ServerDescriptor;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-public class Main {
-
- /** Executes this data-processing module. */
- public static void main(String[] args) throws IOException {
-
- /* Parse server descriptors, not keeping a parse history, and memorize
- * the advertised bandwidth for every server descriptor. */
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(
- new File("../../shared/in/recent/relay-descriptors/"
- + "server-descriptors"));
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- Map<String, Long> serverDescriptors = new HashMap<>();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (!(descriptor instanceof ServerDescriptor)) {
- continue;
- }
- ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor;
- String digest = serverDescriptor.getServerDescriptorDigest();
- long advertisedBandwidth = Math.min(Math.min(
- serverDescriptor.getBandwidthRate(),
- serverDescriptor.getBandwidthBurst()),
- serverDescriptor.getBandwidthObserved());
- serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth);
- }
- }
-
- /* Parse consensuses, keeping a parse history. */
- descriptorReader = DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(
- new File("../../shared/in/recent/relay-descriptors/consensuses"));
- descriptorReader.setExcludeFiles(
- new File("status/parsed-consensuses"));
- descriptorFiles = descriptorReader.readDescriptors();
- File resultsFile = new File("stats/advbwdist-validafter.csv");
- resultsFile.getParentFile().mkdirs();
- boolean writeHeader = !resultsFile.exists();
- BufferedWriter bw = new BufferedWriter(new FileWriter(resultsFile,
- true));
- if (writeHeader) {
- bw.write("valid_after,isexit,relay,percentile,advbw\n");
- }
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
- continue;
- }
-
- /* Parse server descriptor digests from consensus and look up
- * advertised bandwidths. */
- RelayNetworkStatusConsensus consensus =
- (RelayNetworkStatusConsensus) descriptor;
- String validAfter = dateTimeFormat.format(
- consensus.getValidAfterMillis());
- List<Long> advertisedBandwidthsAllRelays = new ArrayList<>();
- List<Long> advertisedBandwidthsExitsOnly = new ArrayList<>();
- for (NetworkStatusEntry relay
- : consensus.getStatusEntries().values()) {
- if (!relay.getFlags().contains("Running")) {
- continue;
- }
- String serverDescriptorDigest = relay.getDescriptor()
- .toUpperCase();
- if (!serverDescriptors.containsKey(serverDescriptorDigest)) {
- continue;
- }
- long advertisedBandwidth = serverDescriptors.get(
- serverDescriptorDigest);
- advertisedBandwidthsAllRelays.add(advertisedBandwidth);
- if (relay.getFlags().contains("Exit")
- && !relay.getFlags().contains("BadExit")) {
- advertisedBandwidthsExitsOnly.add(advertisedBandwidth);
- }
- }
-
- /* Write advertised bandwidths of n-th fastest relays/exits. */
- Collections.sort(advertisedBandwidthsAllRelays,
- Collections.reverseOrder());
- Collections.sort(advertisedBandwidthsExitsOnly,
- Collections.reverseOrder());
- int[] fastestRelays = new int[] { 1, 2, 3, 5, 10, 20, 30, 50, 100,
- 200, 300, 500, 1000, 2000, 3000, 5000 };
- for (int fastestRelay : fastestRelays) {
- if (advertisedBandwidthsAllRelays.size() >= fastestRelay) {
- bw.write(String.format("%s,,%d,,%d%n", validAfter,
- fastestRelay,
- advertisedBandwidthsAllRelays.get(fastestRelay - 1)));
- }
- }
- for (int fastestRelay : fastestRelays) {
- if (advertisedBandwidthsExitsOnly.size() >= fastestRelay) {
- bw.write(String.format("%s,TRUE,%d,,%d%n", validAfter,
- fastestRelay,
- advertisedBandwidthsExitsOnly.get(fastestRelay - 1)));
- }
- }
-
- /* Write advertised bandwidth percentiles of relays/exits. */
- Collections.sort(advertisedBandwidthsAllRelays);
- Collections.sort(advertisedBandwidthsExitsOnly);
- int[] percentiles = new int[] { 0, 1, 2, 3, 5, 9, 10, 20, 25, 30,
- 40, 50, 60, 70, 75, 80, 90, 91, 95, 97, 98, 99, 100 };
- if (!advertisedBandwidthsAllRelays.isEmpty()) {
- for (int percentile : percentiles) {
- bw.write(String.format("%s,,,%d,%d%n", validAfter,
- percentile, advertisedBandwidthsAllRelays.get(
- ((advertisedBandwidthsAllRelays.size() - 1)
- * percentile) / 100)));
- }
- }
- if (!advertisedBandwidthsExitsOnly.isEmpty()) {
- for (int percentile : percentiles) {
- bw.write(String.format("%s,TRUE,,%d,%d%n", validAfter,
- percentile, advertisedBandwidthsExitsOnly.get(
- ((advertisedBandwidthsExitsOnly.size() - 1)
- * percentile) / 100)));
- }
- }
- }
- }
- bw.close();
- }
-}
-
diff --git a/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java b/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java
new file mode 100644
index 0000000..dff73f7
--- /dev/null
+++ b/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java
@@ -0,0 +1,478 @@
+/* Copyright 2013--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.clients;
+
+import org.torproject.descriptor.BandwidthHistory;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+public class Main {
+
+ /** Executes this data-processing module. */
+ public static void main(String[] args) throws Exception {
+ parseArgs(args);
+ parseRelayDescriptors();
+ parseBridgeDescriptors();
+ closeOutputFiles();
+ }
+
+ private static boolean writeToSingleFile = true;
+ private static boolean byStatsDateNotByDescHour = false;
+
+ private static void parseArgs(String[] args) {
+ if (args.length == 0) {
+ writeToSingleFile = true;
+ } else if (args.length == 1 && args[0].equals("--stats-date")) {
+ writeToSingleFile = false;
+ byStatsDateNotByDescHour = true;
+ } else if (args.length == 1 && args[0].equals("--desc-hour")) {
+ writeToSingleFile = false;
+ byStatsDateNotByDescHour = false;
+ } else {
+ System.err.println("Usage: java " + Main.class.getName()
+ + " [ --stats-date | --desc-hour ]");
+ System.exit(1);
+ }
+ }
+
+ private static final long ONE_HOUR_MILLIS = 60L * 60L * 1000L;
+
+ private static final long ONE_DAY_MILLIS = 24L * ONE_HOUR_MILLIS;
+
+ private static final long ONE_WEEK_MILLIS = 7L * ONE_DAY_MILLIS;
+
+ private static void parseRelayDescriptors() throws Exception {
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.setExcludeFiles(new File(
+ "status/relay-descriptors"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/recent/relay-descriptors/consensuses"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/recent/relay-descriptors/extra-infos"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/archive/relay-descriptors/consensuses"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/archive/relay-descriptors/extra-infos"));
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof ExtraInfoDescriptor) {
+ parseRelayExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ parseRelayNetworkStatusConsensus(
+ (RelayNetworkStatusConsensus) descriptor);
+ }
+ }
+ }
+ }
+
+ private static void parseRelayExtraInfoDescriptor(
+ ExtraInfoDescriptor descriptor) throws IOException {
+ long publishedMillis = descriptor.getPublishedMillis();
+ String fingerprint = descriptor.getFingerprint()
+ .toUpperCase();
+ long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis();
+ long dirreqStatsIntervalLengthMillis =
+ descriptor.getDirreqStatsIntervalLength() * 1000L;
+ SortedMap<String, Integer> requests = descriptor.getDirreqV3Reqs();
+ BandwidthHistory dirreqWriteHistory =
+ descriptor.getDirreqWriteHistory();
+ parseRelayDirreqV3Reqs(fingerprint, publishedMillis,
+ dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, requests);
+ parseRelayDirreqWriteHistory(fingerprint, publishedMillis,
+ dirreqWriteHistory);
+ }
+
+ private static void parseRelayDirreqV3Reqs(String fingerprint,
+ long publishedMillis, long dirreqStatsEndMillis,
+ long dirreqStatsIntervalLengthMillis,
+ SortedMap<String, Integer> requests) throws IOException {
+ if (requests == null
+ || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS
+ || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) {
+ /* Cut off all observations that are one week older than
+ * the descriptor publication time, or we'll have to update
+ * weeks of aggregate values every hour. */
+ return;
+ }
+ long statsStartMillis = dirreqStatsEndMillis
+ - dirreqStatsIntervalLengthMillis;
+ long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS)
+ * ONE_DAY_MILLIS;
+ for (int i = 0; i < 2; i++) {
+ long fromMillis = i == 0 ? statsStartMillis
+ : utcBreakMillis;
+ long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis;
+ if (fromMillis >= toMillis) {
+ continue;
+ }
+ double intervalFraction = ((double) (toMillis - fromMillis))
+ / ((double) dirreqStatsIntervalLengthMillis);
+ double sum = 0L;
+ for (Map.Entry<String, Integer> e : requests.entrySet()) {
+ String country = e.getKey();
+ double reqs = ((double) e.getValue()) - 4.0;
+ sum += reqs;
+ writeOutputLine(fingerprint, "relay", "responses", country,
+ "", "", fromMillis, toMillis, reqs * intervalFraction,
+ publishedMillis);
+ }
+ writeOutputLine(fingerprint, "relay", "responses", "", "",
+ "", fromMillis, toMillis, sum * intervalFraction,
+ publishedMillis);
+ }
+ }
+
+ private static void parseRelayDirreqWriteHistory(String fingerprint,
+ long publishedMillis, BandwidthHistory dirreqWriteHistory)
+ throws IOException {
+ if (dirreqWriteHistory == null
+ || publishedMillis - dirreqWriteHistory.getHistoryEndMillis()
+ > ONE_WEEK_MILLIS) {
+ return;
+ /* Cut off all observations that are one week older than
+ * the descriptor publication time, or we'll have to update
+ * weeks of aggregate values every hour. */
+ }
+ long intervalLengthMillis =
+ dirreqWriteHistory.getIntervalLength() * 1000L;
+ for (Map.Entry<Long, Long> e
+ : dirreqWriteHistory.getBandwidthValues().entrySet()) {
+ long intervalEndMillis = e.getKey();
+ long intervalStartMillis =
+ intervalEndMillis - intervalLengthMillis;
+ for (int i = 0; i < 2; i++) {
+ long fromMillis = intervalStartMillis;
+ long toMillis = intervalEndMillis;
+ double writtenBytes = (double) e.getValue();
+ if (intervalStartMillis / ONE_DAY_MILLIS
+ < intervalEndMillis / ONE_DAY_MILLIS) {
+ long utcBreakMillis = (intervalEndMillis
+ / ONE_DAY_MILLIS) * ONE_DAY_MILLIS;
+ if (i == 0) {
+ toMillis = utcBreakMillis;
+ } else if (i == 1) {
+ fromMillis = utcBreakMillis;
+ }
+ double intervalFraction = ((double) (toMillis - fromMillis))
+ / ((double) intervalLengthMillis);
+ writtenBytes *= intervalFraction;
+ } else if (i == 1) {
+ break;
+ }
+ writeOutputLine(fingerprint, "relay", "bytes", "", "", "",
+ fromMillis, toMillis, writtenBytes, publishedMillis);
+ }
+ }
+ }
+
+ private static void parseRelayNetworkStatusConsensus(
+ RelayNetworkStatusConsensus consensus) throws IOException {
+ long fromMillis = consensus.getValidAfterMillis();
+ long toMillis = consensus.getFreshUntilMillis();
+ for (NetworkStatusEntry statusEntry
+ : consensus.getStatusEntries().values()) {
+ String fingerprint = statusEntry.getFingerprint()
+ .toUpperCase();
+ if (statusEntry.getFlags().contains("Running")) {
+ writeOutputLine(fingerprint, "relay", "status", "", "", "",
+ fromMillis, toMillis, 0.0, fromMillis);
+ }
+ }
+ }
+
+ private static void parseBridgeDescriptors() throws Exception {
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.setExcludeFiles(new File(
+ "status/bridge-descriptors"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/recent/bridge-descriptors"));
+ descriptorReader.addDirectory(new File(
+ "../../shared/in/archive/bridge-descriptors"));
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof ExtraInfoDescriptor) {
+ parseBridgeExtraInfoDescriptor(
+ (ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof BridgeNetworkStatus) {
+ parseBridgeNetworkStatus((BridgeNetworkStatus) descriptor);
+ }
+ }
+ }
+ }
+
+ private static void parseBridgeExtraInfoDescriptor(
+ ExtraInfoDescriptor descriptor) throws IOException {
+ String fingerprint = descriptor.getFingerprint().toUpperCase();
+ long publishedMillis = descriptor.getPublishedMillis();
+ long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis();
+ long dirreqStatsIntervalLengthMillis =
+ descriptor.getDirreqStatsIntervalLength() * 1000L;
+ parseBridgeDirreqV3Resp(fingerprint, publishedMillis,
+ dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis,
+ descriptor.getDirreqV3Resp(),
+ descriptor.getBridgeIps(),
+ descriptor.getBridgeIpTransports(),
+ descriptor.getBridgeIpVersions());
+
+ parseBridgeDirreqWriteHistory(fingerprint, publishedMillis,
+ descriptor.getDirreqWriteHistory());
+ }
+
+ private static void parseBridgeDirreqV3Resp(String fingerprint,
+ long publishedMillis, long dirreqStatsEndMillis,
+ long dirreqStatsIntervalLengthMillis,
+ SortedMap<String, Integer> responses,
+ SortedMap<String, Integer> bridgeIps,
+ SortedMap<String, Integer> bridgeIpTransports,
+ SortedMap<String, Integer> bridgeIpVersions) throws IOException {
+ if (responses == null
+ || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS
+ || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) {
+ /* Cut off all observations that are one week older than
+ * the descriptor publication time, or we'll have to update
+ * weeks of aggregate values every hour. */
+ return;
+ }
+ long statsStartMillis = dirreqStatsEndMillis
+ - dirreqStatsIntervalLengthMillis;
+ long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS)
+ * ONE_DAY_MILLIS;
+ double resp = ((double) responses.get("ok")) - 4.0;
+ if (resp > 0.0) {
+ for (int i = 0; i < 2; i++) {
+ long fromMillis = i == 0 ? statsStartMillis
+ : utcBreakMillis;
+ long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis;
+ if (fromMillis >= toMillis) {
+ continue;
+ }
+ double intervalFraction = ((double) (toMillis - fromMillis))
+ / ((double) dirreqStatsIntervalLengthMillis);
+ writeOutputLine(fingerprint, "bridge", "responses", "", "",
+ "", fromMillis, toMillis, resp * intervalFraction,
+ publishedMillis);
+ parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
+ dirreqStatsIntervalLengthMillis, "country", bridgeIps,
+ publishedMillis);
+ parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
+ dirreqStatsIntervalLengthMillis, "transport",
+ bridgeIpTransports, publishedMillis);
+ parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
+ dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions,
+ publishedMillis);
+ }
+ }
+ }
+
+ private static void parseBridgeRespByCategory(String fingerprint,
+ long fromMillis, long toMillis, double resp,
+ long dirreqStatsIntervalLengthMillis, String category,
+ SortedMap<String, Integer> frequencies, long publishedMillis)
+ throws IOException {
+ double total = 0.0;
+ SortedMap<String, Double> frequenciesCopy = new TreeMap<>();
+ if (frequencies != null) {
+ for (Map.Entry<String, Integer> e : frequencies.entrySet()) {
+ if (e.getValue() < 4.0) {
+ continue;
+ }
+ double frequency = ((double) e.getValue()) - 4.0;
+ frequenciesCopy.put(e.getKey(), frequency);
+ total += frequency;
+ }
+ }
+ /* If we're not told any frequencies, or at least none of them are
+ * greater than 4, put in a default that we'll attribute all responses
+ * to. */
+ if (total == 0) {
+ if (category.equals("country")) {
+ frequenciesCopy.put("??", 4.0);
+ } else if (category.equals("transport")) {
+ frequenciesCopy.put("<OR>", 4.0);
+ } else if (category.equals("version")) {
+ frequenciesCopy.put("v4", 4.0);
+ }
+ total = 4.0;
+ }
+ for (Map.Entry<String, Double> e : frequenciesCopy.entrySet()) {
+ double intervalFraction = ((double) (toMillis - fromMillis))
+ / ((double) dirreqStatsIntervalLengthMillis);
+ double val = resp * intervalFraction * e.getValue() / total;
+ if (category.equals("country")) {
+ writeOutputLine(fingerprint, "bridge", "responses", e.getKey(),
+ "", "", fromMillis, toMillis, val, publishedMillis);
+ } else if (category.equals("transport")) {
+ writeOutputLine(fingerprint, "bridge", "responses", "",
+ e.getKey(), "", fromMillis, toMillis, val, publishedMillis);
+ } else if (category.equals("version")) {
+ writeOutputLine(fingerprint, "bridge", "responses", "", "",
+ e.getKey(), fromMillis, toMillis, val, publishedMillis);
+ }
+ }
+ }
+
+ private static void parseBridgeDirreqWriteHistory(String fingerprint,
+ long publishedMillis, BandwidthHistory dirreqWriteHistory)
+ throws IOException {
+ if (dirreqWriteHistory == null
+ || publishedMillis - dirreqWriteHistory.getHistoryEndMillis()
+ > ONE_WEEK_MILLIS) {
+ /* Cut off all observations that are one week older than
+ * the descriptor publication time, or we'll have to update
+ * weeks of aggregate values every hour. */
+ return;
+ }
+ long intervalLengthMillis =
+ dirreqWriteHistory.getIntervalLength() * 1000L;
+ for (Map.Entry<Long, Long> e
+ : dirreqWriteHistory.getBandwidthValues().entrySet()) {
+ long intervalEndMillis = e.getKey();
+ long intervalStartMillis =
+ intervalEndMillis - intervalLengthMillis;
+ for (int i = 0; i < 2; i++) {
+ long fromMillis = intervalStartMillis;
+ long toMillis = intervalEndMillis;
+ double writtenBytes = (double) e.getValue();
+ if (intervalStartMillis / ONE_DAY_MILLIS
+ < intervalEndMillis / ONE_DAY_MILLIS) {
+ long utcBreakMillis = (intervalEndMillis
+ / ONE_DAY_MILLIS) * ONE_DAY_MILLIS;
+ if (i == 0) {
+ toMillis = utcBreakMillis;
+ } else if (i == 1) {
+ fromMillis = utcBreakMillis;
+ }
+ double intervalFraction = ((double) (toMillis - fromMillis))
+ / ((double) intervalLengthMillis);
+ writtenBytes *= intervalFraction;
+ } else if (i == 1) {
+ break;
+ }
+ writeOutputLine(fingerprint, "bridge", "bytes", "",
+ "", "", fromMillis, toMillis, writtenBytes, publishedMillis);
+ }
+ }
+ }
+
+ private static void parseBridgeNetworkStatus(BridgeNetworkStatus status)
+ throws IOException {
+ long publishedMillis = status.getPublishedMillis();
+ long fromMillis = (publishedMillis / ONE_HOUR_MILLIS)
+ * ONE_HOUR_MILLIS;
+ long toMillis = fromMillis + ONE_HOUR_MILLIS;
+ for (NetworkStatusEntry statusEntry
+ : status.getStatusEntries().values()) {
+ String fingerprint = statusEntry.getFingerprint()
+ .toUpperCase();
+ if (statusEntry.getFlags().contains("Running")) {
+ writeOutputLine(fingerprint, "bridge", "status", "", "", "",
+ fromMillis, toMillis, 0.0, publishedMillis);
+ }
+ }
+ }
+
+ private static Map<String, BufferedWriter> openOutputFiles = new HashMap<>();
+
+ private static void writeOutputLine(String fingerprint, String node,
+ String metric, String country, String transport, String version,
+ long fromMillis, long toMillis, double val, long publishedMillis)
+ throws IOException {
+ if (fromMillis > toMillis) {
+ return;
+ }
+ String fromDateTime = formatDateTimeMillis(fromMillis);
+ String toDateTime = formatDateTimeMillis(toMillis);
+ BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis);
+ bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n",
+ fingerprint, node, metric, country, transport, version,
+ fromDateTime, toDateTime, val));
+ }
+
+ private static SimpleDateFormat dateTimeFormat = null;
+
+ private static String formatDateTimeMillis(long millis) {
+ if (dateTimeFormat == null) {
+ dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setLenient(false);
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+ return dateTimeFormat.format(millis);
+ }
+
+ private static BufferedWriter getOutputFile(String fromDateTime,
+ long publishedMillis) throws IOException {
+ String outputFileName;
+ if (writeToSingleFile) {
+ outputFileName = "out/userstats.sql";
+ } else if (byStatsDateNotByDescHour) {
+ outputFileName = "out/userstats-" + fromDateTime.substring(0, 10)
+ + ".sql";
+ } else {
+ String publishedHourDateTime = formatDateTimeMillis(
+ (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS);
+ outputFileName = "out/userstats-"
+ + publishedHourDateTime.substring(0, 10) + "-"
+ + publishedHourDateTime.substring(11, 13) + ".sql";
+ }
+ BufferedWriter bw = openOutputFiles.get(outputFileName);
+ if (bw == null) {
+ bw = openOutputFile(outputFileName);
+ openOutputFiles.put(outputFileName, bw);
+ }
+ return bw;
+ }
+
+ private static BufferedWriter openOutputFile(String outputFileName)
+ throws IOException {
+ File outputFile = new File(outputFileName);
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFileName));
+ bw.write("BEGIN;\n");
+ bw.write("LOCK TABLE imported NOWAIT;\n");
+ bw.write("COPY imported (fingerprint, node, metric, country, "
+ + "transport, version, stats_start, stats_end, val) FROM "
+ + "stdin;\n");
+ return bw;
+ }
+
+ private static void closeOutputFiles() throws IOException {
+ for (BufferedWriter bw : openOutputFiles.values()) {
+ bw.write("\\.\n");
+ bw.write("SELECT merge();\n");
+ bw.write("SELECT aggregate();\n");
+ bw.write("SELECT combine();\n");
+ bw.write("TRUNCATE imported;\n");
+ bw.write("COMMIT;\n");
+ bw.close();
+ }
+ }
+}
+
diff --git a/modules/clients/src/org/torproject/metrics/clients/Main.java b/modules/clients/src/org/torproject/metrics/clients/Main.java
deleted file mode 100644
index dff73f7..0000000
--- a/modules/clients/src/org/torproject/metrics/clients/Main.java
+++ /dev/null
@@ -1,478 +0,0 @@
-/* Copyright 2013--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.clients;
-
-import org.torproject.descriptor.BandwidthHistory;
-import org.torproject.descriptor.BridgeNetworkStatus;
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-
-public class Main {
-
- /** Executes this data-processing module. */
- public static void main(String[] args) throws Exception {
- parseArgs(args);
- parseRelayDescriptors();
- parseBridgeDescriptors();
- closeOutputFiles();
- }
-
- private static boolean writeToSingleFile = true;
- private static boolean byStatsDateNotByDescHour = false;
-
- private static void parseArgs(String[] args) {
- if (args.length == 0) {
- writeToSingleFile = true;
- } else if (args.length == 1 && args[0].equals("--stats-date")) {
- writeToSingleFile = false;
- byStatsDateNotByDescHour = true;
- } else if (args.length == 1 && args[0].equals("--desc-hour")) {
- writeToSingleFile = false;
- byStatsDateNotByDescHour = false;
- } else {
- System.err.println("Usage: java " + Main.class.getName()
- + " [ --stats-date | --desc-hour ]");
- System.exit(1);
- }
- }
-
- private static final long ONE_HOUR_MILLIS = 60L * 60L * 1000L;
-
- private static final long ONE_DAY_MILLIS = 24L * ONE_HOUR_MILLIS;
-
- private static final long ONE_WEEK_MILLIS = 7L * ONE_DAY_MILLIS;
-
- private static void parseRelayDescriptors() throws Exception {
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.setExcludeFiles(new File(
- "status/relay-descriptors"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/recent/relay-descriptors/consensuses"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/recent/relay-descriptors/extra-infos"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/archive/relay-descriptors/consensuses"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/archive/relay-descriptors/extra-infos"));
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof ExtraInfoDescriptor) {
- parseRelayExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
- } else if (descriptor instanceof RelayNetworkStatusConsensus) {
- parseRelayNetworkStatusConsensus(
- (RelayNetworkStatusConsensus) descriptor);
- }
- }
- }
- }
-
- private static void parseRelayExtraInfoDescriptor(
- ExtraInfoDescriptor descriptor) throws IOException {
- long publishedMillis = descriptor.getPublishedMillis();
- String fingerprint = descriptor.getFingerprint()
- .toUpperCase();
- long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis();
- long dirreqStatsIntervalLengthMillis =
- descriptor.getDirreqStatsIntervalLength() * 1000L;
- SortedMap<String, Integer> requests = descriptor.getDirreqV3Reqs();
- BandwidthHistory dirreqWriteHistory =
- descriptor.getDirreqWriteHistory();
- parseRelayDirreqV3Reqs(fingerprint, publishedMillis,
- dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, requests);
- parseRelayDirreqWriteHistory(fingerprint, publishedMillis,
- dirreqWriteHistory);
- }
-
- private static void parseRelayDirreqV3Reqs(String fingerprint,
- long publishedMillis, long dirreqStatsEndMillis,
- long dirreqStatsIntervalLengthMillis,
- SortedMap<String, Integer> requests) throws IOException {
- if (requests == null
- || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS
- || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) {
- /* Cut off all observations that are one week older than
- * the descriptor publication time, or we'll have to update
- * weeks of aggregate values every hour. */
- return;
- }
- long statsStartMillis = dirreqStatsEndMillis
- - dirreqStatsIntervalLengthMillis;
- long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS)
- * ONE_DAY_MILLIS;
- for (int i = 0; i < 2; i++) {
- long fromMillis = i == 0 ? statsStartMillis
- : utcBreakMillis;
- long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis;
- if (fromMillis >= toMillis) {
- continue;
- }
- double intervalFraction = ((double) (toMillis - fromMillis))
- / ((double) dirreqStatsIntervalLengthMillis);
- double sum = 0L;
- for (Map.Entry<String, Integer> e : requests.entrySet()) {
- String country = e.getKey();
- double reqs = ((double) e.getValue()) - 4.0;
- sum += reqs;
- writeOutputLine(fingerprint, "relay", "responses", country,
- "", "", fromMillis, toMillis, reqs * intervalFraction,
- publishedMillis);
- }
- writeOutputLine(fingerprint, "relay", "responses", "", "",
- "", fromMillis, toMillis, sum * intervalFraction,
- publishedMillis);
- }
- }
-
- private static void parseRelayDirreqWriteHistory(String fingerprint,
- long publishedMillis, BandwidthHistory dirreqWriteHistory)
- throws IOException {
- if (dirreqWriteHistory == null
- || publishedMillis - dirreqWriteHistory.getHistoryEndMillis()
- > ONE_WEEK_MILLIS) {
- return;
- /* Cut off all observations that are one week older than
- * the descriptor publication time, or we'll have to update
- * weeks of aggregate values every hour. */
- }
- long intervalLengthMillis =
- dirreqWriteHistory.getIntervalLength() * 1000L;
- for (Map.Entry<Long, Long> e
- : dirreqWriteHistory.getBandwidthValues().entrySet()) {
- long intervalEndMillis = e.getKey();
- long intervalStartMillis =
- intervalEndMillis - intervalLengthMillis;
- for (int i = 0; i < 2; i++) {
- long fromMillis = intervalStartMillis;
- long toMillis = intervalEndMillis;
- double writtenBytes = (double) e.getValue();
- if (intervalStartMillis / ONE_DAY_MILLIS
- < intervalEndMillis / ONE_DAY_MILLIS) {
- long utcBreakMillis = (intervalEndMillis
- / ONE_DAY_MILLIS) * ONE_DAY_MILLIS;
- if (i == 0) {
- toMillis = utcBreakMillis;
- } else if (i == 1) {
- fromMillis = utcBreakMillis;
- }
- double intervalFraction = ((double) (toMillis - fromMillis))
- / ((double) intervalLengthMillis);
- writtenBytes *= intervalFraction;
- } else if (i == 1) {
- break;
- }
- writeOutputLine(fingerprint, "relay", "bytes", "", "", "",
- fromMillis, toMillis, writtenBytes, publishedMillis);
- }
- }
- }
-
- private static void parseRelayNetworkStatusConsensus(
- RelayNetworkStatusConsensus consensus) throws IOException {
- long fromMillis = consensus.getValidAfterMillis();
- long toMillis = consensus.getFreshUntilMillis();
- for (NetworkStatusEntry statusEntry
- : consensus.getStatusEntries().values()) {
- String fingerprint = statusEntry.getFingerprint()
- .toUpperCase();
- if (statusEntry.getFlags().contains("Running")) {
- writeOutputLine(fingerprint, "relay", "status", "", "", "",
- fromMillis, toMillis, 0.0, fromMillis);
- }
- }
- }
-
- private static void parseBridgeDescriptors() throws Exception {
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.setExcludeFiles(new File(
- "status/bridge-descriptors"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/recent/bridge-descriptors"));
- descriptorReader.addDirectory(new File(
- "../../shared/in/archive/bridge-descriptors"));
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof ExtraInfoDescriptor) {
- parseBridgeExtraInfoDescriptor(
- (ExtraInfoDescriptor) descriptor);
- } else if (descriptor instanceof BridgeNetworkStatus) {
- parseBridgeNetworkStatus((BridgeNetworkStatus) descriptor);
- }
- }
- }
- }
-
- private static void parseBridgeExtraInfoDescriptor(
- ExtraInfoDescriptor descriptor) throws IOException {
- String fingerprint = descriptor.getFingerprint().toUpperCase();
- long publishedMillis = descriptor.getPublishedMillis();
- long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis();
- long dirreqStatsIntervalLengthMillis =
- descriptor.getDirreqStatsIntervalLength() * 1000L;
- parseBridgeDirreqV3Resp(fingerprint, publishedMillis,
- dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis,
- descriptor.getDirreqV3Resp(),
- descriptor.getBridgeIps(),
- descriptor.getBridgeIpTransports(),
- descriptor.getBridgeIpVersions());
-
- parseBridgeDirreqWriteHistory(fingerprint, publishedMillis,
- descriptor.getDirreqWriteHistory());
- }
-
- private static void parseBridgeDirreqV3Resp(String fingerprint,
- long publishedMillis, long dirreqStatsEndMillis,
- long dirreqStatsIntervalLengthMillis,
- SortedMap<String, Integer> responses,
- SortedMap<String, Integer> bridgeIps,
- SortedMap<String, Integer> bridgeIpTransports,
- SortedMap<String, Integer> bridgeIpVersions) throws IOException {
- if (responses == null
- || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS
- || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) {
- /* Cut off all observations that are one week older than
- * the descriptor publication time, or we'll have to update
- * weeks of aggregate values every hour. */
- return;
- }
- long statsStartMillis = dirreqStatsEndMillis
- - dirreqStatsIntervalLengthMillis;
- long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS)
- * ONE_DAY_MILLIS;
- double resp = ((double) responses.get("ok")) - 4.0;
- if (resp > 0.0) {
- for (int i = 0; i < 2; i++) {
- long fromMillis = i == 0 ? statsStartMillis
- : utcBreakMillis;
- long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis;
- if (fromMillis >= toMillis) {
- continue;
- }
- double intervalFraction = ((double) (toMillis - fromMillis))
- / ((double) dirreqStatsIntervalLengthMillis);
- writeOutputLine(fingerprint, "bridge", "responses", "", "",
- "", fromMillis, toMillis, resp * intervalFraction,
- publishedMillis);
- parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
- dirreqStatsIntervalLengthMillis, "country", bridgeIps,
- publishedMillis);
- parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
- dirreqStatsIntervalLengthMillis, "transport",
- bridgeIpTransports, publishedMillis);
- parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
- dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions,
- publishedMillis);
- }
- }
- }
-
- private static void parseBridgeRespByCategory(String fingerprint,
- long fromMillis, long toMillis, double resp,
- long dirreqStatsIntervalLengthMillis, String category,
- SortedMap<String, Integer> frequencies, long publishedMillis)
- throws IOException {
- double total = 0.0;
- SortedMap<String, Double> frequenciesCopy = new TreeMap<>();
- if (frequencies != null) {
- for (Map.Entry<String, Integer> e : frequencies.entrySet()) {
- if (e.getValue() < 4.0) {
- continue;
- }
- double frequency = ((double) e.getValue()) - 4.0;
- frequenciesCopy.put(e.getKey(), frequency);
- total += frequency;
- }
- }
- /* If we're not told any frequencies, or at least none of them are
- * greater than 4, put in a default that we'll attribute all responses
- * to. */
- if (total == 0) {
- if (category.equals("country")) {
- frequenciesCopy.put("??", 4.0);
- } else if (category.equals("transport")) {
- frequenciesCopy.put("<OR>", 4.0);
- } else if (category.equals("version")) {
- frequenciesCopy.put("v4", 4.0);
- }
- total = 4.0;
- }
- for (Map.Entry<String, Double> e : frequenciesCopy.entrySet()) {
- double intervalFraction = ((double) (toMillis - fromMillis))
- / ((double) dirreqStatsIntervalLengthMillis);
- double val = resp * intervalFraction * e.getValue() / total;
- if (category.equals("country")) {
- writeOutputLine(fingerprint, "bridge", "responses", e.getKey(),
- "", "", fromMillis, toMillis, val, publishedMillis);
- } else if (category.equals("transport")) {
- writeOutputLine(fingerprint, "bridge", "responses", "",
- e.getKey(), "", fromMillis, toMillis, val, publishedMillis);
- } else if (category.equals("version")) {
- writeOutputLine(fingerprint, "bridge", "responses", "", "",
- e.getKey(), fromMillis, toMillis, val, publishedMillis);
- }
- }
- }
-
- private static void parseBridgeDirreqWriteHistory(String fingerprint,
- long publishedMillis, BandwidthHistory dirreqWriteHistory)
- throws IOException {
- if (dirreqWriteHistory == null
- || publishedMillis - dirreqWriteHistory.getHistoryEndMillis()
- > ONE_WEEK_MILLIS) {
- /* Cut off all observations that are one week older than
- * the descriptor publication time, or we'll have to update
- * weeks of aggregate values every hour. */
- return;
- }
- long intervalLengthMillis =
- dirreqWriteHistory.getIntervalLength() * 1000L;
- for (Map.Entry<Long, Long> e
- : dirreqWriteHistory.getBandwidthValues().entrySet()) {
- long intervalEndMillis = e.getKey();
- long intervalStartMillis =
- intervalEndMillis - intervalLengthMillis;
- for (int i = 0; i < 2; i++) {
- long fromMillis = intervalStartMillis;
- long toMillis = intervalEndMillis;
- double writtenBytes = (double) e.getValue();
- if (intervalStartMillis / ONE_DAY_MILLIS
- < intervalEndMillis / ONE_DAY_MILLIS) {
- long utcBreakMillis = (intervalEndMillis
- / ONE_DAY_MILLIS) * ONE_DAY_MILLIS;
- if (i == 0) {
- toMillis = utcBreakMillis;
- } else if (i == 1) {
- fromMillis = utcBreakMillis;
- }
- double intervalFraction = ((double) (toMillis - fromMillis))
- / ((double) intervalLengthMillis);
- writtenBytes *= intervalFraction;
- } else if (i == 1) {
- break;
- }
- writeOutputLine(fingerprint, "bridge", "bytes", "",
- "", "", fromMillis, toMillis, writtenBytes, publishedMillis);
- }
- }
- }
-
- private static void parseBridgeNetworkStatus(BridgeNetworkStatus status)
- throws IOException {
- long publishedMillis = status.getPublishedMillis();
- long fromMillis = (publishedMillis / ONE_HOUR_MILLIS)
- * ONE_HOUR_MILLIS;
- long toMillis = fromMillis + ONE_HOUR_MILLIS;
- for (NetworkStatusEntry statusEntry
- : status.getStatusEntries().values()) {
- String fingerprint = statusEntry.getFingerprint()
- .toUpperCase();
- if (statusEntry.getFlags().contains("Running")) {
- writeOutputLine(fingerprint, "bridge", "status", "", "", "",
- fromMillis, toMillis, 0.0, publishedMillis);
- }
- }
- }
-
- private static Map<String, BufferedWriter> openOutputFiles = new HashMap<>();
-
- private static void writeOutputLine(String fingerprint, String node,
- String metric, String country, String transport, String version,
- long fromMillis, long toMillis, double val, long publishedMillis)
- throws IOException {
- if (fromMillis > toMillis) {
- return;
- }
- String fromDateTime = formatDateTimeMillis(fromMillis);
- String toDateTime = formatDateTimeMillis(toMillis);
- BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis);
- bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n",
- fingerprint, node, metric, country, transport, version,
- fromDateTime, toDateTime, val));
- }
-
- private static SimpleDateFormat dateTimeFormat = null;
-
- private static String formatDateTimeMillis(long millis) {
- if (dateTimeFormat == null) {
- dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setLenient(false);
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
- return dateTimeFormat.format(millis);
- }
-
- private static BufferedWriter getOutputFile(String fromDateTime,
- long publishedMillis) throws IOException {
- String outputFileName;
- if (writeToSingleFile) {
- outputFileName = "out/userstats.sql";
- } else if (byStatsDateNotByDescHour) {
- outputFileName = "out/userstats-" + fromDateTime.substring(0, 10)
- + ".sql";
- } else {
- String publishedHourDateTime = formatDateTimeMillis(
- (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS);
- outputFileName = "out/userstats-"
- + publishedHourDateTime.substring(0, 10) + "-"
- + publishedHourDateTime.substring(11, 13) + ".sql";
- }
- BufferedWriter bw = openOutputFiles.get(outputFileName);
- if (bw == null) {
- bw = openOutputFile(outputFileName);
- openOutputFiles.put(outputFileName, bw);
- }
- return bw;
- }
-
- private static BufferedWriter openOutputFile(String outputFileName)
- throws IOException {
- File outputFile = new File(outputFileName);
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFileName));
- bw.write("BEGIN;\n");
- bw.write("LOCK TABLE imported NOWAIT;\n");
- bw.write("COPY imported (fingerprint, node, metric, country, "
- + "transport, version, stats_start, stats_end, val) FROM "
- + "stdin;\n");
- return bw;
- }
-
- private static void closeOutputFiles() throws IOException {
- for (BufferedWriter bw : openOutputFiles.values()) {
- bw.write("\\.\n");
- bw.write("SELECT merge();\n");
- bw.write("SELECT aggregate();\n");
- bw.write("SELECT combine();\n");
- bw.write("TRUNCATE imported;\n");
- bw.write("COMMIT;\n");
- bw.close();
- }
- }
-}
-
diff --git a/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java b/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java
new file mode 100644
index 0000000..499dff9
--- /dev/null
+++ b/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java
@@ -0,0 +1,31 @@
+/* Copyright 2015--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collectdescs;
+
+import org.torproject.descriptor.DescriptorCollector;
+import org.torproject.descriptor.DescriptorSourceFactory;
+
+import java.io.File;
+
+public class Main {
+
+ /** Executes this data-processing module. */
+ public static void main(String[] args) {
+ /* Fetch recent descriptors from CollecTor. */
+ DescriptorCollector collector =
+ DescriptorSourceFactory.createDescriptorCollector();
+ collector.collectDescriptors(
+ "https://collector.torproject.org", new String[] {
+ "/recent/bridge-descriptors/extra-infos/",
+ "/recent/bridge-descriptors/server-descriptors/",
+ "/recent/bridge-descriptors/statuses/",
+ "/recent/exit-lists/",
+ "/recent/relay-descriptors/consensuses/",
+ "/recent/relay-descriptors/extra-infos/",
+ "/recent/relay-descriptors/server-descriptors/",
+ "/recent/torperf/"
+ }, 0L, new File("../../shared/in"), true);
+ }
+}
+
diff --git a/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java b/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java
deleted file mode 100644
index 499dff9..0000000
--- a/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright 2015--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.collectdescs;
-
-import org.torproject.descriptor.DescriptorCollector;
-import org.torproject.descriptor.DescriptorSourceFactory;
-
-import java.io.File;
-
-public class Main {
-
- /** Executes this data-processing module. */
- public static void main(String[] args) {
- /* Fetch recent descriptors from CollecTor. */
- DescriptorCollector collector =
- DescriptorSourceFactory.createDescriptorCollector();
- collector.collectDescriptors(
- "https://collector.torproject.org", new String[] {
- "/recent/bridge-descriptors/extra-infos/",
- "/recent/bridge-descriptors/server-descriptors/",
- "/recent/bridge-descriptors/statuses/",
- "/recent/exit-lists/",
- "/recent/relay-descriptors/consensuses/",
- "/recent/relay-descriptors/extra-infos/",
- "/recent/relay-descriptors/server-descriptors/",
- "/recent/torperf/"
- }, 0L, new File("../../shared/in"), true);
- }
-}
-
diff --git a/modules/connbidirect/build.xml b/modules/connbidirect/build.xml
index 72c028f..7bc1f32 100644
--- a/modules/connbidirect/build.xml
+++ b/modules/connbidirect/build.xml
@@ -1,61 +1,16 @@
<project default="run" name="connbidirect" basedir=".">
- <property name="connbidirect-sources" value="src/main/java"/>
- <property name="connbidirect-tests" value="src/test/java"/>
- <property name="connbidirect-libs" value="../../shared/lib"/>
- <property name="connbidirect-classes" value="classes"/>
+ <include file="../../shared/build-base.xml" as="basetask"/>
+ <target name="clean" depends="basetask.clean"/>
+ <target name="compile" depends="basetask.compile"/>
+ <target name="testcompile" depends="basetask.testcompile"/>
+ <target name="test" depends="basetask.test"/>
+
<path id="classpath">
- <pathelement path="${connbidirect-classes}"/>
- <fileset dir="${connbidirect-libs}">
- <include name="commons-codec-1.6.jar"/>
- <include name="commons-compress-1.9.jar"/>
- <include name="commons-lang-2.6.jar"/>
- <include name="junit4-4.11.jar"/>
- <include name="hamcrest-all-1.3.jar"/>
- <include name="descriptor-1.4.0.jar"/>
- <include name="slf4j-api-1.7.7.jar"/>
- <include name="logback-core-1.1.2.jar"/>
- <include name="logback-classic-1.1.2.jar"/>
- </fileset>
+ <pathelement path="${classes}"/>
+ <path refid="base.classpath" />
</path>
- <target name="compile">
- <mkdir dir="${connbidirect-classes}"/>
- <javac destdir="${connbidirect-classes}"
- srcdir="${connbidirect-sources}"
- source="1.7"
- target="1.7"
- debug="true"
- deprecation="true"
- optimize="false"
- failonerror="true"
- includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- </target>
-
- <target name="test" depends="compile">
- <javac destdir="${connbidirect-classes}"
- srcdir="${connbidirect-tests}"
- source="1.7"
- target="1.7"
- debug="true"
- deprecation="true"
- optimize="false"
- failonerror="true"
- includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- <junit fork="true" haltonfailure="true" printsummary="off">
- <classpath refid="classpath"/>
- <formatter type="plain" usefile="false"/>
- <batchtest>
- <fileset dir="${connbidirect-classes}"
- includes="**/*Test.class"/>
- </batchtest>
- </junit>
- </target>
-
<target name="run" depends="compile">
<java fork="true"
maxmemory="2g"
diff --git a/modules/hidserv/build.xml b/modules/hidserv/build.xml
index fe073a1..c997161 100644
--- a/modules/hidserv/build.xml
+++ b/modules/hidserv/build.xml
@@ -7,9 +7,6 @@
<path id="classpath">
<pathelement path="${classes}"/>
<path refid="base.classpath" />
- <fileset dir="${libs}">
- <include name="commons-codec-1.9.jar"/>
- </fileset>
</path>
<target name="run" depends="basetask.compile">
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java
new file mode 100644
index 0000000..ea09a78
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java
@@ -0,0 +1,198 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/** Aggregate extrapolated network totals of hidden-service statistics by
+ * calculating statistics like the daily weighted interquartile mean.
+ * Also calculate simpler statistics like the number of reported
+ * statistics and the total network fraction of reporting relays. */
+public class Aggregator {
+
+ /** Document file containing extrapolated hidden-service statistics. */
+ private File extrapolatedHidServStatsFile;
+
+ /** Document store for storing and retrieving extrapolated hidden-service
+ * statistics. */
+ private DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore;
+
+ /** Output file for writing aggregated statistics. */
+ private File hidservStatsCsvFile;
+
+ /** Initializes a new aggregator object using the given directory,
+ * document store, and output file for results. */
+ public Aggregator(File statusDirectory,
+ DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore, File hidservStatsCsvFile) {
+
+ /* Create a File instance for the document file containing
+ * extrapolated network totals. */
+ this.extrapolatedHidServStatsFile = new File(statusDirectory,
+ "extrapolated-hidserv-stats");
+
+ /* Store references to the provided document store and output file. */
+ this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+ this.hidservStatsCsvFile = hidservStatsCsvFile;
+ }
+
+ /** Calculates aggregates for all extrapolated hidden-service statistics
+ * and writes them to the output file. */
+ public void aggregateHidServStats() {
+
+ /* Retrieve previously extrapolated network totals. */
+ Set<ExtrapolatedHidServStats> extrapolatedStats =
+ this.extrapolatedHidServStatsStore.retrieve(
+ this.extrapolatedHidServStatsFile);
+ if (extrapolatedStats == null) {
+ System.err.printf("Unable to retrieve extrapolated hidden-service "
+ + "statistics from file %s. Skipping aggregation step.%n",
+ this.extrapolatedHidServStatsFile.getAbsolutePath());
+ return;
+ }
+
+ /* Re-arrange extrapolated network totals by statistics interval end
+ * date, and include the computed network total as weight for the
+ * extrapolated value. More precisely, map keys are ISO-formatted
+ * dates, map values are double[] arrays with the extrapolated network
+ * total as first element and the corresponding computed network
+ * fraction as second element. */
+ SortedMap<String, List<double[]>> extrapolatedCells = new TreeMap<>();
+ SortedMap<String, List<double[]>> extrapolatedOnions = new TreeMap<>();
+ for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) {
+ String date = DateTimeHelper.format(
+ extrapolated.getStatsDateMillis(),
+ DateTimeHelper.ISO_DATE_FORMAT);
+ if (extrapolated.getFractionRendRelayedCells() > 0.0) {
+ if (!extrapolatedCells.containsKey(date)) {
+ extrapolatedCells.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedCells.get(date).add(new double[] {
+ extrapolated.getExtrapolatedRendRelayedCells(),
+ extrapolated.getFractionRendRelayedCells() });
+ }
+ if (extrapolated.getFractionDirOnionsSeen() > 0.0) {
+ if (!extrapolatedOnions.containsKey(date)) {
+ extrapolatedOnions.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedOnions.get(date).add(new double[] {
+ extrapolated.getExtrapolatedDirOnionsSeen(),
+ extrapolated.getFractionDirOnionsSeen() });
+ }
+ }
+
+ /* Write all results to a string builder that will later be written to
+ * the output file. Each line contains an ISO-formatted "date", a
+ * string identifier for the "type" of statistic, the weighted mean
+ * ("wmean"), weighted median ("wmedian"), weighted interquartile mean
+ * ("wiqm"), the total network "frac"tion, and the number of reported
+ * "stats" with non-zero computed network fraction. */
+ StringBuilder sb = new StringBuilder();
+ sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n");
+
+ /* Repeat all aggregation steps for both types of statistics. */
+ for (int i = 0; i < 2; i++) {
+ String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen";
+ SortedMap<String, List<double[]>> extrapolated = i == 0
+ ? extrapolatedCells : extrapolatedOnions;
+
+ /* Go through all dates. */
+ for (Map.Entry<String, List<double[]>> e
+ : extrapolated.entrySet()) {
+ List<double[]> weightedValues = e.getValue();
+
+ /* Sort extrapolated network totals contained in the first array
+ * element. (The second array element contains the computed
+ * network fraction as weight.) */
+ Collections.sort(weightedValues,
+ new Comparator<double[]>() {
+ public int compare(double[] first, double[] second) {
+ return first[0] < second[0] ? -1
+ : first[0] > second[0] ? 1
+ : 0;
+ }
+ }
+ );
+
+ /* For the weighted mean, sum up all previously extrapolated
+ * values weighted with their network fractions (which happens to
+ * be the values that relays reported), and sum up all network
+ * fractions. Once we have those two sums, we can divide the sum
+ * of weighted extrapolated values by the sum of network fractions
+ * to obtain the weighted mean of extrapolated values. */
+ double sumReported = 0.0;
+ double sumFraction = 0.0;
+ for (double[] d : weightedValues) {
+ sumReported += d[0] * d[1];
+ sumFraction += d[1];
+ }
+ double weightedMean = sumReported / sumFraction;
+
+ /* For the weighted median and weighted interquartile mean, go
+ * through all values once again. The weighted median is the
+ * first extrapolated value with weight interval end greater than
+ * 50% of reported network fractions. For the weighted
+ * interquartile mean, sum up extrapolated values multiplied with
+ * network fractions and network fractions falling into the 25% to
+ * 75% range and later compute the weighted mean of those. */
+ double weightIntervalEnd = 0.0;
+ Double weightedMedian = null;
+ double sumFractionInterquartile = 0.0;
+ double sumReportedInterquartile = 0.0;
+ for (double[] d : weightedValues) {
+ double extrapolatedValue = d[0];
+ double computedFraction = d[1];
+ double weightIntervalStart = weightIntervalEnd;
+ weightIntervalEnd += computedFraction;
+ if (weightedMedian == null
+ && weightIntervalEnd > sumFraction * 0.5) {
+ weightedMedian = extrapolatedValue;
+ }
+ if (weightIntervalEnd >= sumFraction * 0.25
+ && weightIntervalStart <= sumFraction * 0.75) {
+ double fractionBetweenQuartiles =
+ Math.min(weightIntervalEnd, sumFraction * 0.75)
+ - Math.max(weightIntervalStart, sumFraction * 0.25);
+ sumReportedInterquartile += extrapolatedValue
+ * fractionBetweenQuartiles;
+ sumFractionInterquartile += fractionBetweenQuartiles;
+ }
+ }
+ double weightedInterquartileMean =
+ sumReportedInterquartile / sumFractionInterquartile;
+
+ /* Put together all aggregated values in a single line. */
+ String date = e.getKey();
+ int numStats = weightedValues.size();
+ sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date,
+ type, weightedMean, weightedMedian, weightedInterquartileMean,
+ sumFraction, numStats));
+ }
+ }
+
+ /* Write all aggregated results to the output file. */
+ try {
+ this.hidservStatsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.hidservStatsCsvFile));
+ bw.write(sb.toString());
+ bw.close();
+ } catch (IOException e) {
+ System.err.printf("Unable to write results to %s. Ignoring.");
+ }
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
new file mode 100644
index 0000000..a403e48
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
@@ -0,0 +1,183 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/** Computed fraction of hidden-service activity that a single relay is
+ * assumed to observe in the network. These fractions are computed from
+ * status entries and bandwidth weights in a network status consensus. */
+public class ComputedNetworkFractions implements Document {
+
+ /** Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /** Valid-after timestamp of the consensus in milliseconds. */
+ private long validAfterMillis;
+
+ public long getValidAfterMillis() {
+ return this.validAfterMillis;
+ }
+
+ /** Fraction of cells on rendezvous circuits that this relay is assumed
+ * to observe in the network. */
+ private double fractionRendRelayedCells;
+
+ public void setFractionRendRelayedCells(
+ double fractionRendRelayedCells) {
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ }
+
+ public double getFractionRendRelayedCells() {
+ return this.fractionRendRelayedCells;
+ }
+
+ /** Fraction of descriptors that this relay is assumed to observe in the
+ * network. This is calculated as the fraction of descriptors
+ * identifiers that this relay was responsible for, divided by 3,
+ * because each descriptor that is published to this directory is also
+ * published to two other directories. */
+ private double fractionDirOnionsSeen;
+
+ public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ }
+
+ public double getFractionDirOnionsSeen() {
+ return this.fractionDirOnionsSeen;
+ }
+
+ /** Instantiates a new fractions object using fingerprint and consensus
+ * valid-after time which together uniquely identify the object. */
+ public ComputedNetworkFractions(String fingerprint,
+ long validAfterMillis) {
+ this.fingerprint = fingerprint;
+ this.validAfterMillis = validAfterMillis;
+ }
+
+ /** Returns whether this object contains the same fingerprint and
+ * consensus valid-after time as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ComputedNetworkFractions)) {
+ return false;
+ }
+ ComputedNetworkFractions other =
+ (ComputedNetworkFractions) otherObject;
+ return this.fingerprint.equals(other.fingerprint)
+ && this.validAfterMillis == other.validAfterMillis;
+ }
+
+ /** Returns a (hopefully unique) hash code based on this object's
+ * fingerprint and consensus valid-after time. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode()
+ + (int) this.validAfterMillis;
+ }
+
+ private static Map<Long, String> previouslyFormattedDates =
+ Collections.synchronizedMap(new HashMap<Long, String>());
+
+ /** Returns a string representation of this object, consisting of two
+ * strings: the first string contains fingerprint and valid-after date,
+ * the second string contains the concatenation of all other
+ * attributes. */
+ @Override
+ public String[] format() {
+ long validAfterDateMillis = (this.validAfterMillis
+ / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
+ String validAfterDate;
+ if (previouslyFormattedDates.containsKey(validAfterDateMillis)) {
+ validAfterDate = previouslyFormattedDates.get(validAfterDateMillis);
+ } else {
+ validAfterDate = DateTimeHelper.format(validAfterDateMillis,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ previouslyFormattedDates.put(validAfterDateMillis, validAfterDate);
+ }
+ long validAfterHourMillis = this.validAfterMillis
+ % DateTimeHelper.ONE_DAY;
+ String validAfterHour = String.format("%02d",
+ validAfterHourMillis / DateTimeHelper.ONE_HOUR);
+ String first = String.format("%s,%s", this.fingerprint,
+ validAfterDate);
+ String second = validAfterHour
+ + (this.fractionRendRelayedCells == 0.0 ? ","
+ : String.format(",%f", this.fractionRendRelayedCells))
+ + (this.fractionDirOnionsSeen == 0.0 ? ","
+ : String.format(",%f", this.fractionDirOnionsSeen));
+ return new String[] { first, second };
+ }
+
+ /** Instantiates an empty fractions object that will be initialized more
+ * by the parse method. */
+ ComputedNetworkFractions() {
+ }
+
+ private static Map<String, Long> previouslyParsedDates =
+ Collections.synchronizedMap(new HashMap<String, Long>());
+
+ /** Initializes this fractions object using the two provided strings
+ * that have been produced by the format method earlier and returns
+ * whether this operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ String[] firstParts = formattedStrings[0].split(",", 2);
+ if (firstParts.length != 2) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ String fingerprint = firstParts[0];
+ String[] secondParts = formattedStrings[1].split(",", 3);
+ if (secondParts.length != 3) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ String validAfterDate = firstParts[1];
+ String validAfterHour = secondParts[0];
+ long validAfterDateMillis;
+ if (previouslyParsedDates.containsKey(validAfterDate)) {
+ validAfterDateMillis = previouslyParsedDates.get(validAfterDate);
+ } else {
+ validAfterDateMillis = DateTimeHelper.parse(validAfterDate,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ previouslyParsedDates.put(validAfterDate, validAfterDateMillis);
+ }
+ long validAfterTimeMillis = Long.parseLong(validAfterHour)
+ * DateTimeHelper.ONE_HOUR;
+ if (validAfterDateMillis == DateTimeHelper.NO_TIME_AVAILABLE
+ || validAfterTimeMillis < 0L
+ || validAfterTimeMillis >= DateTimeHelper.ONE_DAY) {
+ System.err.printf("Invalid date/hour format. Skipping.%n");
+ return false;
+ }
+ long validAfterMillis = validAfterDateMillis + validAfterTimeMillis;
+ try {
+ this.fingerprint = fingerprint;
+ this.validAfterMillis = validAfterMillis;
+ this.fractionRendRelayedCells = secondParts[1].equals("")
+ ? 0.0 : Double.parseDouble(secondParts[1]);
+ this.fractionDirOnionsSeen = secondParts[2].equals("")
+ ? 0.0 : Double.parseDouble(secondParts[2]);
+ return true;
+ } catch (NumberFormatException e) {
+ System.err.printf("Invalid number format. Skipping.%n");
+ return false;
+ }
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java
new file mode 100644
index 0000000..5be6800
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java
@@ -0,0 +1,107 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/** Utility class to format and parse dates and timestamps. */
+public class DateTimeHelper {
+
+ /** This class is not supposed to be instantiated, which is why its
+ * constructor has private visibility. */
+ private DateTimeHelper() {
+ }
+
+ /* Some useful time constant. */
+ public static final long ONE_SECOND = 1000L;
+
+ public static final long ONE_MINUTE = 60L * ONE_SECOND;
+
+ public static final long ONE_HOUR = 60L * ONE_MINUTE;
+
+ public static final long ONE_DAY = 24L * ONE_HOUR;
+
+ /* Some useful date/time formats. */
+ public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
+
+ public static final String ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH";
+
+ public static final String ISO_DATE_FORMAT = "yyyy-MM-dd";
+
+ public static final String ISO_HOUR_FORMAT = "HH";
+
+ /** Map of DateFormat instances for parsing and formatting dates and
+ * timestamps, protected using ThreadLocal to ensure that each thread
+ * uses its own instances. */
+ private static ThreadLocal<Map<String, DateFormat>> dateFormats =
+ new ThreadLocal<Map<String, DateFormat>>() {
+
+ public Map<String, DateFormat> get() {
+ return super.get();
+ }
+
+ protected Map<String, DateFormat> initialValue() {
+ return new HashMap<>();
+ }
+
+ public void remove() {
+ super.remove();
+ }
+
+ public void set(Map<String, DateFormat> value) {
+ super.set(value);
+ }
+ };
+
+ /** Returns an instance of DateFormat for the given format, and if no
+ * such instance exists, creates one and puts it in the map. */
+ private static DateFormat getDateFormat(String format) {
+ Map<String, DateFormat> threadDateFormats = dateFormats.get();
+ if (!threadDateFormats.containsKey(format)) {
+ DateFormat dateFormat = new SimpleDateFormat(format);
+ dateFormat.setLenient(false);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ threadDateFormats.put(format, dateFormat);
+ }
+ return threadDateFormats.get(format);
+ }
+
+ /** Formats the given time in milliseconds using the given format. */
+ public static String format(long millis, String format) {
+ return getDateFormat(format).format(millis);
+ }
+
+ /** Formats the given time in milliseconds using ISO date/time
+ * format. */
+ public static String format(long millis) {
+ return format(millis, ISO_DATETIME_FORMAT);
+ }
+
+ /** Default result of the parse methods if the provided time could not
+ * be parsed. */
+ public static final long NO_TIME_AVAILABLE = -1L;
+
+ /** Parses the given string using the given format. */
+ public static long parse(String string, String format) {
+ if (null == string) {
+ return NO_TIME_AVAILABLE;
+ }
+ try {
+ return getDateFormat(format).parse(string).getTime();
+ } catch (ParseException e) {
+ return NO_TIME_AVAILABLE;
+ }
+ }
+
+ /** Parses the given string using ISO date/time format. */
+ public static long parse(String string) {
+ return parse(string, ISO_DATETIME_FORMAT);
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java
new file mode 100644
index 0000000..46ce40d
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java
@@ -0,0 +1,26 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+/** Common interface of documents that are supposed to be serialized and
+ * stored in document files and later retrieved and de-serialized. */
+public interface Document {
+
+ /** Returns an array of two strings with a string representation of this
+ * document.
+ *
+ * <p>The first string will be used to start a group of documents, the
+ * second string will be used to represent a single document in that
+ * group. Ideally, the first string is equivalent for many documents
+ * stored in the same file, and the second string is different for those
+ * documents.</p> */
+ public String[] format();
+
+ /** Initializes an object using the given array of two strings.
+ *
+ * <p>These are the same two strings that the format method
+ * provides.</p> */
+ public boolean parse(String[] formattedStrings);
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java
new file mode 100644
index 0000000..2670cf4
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java
@@ -0,0 +1,176 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/** Utility class to store serialized objects implementing the Document
+ * interface to a file and later to retrieve them. */
+public class DocumentStore<T extends Document> {
+
+ /** Document class, needed to create new instances when retrieving
+ * documents. */
+ private Class<T> clazz;
+
+ /** Initializes a new store object for the given type of documents. */
+ DocumentStore(Class<T> clazz) {
+ this.clazz = clazz;
+ }
+
+ /** Stores the provided documents in the given file and returns whether
+ * the storage operation was successful.
+ *
+ * <p>If the file already existed and if it contains documents, merge
+ * the new documents with the existing ones.</p> */
+ public boolean store(File documentFile, Set<T> documentsToStore) {
+
+ /* Retrieve existing documents. */
+ Set<T> retrievedDocuments = this.retrieve(documentFile);
+ if (retrievedDocuments == null) {
+ System.err.printf("Unable to read and update %s. Not storing "
+ + "documents.%n", documentFile.getAbsoluteFile());
+ return false;
+ }
+
+ /* Merge new documents with existing ones. */
+ retrievedDocuments.addAll(documentsToStore);
+
+ /* Serialize documents. */
+ SortedMap<String, SortedSet<String>> formattedDocuments = new TreeMap<>();
+ for (T retrieveDocument : retrievedDocuments) {
+ String[] formattedDocument = retrieveDocument.format();
+ if (!formattedDocuments.containsKey(formattedDocument[0])) {
+ formattedDocuments.put(formattedDocument[0],
+ new TreeSet<String>());
+ }
+ formattedDocuments.get(formattedDocument[0]).add(
+ formattedDocument[1]);
+ }
+
+ /* Check if a temporary file exists from the previous execution. */
+ File documentTempFile = new File(documentFile.getAbsoluteFile()
+ + ".tmp");
+ if (documentTempFile.exists()) {
+ System.err.printf("Temporary document file %s still exists, "
+ + "indicating that a previous execution did not terminate "
+ + "cleanly. Not storing documents.%n",
+ documentTempFile.getAbsoluteFile());
+ return false;
+ }
+
+ /* Write to a new temporary file, then move it into place, possibly
+ * overwriting an existing file. */
+ try {
+ documentTempFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ documentTempFile));
+ for (Map.Entry<String, SortedSet<String>> e
+ : formattedDocuments.entrySet()) {
+ bw.write(e.getKey() + "\n");
+ for (String s : e.getValue()) {
+ bw.write(" " + s + "\n");
+ }
+ }
+ bw.close();
+ documentFile.delete();
+ documentTempFile.renameTo(documentFile);
+ } catch (IOException e) {
+ System.err.printf("Unable to write %s. Not storing documents.%n",
+ documentFile.getAbsolutePath());
+ return false;
+ }
+
+ /* Return success. */
+ return true;
+ }
+
+ /** Retrieves all previously stored documents from the given file. */
+ public Set<T> retrieve(File documentFile) {
+ return this.retrieve(documentFile, "");
+ }
+
+ /** Retrieves previously stored documents from the given file that start
+ * with the given prefix. */
+ public Set<T> retrieve(File documentFile, String prefix) {
+
+ /* Check if the document file exists, and if not, return an empty set.
+ * This is not an error case. */
+ Set<T> result = new HashSet<>();
+ if (!documentFile.exists()) {
+ return result;
+ }
+
+ /* Parse the document file line by line and de-serialize contained
+ * documents. */
+ try {
+ LineNumberReader lnr = new LineNumberReader(new BufferedReader(
+ new FileReader(documentFile)));
+ String line;
+ String formattedString0 = null;
+ while ((line = lnr.readLine()) != null) {
+ if (!line.startsWith(" ")) {
+ formattedString0 = line;
+ } else if (formattedString0 == null) {
+ System.err.printf("First line in %s must not start with a "
+ + "space. Not retrieving any previously stored "
+ + "documents.%n", documentFile.getAbsolutePath());
+ lnr.close();
+ return null;
+ } else if (prefix.length() > formattedString0.length()
+ && !(formattedString0 + line.substring(1))
+ .startsWith(prefix)) {
+ /* Skip combined line not starting with prefix. */
+ continue;
+ } else if (prefix.length() > 0
+ && !formattedString0.startsWith(prefix)) {
+ /* Skip line not starting with prefix. */
+ continue;
+ } else {
+ T document = this.clazz.newInstance();
+ if (!document.parse(new String[] { formattedString0,
+ line.substring(1) })) {
+ System.err.printf("Unable to read line %d from %s. Not "
+ + "retrieving any previously stored documents.%n",
+ lnr.getLineNumber(), documentFile.getAbsolutePath());
+ lnr.close();
+ return null;
+ }
+ result.add(document);
+ }
+ }
+ lnr.close();
+ } catch (IOException e) {
+ System.err.printf("Unable to read %s. Not retrieving any "
+ + "previously stored documents.%n",
+ documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ } catch (InstantiationException e) {
+ System.err.printf("Unable to read %s. Cannot instantiate document "
+ + "object.%n", documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ } catch (IllegalAccessException e) {
+ System.err.printf("Unable to read %s. Cannot instantiate document "
+ + "object.%n", documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ }
+ return result;
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
new file mode 100644
index 0000000..53bef71
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
@@ -0,0 +1,170 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+/** Extrapolated network totals of hidden-service statistics reported by a
+ * single relay. Extrapolated values are based on reported statistics and
+ * computed network fractions in the statistics interval. */
+public class ExtrapolatedHidServStats implements Document {
+
+ /** Date of statistics interval end in milliseconds. */
+ private long statsDateMillis;
+
+ public long getStatsDateMillis() {
+ return this.statsDateMillis;
+ }
+
+ /** Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /** Extrapolated number of cells on rendezvous circuits in the
+ * network. */
+ private double extrapolatedRendRelayedCells;
+
+ public void setExtrapolatedRendRelayedCells(
+ double extrapolatedRendRelayedCells) {
+ this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+ }
+
+ public double getExtrapolatedRendRelayedCells() {
+ return this.extrapolatedRendRelayedCells;
+ }
+
+ /** Computed fraction of observed cells on rendezvous circuits in the
+ * network, used to weight this relay's extrapolated network total in
+ * the aggregation step. */
+ private double fractionRendRelayedCells;
+
+ public void setFractionRendRelayedCells(
+ double fractionRendRelayedCells) {
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ }
+
+ public double getFractionRendRelayedCells() {
+ return this.fractionRendRelayedCells;
+ }
+
+ /** Extrapolated number of .onions in the network. */
+ private double extrapolatedDirOnionsSeen;
+
+ public void setExtrapolatedDirOnionsSeen(
+ double extrapolatedDirOnionsSeen) {
+ this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+ }
+
+ public double getExtrapolatedDirOnionsSeen() {
+ return this.extrapolatedDirOnionsSeen;
+ }
+
+ /** Computed fraction of observed .onions in the network, used to weight
+ * this relay's extrapolated network total in the aggregation step. */
+ private double fractionDirOnionsSeen;
+
+ public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ }
+
+ public double getFractionDirOnionsSeen() {
+ return this.fractionDirOnionsSeen;
+ }
+
+ /** Instantiates a new stats object using fingerprint and statistics
+ * interval end date which together uniquely identify the object. */
+ public ExtrapolatedHidServStats(long statsDateMillis,
+ String fingerprint) {
+ this.statsDateMillis = statsDateMillis;
+ this.fingerprint = fingerprint;
+ }
+
+ /** Returns whether this object contains the same fingerprint and
+ * statistics interval end date as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ExtrapolatedHidServStats)) {
+ return false;
+ }
+ ExtrapolatedHidServStats other =
+ (ExtrapolatedHidServStats) otherObject;
+ return this.fingerprint.equals(other.fingerprint)
+ && this.statsDateMillis == other.statsDateMillis;
+ }
+
+ /** Returns a (hopefully unique) hash code based on this object's
+ * fingerprint and statistics interval end date. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode() + (int) this.statsDateMillis;
+ }
+
+ /** Returns a string representation of this object, consisting of the
+ * statistics interval end date and the concatenation of all other
+ * attributes. */
+ @Override
+ public String[] format() {
+ String first = DateTimeHelper.format(this.statsDateMillis,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ String second = this.fingerprint
+ + (this.fractionRendRelayedCells == 0.0 ? ",,"
+ : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells,
+ this.fractionRendRelayedCells))
+ + (this.fractionDirOnionsSeen == 0.0 ? ",,"
+ : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen,
+ this.fractionDirOnionsSeen));
+ return new String[] { first, second };
+ }
+
+ /** Instantiates an empty stats object that will be initialized more by
+ * the parse method. */
+ ExtrapolatedHidServStats() {
+ }
+
+ /** Initializes this stats object using the two provided strings that
+ * have been produced by the format method earlier and returns whether
+ * this operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ long statsDateMillis = DateTimeHelper.parse(formattedStrings[0],
+ DateTimeHelper.ISO_DATE_FORMAT);
+ String[] secondParts = formattedStrings[1].split(",", 5);
+ if (secondParts.length != 5) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ String fingerprint = secondParts[0];
+ double extrapolatedRendRelayedCells = 0.0;
+ double fractionRendRelayedCells = 0.0;
+ double extrapolatedDirOnionsSeen = 0.0;
+ double fractionDirOnionsSeen = 0.0;
+ try {
+ extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0
+ : Double.parseDouble(secondParts[1]);
+ fractionRendRelayedCells = secondParts[2].equals("") ? 0.0
+ : Double.parseDouble(secondParts[2]);
+ extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0
+ : Double.parseDouble(secondParts[3]);
+ fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0
+ : Double.parseDouble(secondParts[4]);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ this.statsDateMillis = statsDateMillis;
+ this.fingerprint = fingerprint;
+ this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java
new file mode 100644
index 0000000..262720a
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java
@@ -0,0 +1,253 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/** Extrapolate hidden-service statistics reported by single relays by
+ * dividing them by the computed fraction of hidden-service activity
+ * observed by the relay. */
+public class Extrapolator {
+
+ /** Document file containing previously parsed reported hidden-service
+ * statistics. */
+ private File reportedHidServStatsFile;
+
+ /** Document store for storing and retrieving reported hidden-service
+ * statistics. */
+ private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+ /** Directory containing document files with previously computed network
+ * fractions. */
+ private File computedNetworkFractionsDirectory;
+
+ /** Document store for storing and retrieving computed network
+ * fractions. */
+ private DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore;
+
+ /** Document file containing extrapolated hidden-service statistics. */
+ private File extrapolatedHidServStatsFile;
+
+ /** Document store for storing and retrieving extrapolated hidden-service
+ * statistics. */
+ private DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore;
+
+ /** Initializes a new extrapolator object using the given directory and
+ * document stores. */
+ public Extrapolator(File statusDirectory,
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore,
+ DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore) {
+
+ /* Create File instances for the files and directories in the provided
+ * status directory. */
+ this.reportedHidServStatsFile = new File(statusDirectory,
+ "reported-hidserv-stats");
+ this.computedNetworkFractionsDirectory =
+ new File(statusDirectory, "computed-network-fractions");
+ this.extrapolatedHidServStatsFile = new File(statusDirectory,
+ "extrapolated-hidserv-stats");
+
+ /* Store references to the provided document stores. */
+ this.reportedHidServStatsStore = reportedHidServStatsStore;
+ this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+ this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+ }
+
+ /** Iterates over all reported stats and extrapolate network totals for
+ * those that have not been extrapolated before. */
+ public boolean extrapolateHidServStats() {
+
+ /* Retrieve previously extrapolated stats to avoid extrapolating them
+ * again. */
+ Set<ExtrapolatedHidServStats> extrapolatedStats =
+ this.extrapolatedHidServStatsStore.retrieve(
+ this.extrapolatedHidServStatsFile);
+
+ /* Retrieve all reported stats, even including those that have already
+ * been extrapolated. */
+ Set<ReportedHidServStats> reportedStats =
+ this.reportedHidServStatsStore.retrieve(
+ this.reportedHidServStatsFile);
+
+ /* Make sure that all documents could be retrieved correctly. */
+ if (extrapolatedStats == null || reportedStats == null) {
+ System.err.printf("Could not read previously parsed or "
+ + "extrapolated hidserv-stats. Skipping.");
+ return false;
+ }
+
+ /* Re-arrange reported stats by fingerprint. */
+ SortedMap<String, Set<ReportedHidServStats>> parsedStatsByFingerprint =
+ new TreeMap<>();
+ for (ReportedHidServStats stat : reportedStats) {
+ String fingerprint = stat.getFingerprint();
+ if (!parsedStatsByFingerprint.containsKey(fingerprint)) {
+ parsedStatsByFingerprint.put(fingerprint,
+ new HashSet<ReportedHidServStats>());
+ }
+ parsedStatsByFingerprint.get(fingerprint).add(stat);
+ }
+
+ /* Go through reported stats by fingerprint. */
+ for (Map.Entry<String, Set<ReportedHidServStats>> e
+ : parsedStatsByFingerprint.entrySet()) {
+ String fingerprint = e.getKey();
+
+ /* Iterate over all stats reported by this relay and make a list of
+ * those that still need to be extrapolated. Also make a list of
+ * all dates for which we need to retrieve computed network
+ * fractions. */
+ Set<ReportedHidServStats> newReportedStats = new HashSet<>();
+ SortedSet<String> retrieveFractionDates = new TreeSet<>();
+ for (ReportedHidServStats stats : e.getValue()) {
+
+ /* Check whether extrapolated stats already contain an object with
+ * the same statistics interval end date and fingerprint. */
+ long statsDateMillis = (stats.getStatsEndMillis()
+ / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
+ if (extrapolatedStats.contains(
+ new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) {
+ continue;
+ }
+
+ /* Add the reported stats to the list of stats we still need to
+ * extrapolate. */
+ newReportedStats.add(stats);
+
+ /* Add all dates between statistics interval start and end to a
+ * list. */
+ long statsEndMillis = stats.getStatsEndMillis();
+ long statsStartMillis = statsEndMillis
+ - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+ for (long millis = statsStartMillis; millis <= statsEndMillis;
+ millis += DateTimeHelper.ONE_DAY) {
+ String date = DateTimeHelper.format(millis,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ retrieveFractionDates.add(date);
+ }
+ }
+
+ /* Retrieve all computed network fractions that might be needed to
+ * extrapolate new statistics. Keep a list of all known consensus
+ * valid-after times, and keep a map of fractions also by consensus
+ * valid-after time. (It's not sufficient to only keep the latter,
+ * because we need to count known consensuses even if the relay was
+ * not contained in a consensus or had a network fraction of exactly
+ * zero.) */
+ SortedSet<Long> knownConsensuses = new TreeSet<>();
+ SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions =
+ new TreeMap<>();
+ for (String date : retrieveFractionDates) {
+ File documentFile = new File(
+ this.computedNetworkFractionsDirectory, date);
+ Set<ComputedNetworkFractions> fractions
+ = this.computedNetworkFractionsStore.retrieve(documentFile,
+ fingerprint);
+ for (ComputedNetworkFractions fraction : fractions) {
+ knownConsensuses.add(fraction.getValidAfterMillis());
+ if (fraction.getFingerprint().equals(fingerprint)) {
+ computedNetworkFractions.put(fraction.getValidAfterMillis(),
+ fraction);
+ }
+ }
+ }
+
+ /* Go through newly reported stats, match them with computed network
+ * fractions, and extrapolate network totals. */
+ for (ReportedHidServStats stats : newReportedStats) {
+ long statsEndMillis = stats.getStatsEndMillis();
+ long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY)
+ * DateTimeHelper.ONE_DAY;
+ long statsStartMillis = statsEndMillis
+ - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+
+ /* Sum up computed network fractions and count known consensus in
+ * the relevant interval, so that we can later compute means of
+ * network fractions. */
+ double sumFractionRendRelayedCells = 0.0;
+ double sumFractionDirOnionsSeen = 0.0;
+ int consensuses = 0;
+ for (long validAfterMillis : knownConsensuses) {
+ if (statsStartMillis <= validAfterMillis
+ && validAfterMillis < statsEndMillis) {
+ if (computedNetworkFractions.containsKey(validAfterMillis)) {
+ ComputedNetworkFractions frac =
+ computedNetworkFractions.get(validAfterMillis);
+ sumFractionRendRelayedCells +=
+ frac.getFractionRendRelayedCells();
+ sumFractionDirOnionsSeen +=
+ frac.getFractionDirOnionsSeen();
+ }
+ consensuses++;
+ }
+ }
+
+ /* If we don't know a single consensus with valid-after time in
+ * the statistics interval, skip this stat. */
+ if (consensuses == 0) {
+ continue;
+ }
+
+ /* Compute means of network fractions. */
+ double fractionRendRelayedCells =
+ sumFractionRendRelayedCells / consensuses;
+ double fractionDirOnionsSeen =
+ sumFractionDirOnionsSeen / consensuses;
+
+ /* If at least one fraction is positive, extrapolate network
+ * totals. */
+ if (fractionRendRelayedCells > 0.0
+ || fractionDirOnionsSeen > 0.0) {
+ ExtrapolatedHidServStats extrapolated =
+ new ExtrapolatedHidServStats(
+ statsDateMillis, fingerprint);
+ if (fractionRendRelayedCells > 0.0) {
+ extrapolated.setFractionRendRelayedCells(
+ fractionRendRelayedCells);
+ /* Extrapolating cells on rendezvous circuits is as easy as
+ * dividing the reported number by the computed network
+ * fraction. */
+ double extrapolatedRendRelayedCells =
+ stats.getRendRelayedCells() / fractionRendRelayedCells;
+ extrapolated.setExtrapolatedRendRelayedCells(
+ extrapolatedRendRelayedCells);
+ }
+ if (fractionDirOnionsSeen > 0.0) {
+ extrapolated.setFractionDirOnionsSeen(
+ fractionDirOnionsSeen);
+ /* Extrapolating reported unique .onion addresses to the
+ * total number in the network is more difficult. In short,
+ * each descriptor is stored to 12 (likely) different
+ * directories, so we'll have to divide the reported number by
+ * 12 and then by the computed network fraction of this
+ * directory. */
+ double extrapolatedDirOnionsSeen =
+ stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen);
+ extrapolated.setExtrapolatedDirOnionsSeen(
+ extrapolatedDirOnionsSeen);
+ }
+ extrapolatedStats.add(extrapolated);
+ }
+ }
+ }
+
+ /* Store all extrapolated network totals to disk with help of the
+ * document store. */
+ return this.extrapolatedHidServStatsStore.store(
+ this.extrapolatedHidServStatsFile, extrapolatedStats);
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java
new file mode 100644
index 0000000..ad0b415
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java
@@ -0,0 +1,88 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Set;
+
+/** Main class for updating extrapolated network totals of hidden-service
+ * statistics. The main method of this class can be executed as often as
+ * new statistics are needed, though callers must ensure that executions
+ * do not overlap. */
+public class Main {
+
+ /** Parses new descriptors, extrapolate contained statistics using
+ * computed network fractions, aggregate results, and writes results to
+ * disk. */
+ public static void main(String[] args) {
+
+ /* Initialize directories and file paths. */
+ Set<File> inDirectories = new HashSet<>();
+ inDirectories.add(
+ new File("../../shared/in/recent/relay-descriptors/consensuses"));
+ inDirectories.add(
+ new File("../../shared/in/recent/relay-descriptors/extra-infos"));
+ File statusDirectory = new File("status");
+
+ /* Initialize parser and read parse history to avoid parsing
+ * descriptor files that haven't changed since the last execution. */
+ System.out.println("Initializing parser and reading parse "
+ + "history...");
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore =
+ new DocumentStore<>(ReportedHidServStats.class);
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore = new DocumentStore<>(
+ ComputedNetworkFractions.class);
+ Parser parser = new Parser(inDirectories, statusDirectory,
+ reportedHidServStatsStore, computedNetworkFractionsStore);
+ parser.readParseHistory();
+
+ /* Parse new descriptors and store their contents using the document
+ * stores. */
+ System.out.println("Parsing descriptors...");
+ if (!parser.parseDescriptors()) {
+ System.err.println("Could not store parsed descriptors. "
+ + "Terminating.");
+ return;
+ }
+
+ /* Write the parse history to avoid parsing descriptor files again
+ * next time. It's okay to do this now and not at the end of the
+ * execution, because even if something breaks apart below, it's safe
+ * not to parse descriptor files again. */
+ System.out.println("Writing parse history...");
+ parser.writeParseHistory();
+
+ /* Extrapolate reported statistics using computed network fractions
+ * and write the result to disk using a document store. The result is
+ * a single file with extrapolated network totals based on reports by
+ * single relays. */
+ System.out.println("Extrapolating statistics...");
+ DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore
+ = new DocumentStore<>(ExtrapolatedHidServStats.class);
+ Extrapolator extrapolator = new Extrapolator(statusDirectory,
+ reportedHidServStatsStore, computedNetworkFractionsStore,
+ extrapolatedHidServStatsStore);
+ if (!extrapolator.extrapolateHidServStats()) {
+ System.err.println("Could not extrapolate statistics. "
+ + "Terminating.");
+ return;
+ }
+
+ /* Go through all extrapolated network totals and aggregate them.
+ * This includes calculating daily weighted interquartile means, among
+ * other statistics. Write the result to a .csv file that can be
+ * processed by other tools. */
+ System.out.println("Aggregating statistics...");
+ File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv");
+ Aggregator aggregator = new Aggregator(statusDirectory,
+ extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile);
+ aggregator.aggregateHidServStats();
+
+ /* End this execution. */
+ System.out.println("Terminating.");
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java
new file mode 100644
index 0000000..eccb0c0
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java
@@ -0,0 +1,440 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/** Parse hidden-service statistics from extra-info descriptors, compute
+ * network fractions from consensuses, and write parsed contents to
+ * document files for later use. */
+public class Parser {
+
+ /** File containing tuples of last-modified times and file names of
+ * descriptor files parsed in the previous execution. */
+ private File parseHistoryFile;
+
+ /** Descriptor reader to provide parsed extra-info descriptors and
+ * consensuses. */
+ private DescriptorReader descriptorReader;
+
+ /** Document file containing previously parsed reported hidden-service
+ * statistics. */
+ private File reportedHidServStatsFile;
+
+ /** Document store for storing and retrieving reported hidden-service
+ * statistics. */
+ private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+ /** Directory containing document files with previously computed network
+ * fractions. */
+ private File computedNetworkFractionsDirectory;
+
+ /** Document store for storing and retrieving computed network
+ * fractions. */
+ private DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore;
+
+ /** Initializes a new parser object using the given directories and
+ * document stores. */
+ public Parser(Set<File> inDirectories, File statusDirectory,
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore) {
+
+ /* Create a new descriptor reader for reading descriptors in the given
+ * in directory. Configure the reader to avoid having more than five
+ * parsed descriptors in the queue, rather than the default one
+ * hundred. Five is a compromise between very large consensuses and
+ * rather small extra-info descriptors. */
+ this.descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ for (File inDirectory : inDirectories) {
+ this.descriptorReader.addDirectory(inDirectory);
+ }
+ this.descriptorReader.setMaxDescriptorFilesInQueue(5);
+
+ /* Create File instances for the files and directories in the provided
+ * status directory. */
+ this.parseHistoryFile = new File(statusDirectory, "parse-history");
+ this.reportedHidServStatsFile = new File(statusDirectory,
+ "reported-hidserv-stats");
+ this.computedNetworkFractionsDirectory =
+ new File(statusDirectory, "computed-network-fractions");
+
+ /* Store references to the provided document stores. */
+ this.reportedHidServStatsStore = reportedHidServStatsStore;
+ this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+ }
+
+ /** Reads the parse history file to avoid parsing descriptor files that
+ * have not changed since the previous execution. */
+ public void readParseHistory() {
+ if (this.parseHistoryFile.exists()
+ && this.parseHistoryFile.isFile()) {
+ SortedMap<String, Long> excludedFiles = new TreeMap<>();
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.parseHistoryFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ try {
+ /* Each line is supposed to contain the last-modified time and
+ * absolute path of a descriptor file. */
+ String[] parts = line.split(" ", 2);
+ excludedFiles.put(parts[1], Long.parseLong(parts[0]));
+ } catch (NumberFormatException e) {
+ System.err.printf("Illegal line '%s' in parse history. "
+ + "Skipping line.%n", line);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ System.err.printf("Could not read history file '%s'. Not "
+ + "excluding descriptors in this execution.",
+ this.parseHistoryFile.getAbsolutePath());
+ }
+
+ /* Tell the descriptor reader to exclude the files contained in the
+ * parse history file. */
+ this.descriptorReader.setExcludedFiles(excludedFiles);
+ }
+ }
+
+ /** Writes parsed or skipped descriptor files with last-modified times
+ * and absolute paths to the parse history file to avoid parsing these
+ * files again, unless they change until the next execution. */
+ public void writeParseHistory() {
+
+ /* Obtain the list of descriptor files that were either parsed now or
+ * that were skipped in this execution from the descriptor reader. */
+ SortedMap<String, Long> excludedAndParsedFiles = new TreeMap<>();
+ excludedAndParsedFiles.putAll(
+ this.descriptorReader.getExcludedFiles());
+ excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles());
+ try {
+ this.parseHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.parseHistoryFile));
+ for (Map.Entry<String, Long> e
+ : excludedAndParsedFiles.entrySet()) {
+ /* Each line starts with the last-modified time of the descriptor
+ * file, followed by its absolute path. */
+ String absolutePath = e.getKey();
+ long lastModifiedMillis = e.getValue();
+ bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath
+ + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ System.err.printf("Could not write history file '%s'. Not "
+ + "excluding descriptors in next execution.",
+ this.parseHistoryFile.getAbsolutePath());
+ }
+ }
+
+ /** Set of all reported hidden-service statistics.
+ *
+ * <p>To date, these objects are small, and keeping them all in memory
+ * is easy. But if this ever changes, e.g., when more and more
+ * statistics are added, this may not scale.</p> */
+ private Set<ReportedHidServStats> reportedHidServStats = new HashSet<>();
+
+ /** Instructs the descriptor reader to parse descriptor files, and
+ * handles the resulting parsed descriptors if they are either
+ * extra-info descriptors or consensuses. */
+ public boolean parseDescriptors() {
+ Iterator<DescriptorFile> descriptorFiles =
+ this.descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof ExtraInfoDescriptor) {
+ this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ if (!this.parseRelayNetworkStatusConsensus(
+ (RelayNetworkStatusConsensus) descriptor)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ /* Store reported hidden-service statistics to their document file.
+ * It's more efficient to only do this once after processing all
+ * descriptors. In contrast, sets of computed network fractions are
+ * stored immediately after processing the consensus they are based
+ * on. */
+ return this.reportedHidServStatsStore.store(
+ this.reportedHidServStatsFile, this.reportedHidServStats);
+ }
+
+ private static final String BIN_SIZE = "bin_size";
+
+ /** Parses the given extra-info descriptor by extracting its fingerprint
+ * and contained hidserv-* lines.
+ *
+ * <p>If a valid set of hidserv-stats can be extracted, create a new
+ * stats object that will later be stored to a document file.</p> */
+ private void parseExtraInfoDescriptor(
+ ExtraInfoDescriptor extraInfoDescriptor) {
+
+ /* Extract the fingerprint from the parsed descriptor. */
+ String fingerprint = extraInfoDescriptor.getFingerprint();
+
+ /* If the descriptor did not contain any of the expected hidserv-*
+ * lines, don't do anything. This applies to the majority of
+ * descriptors, at least as long as only a minority of relays reports
+ * these statistics. */
+ if (extraInfoDescriptor.getHidservStatsEndMillis() < 0L
+ && extraInfoDescriptor.getHidservRendRelayedCells() == null
+ && extraInfoDescriptor.getHidservDirOnionsSeen() == null) {
+ return;
+
+ /* If the descriptor contained all expected hidserv-* lines, create a
+ * new stats object and put it in the local map, so that it will later
+ * be written to a document file. */
+ } else if (extraInfoDescriptor.getHidservStatsEndMillis() >= 0L
+ && extraInfoDescriptor.getHidservStatsIntervalLength() >= 0L
+ && extraInfoDescriptor.getHidservRendRelayedCells() != null
+ && extraInfoDescriptor.getHidservRendRelayedCellsParameters() != null
+ && extraInfoDescriptor.getHidservRendRelayedCellsParameters()
+ .containsKey(BIN_SIZE)
+ && extraInfoDescriptor.getHidservDirOnionsSeen() != null
+ && extraInfoDescriptor.getHidservDirOnionsSeenParameters() != null
+ && extraInfoDescriptor.getHidservDirOnionsSeenParameters()
+ .containsKey(BIN_SIZE)) {
+ ReportedHidServStats reportedStats = new ReportedHidServStats(
+ fingerprint, extraInfoDescriptor.getHidservStatsEndMillis());
+ reportedStats.setStatsIntervalSeconds(extraInfoDescriptor
+ .getHidservStatsIntervalLength());
+ reportedStats.setRendRelayedCells(this.removeNoise(extraInfoDescriptor
+ .getHidservRendRelayedCells().longValue(), extraInfoDescriptor
+ .getHidservRendRelayedCellsParameters().get(BIN_SIZE).longValue()));
+ reportedStats.setDirOnionsSeen(this.removeNoise(extraInfoDescriptor
+ .getHidservDirOnionsSeen().longValue(), extraInfoDescriptor
+ .getHidservDirOnionsSeenParameters().get(BIN_SIZE).longValue()));
+ this.reportedHidServStats.add(reportedStats);
+
+ /* If the descriptor contained some but not all hidserv-* lines, print
+ * out a warning. This case does not warrant any further action,
+ * because relays can in theory write anything in their extra-info
+ * descriptors. But maybe we'll want to know. */
+ } else {
+ System.err.println("Relay " + fingerprint + " published "
+ + "incomplete hidserv-stats. Ignoring.");
+ }
+ }
+
+ /** Removes noise from a reported stats value by rounding to the nearest
+ * right side of a bin and subtracting half of the bin size. */
+ private long removeNoise(long reportedNumber, long binSize) {
+ long roundedToNearestRightSideOfTheBin =
+ ((reportedNumber + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ return subtractedHalfOfBinSize;
+ }
+
+ /** Parses the given consensus. */
+ public boolean parseRelayNetworkStatusConsensus(
+ RelayNetworkStatusConsensus consensus) {
+
+ /* Make sure that the consensus contains Wxx weights. */
+ SortedMap<String, Integer> bandwidthWeights =
+ consensus.getBandwidthWeights();
+ if (bandwidthWeights == null) {
+ System.err.printf("Consensus with valid-after time %s doesn't "
+ + "contain any Wxx weights. Skipping.%n",
+ DateTimeHelper.format(consensus.getValidAfterMillis()));
+ return false;
+ }
+
+ /* More precisely, make sure that it contains Wmx weights, and then
+ * parse them. */
+ SortedSet<String> expectedWeightKeys =
+ new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(",")));
+ expectedWeightKeys.removeAll(bandwidthWeights.keySet());
+ if (!expectedWeightKeys.isEmpty()) {
+ System.err.printf("Consensus with valid-after time %s doesn't "
+ + "contain expected Wmx weights. Skipping.%n",
+ DateTimeHelper.format(consensus.getValidAfterMillis()));
+ return false;
+ }
+ double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0;
+ double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0;
+ double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0;
+ double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0;
+
+ /* Keep a sorted set with the fingerprints of all hidden-service
+ * directories, in reverse order, so that we can later determine the
+ * fingerprint distance between a directory and the directory
+ * preceding it by three positions in the descriptor ring. */
+ SortedSet<String> hsDirs = new TreeSet<>(Collections.reverseOrder());
+
+ /* Prepare for computing the weights of all relays with the Fast flag
+ * for being selected in the middle position. */
+ double totalWeightsRendezvousPoint = 0.0;
+ SortedMap<String, Double> weightsRendezvousPoint = new TreeMap<>();
+
+ /* Go through all status entries contained in the consensus. */
+ for (Map.Entry<String, NetworkStatusEntry> e
+ : consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ SortedSet<String> flags = statusEntry.getFlags();
+
+ /* Add the relay to the set of hidden-service directories if it has
+ * the HSDir flag. */
+ if (flags.contains("HSDir")) {
+ hsDirs.add(statusEntry.getFingerprint());
+ }
+
+ /* Compute the probability for being selected as rendezvous point.
+ * If the relay has the Fast flag, multiply its consensus weight
+ * with the correct Wmx weight, depending on whether the relay has
+ * the Guard and/or Exit flag. */
+ double weightRendezvousPoint = 0.0;
+ if (flags.contains("Fast")) {
+ weightRendezvousPoint = (double) statusEntry.getBandwidth();
+ if (flags.contains("Guard") && flags.contains("Exit")) {
+ weightRendezvousPoint *= wmd;
+ } else if (flags.contains("Guard")) {
+ weightRendezvousPoint *= wmg;
+ } else if (flags.contains("Exit")) {
+ weightRendezvousPoint *= wme;
+ } else {
+ weightRendezvousPoint *= wmm;
+ }
+ }
+ weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+ totalWeightsRendezvousPoint += weightRendezvousPoint;
+ }
+
+ /* Store all computed network fractions based on this consensus in a
+ * set, which will then be written to disk in a single store
+ * operation. */
+ Set<ComputedNetworkFractions> computedNetworkFractions = new HashSet<>();
+
+ /* Remove all previously added directory fingerprints and re-add them
+ * twice, once with a leading "0" and once with a leading "1". The
+ * purpose is to simplify the logic for moving from one fingerprint to
+ * the previous one, even if that would mean traversing the ring
+ * start. For example, the fingerprint preceding "1""00..0000" with
+ * the first "1" being added here could be "0""FF..FFFF". */
+ SortedSet<String> hsDirsCopy = new TreeSet<>(hsDirs);
+ hsDirs.clear();
+ for (String fingerprint : hsDirsCopy) {
+ hsDirs.add("0" + fingerprint);
+ hsDirs.add("1" + fingerprint);
+ }
+
+ /* Define the total ring size to compute fractions below. This is
+ * 16^40 or 2^160. */
+ final double ringSize = new BigInteger(
+ "10000000000000000000000000000000000000000",
+ 16).doubleValue();
+
+ /* Go through all status entries again, this time computing network
+ * fractions. */
+ for (Map.Entry<String, NetworkStatusEntry> e
+ : consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ double fractionRendRelayedCells = 0.0;
+ double fractionDirOnionsSeen = 0.0;
+ if (statusEntry != null) {
+
+ /* Check if the relay is a hidden-service directory by looking up
+ * its fingerprint, preceded by "1", in the sorted set that we
+ * populated above. */
+ String fingerprintPrecededByOne = "1" + fingerprint;
+ if (hsDirs.contains(fingerprintPrecededByOne)) {
+
+ /* Move three positions in the sorted set, which is in reverse
+ * order, to learn the fingerprint of the directory preceding
+ * this directory by three positions. */
+ String startResponsible = fingerprint;
+ int positionsToGo = 3;
+ for (String hsDirFingerprint
+ : hsDirs.tailSet(fingerprintPrecededByOne)) {
+ startResponsible = hsDirFingerprint;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+
+ /* Compute the fraction of descriptor space that this relay is
+ * responsible for as difference between the two fingerprints
+ * divided by the ring size. */
+ fractionDirOnionsSeen =
+ new BigInteger(fingerprintPrecededByOne, 16).subtract(
+ new BigInteger(startResponsible, 16)).doubleValue()
+ / ringSize;
+
+ /* Divide this fraction by three to obtain the fraction of
+ * descriptors that this directory has seen. This step is
+ * necessary, because each descriptor that is published to this
+ * directory is also published to two other directories. */
+ fractionDirOnionsSeen /= 3.0;
+ }
+
+ /* Compute the fraction of cells on rendezvous circuits that this
+ * relay has seen by dividing its previously calculated weight by
+ * the sum of all such weights. */
+ fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint)
+ / totalWeightsRendezvousPoint;
+ }
+
+ /* If at least one of the computed fractions is non-zero, create a
+ * new fractions object. */
+ if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) {
+ ComputedNetworkFractions fractions = new ComputedNetworkFractions(
+ fingerprint, consensus.getValidAfterMillis());
+ fractions.setFractionRendRelayedCells(fractionRendRelayedCells);
+ fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen);
+ computedNetworkFractions.add(fractions);
+ }
+ }
+
+ /* Store all newly computed network fractions to a documents file.
+ * The same file also contains computed network fractions from other
+ * consensuses that were valid on the same day. This is in contrast
+ * to the other documents which are all stored in a single file, which
+ * would not scale for computed network fractions. */
+ String date = DateTimeHelper.format(consensus.getValidAfterMillis(),
+ DateTimeHelper.ISO_DATE_FORMAT);
+ File documentFile = new File(this.computedNetworkFractionsDirectory,
+ date);
+ if (!this.computedNetworkFractionsStore.store(documentFile,
+ computedNetworkFractions)) {
+ return false;
+ }
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java
new file mode 100644
index 0000000..6d305d0
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java
@@ -0,0 +1,141 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+/* Hidden-service statistics reported by a single relay covering a single
+ * statistics interval of usually 24 hours. These statistics are reported
+ * by the relay in the "hidserv-" lines of its extra-info descriptor. */
+public class ReportedHidServStats implements Document {
+
+ /* Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /* Hidden-service statistics end timestamp in milliseconds. */
+ private long statsEndMillis;
+
+ public long getStatsEndMillis() {
+ return this.statsEndMillis;
+ }
+
+ /* Statistics interval length in seconds. */
+ private long statsIntervalSeconds;
+
+ public void setStatsIntervalSeconds(long statsIntervalSeconds) {
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ }
+
+ public long getStatsIntervalSeconds() {
+ return this.statsIntervalSeconds;
+ }
+
+ /* Number of relayed cells on rendezvous circuits as reported by the
+ * relay and adjusted by rounding to the nearest right side of a bin and
+ * subtracting half of the bin size. */
+ private long rendRelayedCells;
+
+ public void setRendRelayedCells(long rendRelayedCells) {
+ this.rendRelayedCells = rendRelayedCells;
+ }
+
+ public long getRendRelayedCells() {
+ return this.rendRelayedCells;
+ }
+
+ /* Number of distinct .onion addresses as reported by the relay and
+ * adjusted by rounding to the nearest right side of a bin and
+ * subtracting half of the bin size. */
+ private long dirOnionsSeen;
+
+ public void setDirOnionsSeen(long dirOnionsSeen) {
+ this.dirOnionsSeen = dirOnionsSeen;
+ }
+
+ public long getDirOnionsSeen() {
+ return this.dirOnionsSeen;
+ }
+
+ /* Instantiate a new stats object using fingerprint and stats interval
+ * end which together uniquely identify the object. */
+ public ReportedHidServStats(String fingerprint, long statsEndMillis) {
+ this.fingerprint = fingerprint;
+ this.statsEndMillis = statsEndMillis;
+ }
+
+ /* Return whether this object contains the same fingerprint and stats
+ * interval end as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ReportedHidServStats)) {
+ return false;
+ }
+ ReportedHidServStats other = (ReportedHidServStats) otherObject;
+ return this.fingerprint.equals(other.fingerprint)
+ && this.statsEndMillis == other.statsEndMillis;
+ }
+
+ /* Return a (hopefully unique) hash code based on this object's
+ * fingerprint and stats interval end. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode() + (int) this.statsEndMillis;
+ }
+
+ /* Return a string representation of this object, consisting of
+ * fingerprint and the concatenation of all other attributes. */
+ @Override
+ public String[] format() {
+ String first = this.fingerprint;
+ String second = String.format("%s,%d,%d,%d",
+ DateTimeHelper.format(this.statsEndMillis),
+ this.statsIntervalSeconds, this.rendRelayedCells,
+ this.dirOnionsSeen);
+ return new String[] { first, second };
+ }
+
+ /* Instantiate an empty stats object that will be initialized more by
+ * the parse method. */
+ ReportedHidServStats() {
+ }
+
+ /* Initialize this stats object using the two provided strings that have
+ * been produced by the format method earlier. Return whether this
+ * operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ String[] secondParts = formattedStrings[1].split(",", 4);
+ if (secondParts.length != 4) {
+ return false;
+ }
+ long statsEndMillis = DateTimeHelper.parse(secondParts[0]);
+ if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
+ return false;
+ }
+ long statsIntervalSeconds = -1L;
+ long rendRelayedCells = -1L;
+ long dirOnionsSeen = -1L;
+ try {
+ statsIntervalSeconds = Long.parseLong(secondParts[1]);
+ rendRelayedCells = Long.parseLong(secondParts[2]);
+ dirOnionsSeen = Long.parseLong(secondParts[3]);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ this.fingerprint = formattedStrings[0];
+ this.statsEndMillis = statsEndMillis;
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ this.rendRelayedCells = rendRelayedCells;
+ this.dirOnionsSeen = dirOnionsSeen;
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java
new file mode 100644
index 0000000..207b4aa
--- /dev/null
+++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java
@@ -0,0 +1,365 @@
+/* Copyright 2016--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* NOTE: This class is not required for running the Main class! (It
+ * contains its own main method.) */
+public class Simulate {
+ private static File simCellsCsvFile =
+ new File("out/csv/sim-cells.csv");
+
+ private static File simOnionsCsvFile =
+ new File("out/csv/sim-onions.csv");
+
+ /** Runs two simulations to evaluate this data-processing module. */
+ public static void main(String[] args) throws Exception {
+ System.out.print("Simulating extrapolation of rendezvous cells");
+ simulateManyCells();
+ System.out.print("\nSimulating extrapolation of .onions");
+ simulateManyOnions();
+ System.out.println("\nTerminating.");
+ }
+
+ private static Random rnd = new Random();
+
+ private static void simulateManyCells() throws Exception {
+ simCellsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simCellsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateCells(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static void simulateManyOnions() throws Exception {
+ simOnionsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simOnionsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateOnions(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static String simulateCells(int run) {
+
+ /* Generate consensus weights following an exponential distribution
+ * with lambda = 1 for 3000 potential rendezvous points. */
+ final int numberRendPoints = 3000;
+ double[] consensusWeights = new double[numberRendPoints];
+ double totalConsensusWeight = 0.0;
+ for (int i = 0; i < numberRendPoints; i++) {
+ double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+ consensusWeights[i] = consensusWeight;
+ totalConsensusWeight += consensusWeight;
+ }
+
+ /* Compute probabilities for being selected as rendezvous point. */
+ double[] probRendPoint = new double[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+ }
+
+ /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
+ * exponential distribution with lambda = 0.0001, so on average
+ * 10,000 cells per chunk, and randomly assign them to a rendezvous
+ * point to report them later. */
+ long cellsLeft = 10000000000L;
+ final double cellsLambda = 0.0001;
+ long[] observedCells = new long[numberRendPoints];
+ while (cellsLeft > 0) {
+ long cells = Math.min(cellsLeft,
+ (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
+ double selectRendPoint = rnd.nextDouble();
+ for (int i = 0; i < probRendPoint.length; i++) {
+ selectRendPoint -= probRendPoint[i];
+ if (selectRendPoint <= 0.0) {
+ observedCells[i] += cells;
+ break;
+ }
+ }
+ cellsLeft -= cells;
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 1024L;
+ final double b = 2048.0 / 0.3;
+ long[] reportedCells = new long[numberRendPoints];
+ long[] removedNoiseCells = new long[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ long observed = observedCells[i];
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double randomDouble = rnd.nextDouble();
+ double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0)
+ * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedCells[i] = reported;
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseCells[i] = subtractedHalfOfBinSize;
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Integer> nonReportingRelays = new TreeSet<>();
+ for (int j = 0; j < numberRendPoints; j++) {
+ nonReportingRelays.add(j);
+ }
+ List<Integer> shuffledRelays = new ArrayList<>(nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Integer> reportingRelays = new TreeSet<>();
+ for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+ j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<>();
+ totalReportingProbability = 0.0;
+ for (int reportingRelay : reportingRelays) {
+ double probability = probRendPoint[reportingRelay];
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] {
+ removedNoiseCells[reportingRelay] / probability,
+ removedNoiseCells[reportingRelay],
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ int addRelay = new ArrayList<>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ int removeRelay = new ArrayList<>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001
+ || totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ }
+ );
+ double totalProbability = 0.0;
+ double totalValues = 0.0;
+ double totalInterquartileProbability = 0.0;
+ double totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null
+ && totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25
+ && totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+
+ private static String simulateOnions(final int run) {
+
+ /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+ final int numberHsDirs = 3000;
+ SortedSet<Double> hsDirFingerprints = new TreeSet<>();
+ for (int i = 0; i < numberHsDirs; i++) {
+ hsDirFingerprints.add(rnd.nextDouble());
+ }
+
+ /* Compute fractions of observed descriptor space. */
+ SortedSet<Double> ring =
+ new TreeSet<>(Collections.reverseOrder());
+ for (double fingerprint : hsDirFingerprints) {
+ ring.add(fingerprint);
+ ring.add(fingerprint - 1.0);
+ }
+ SortedMap<Double, Double> hsDirFractions = new TreeMap<>();
+ for (double fingerprint : hsDirFingerprints) {
+ double start = fingerprint;
+ int positionsToGo = 3;
+ for (double prev : ring.tailSet(fingerprint)) {
+ start = prev;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ hsDirFractions.put(fingerprint, fingerprint - start);
+ }
+
+ /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+ final int numberOnions = 40000;
+ final int replicas = 4;
+ final int storeOnDirs = 3;
+ SortedMap<Double, SortedSet<Integer>> storedDescs = new TreeMap<>();
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.put(fingerprint, new TreeSet<Integer>());
+ }
+ for (int i = 0; i < numberOnions; i++) {
+ for (int j = 0; j < replicas; j++) {
+ int leftToStore = storeOnDirs;
+ for (double fingerprint
+ : hsDirFingerprints.tailSet(rnd.nextDouble())) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ if (leftToStore > 0) {
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 8L;
+ final double b = 8.0 / 0.3;
+ SortedMap<Double, Long> reportedOnions = new TreeMap<>();
+ SortedMap<Double, Long> removedNoiseOnions = new TreeMap<>();
+ for (Map.Entry<Double, SortedSet<Integer>> e
+ : storedDescs.entrySet()) {
+ double fingerprint = e.getKey();
+ long observed = (long) e.getValue().size();
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double randomDouble = rnd.nextDouble();
+ double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0)
+ * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedOnions.put(fingerprint, reported);
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Double> nonReportingRelays =
+ new TreeSet<>(hsDirFractions.keySet());
+ List<Double> shuffledRelays = new ArrayList<>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Double> reportingRelays = new TreeSet<>();
+ for (int j = 0; j < (int) ((double) hsDirFractions.size()
+ * fraction); j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<>();
+ totalReportingProbability = 0.0;
+ for (double reportingRelay : reportingRelays) {
+ double probability = hsDirFractions.get(reportingRelay) / 3.0;
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] { removedNoiseOnions.get(reportingRelay)
+ / probability, removedNoiseOnions.get(reportingRelay),
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ double addRelay =
+ new ArrayList<>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ double removeRelay =
+ new ArrayList<>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001
+ || totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] first, double[] second) {
+ return first[0] < second[0] ? -1 : first[0] > second[0] ? 1 : 0;
+ }
+ }
+ );
+ double totalProbability = 0.0;
+ double totalValues = 0.0;
+ double totalInterquartileProbability = 0.0;
+ double totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null
+ && totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25
+ && totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+}
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
deleted file mode 100644
index ea09a78..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-/** Aggregate extrapolated network totals of hidden-service statistics by
- * calculating statistics like the daily weighted interquartile mean.
- * Also calculate simpler statistics like the number of reported
- * statistics and the total network fraction of reporting relays. */
-public class Aggregator {
-
- /** Document file containing extrapolated hidden-service statistics. */
- private File extrapolatedHidServStatsFile;
-
- /** Document store for storing and retrieving extrapolated hidden-service
- * statistics. */
- private DocumentStore<ExtrapolatedHidServStats>
- extrapolatedHidServStatsStore;
-
- /** Output file for writing aggregated statistics. */
- private File hidservStatsCsvFile;
-
- /** Initializes a new aggregator object using the given directory,
- * document store, and output file for results. */
- public Aggregator(File statusDirectory,
- DocumentStore<ExtrapolatedHidServStats>
- extrapolatedHidServStatsStore, File hidservStatsCsvFile) {
-
- /* Create a File instance for the document file containing
- * extrapolated network totals. */
- this.extrapolatedHidServStatsFile = new File(statusDirectory,
- "extrapolated-hidserv-stats");
-
- /* Store references to the provided document store and output file. */
- this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
- this.hidservStatsCsvFile = hidservStatsCsvFile;
- }
-
- /** Calculates aggregates for all extrapolated hidden-service statistics
- * and writes them to the output file. */
- public void aggregateHidServStats() {
-
- /* Retrieve previously extrapolated network totals. */
- Set<ExtrapolatedHidServStats> extrapolatedStats =
- this.extrapolatedHidServStatsStore.retrieve(
- this.extrapolatedHidServStatsFile);
- if (extrapolatedStats == null) {
- System.err.printf("Unable to retrieve extrapolated hidden-service "
- + "statistics from file %s. Skipping aggregation step.%n",
- this.extrapolatedHidServStatsFile.getAbsolutePath());
- return;
- }
-
- /* Re-arrange extrapolated network totals by statistics interval end
- * date, and include the computed network total as weight for the
- * extrapolated value. More precisely, map keys are ISO-formatted
- * dates, map values are double[] arrays with the extrapolated network
- * total as first element and the corresponding computed network
- * fraction as second element. */
- SortedMap<String, List<double[]>> extrapolatedCells = new TreeMap<>();
- SortedMap<String, List<double[]>> extrapolatedOnions = new TreeMap<>();
- for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) {
- String date = DateTimeHelper.format(
- extrapolated.getStatsDateMillis(),
- DateTimeHelper.ISO_DATE_FORMAT);
- if (extrapolated.getFractionRendRelayedCells() > 0.0) {
- if (!extrapolatedCells.containsKey(date)) {
- extrapolatedCells.put(date, new ArrayList<double[]>());
- }
- extrapolatedCells.get(date).add(new double[] {
- extrapolated.getExtrapolatedRendRelayedCells(),
- extrapolated.getFractionRendRelayedCells() });
- }
- if (extrapolated.getFractionDirOnionsSeen() > 0.0) {
- if (!extrapolatedOnions.containsKey(date)) {
- extrapolatedOnions.put(date, new ArrayList<double[]>());
- }
- extrapolatedOnions.get(date).add(new double[] {
- extrapolated.getExtrapolatedDirOnionsSeen(),
- extrapolated.getFractionDirOnionsSeen() });
- }
- }
-
- /* Write all results to a string builder that will later be written to
- * the output file. Each line contains an ISO-formatted "date", a
- * string identifier for the "type" of statistic, the weighted mean
- * ("wmean"), weighted median ("wmedian"), weighted interquartile mean
- * ("wiqm"), the total network "frac"tion, and the number of reported
- * "stats" with non-zero computed network fraction. */
- StringBuilder sb = new StringBuilder();
- sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n");
-
- /* Repeat all aggregation steps for both types of statistics. */
- for (int i = 0; i < 2; i++) {
- String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen";
- SortedMap<String, List<double[]>> extrapolated = i == 0
- ? extrapolatedCells : extrapolatedOnions;
-
- /* Go through all dates. */
- for (Map.Entry<String, List<double[]>> e
- : extrapolated.entrySet()) {
- List<double[]> weightedValues = e.getValue();
-
- /* Sort extrapolated network totals contained in the first array
- * element. (The second array element contains the computed
- * network fraction as weight.) */
- Collections.sort(weightedValues,
- new Comparator<double[]>() {
- public int compare(double[] first, double[] second) {
- return first[0] < second[0] ? -1
- : first[0] > second[0] ? 1
- : 0;
- }
- }
- );
-
- /* For the weighted mean, sum up all previously extrapolated
- * values weighted with their network fractions (which happens to
- * be the values that relays reported), and sum up all network
- * fractions. Once we have those two sums, we can divide the sum
- * of weighted extrapolated values by the sum of network fractions
- * to obtain the weighted mean of extrapolated values. */
- double sumReported = 0.0;
- double sumFraction = 0.0;
- for (double[] d : weightedValues) {
- sumReported += d[0] * d[1];
- sumFraction += d[1];
- }
- double weightedMean = sumReported / sumFraction;
-
- /* For the weighted median and weighted interquartile mean, go
- * through all values once again. The weighted median is the
- * first extrapolated value with weight interval end greater than
- * 50% of reported network fractions. For the weighted
- * interquartile mean, sum up extrapolated values multiplied with
- * network fractions and network fractions falling into the 25% to
- * 75% range and later compute the weighted mean of those. */
- double weightIntervalEnd = 0.0;
- Double weightedMedian = null;
- double sumFractionInterquartile = 0.0;
- double sumReportedInterquartile = 0.0;
- for (double[] d : weightedValues) {
- double extrapolatedValue = d[0];
- double computedFraction = d[1];
- double weightIntervalStart = weightIntervalEnd;
- weightIntervalEnd += computedFraction;
- if (weightedMedian == null
- && weightIntervalEnd > sumFraction * 0.5) {
- weightedMedian = extrapolatedValue;
- }
- if (weightIntervalEnd >= sumFraction * 0.25
- && weightIntervalStart <= sumFraction * 0.75) {
- double fractionBetweenQuartiles =
- Math.min(weightIntervalEnd, sumFraction * 0.75)
- - Math.max(weightIntervalStart, sumFraction * 0.25);
- sumReportedInterquartile += extrapolatedValue
- * fractionBetweenQuartiles;
- sumFractionInterquartile += fractionBetweenQuartiles;
- }
- }
- double weightedInterquartileMean =
- sumReportedInterquartile / sumFractionInterquartile;
-
- /* Put together all aggregated values in a single line. */
- String date = e.getKey();
- int numStats = weightedValues.size();
- sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date,
- type, weightedMean, weightedMedian, weightedInterquartileMean,
- sumFraction, numStats));
- }
- }
-
- /* Write all aggregated results to the output file. */
- try {
- this.hidservStatsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.hidservStatsCsvFile));
- bw.write(sb.toString());
- bw.close();
- } catch (IOException e) {
- System.err.printf("Unable to write results to %s. Ignoring.");
- }
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
deleted file mode 100644
index a403e48..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-/** Computed fraction of hidden-service activity that a single relay is
- * assumed to observe in the network. These fractions are computed from
- * status entries and bandwidth weights in a network status consensus. */
-public class ComputedNetworkFractions implements Document {
-
- /** Relay fingerprint consisting of 40 upper-case hex characters. */
- private String fingerprint;
-
- public String getFingerprint() {
- return this.fingerprint;
- }
-
- /** Valid-after timestamp of the consensus in milliseconds. */
- private long validAfterMillis;
-
- public long getValidAfterMillis() {
- return this.validAfterMillis;
- }
-
- /** Fraction of cells on rendezvous circuits that this relay is assumed
- * to observe in the network. */
- private double fractionRendRelayedCells;
-
- public void setFractionRendRelayedCells(
- double fractionRendRelayedCells) {
- this.fractionRendRelayedCells = fractionRendRelayedCells;
- }
-
- public double getFractionRendRelayedCells() {
- return this.fractionRendRelayedCells;
- }
-
- /** Fraction of descriptors that this relay is assumed to observe in the
- * network. This is calculated as the fraction of descriptors
- * identifiers that this relay was responsible for, divided by 3,
- * because each descriptor that is published to this directory is also
- * published to two other directories. */
- private double fractionDirOnionsSeen;
-
- public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
- this.fractionDirOnionsSeen = fractionDirOnionsSeen;
- }
-
- public double getFractionDirOnionsSeen() {
- return this.fractionDirOnionsSeen;
- }
-
- /** Instantiates a new fractions object using fingerprint and consensus
- * valid-after time which together uniquely identify the object. */
- public ComputedNetworkFractions(String fingerprint,
- long validAfterMillis) {
- this.fingerprint = fingerprint;
- this.validAfterMillis = validAfterMillis;
- }
-
- /** Returns whether this object contains the same fingerprint and
- * consensus valid-after time as the passed object. */
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof ComputedNetworkFractions)) {
- return false;
- }
- ComputedNetworkFractions other =
- (ComputedNetworkFractions) otherObject;
- return this.fingerprint.equals(other.fingerprint)
- && this.validAfterMillis == other.validAfterMillis;
- }
-
- /** Returns a (hopefully unique) hash code based on this object's
- * fingerprint and consensus valid-after time. */
- @Override
- public int hashCode() {
- return this.fingerprint.hashCode()
- + (int) this.validAfterMillis;
- }
-
- private static Map<Long, String> previouslyFormattedDates =
- Collections.synchronizedMap(new HashMap<Long, String>());
-
- /** Returns a string representation of this object, consisting of two
- * strings: the first string contains fingerprint and valid-after date,
- * the second string contains the concatenation of all other
- * attributes. */
- @Override
- public String[] format() {
- long validAfterDateMillis = (this.validAfterMillis
- / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
- String validAfterDate;
- if (previouslyFormattedDates.containsKey(validAfterDateMillis)) {
- validAfterDate = previouslyFormattedDates.get(validAfterDateMillis);
- } else {
- validAfterDate = DateTimeHelper.format(validAfterDateMillis,
- DateTimeHelper.ISO_DATE_FORMAT);
- previouslyFormattedDates.put(validAfterDateMillis, validAfterDate);
- }
- long validAfterHourMillis = this.validAfterMillis
- % DateTimeHelper.ONE_DAY;
- String validAfterHour = String.format("%02d",
- validAfterHourMillis / DateTimeHelper.ONE_HOUR);
- String first = String.format("%s,%s", this.fingerprint,
- validAfterDate);
- String second = validAfterHour
- + (this.fractionRendRelayedCells == 0.0 ? ","
- : String.format(",%f", this.fractionRendRelayedCells))
- + (this.fractionDirOnionsSeen == 0.0 ? ","
- : String.format(",%f", this.fractionDirOnionsSeen));
- return new String[] { first, second };
- }
-
- /** Instantiates an empty fractions object that will be initialized more
- * by the parse method. */
- ComputedNetworkFractions() {
- }
-
- private static Map<String, Long> previouslyParsedDates =
- Collections.synchronizedMap(new HashMap<String, Long>());
-
- /** Initializes this fractions object using the two provided strings
- * that have been produced by the format method earlier and returns
- * whether this operation was successful. */
- @Override
- public boolean parse(String[] formattedStrings) {
- if (formattedStrings.length != 2) {
- System.err.printf("Invalid number of formatted strings. "
- + "Skipping.%n", formattedStrings.length);
- return false;
- }
- String[] firstParts = formattedStrings[0].split(",", 2);
- if (firstParts.length != 2) {
- System.err.printf("Invalid number of comma-separated values. "
- + "Skipping.%n");
- return false;
- }
- String fingerprint = firstParts[0];
- String[] secondParts = formattedStrings[1].split(",", 3);
- if (secondParts.length != 3) {
- System.err.printf("Invalid number of comma-separated values. "
- + "Skipping.%n");
- return false;
- }
- String validAfterDate = firstParts[1];
- String validAfterHour = secondParts[0];
- long validAfterDateMillis;
- if (previouslyParsedDates.containsKey(validAfterDate)) {
- validAfterDateMillis = previouslyParsedDates.get(validAfterDate);
- } else {
- validAfterDateMillis = DateTimeHelper.parse(validAfterDate,
- DateTimeHelper.ISO_DATE_FORMAT);
- previouslyParsedDates.put(validAfterDate, validAfterDateMillis);
- }
- long validAfterTimeMillis = Long.parseLong(validAfterHour)
- * DateTimeHelper.ONE_HOUR;
- if (validAfterDateMillis == DateTimeHelper.NO_TIME_AVAILABLE
- || validAfterTimeMillis < 0L
- || validAfterTimeMillis >= DateTimeHelper.ONE_DAY) {
- System.err.printf("Invalid date/hour format. Skipping.%n");
- return false;
- }
- long validAfterMillis = validAfterDateMillis + validAfterTimeMillis;
- try {
- this.fingerprint = fingerprint;
- this.validAfterMillis = validAfterMillis;
- this.fractionRendRelayedCells = secondParts[1].equals("")
- ? 0.0 : Double.parseDouble(secondParts[1]);
- this.fractionDirOnionsSeen = secondParts[2].equals("")
- ? 0.0 : Double.parseDouble(secondParts[2]);
- return true;
- } catch (NumberFormatException e) {
- System.err.printf("Invalid number format. Skipping.%n");
- return false;
- }
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
deleted file mode 100644
index 5be6800..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-
-/** Utility class to format and parse dates and timestamps. */
-public class DateTimeHelper {
-
- /** This class is not supposed to be instantiated, which is why its
- * constructor has private visibility. */
- private DateTimeHelper() {
- }
-
- /* Some useful time constant. */
- public static final long ONE_SECOND = 1000L;
-
- public static final long ONE_MINUTE = 60L * ONE_SECOND;
-
- public static final long ONE_HOUR = 60L * ONE_MINUTE;
-
- public static final long ONE_DAY = 24L * ONE_HOUR;
-
- /* Some useful date/time formats. */
- public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
-
- public static final String ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH";
-
- public static final String ISO_DATE_FORMAT = "yyyy-MM-dd";
-
- public static final String ISO_HOUR_FORMAT = "HH";
-
- /** Map of DateFormat instances for parsing and formatting dates and
- * timestamps, protected using ThreadLocal to ensure that each thread
- * uses its own instances. */
- private static ThreadLocal<Map<String, DateFormat>> dateFormats =
- new ThreadLocal<Map<String, DateFormat>>() {
-
- public Map<String, DateFormat> get() {
- return super.get();
- }
-
- protected Map<String, DateFormat> initialValue() {
- return new HashMap<>();
- }
-
- public void remove() {
- super.remove();
- }
-
- public void set(Map<String, DateFormat> value) {
- super.set(value);
- }
- };
-
- /** Returns an instance of DateFormat for the given format, and if no
- * such instance exists, creates one and puts it in the map. */
- private static DateFormat getDateFormat(String format) {
- Map<String, DateFormat> threadDateFormats = dateFormats.get();
- if (!threadDateFormats.containsKey(format)) {
- DateFormat dateFormat = new SimpleDateFormat(format);
- dateFormat.setLenient(false);
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- threadDateFormats.put(format, dateFormat);
- }
- return threadDateFormats.get(format);
- }
-
- /** Formats the given time in milliseconds using the given format. */
- public static String format(long millis, String format) {
- return getDateFormat(format).format(millis);
- }
-
- /** Formats the given time in milliseconds using ISO date/time
- * format. */
- public static String format(long millis) {
- return format(millis, ISO_DATETIME_FORMAT);
- }
-
- /** Default result of the parse methods if the provided time could not
- * be parsed. */
- public static final long NO_TIME_AVAILABLE = -1L;
-
- /** Parses the given string using the given format. */
- public static long parse(String string, String format) {
- if (null == string) {
- return NO_TIME_AVAILABLE;
- }
- try {
- return getDateFormat(format).parse(string).getTime();
- } catch (ParseException e) {
- return NO_TIME_AVAILABLE;
- }
- }
-
- /** Parses the given string using ISO date/time format. */
- public static long parse(String string) {
- return parse(string, ISO_DATETIME_FORMAT);
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
deleted file mode 100644
index 46ce40d..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-/** Common interface of documents that are supposed to be serialized and
- * stored in document files and later retrieved and de-serialized. */
-public interface Document {
-
- /** Returns an array of two strings with a string representation of this
- * document.
- *
- * <p>The first string will be used to start a group of documents, the
- * second string will be used to represent a single document in that
- * group. Ideally, the first string is equivalent for many documents
- * stored in the same file, and the second string is different for those
- * documents.</p> */
- public String[] format();
-
- /** Initializes an object using the given array of two strings.
- *
- * <p>These are the same two strings that the format method
- * provides.</p> */
- public boolean parse(String[] formattedStrings);
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
deleted file mode 100644
index 2670cf4..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.LineNumberReader;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/** Utility class to store serialized objects implementing the Document
- * interface to a file and later to retrieve them. */
-public class DocumentStore<T extends Document> {
-
- /** Document class, needed to create new instances when retrieving
- * documents. */
- private Class<T> clazz;
-
- /** Initializes a new store object for the given type of documents. */
- DocumentStore(Class<T> clazz) {
- this.clazz = clazz;
- }
-
- /** Stores the provided documents in the given file and returns whether
- * the storage operation was successful.
- *
- * <p>If the file already existed and if it contains documents, merge
- * the new documents with the existing ones.</p> */
- public boolean store(File documentFile, Set<T> documentsToStore) {
-
- /* Retrieve existing documents. */
- Set<T> retrievedDocuments = this.retrieve(documentFile);
- if (retrievedDocuments == null) {
- System.err.printf("Unable to read and update %s. Not storing "
- + "documents.%n", documentFile.getAbsoluteFile());
- return false;
- }
-
- /* Merge new documents with existing ones. */
- retrievedDocuments.addAll(documentsToStore);
-
- /* Serialize documents. */
- SortedMap<String, SortedSet<String>> formattedDocuments = new TreeMap<>();
- for (T retrieveDocument : retrievedDocuments) {
- String[] formattedDocument = retrieveDocument.format();
- if (!formattedDocuments.containsKey(formattedDocument[0])) {
- formattedDocuments.put(formattedDocument[0],
- new TreeSet<String>());
- }
- formattedDocuments.get(formattedDocument[0]).add(
- formattedDocument[1]);
- }
-
- /* Check if a temporary file exists from the previous execution. */
- File documentTempFile = new File(documentFile.getAbsoluteFile()
- + ".tmp");
- if (documentTempFile.exists()) {
- System.err.printf("Temporary document file %s still exists, "
- + "indicating that a previous execution did not terminate "
- + "cleanly. Not storing documents.%n",
- documentTempFile.getAbsoluteFile());
- return false;
- }
-
- /* Write to a new temporary file, then move it into place, possibly
- * overwriting an existing file. */
- try {
- documentTempFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- documentTempFile));
- for (Map.Entry<String, SortedSet<String>> e
- : formattedDocuments.entrySet()) {
- bw.write(e.getKey() + "\n");
- for (String s : e.getValue()) {
- bw.write(" " + s + "\n");
- }
- }
- bw.close();
- documentFile.delete();
- documentTempFile.renameTo(documentFile);
- } catch (IOException e) {
- System.err.printf("Unable to write %s. Not storing documents.%n",
- documentFile.getAbsolutePath());
- return false;
- }
-
- /* Return success. */
- return true;
- }
-
- /** Retrieves all previously stored documents from the given file. */
- public Set<T> retrieve(File documentFile) {
- return this.retrieve(documentFile, "");
- }
-
- /** Retrieves previously stored documents from the given file that start
- * with the given prefix. */
- public Set<T> retrieve(File documentFile, String prefix) {
-
- /* Check if the document file exists, and if not, return an empty set.
- * This is not an error case. */
- Set<T> result = new HashSet<>();
- if (!documentFile.exists()) {
- return result;
- }
-
- /* Parse the document file line by line and de-serialize contained
- * documents. */
- try {
- LineNumberReader lnr = new LineNumberReader(new BufferedReader(
- new FileReader(documentFile)));
- String line;
- String formattedString0 = null;
- while ((line = lnr.readLine()) != null) {
- if (!line.startsWith(" ")) {
- formattedString0 = line;
- } else if (formattedString0 == null) {
- System.err.printf("First line in %s must not start with a "
- + "space. Not retrieving any previously stored "
- + "documents.%n", documentFile.getAbsolutePath());
- lnr.close();
- return null;
- } else if (prefix.length() > formattedString0.length()
- && !(formattedString0 + line.substring(1))
- .startsWith(prefix)) {
- /* Skip combined line not starting with prefix. */
- continue;
- } else if (prefix.length() > 0
- && !formattedString0.startsWith(prefix)) {
- /* Skip line not starting with prefix. */
- continue;
- } else {
- T document = this.clazz.newInstance();
- if (!document.parse(new String[] { formattedString0,
- line.substring(1) })) {
- System.err.printf("Unable to read line %d from %s. Not "
- + "retrieving any previously stored documents.%n",
- lnr.getLineNumber(), documentFile.getAbsolutePath());
- lnr.close();
- return null;
- }
- result.add(document);
- }
- }
- lnr.close();
- } catch (IOException e) {
- System.err.printf("Unable to read %s. Not retrieving any "
- + "previously stored documents.%n",
- documentFile.getAbsolutePath());
- e.printStackTrace();
- return null;
- } catch (InstantiationException e) {
- System.err.printf("Unable to read %s. Cannot instantiate document "
- + "object.%n", documentFile.getAbsolutePath());
- e.printStackTrace();
- return null;
- } catch (IllegalAccessException e) {
- System.err.printf("Unable to read %s. Cannot instantiate document "
- + "object.%n", documentFile.getAbsolutePath());
- e.printStackTrace();
- return null;
- }
- return result;
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
deleted file mode 100644
index 53bef71..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-/** Extrapolated network totals of hidden-service statistics reported by a
- * single relay. Extrapolated values are based on reported statistics and
- * computed network fractions in the statistics interval. */
-public class ExtrapolatedHidServStats implements Document {
-
- /** Date of statistics interval end in milliseconds. */
- private long statsDateMillis;
-
- public long getStatsDateMillis() {
- return this.statsDateMillis;
- }
-
- /** Relay fingerprint consisting of 40 upper-case hex characters. */
- private String fingerprint;
-
- public String getFingerprint() {
- return this.fingerprint;
- }
-
- /** Extrapolated number of cells on rendezvous circuits in the
- * network. */
- private double extrapolatedRendRelayedCells;
-
- public void setExtrapolatedRendRelayedCells(
- double extrapolatedRendRelayedCells) {
- this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
- }
-
- public double getExtrapolatedRendRelayedCells() {
- return this.extrapolatedRendRelayedCells;
- }
-
- /** Computed fraction of observed cells on rendezvous circuits in the
- * network, used to weight this relay's extrapolated network total in
- * the aggregation step. */
- private double fractionRendRelayedCells;
-
- public void setFractionRendRelayedCells(
- double fractionRendRelayedCells) {
- this.fractionRendRelayedCells = fractionRendRelayedCells;
- }
-
- public double getFractionRendRelayedCells() {
- return this.fractionRendRelayedCells;
- }
-
- /** Extrapolated number of .onions in the network. */
- private double extrapolatedDirOnionsSeen;
-
- public void setExtrapolatedDirOnionsSeen(
- double extrapolatedDirOnionsSeen) {
- this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
- }
-
- public double getExtrapolatedDirOnionsSeen() {
- return this.extrapolatedDirOnionsSeen;
- }
-
- /** Computed fraction of observed .onions in the network, used to weight
- * this relay's extrapolated network total in the aggregation step. */
- private double fractionDirOnionsSeen;
-
- public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
- this.fractionDirOnionsSeen = fractionDirOnionsSeen;
- }
-
- public double getFractionDirOnionsSeen() {
- return this.fractionDirOnionsSeen;
- }
-
- /** Instantiates a new stats object using fingerprint and statistics
- * interval end date which together uniquely identify the object. */
- public ExtrapolatedHidServStats(long statsDateMillis,
- String fingerprint) {
- this.statsDateMillis = statsDateMillis;
- this.fingerprint = fingerprint;
- }
-
- /** Returns whether this object contains the same fingerprint and
- * statistics interval end date as the passed object. */
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof ExtrapolatedHidServStats)) {
- return false;
- }
- ExtrapolatedHidServStats other =
- (ExtrapolatedHidServStats) otherObject;
- return this.fingerprint.equals(other.fingerprint)
- && this.statsDateMillis == other.statsDateMillis;
- }
-
- /** Returns a (hopefully unique) hash code based on this object's
- * fingerprint and statistics interval end date. */
- @Override
- public int hashCode() {
- return this.fingerprint.hashCode() + (int) this.statsDateMillis;
- }
-
- /** Returns a string representation of this object, consisting of the
- * statistics interval end date and the concatenation of all other
- * attributes. */
- @Override
- public String[] format() {
- String first = DateTimeHelper.format(this.statsDateMillis,
- DateTimeHelper.ISO_DATE_FORMAT);
- String second = this.fingerprint
- + (this.fractionRendRelayedCells == 0.0 ? ",,"
- : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells,
- this.fractionRendRelayedCells))
- + (this.fractionDirOnionsSeen == 0.0 ? ",,"
- : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen,
- this.fractionDirOnionsSeen));
- return new String[] { first, second };
- }
-
- /** Instantiates an empty stats object that will be initialized more by
- * the parse method. */
- ExtrapolatedHidServStats() {
- }
-
- /** Initializes this stats object using the two provided strings that
- * have been produced by the format method earlier and returns whether
- * this operation was successful. */
- @Override
- public boolean parse(String[] formattedStrings) {
- if (formattedStrings.length != 2) {
- System.err.printf("Invalid number of formatted strings. "
- + "Skipping.%n", formattedStrings.length);
- return false;
- }
- long statsDateMillis = DateTimeHelper.parse(formattedStrings[0],
- DateTimeHelper.ISO_DATE_FORMAT);
- String[] secondParts = formattedStrings[1].split(",", 5);
- if (secondParts.length != 5) {
- System.err.printf("Invalid number of comma-separated values. "
- + "Skipping.%n");
- return false;
- }
- String fingerprint = secondParts[0];
- double extrapolatedRendRelayedCells = 0.0;
- double fractionRendRelayedCells = 0.0;
- double extrapolatedDirOnionsSeen = 0.0;
- double fractionDirOnionsSeen = 0.0;
- try {
- extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0
- : Double.parseDouble(secondParts[1]);
- fractionRendRelayedCells = secondParts[2].equals("") ? 0.0
- : Double.parseDouble(secondParts[2]);
- extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0
- : Double.parseDouble(secondParts[3]);
- fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0
- : Double.parseDouble(secondParts[4]);
- } catch (NumberFormatException e) {
- return false;
- }
- this.statsDateMillis = statsDateMillis;
- this.fingerprint = fingerprint;
- this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
- this.fractionRendRelayedCells = fractionRendRelayedCells;
- this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
- this.fractionDirOnionsSeen = fractionDirOnionsSeen;
- return true;
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
deleted file mode 100644
index 262720a..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.io.File;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/** Extrapolate hidden-service statistics reported by single relays by
- * dividing them by the computed fraction of hidden-service activity
- * observed by the relay. */
-public class Extrapolator {
-
- /** Document file containing previously parsed reported hidden-service
- * statistics. */
- private File reportedHidServStatsFile;
-
- /** Document store for storing and retrieving reported hidden-service
- * statistics. */
- private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
-
- /** Directory containing document files with previously computed network
- * fractions. */
- private File computedNetworkFractionsDirectory;
-
- /** Document store for storing and retrieving computed network
- * fractions. */
- private DocumentStore<ComputedNetworkFractions>
- computedNetworkFractionsStore;
-
- /** Document file containing extrapolated hidden-service statistics. */
- private File extrapolatedHidServStatsFile;
-
- /** Document store for storing and retrieving extrapolated hidden-service
- * statistics. */
- private DocumentStore<ExtrapolatedHidServStats>
- extrapolatedHidServStatsStore;
-
- /** Initializes a new extrapolator object using the given directory and
- * document stores. */
- public Extrapolator(File statusDirectory,
- DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
- DocumentStore<ComputedNetworkFractions>
- computedNetworkFractionsStore,
- DocumentStore<ExtrapolatedHidServStats>
- extrapolatedHidServStatsStore) {
-
- /* Create File instances for the files and directories in the provided
- * status directory. */
- this.reportedHidServStatsFile = new File(statusDirectory,
- "reported-hidserv-stats");
- this.computedNetworkFractionsDirectory =
- new File(statusDirectory, "computed-network-fractions");
- this.extrapolatedHidServStatsFile = new File(statusDirectory,
- "extrapolated-hidserv-stats");
-
- /* Store references to the provided document stores. */
- this.reportedHidServStatsStore = reportedHidServStatsStore;
- this.computedNetworkFractionsStore = computedNetworkFractionsStore;
- this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
- }
-
- /** Iterates over all reported stats and extrapolate network totals for
- * those that have not been extrapolated before. */
- public boolean extrapolateHidServStats() {
-
- /* Retrieve previously extrapolated stats to avoid extrapolating them
- * again. */
- Set<ExtrapolatedHidServStats> extrapolatedStats =
- this.extrapolatedHidServStatsStore.retrieve(
- this.extrapolatedHidServStatsFile);
-
- /* Retrieve all reported stats, even including those that have already
- * been extrapolated. */
- Set<ReportedHidServStats> reportedStats =
- this.reportedHidServStatsStore.retrieve(
- this.reportedHidServStatsFile);
-
- /* Make sure that all documents could be retrieved correctly. */
- if (extrapolatedStats == null || reportedStats == null) {
- System.err.printf("Could not read previously parsed or "
- + "extrapolated hidserv-stats. Skipping.");
- return false;
- }
-
- /* Re-arrange reported stats by fingerprint. */
- SortedMap<String, Set<ReportedHidServStats>> parsedStatsByFingerprint =
- new TreeMap<>();
- for (ReportedHidServStats stat : reportedStats) {
- String fingerprint = stat.getFingerprint();
- if (!parsedStatsByFingerprint.containsKey(fingerprint)) {
- parsedStatsByFingerprint.put(fingerprint,
- new HashSet<ReportedHidServStats>());
- }
- parsedStatsByFingerprint.get(fingerprint).add(stat);
- }
-
- /* Go through reported stats by fingerprint. */
- for (Map.Entry<String, Set<ReportedHidServStats>> e
- : parsedStatsByFingerprint.entrySet()) {
- String fingerprint = e.getKey();
-
- /* Iterate over all stats reported by this relay and make a list of
- * those that still need to be extrapolated. Also make a list of
- * all dates for which we need to retrieve computed network
- * fractions. */
- Set<ReportedHidServStats> newReportedStats = new HashSet<>();
- SortedSet<String> retrieveFractionDates = new TreeSet<>();
- for (ReportedHidServStats stats : e.getValue()) {
-
- /* Check whether extrapolated stats already contain an object with
- * the same statistics interval end date and fingerprint. */
- long statsDateMillis = (stats.getStatsEndMillis()
- / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
- if (extrapolatedStats.contains(
- new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) {
- continue;
- }
-
- /* Add the reported stats to the list of stats we still need to
- * extrapolate. */
- newReportedStats.add(stats);
-
- /* Add all dates between statistics interval start and end to a
- * list. */
- long statsEndMillis = stats.getStatsEndMillis();
- long statsStartMillis = statsEndMillis
- - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
- for (long millis = statsStartMillis; millis <= statsEndMillis;
- millis += DateTimeHelper.ONE_DAY) {
- String date = DateTimeHelper.format(millis,
- DateTimeHelper.ISO_DATE_FORMAT);
- retrieveFractionDates.add(date);
- }
- }
-
- /* Retrieve all computed network fractions that might be needed to
- * extrapolate new statistics. Keep a list of all known consensus
- * valid-after times, and keep a map of fractions also by consensus
- * valid-after time. (It's not sufficient to only keep the latter,
- * because we need to count known consensuses even if the relay was
- * not contained in a consensus or had a network fraction of exactly
- * zero.) */
- SortedSet<Long> knownConsensuses = new TreeSet<>();
- SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions =
- new TreeMap<>();
- for (String date : retrieveFractionDates) {
- File documentFile = new File(
- this.computedNetworkFractionsDirectory, date);
- Set<ComputedNetworkFractions> fractions
- = this.computedNetworkFractionsStore.retrieve(documentFile,
- fingerprint);
- for (ComputedNetworkFractions fraction : fractions) {
- knownConsensuses.add(fraction.getValidAfterMillis());
- if (fraction.getFingerprint().equals(fingerprint)) {
- computedNetworkFractions.put(fraction.getValidAfterMillis(),
- fraction);
- }
- }
- }
-
- /* Go through newly reported stats, match them with computed network
- * fractions, and extrapolate network totals. */
- for (ReportedHidServStats stats : newReportedStats) {
- long statsEndMillis = stats.getStatsEndMillis();
- long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY)
- * DateTimeHelper.ONE_DAY;
- long statsStartMillis = statsEndMillis
- - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
-
- /* Sum up computed network fractions and count known consensus in
- * the relevant interval, so that we can later compute means of
- * network fractions. */
- double sumFractionRendRelayedCells = 0.0;
- double sumFractionDirOnionsSeen = 0.0;
- int consensuses = 0;
- for (long validAfterMillis : knownConsensuses) {
- if (statsStartMillis <= validAfterMillis
- && validAfterMillis < statsEndMillis) {
- if (computedNetworkFractions.containsKey(validAfterMillis)) {
- ComputedNetworkFractions frac =
- computedNetworkFractions.get(validAfterMillis);
- sumFractionRendRelayedCells +=
- frac.getFractionRendRelayedCells();
- sumFractionDirOnionsSeen +=
- frac.getFractionDirOnionsSeen();
- }
- consensuses++;
- }
- }
-
- /* If we don't know a single consensus with valid-after time in
- * the statistics interval, skip this stat. */
- if (consensuses == 0) {
- continue;
- }
-
- /* Compute means of network fractions. */
- double fractionRendRelayedCells =
- sumFractionRendRelayedCells / consensuses;
- double fractionDirOnionsSeen =
- sumFractionDirOnionsSeen / consensuses;
-
- /* If at least one fraction is positive, extrapolate network
- * totals. */
- if (fractionRendRelayedCells > 0.0
- || fractionDirOnionsSeen > 0.0) {
- ExtrapolatedHidServStats extrapolated =
- new ExtrapolatedHidServStats(
- statsDateMillis, fingerprint);
- if (fractionRendRelayedCells > 0.0) {
- extrapolated.setFractionRendRelayedCells(
- fractionRendRelayedCells);
- /* Extrapolating cells on rendezvous circuits is as easy as
- * dividing the reported number by the computed network
- * fraction. */
- double extrapolatedRendRelayedCells =
- stats.getRendRelayedCells() / fractionRendRelayedCells;
- extrapolated.setExtrapolatedRendRelayedCells(
- extrapolatedRendRelayedCells);
- }
- if (fractionDirOnionsSeen > 0.0) {
- extrapolated.setFractionDirOnionsSeen(
- fractionDirOnionsSeen);
- /* Extrapolating reported unique .onion addresses to the
- * total number in the network is more difficult. In short,
- * each descriptor is stored to 12 (likely) different
- * directories, so we'll have to divide the reported number by
- * 12 and then by the computed network fraction of this
- * directory. */
- double extrapolatedDirOnionsSeen =
- stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen);
- extrapolated.setExtrapolatedDirOnionsSeen(
- extrapolatedDirOnionsSeen);
- }
- extrapolatedStats.add(extrapolated);
- }
- }
- }
-
- /* Store all extrapolated network totals to disk with help of the
- * document store. */
- return this.extrapolatedHidServStatsStore.store(
- this.extrapolatedHidServStatsFile, extrapolatedStats);
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
deleted file mode 100644
index ad0b415..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.io.File;
-import java.util.HashSet;
-import java.util.Set;
-
-/** Main class for updating extrapolated network totals of hidden-service
- * statistics. The main method of this class can be executed as often as
- * new statistics are needed, though callers must ensure that executions
- * do not overlap. */
-public class Main {
-
- /** Parses new descriptors, extrapolate contained statistics using
- * computed network fractions, aggregate results, and writes results to
- * disk. */
- public static void main(String[] args) {
-
- /* Initialize directories and file paths. */
- Set<File> inDirectories = new HashSet<>();
- inDirectories.add(
- new File("../../shared/in/recent/relay-descriptors/consensuses"));
- inDirectories.add(
- new File("../../shared/in/recent/relay-descriptors/extra-infos"));
- File statusDirectory = new File("status");
-
- /* Initialize parser and read parse history to avoid parsing
- * descriptor files that haven't changed since the last execution. */
- System.out.println("Initializing parser and reading parse "
- + "history...");
- DocumentStore<ReportedHidServStats> reportedHidServStatsStore =
- new DocumentStore<>(ReportedHidServStats.class);
- DocumentStore<ComputedNetworkFractions>
- computedNetworkFractionsStore = new DocumentStore<>(
- ComputedNetworkFractions.class);
- Parser parser = new Parser(inDirectories, statusDirectory,
- reportedHidServStatsStore, computedNetworkFractionsStore);
- parser.readParseHistory();
-
- /* Parse new descriptors and store their contents using the document
- * stores. */
- System.out.println("Parsing descriptors...");
- if (!parser.parseDescriptors()) {
- System.err.println("Could not store parsed descriptors. "
- + "Terminating.");
- return;
- }
-
- /* Write the parse history to avoid parsing descriptor files again
- * next time. It's okay to do this now and not at the end of the
- * execution, because even if something breaks apart below, it's safe
- * not to parse descriptor files again. */
- System.out.println("Writing parse history...");
- parser.writeParseHistory();
-
- /* Extrapolate reported statistics using computed network fractions
- * and write the result to disk using a document store. The result is
- * a single file with extrapolated network totals based on reports by
- * single relays. */
- System.out.println("Extrapolating statistics...");
- DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore
- = new DocumentStore<>(ExtrapolatedHidServStats.class);
- Extrapolator extrapolator = new Extrapolator(statusDirectory,
- reportedHidServStatsStore, computedNetworkFractionsStore,
- extrapolatedHidServStatsStore);
- if (!extrapolator.extrapolateHidServStats()) {
- System.err.println("Could not extrapolate statistics. "
- + "Terminating.");
- return;
- }
-
- /* Go through all extrapolated network totals and aggregate them.
- * This includes calculating daily weighted interquartile means, among
- * other statistics. Write the result to a .csv file that can be
- * processed by other tools. */
- System.out.println("Aggregating statistics...");
- File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv");
- Aggregator aggregator = new Aggregator(statusDirectory,
- extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile);
- aggregator.aggregateHidServStats();
-
- /* End this execution. */
- System.out.println("Terminating.");
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
deleted file mode 100644
index eccb0c0..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
+++ /dev/null
@@ -1,440 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/** Parse hidden-service statistics from extra-info descriptors, compute
- * network fractions from consensuses, and write parsed contents to
- * document files for later use. */
-public class Parser {
-
- /** File containing tuples of last-modified times and file names of
- * descriptor files parsed in the previous execution. */
- private File parseHistoryFile;
-
- /** Descriptor reader to provide parsed extra-info descriptors and
- * consensuses. */
- private DescriptorReader descriptorReader;
-
- /** Document file containing previously parsed reported hidden-service
- * statistics. */
- private File reportedHidServStatsFile;
-
- /** Document store for storing and retrieving reported hidden-service
- * statistics. */
- private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
-
- /** Directory containing document files with previously computed network
- * fractions. */
- private File computedNetworkFractionsDirectory;
-
- /** Document store for storing and retrieving computed network
- * fractions. */
- private DocumentStore<ComputedNetworkFractions>
- computedNetworkFractionsStore;
-
- /** Initializes a new parser object using the given directories and
- * document stores. */
- public Parser(Set<File> inDirectories, File statusDirectory,
- DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
- DocumentStore<ComputedNetworkFractions>
- computedNetworkFractionsStore) {
-
- /* Create a new descriptor reader for reading descriptors in the given
- * in directory. Configure the reader to avoid having more than five
- * parsed descriptors in the queue, rather than the default one
- * hundred. Five is a compromise between very large consensuses and
- * rather small extra-info descriptors. */
- this.descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- for (File inDirectory : inDirectories) {
- this.descriptorReader.addDirectory(inDirectory);
- }
- this.descriptorReader.setMaxDescriptorFilesInQueue(5);
-
- /* Create File instances for the files and directories in the provided
- * status directory. */
- this.parseHistoryFile = new File(statusDirectory, "parse-history");
- this.reportedHidServStatsFile = new File(statusDirectory,
- "reported-hidserv-stats");
- this.computedNetworkFractionsDirectory =
- new File(statusDirectory, "computed-network-fractions");
-
- /* Store references to the provided document stores. */
- this.reportedHidServStatsStore = reportedHidServStatsStore;
- this.computedNetworkFractionsStore = computedNetworkFractionsStore;
- }
-
- /** Reads the parse history file to avoid parsing descriptor files that
- * have not changed since the previous execution. */
- public void readParseHistory() {
- if (this.parseHistoryFile.exists()
- && this.parseHistoryFile.isFile()) {
- SortedMap<String, Long> excludedFiles = new TreeMap<>();
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.parseHistoryFile));
- String line;
- while ((line = br.readLine()) != null) {
- try {
- /* Each line is supposed to contain the last-modified time and
- * absolute path of a descriptor file. */
- String[] parts = line.split(" ", 2);
- excludedFiles.put(parts[1], Long.parseLong(parts[0]));
- } catch (NumberFormatException e) {
- System.err.printf("Illegal line '%s' in parse history. "
- + "Skipping line.%n", line);
- }
- }
- br.close();
- } catch (IOException e) {
- System.err.printf("Could not read history file '%s'. Not "
- + "excluding descriptors in this execution.",
- this.parseHistoryFile.getAbsolutePath());
- }
-
- /* Tell the descriptor reader to exclude the files contained in the
- * parse history file. */
- this.descriptorReader.setExcludedFiles(excludedFiles);
- }
- }
-
- /** Writes parsed or skipped descriptor files with last-modified times
- * and absolute paths to the parse history file to avoid parsing these
- * files again, unless they change until the next execution. */
- public void writeParseHistory() {
-
- /* Obtain the list of descriptor files that were either parsed now or
- * that were skipped in this execution from the descriptor reader. */
- SortedMap<String, Long> excludedAndParsedFiles = new TreeMap<>();
- excludedAndParsedFiles.putAll(
- this.descriptorReader.getExcludedFiles());
- excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles());
- try {
- this.parseHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.parseHistoryFile));
- for (Map.Entry<String, Long> e
- : excludedAndParsedFiles.entrySet()) {
- /* Each line starts with the last-modified time of the descriptor
- * file, followed by its absolute path. */
- String absolutePath = e.getKey();
- long lastModifiedMillis = e.getValue();
- bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath
- + "\n");
- }
- bw.close();
- } catch (IOException e) {
- System.err.printf("Could not write history file '%s'. Not "
- + "excluding descriptors in next execution.",
- this.parseHistoryFile.getAbsolutePath());
- }
- }
-
- /** Set of all reported hidden-service statistics.
- *
- * <p>To date, these objects are small, and keeping them all in memory
- * is easy. But if this ever changes, e.g., when more and more
- * statistics are added, this may not scale.</p> */
- private Set<ReportedHidServStats> reportedHidServStats = new HashSet<>();
-
- /** Instructs the descriptor reader to parse descriptor files, and
- * handles the resulting parsed descriptors if they are either
- * extra-info descriptors or consensuses. */
- public boolean parseDescriptors() {
- Iterator<DescriptorFile> descriptorFiles =
- this.descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof ExtraInfoDescriptor) {
- this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
- } else if (descriptor instanceof RelayNetworkStatusConsensus) {
- if (!this.parseRelayNetworkStatusConsensus(
- (RelayNetworkStatusConsensus) descriptor)) {
- return false;
- }
- }
- }
- }
-
- /* Store reported hidden-service statistics to their document file.
- * It's more efficient to only do this once after processing all
- * descriptors. In contrast, sets of computed network fractions are
- * stored immediately after processing the consensus they are based
- * on. */
- return this.reportedHidServStatsStore.store(
- this.reportedHidServStatsFile, this.reportedHidServStats);
- }
-
- private static final String BIN_SIZE = "bin_size";
-
- /** Parses the given extra-info descriptor by extracting its fingerprint
- * and contained hidserv-* lines.
- *
- * <p>If a valid set of hidserv-stats can be extracted, create a new
- * stats object that will later be stored to a document file.</p> */
- private void parseExtraInfoDescriptor(
- ExtraInfoDescriptor extraInfoDescriptor) {
-
- /* Extract the fingerprint from the parsed descriptor. */
- String fingerprint = extraInfoDescriptor.getFingerprint();
-
- /* If the descriptor did not contain any of the expected hidserv-*
- * lines, don't do anything. This applies to the majority of
- * descriptors, at least as long as only a minority of relays reports
- * these statistics. */
- if (extraInfoDescriptor.getHidservStatsEndMillis() < 0L
- && extraInfoDescriptor.getHidservRendRelayedCells() == null
- && extraInfoDescriptor.getHidservDirOnionsSeen() == null) {
- return;
-
- /* If the descriptor contained all expected hidserv-* lines, create a
- * new stats object and put it in the local map, so that it will later
- * be written to a document file. */
- } else if (extraInfoDescriptor.getHidservStatsEndMillis() >= 0L
- && extraInfoDescriptor.getHidservStatsIntervalLength() >= 0L
- && extraInfoDescriptor.getHidservRendRelayedCells() != null
- && extraInfoDescriptor.getHidservRendRelayedCellsParameters() != null
- && extraInfoDescriptor.getHidservRendRelayedCellsParameters()
- .containsKey(BIN_SIZE)
- && extraInfoDescriptor.getHidservDirOnionsSeen() != null
- && extraInfoDescriptor.getHidservDirOnionsSeenParameters() != null
- && extraInfoDescriptor.getHidservDirOnionsSeenParameters()
- .containsKey(BIN_SIZE)) {
- ReportedHidServStats reportedStats = new ReportedHidServStats(
- fingerprint, extraInfoDescriptor.getHidservStatsEndMillis());
- reportedStats.setStatsIntervalSeconds(extraInfoDescriptor
- .getHidservStatsIntervalLength());
- reportedStats.setRendRelayedCells(this.removeNoise(extraInfoDescriptor
- .getHidservRendRelayedCells().longValue(), extraInfoDescriptor
- .getHidservRendRelayedCellsParameters().get(BIN_SIZE).longValue()));
- reportedStats.setDirOnionsSeen(this.removeNoise(extraInfoDescriptor
- .getHidservDirOnionsSeen().longValue(), extraInfoDescriptor
- .getHidservDirOnionsSeenParameters().get(BIN_SIZE).longValue()));
- this.reportedHidServStats.add(reportedStats);
-
- /* If the descriptor contained some but not all hidserv-* lines, print
- * out a warning. This case does not warrant any further action,
- * because relays can in theory write anything in their extra-info
- * descriptors. But maybe we'll want to know. */
- } else {
- System.err.println("Relay " + fingerprint + " published "
- + "incomplete hidserv-stats. Ignoring.");
- }
- }
-
- /** Removes noise from a reported stats value by rounding to the nearest
- * right side of a bin and subtracting half of the bin size. */
- private long removeNoise(long reportedNumber, long binSize) {
- long roundedToNearestRightSideOfTheBin =
- ((reportedNumber + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- return subtractedHalfOfBinSize;
- }
-
- /** Parses the given consensus. */
- public boolean parseRelayNetworkStatusConsensus(
- RelayNetworkStatusConsensus consensus) {
-
- /* Make sure that the consensus contains Wxx weights. */
- SortedMap<String, Integer> bandwidthWeights =
- consensus.getBandwidthWeights();
- if (bandwidthWeights == null) {
- System.err.printf("Consensus with valid-after time %s doesn't "
- + "contain any Wxx weights. Skipping.%n",
- DateTimeHelper.format(consensus.getValidAfterMillis()));
- return false;
- }
-
- /* More precisely, make sure that it contains Wmx weights, and then
- * parse them. */
- SortedSet<String> expectedWeightKeys =
- new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(",")));
- expectedWeightKeys.removeAll(bandwidthWeights.keySet());
- if (!expectedWeightKeys.isEmpty()) {
- System.err.printf("Consensus with valid-after time %s doesn't "
- + "contain expected Wmx weights. Skipping.%n",
- DateTimeHelper.format(consensus.getValidAfterMillis()));
- return false;
- }
- double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0;
- double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0;
- double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0;
- double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0;
-
- /* Keep a sorted set with the fingerprints of all hidden-service
- * directories, in reverse order, so that we can later determine the
- * fingerprint distance between a directory and the directory
- * preceding it by three positions in the descriptor ring. */
- SortedSet<String> hsDirs = new TreeSet<>(Collections.reverseOrder());
-
- /* Prepare for computing the weights of all relays with the Fast flag
- * for being selected in the middle position. */
- double totalWeightsRendezvousPoint = 0.0;
- SortedMap<String, Double> weightsRendezvousPoint = new TreeMap<>();
-
- /* Go through all status entries contained in the consensus. */
- for (Map.Entry<String, NetworkStatusEntry> e
- : consensus.getStatusEntries().entrySet()) {
- String fingerprint = e.getKey();
- NetworkStatusEntry statusEntry = e.getValue();
- SortedSet<String> flags = statusEntry.getFlags();
-
- /* Add the relay to the set of hidden-service directories if it has
- * the HSDir flag. */
- if (flags.contains("HSDir")) {
- hsDirs.add(statusEntry.getFingerprint());
- }
-
- /* Compute the probability for being selected as rendezvous point.
- * If the relay has the Fast flag, multiply its consensus weight
- * with the correct Wmx weight, depending on whether the relay has
- * the Guard and/or Exit flag. */
- double weightRendezvousPoint = 0.0;
- if (flags.contains("Fast")) {
- weightRendezvousPoint = (double) statusEntry.getBandwidth();
- if (flags.contains("Guard") && flags.contains("Exit")) {
- weightRendezvousPoint *= wmd;
- } else if (flags.contains("Guard")) {
- weightRendezvousPoint *= wmg;
- } else if (flags.contains("Exit")) {
- weightRendezvousPoint *= wme;
- } else {
- weightRendezvousPoint *= wmm;
- }
- }
- weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
- totalWeightsRendezvousPoint += weightRendezvousPoint;
- }
-
- /* Store all computed network fractions based on this consensus in a
- * set, which will then be written to disk in a single store
- * operation. */
- Set<ComputedNetworkFractions> computedNetworkFractions = new HashSet<>();
-
- /* Remove all previously added directory fingerprints and re-add them
- * twice, once with a leading "0" and once with a leading "1". The
- * purpose is to simplify the logic for moving from one fingerprint to
- * the previous one, even if that would mean traversing the ring
- * start. For example, the fingerprint preceding "1""00..0000" with
- * the first "1" being added here could be "0""FF..FFFF". */
- SortedSet<String> hsDirsCopy = new TreeSet<>(hsDirs);
- hsDirs.clear();
- for (String fingerprint : hsDirsCopy) {
- hsDirs.add("0" + fingerprint);
- hsDirs.add("1" + fingerprint);
- }
-
- /* Define the total ring size to compute fractions below. This is
- * 16^40 or 2^160. */
- final double ringSize = new BigInteger(
- "10000000000000000000000000000000000000000",
- 16).doubleValue();
-
- /* Go through all status entries again, this time computing network
- * fractions. */
- for (Map.Entry<String, NetworkStatusEntry> e
- : consensus.getStatusEntries().entrySet()) {
- String fingerprint = e.getKey();
- NetworkStatusEntry statusEntry = e.getValue();
- double fractionRendRelayedCells = 0.0;
- double fractionDirOnionsSeen = 0.0;
- if (statusEntry != null) {
-
- /* Check if the relay is a hidden-service directory by looking up
- * its fingerprint, preceded by "1", in the sorted set that we
- * populated above. */
- String fingerprintPrecededByOne = "1" + fingerprint;
- if (hsDirs.contains(fingerprintPrecededByOne)) {
-
- /* Move three positions in the sorted set, which is in reverse
- * order, to learn the fingerprint of the directory preceding
- * this directory by three positions. */
- String startResponsible = fingerprint;
- int positionsToGo = 3;
- for (String hsDirFingerprint
- : hsDirs.tailSet(fingerprintPrecededByOne)) {
- startResponsible = hsDirFingerprint;
- if (positionsToGo-- <= 0) {
- break;
- }
- }
-
- /* Compute the fraction of descriptor space that this relay is
- * responsible for as difference between the two fingerprints
- * divided by the ring size. */
- fractionDirOnionsSeen =
- new BigInteger(fingerprintPrecededByOne, 16).subtract(
- new BigInteger(startResponsible, 16)).doubleValue()
- / ringSize;
-
- /* Divide this fraction by three to obtain the fraction of
- * descriptors that this directory has seen. This step is
- * necessary, because each descriptor that is published to this
- * directory is also published to two other directories. */
- fractionDirOnionsSeen /= 3.0;
- }
-
- /* Compute the fraction of cells on rendezvous circuits that this
- * relay has seen by dividing its previously calculated weight by
- * the sum of all such weights. */
- fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint)
- / totalWeightsRendezvousPoint;
- }
-
- /* If at least one of the computed fractions is non-zero, create a
- * new fractions object. */
- if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) {
- ComputedNetworkFractions fractions = new ComputedNetworkFractions(
- fingerprint, consensus.getValidAfterMillis());
- fractions.setFractionRendRelayedCells(fractionRendRelayedCells);
- fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen);
- computedNetworkFractions.add(fractions);
- }
- }
-
- /* Store all newly computed network fractions to a documents file.
- * The same file also contains computed network fractions from other
- * consensuses that were valid on the same day. This is in contrast
- * to the other documents which are all stored in a single file, which
- * would not scale for computed network fractions. */
- String date = DateTimeHelper.format(consensus.getValidAfterMillis(),
- DateTimeHelper.ISO_DATE_FORMAT);
- File documentFile = new File(this.computedNetworkFractionsDirectory,
- date);
- if (!this.computedNetworkFractionsStore.store(documentFile,
- computedNetworkFractions)) {
- return false;
- }
- return true;
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
deleted file mode 100644
index 6d305d0..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-/* Hidden-service statistics reported by a single relay covering a single
- * statistics interval of usually 24 hours. These statistics are reported
- * by the relay in the "hidserv-" lines of its extra-info descriptor. */
-public class ReportedHidServStats implements Document {
-
- /* Relay fingerprint consisting of 40 upper-case hex characters. */
- private String fingerprint;
-
- public String getFingerprint() {
- return this.fingerprint;
- }
-
- /* Hidden-service statistics end timestamp in milliseconds. */
- private long statsEndMillis;
-
- public long getStatsEndMillis() {
- return this.statsEndMillis;
- }
-
- /* Statistics interval length in seconds. */
- private long statsIntervalSeconds;
-
- public void setStatsIntervalSeconds(long statsIntervalSeconds) {
- this.statsIntervalSeconds = statsIntervalSeconds;
- }
-
- public long getStatsIntervalSeconds() {
- return this.statsIntervalSeconds;
- }
-
- /* Number of relayed cells on rendezvous circuits as reported by the
- * relay and adjusted by rounding to the nearest right side of a bin and
- * subtracting half of the bin size. */
- private long rendRelayedCells;
-
- public void setRendRelayedCells(long rendRelayedCells) {
- this.rendRelayedCells = rendRelayedCells;
- }
-
- public long getRendRelayedCells() {
- return this.rendRelayedCells;
- }
-
- /* Number of distinct .onion addresses as reported by the relay and
- * adjusted by rounding to the nearest right side of a bin and
- * subtracting half of the bin size. */
- private long dirOnionsSeen;
-
- public void setDirOnionsSeen(long dirOnionsSeen) {
- this.dirOnionsSeen = dirOnionsSeen;
- }
-
- public long getDirOnionsSeen() {
- return this.dirOnionsSeen;
- }
-
- /* Instantiate a new stats object using fingerprint and stats interval
- * end which together uniquely identify the object. */
- public ReportedHidServStats(String fingerprint, long statsEndMillis) {
- this.fingerprint = fingerprint;
- this.statsEndMillis = statsEndMillis;
- }
-
- /* Return whether this object contains the same fingerprint and stats
- * interval end as the passed object. */
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof ReportedHidServStats)) {
- return false;
- }
- ReportedHidServStats other = (ReportedHidServStats) otherObject;
- return this.fingerprint.equals(other.fingerprint)
- && this.statsEndMillis == other.statsEndMillis;
- }
-
- /* Return a (hopefully unique) hash code based on this object's
- * fingerprint and stats interval end. */
- @Override
- public int hashCode() {
- return this.fingerprint.hashCode() + (int) this.statsEndMillis;
- }
-
- /* Return a string representation of this object, consisting of
- * fingerprint and the concatenation of all other attributes. */
- @Override
- public String[] format() {
- String first = this.fingerprint;
- String second = String.format("%s,%d,%d,%d",
- DateTimeHelper.format(this.statsEndMillis),
- this.statsIntervalSeconds, this.rendRelayedCells,
- this.dirOnionsSeen);
- return new String[] { first, second };
- }
-
- /* Instantiate an empty stats object that will be initialized more by
- * the parse method. */
- ReportedHidServStats() {
- }
-
- /* Initialize this stats object using the two provided strings that have
- * been produced by the format method earlier. Return whether this
- * operation was successful. */
- @Override
- public boolean parse(String[] formattedStrings) {
- if (formattedStrings.length != 2) {
- System.err.printf("Invalid number of formatted strings. "
- + "Skipping.%n", formattedStrings.length);
- return false;
- }
- String[] secondParts = formattedStrings[1].split(",", 4);
- if (secondParts.length != 4) {
- return false;
- }
- long statsEndMillis = DateTimeHelper.parse(secondParts[0]);
- if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
- return false;
- }
- long statsIntervalSeconds = -1L;
- long rendRelayedCells = -1L;
- long dirOnionsSeen = -1L;
- try {
- statsIntervalSeconds = Long.parseLong(secondParts[1]);
- rendRelayedCells = Long.parseLong(secondParts[2]);
- dirOnionsSeen = Long.parseLong(secondParts[3]);
- } catch (NumberFormatException e) {
- return false;
- }
- this.fingerprint = formattedStrings[0];
- this.statsEndMillis = statsEndMillis;
- this.statsIntervalSeconds = statsIntervalSeconds;
- this.rendRelayedCells = rendRelayedCells;
- this.dirOnionsSeen = dirOnionsSeen;
- return true;
- }
-}
-
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
deleted file mode 100644
index 207b4aa..0000000
--- a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/* Copyright 2016--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.hidserv;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/* NOTE: This class is not required for running the Main class! (It
- * contains its own main method.) */
-public class Simulate {
- private static File simCellsCsvFile =
- new File("out/csv/sim-cells.csv");
-
- private static File simOnionsCsvFile =
- new File("out/csv/sim-onions.csv");
-
- /** Runs two simulations to evaluate this data-processing module. */
- public static void main(String[] args) throws Exception {
- System.out.print("Simulating extrapolation of rendezvous cells");
- simulateManyCells();
- System.out.print("\nSimulating extrapolation of .onions");
- simulateManyOnions();
- System.out.println("\nTerminating.");
- }
-
- private static Random rnd = new Random();
-
- private static void simulateManyCells() throws Exception {
- simCellsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- simCellsCsvFile));
- bw.write("run,frac,wmean,wmedian,wiqm\n");
- final int numberOfExtrapolations = 1000;
- for (int i = 0; i < numberOfExtrapolations; i++) {
- bw.write(simulateCells(i));
- System.out.print(".");
- }
- bw.close();
- }
-
- private static void simulateManyOnions() throws Exception {
- simOnionsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- simOnionsCsvFile));
- bw.write("run,frac,wmean,wmedian,wiqm\n");
- final int numberOfExtrapolations = 1000;
- for (int i = 0; i < numberOfExtrapolations; i++) {
- bw.write(simulateOnions(i));
- System.out.print(".");
- }
- bw.close();
- }
-
- private static String simulateCells(int run) {
-
- /* Generate consensus weights following an exponential distribution
- * with lambda = 1 for 3000 potential rendezvous points. */
- final int numberRendPoints = 3000;
- double[] consensusWeights = new double[numberRendPoints];
- double totalConsensusWeight = 0.0;
- for (int i = 0; i < numberRendPoints; i++) {
- double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
- consensusWeights[i] = consensusWeight;
- totalConsensusWeight += consensusWeight;
- }
-
- /* Compute probabilities for being selected as rendezvous point. */
- double[] probRendPoint = new double[numberRendPoints];
- for (int i = 0; i < numberRendPoints; i++) {
- probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
- }
-
- /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
- * exponential distribution with lambda = 0.0001, so on average
- * 10,000 cells per chunk, and randomly assign them to a rendezvous
- * point to report them later. */
- long cellsLeft = 10000000000L;
- final double cellsLambda = 0.0001;
- long[] observedCells = new long[numberRendPoints];
- while (cellsLeft > 0) {
- long cells = Math.min(cellsLeft,
- (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
- double selectRendPoint = rnd.nextDouble();
- for (int i = 0; i < probRendPoint.length; i++) {
- selectRendPoint -= probRendPoint[i];
- if (selectRendPoint <= 0.0) {
- observedCells[i] += cells;
- break;
- }
- }
- cellsLeft -= cells;
- }
-
- /* Obfuscate reports using binning and Laplace noise, and then attempt
- * to remove noise again. */
- final long binSize = 1024L;
- final double b = 2048.0 / 0.3;
- long[] reportedCells = new long[numberRendPoints];
- long[] removedNoiseCells = new long[numberRendPoints];
- for (int i = 0; i < numberRendPoints; i++) {
- long observed = observedCells[i];
- long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
- double randomDouble = rnd.nextDouble();
- double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0)
- * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5));
- long reported = afterBinning + (long) laplaceNoise;
- reportedCells[i] = reported;
- long roundedToNearestRightSideOfTheBin =
- ((reported + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- removedNoiseCells[i] = subtractedHalfOfBinSize;
- }
-
- /* Perform extrapolations from random fractions of reports by
- * probability to be selected as rendezvous point. */
- StringBuilder sb = new StringBuilder();
- double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
- 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
- for (double fraction : fractions) {
- SortedSet<Integer> nonReportingRelays = new TreeSet<>();
- for (int j = 0; j < numberRendPoints; j++) {
- nonReportingRelays.add(j);
- }
- List<Integer> shuffledRelays = new ArrayList<>(nonReportingRelays);
- Collections.shuffle(shuffledRelays);
- SortedSet<Integer> reportingRelays = new TreeSet<>();
- for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
- j++) {
- reportingRelays.add(shuffledRelays.get(j));
- nonReportingRelays.remove(shuffledRelays.get(j));
- }
- List<double[]> singleRelayExtrapolations;
- double totalReportingProbability;
- do {
- singleRelayExtrapolations = new ArrayList<>();
- totalReportingProbability = 0.0;
- for (int reportingRelay : reportingRelays) {
- double probability = probRendPoint[reportingRelay];
- if (probability > 0.0) {
- singleRelayExtrapolations.add(
- new double[] {
- removedNoiseCells[reportingRelay] / probability,
- removedNoiseCells[reportingRelay],
- probability });
- }
- totalReportingProbability += probability;
- }
- if (totalReportingProbability < fraction - 0.001) {
- int addRelay = new ArrayList<>(nonReportingRelays).get(
- rnd.nextInt(nonReportingRelays.size()));
- nonReportingRelays.remove(addRelay);
- reportingRelays.add(addRelay);
- } else if (totalReportingProbability > fraction + 0.001) {
- int removeRelay = new ArrayList<>(reportingRelays).get(
- rnd.nextInt(reportingRelays.size()));
- reportingRelays.remove(removeRelay);
- nonReportingRelays.add(removeRelay);
- }
- } while (totalReportingProbability < fraction - 0.001
- || totalReportingProbability > fraction + 0.001);
- Collections.sort(singleRelayExtrapolations,
- new Comparator<double[]>() {
- public int compare(double[] o1, double[] o2) {
- return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
- }
- }
- );
- double totalProbability = 0.0;
- double totalValues = 0.0;
- double totalInterquartileProbability = 0.0;
- double totalInterquartileValues = 0.0;
- Double weightedMedian = null;
- for (double[] extrapolation : singleRelayExtrapolations) {
- totalValues += extrapolation[1];
- totalProbability += extrapolation[2];
- if (weightedMedian == null
- && totalProbability > totalReportingProbability * 0.5) {
- weightedMedian = extrapolation[0];
- }
- if (totalProbability > totalReportingProbability * 0.25
- && totalProbability < totalReportingProbability * 0.75) {
- totalInterquartileValues += extrapolation[1];
- totalInterquartileProbability += extrapolation[2];
- }
- }
- sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
- totalValues / totalProbability, weightedMedian,
- totalInterquartileValues / totalInterquartileProbability));
- }
- return sb.toString();
- }
-
- private static String simulateOnions(final int run) {
-
- /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
- final int numberHsDirs = 3000;
- SortedSet<Double> hsDirFingerprints = new TreeSet<>();
- for (int i = 0; i < numberHsDirs; i++) {
- hsDirFingerprints.add(rnd.nextDouble());
- }
-
- /* Compute fractions of observed descriptor space. */
- SortedSet<Double> ring =
- new TreeSet<>(Collections.reverseOrder());
- for (double fingerprint : hsDirFingerprints) {
- ring.add(fingerprint);
- ring.add(fingerprint - 1.0);
- }
- SortedMap<Double, Double> hsDirFractions = new TreeMap<>();
- for (double fingerprint : hsDirFingerprints) {
- double start = fingerprint;
- int positionsToGo = 3;
- for (double prev : ring.tailSet(fingerprint)) {
- start = prev;
- if (positionsToGo-- <= 0) {
- break;
- }
- }
- hsDirFractions.put(fingerprint, fingerprint - start);
- }
-
- /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
- final int numberOnions = 40000;
- final int replicas = 4;
- final int storeOnDirs = 3;
- SortedMap<Double, SortedSet<Integer>> storedDescs = new TreeMap<>();
- for (double fingerprint : hsDirFingerprints) {
- storedDescs.put(fingerprint, new TreeSet<Integer>());
- }
- for (int i = 0; i < numberOnions; i++) {
- for (int j = 0; j < replicas; j++) {
- int leftToStore = storeOnDirs;
- for (double fingerprint
- : hsDirFingerprints.tailSet(rnd.nextDouble())) {
- storedDescs.get(fingerprint).add(i);
- if (--leftToStore <= 0) {
- break;
- }
- }
- if (leftToStore > 0) {
- for (double fingerprint : hsDirFingerprints) {
- storedDescs.get(fingerprint).add(i);
- if (--leftToStore <= 0) {
- break;
- }
- }
- }
- }
- }
-
- /* Obfuscate reports using binning and Laplace noise, and then attempt
- * to remove noise again. */
- final long binSize = 8L;
- final double b = 8.0 / 0.3;
- SortedMap<Double, Long> reportedOnions = new TreeMap<>();
- SortedMap<Double, Long> removedNoiseOnions = new TreeMap<>();
- for (Map.Entry<Double, SortedSet<Integer>> e
- : storedDescs.entrySet()) {
- double fingerprint = e.getKey();
- long observed = (long) e.getValue().size();
- long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
- double randomDouble = rnd.nextDouble();
- double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0)
- * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5));
- long reported = afterBinning + (long) laplaceNoise;
- reportedOnions.put(fingerprint, reported);
- long roundedToNearestRightSideOfTheBin =
- ((reported + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
- }
-
- /* Perform extrapolations from random fractions of reports by
- * probability to be selected as rendezvous point. */
- StringBuilder sb = new StringBuilder();
- double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
- 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
- for (double fraction : fractions) {
- SortedSet<Double> nonReportingRelays =
- new TreeSet<>(hsDirFractions.keySet());
- List<Double> shuffledRelays = new ArrayList<>(
- nonReportingRelays);
- Collections.shuffle(shuffledRelays);
- SortedSet<Double> reportingRelays = new TreeSet<>();
- for (int j = 0; j < (int) ((double) hsDirFractions.size()
- * fraction); j++) {
- reportingRelays.add(shuffledRelays.get(j));
- nonReportingRelays.remove(shuffledRelays.get(j));
- }
- List<double[]> singleRelayExtrapolations;
- double totalReportingProbability;
- do {
- singleRelayExtrapolations = new ArrayList<>();
- totalReportingProbability = 0.0;
- for (double reportingRelay : reportingRelays) {
- double probability = hsDirFractions.get(reportingRelay) / 3.0;
- if (probability > 0.0) {
- singleRelayExtrapolations.add(
- new double[] { removedNoiseOnions.get(reportingRelay)
- / probability, removedNoiseOnions.get(reportingRelay),
- probability });
- }
- totalReportingProbability += probability;
- }
- if (totalReportingProbability < fraction - 0.001) {
- double addRelay =
- new ArrayList<>(nonReportingRelays).get(
- rnd.nextInt(nonReportingRelays.size()));
- nonReportingRelays.remove(addRelay);
- reportingRelays.add(addRelay);
- } else if (totalReportingProbability > fraction + 0.001) {
- double removeRelay =
- new ArrayList<>(reportingRelays).get(
- rnd.nextInt(reportingRelays.size()));
- reportingRelays.remove(removeRelay);
- nonReportingRelays.add(removeRelay);
- }
- } while (totalReportingProbability < fraction - 0.001
- || totalReportingProbability > fraction + 0.001);
- Collections.sort(singleRelayExtrapolations,
- new Comparator<double[]>() {
- public int compare(double[] first, double[] second) {
- return first[0] < second[0] ? -1 : first[0] > second[0] ? 1 : 0;
- }
- }
- );
- double totalProbability = 0.0;
- double totalValues = 0.0;
- double totalInterquartileProbability = 0.0;
- double totalInterquartileValues = 0.0;
- Double weightedMedian = null;
- for (double[] extrapolation : singleRelayExtrapolations) {
- totalValues += extrapolation[1];
- totalProbability += extrapolation[2];
- if (weightedMedian == null
- && totalProbability > totalReportingProbability * 0.5) {
- weightedMedian = extrapolation[0];
- }
- if (totalProbability > totalReportingProbability * 0.25
- && totalProbability < totalReportingProbability * 0.75) {
- totalInterquartileValues += extrapolation[1];
- totalInterquartileProbability += extrapolation[2];
- }
- }
- sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
- totalValues / totalProbability, weightedMedian,
- totalInterquartileValues / totalInterquartileProbability));
- }
- return sb.toString();
- }
-}
diff --git a/modules/legacy/build.xml b/modules/legacy/build.xml
index 252a712..f4ef8e7 100644
--- a/modules/legacy/build.xml
+++ b/modules/legacy/build.xml
@@ -8,7 +8,6 @@
<pathelement path="${classes}"/>
<path refid="base.classpath" />
<fileset dir="${libs}">
- <include name="commons-codec-1.9.jar"/>
<include name="postgresql-jdbc3-9.2.jar"/>
</fileset>
</path>
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java
new file mode 100644
index 0000000..e0d753f
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java
@@ -0,0 +1,206 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Initialize configuration with hard-coded defaults, overwrite with
+ * configuration in config file, if exists, and answer Main.java about our
+ * configuration.
+ */
+public class Configuration {
+
+ private boolean importDirectoryArchives = false;
+
+ private List<String> directoryArchivesDirectories = new ArrayList<>();
+
+ private boolean keepDirectoryArchiveImportHistory = false;
+
+ private boolean importSanitizedBridges = false;
+
+ private String sanitizedBridgesDirectory = "in/bridge-descriptors/";
+
+ private boolean keepSanitizedBridgesImportHistory = false;
+
+ private boolean writeRelayDescriptorDatabase = false;
+
+ private String relayDescriptorDatabaseJdbc =
+ "jdbc:postgresql://localhost/tordir?user=metrics&password=password";
+
+ private boolean writeRelayDescriptorsRawFiles = false;
+
+ private String relayDescriptorRawFilesDirectory = "pg-import/";
+
+ private boolean writeBridgeStats = false;
+
+ private boolean importWriteTorperfStats = false;
+
+ private String torperfDirectory = "in/torperf/";
+
+ private String exoneraTorDatabaseJdbc = "jdbc:postgresql:"
+ + "//localhost/exonerator?user=metrics&password=password";
+
+ private String exoneraTorImportDirectory = "exonerator-import/";
+
+ /** Initializes this configuration class. */
+ public Configuration() {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(Configuration.class.getName());
+
+ /* Read config file, if present. */
+ File configFile = new File("config");
+ if (!configFile.exists()) {
+ logger.warning("Could not find config file.");
+ return;
+ }
+ String line = null;
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(configFile));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("#") || line.length() < 1) {
+ continue;
+ } else if (line.startsWith("ImportDirectoryArchives")) {
+ this.importDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesDirectory")) {
+ this.directoryArchivesDirectories.add(line.split(" ")[1]);
+ } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
+ this.keepDirectoryArchiveImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ImportSanitizedBridges")) {
+ this.importSanitizedBridges = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("SanitizedBridgesDirectory")) {
+ this.sanitizedBridgesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) {
+ this.keepSanitizedBridgesImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("WriteRelayDescriptorDatabase")) {
+ this.writeRelayDescriptorDatabase = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("RelayDescriptorDatabaseJDBC")) {
+ this.relayDescriptorDatabaseJdbc = line.split(" ")[1];
+ } else if (line.startsWith("WriteRelayDescriptorsRawFiles")) {
+ this.writeRelayDescriptorsRawFiles = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("RelayDescriptorRawFilesDirectory")) {
+ this.relayDescriptorRawFilesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("WriteBridgeStats")) {
+ this.writeBridgeStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ImportWriteTorperfStats")) {
+ this.importWriteTorperfStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("TorperfDirectory")) {
+ this.torperfDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ExoneraTorDatabaseJdbc")) {
+ this.exoneraTorDatabaseJdbc = line.split(" ")[1];
+ } else if (line.startsWith("ExoneraTorImportDirectory")) {
+ this.exoneraTorImportDirectory = line.split(" ")[1];
+ } else {
+ logger.severe("Configuration file contains unrecognized "
+ + "configuration key in line '" + line + "'! Exiting!");
+ System.exit(1);
+ }
+ }
+ br.close();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ logger.severe("Configuration file contains configuration key "
+ + "without value in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (MalformedURLException e) {
+ logger.severe("Configuration file contains illegal URL or IP:port "
+ + "pair in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (NumberFormatException e) {
+ logger.severe("Configuration file contains illegal value in line '"
+ + line + "' with legal values being 0 or 1. Exiting!");
+ System.exit(1);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Unknown problem while reading config "
+ + "file! Exiting!", e);
+ System.exit(1);
+ }
+ }
+
+ public boolean getImportDirectoryArchives() {
+ return this.importDirectoryArchives;
+ }
+
+ /** Returns directories containing archived descriptors. */
+ public List<String> getDirectoryArchivesDirectories() {
+ if (this.directoryArchivesDirectories.isEmpty()) {
+ String prefix = "../../shared/in/recent/relay-descriptors/";
+ return Arrays.asList(
+ (prefix + "consensuses/," + prefix + "server-descriptors/,"
+ + prefix + "extra-infos/").split(","));
+ } else {
+ return this.directoryArchivesDirectories;
+ }
+ }
+
+ public boolean getKeepDirectoryArchiveImportHistory() {
+ return this.keepDirectoryArchiveImportHistory;
+ }
+
+ public boolean getWriteRelayDescriptorDatabase() {
+ return this.writeRelayDescriptorDatabase;
+ }
+
+ public boolean getImportSanitizedBridges() {
+ return this.importSanitizedBridges;
+ }
+
+ public String getSanitizedBridgesDirectory() {
+ return this.sanitizedBridgesDirectory;
+ }
+
+ public boolean getKeepSanitizedBridgesImportHistory() {
+ return this.keepSanitizedBridgesImportHistory;
+ }
+
+ public String getRelayDescriptorDatabaseJdbc() {
+ return this.relayDescriptorDatabaseJdbc;
+ }
+
+ public boolean getWriteRelayDescriptorsRawFiles() {
+ return this.writeRelayDescriptorsRawFiles;
+ }
+
+ public String getRelayDescriptorRawFilesDirectory() {
+ return this.relayDescriptorRawFilesDirectory;
+ }
+
+ public boolean getWriteBridgeStats() {
+ return this.writeBridgeStats;
+ }
+
+ public boolean getImportWriteTorperfStats() {
+ return this.importWriteTorperfStats;
+ }
+
+ public String getTorperfDirectory() {
+ return this.torperfDirectory;
+ }
+
+ public String getExoneraTorDatabaseJdbc() {
+ return this.exoneraTorDatabaseJdbc;
+ }
+
+ public String getExoneraTorImportDirectory() {
+ return this.exoneraTorImportDirectory;
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java
new file mode 100644
index 0000000..48eb83d
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java
@@ -0,0 +1,58 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+public class LockFile {
+
+ private File lockFile;
+ private Logger logger;
+
+ public LockFile() {
+ this.lockFile = new File("lock");
+ this.logger = Logger.getLogger(LockFile.class.getName());
+ }
+
+ /** Acquires the lock by checking whether a lock file already exists,
+ * and if not, by creating one with the current system time as
+ * content. */
+ public boolean acquireLock() {
+ this.logger.fine("Trying to acquire lock...");
+ try {
+ if (this.lockFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader("lock"));
+ long runStarted = Long.parseLong(br.readLine());
+ br.close();
+ if (System.currentTimeMillis() - runStarted
+ < 23L * 60L * 60L * 1000L) {
+ return false;
+ }
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
+ bw.append("" + System.currentTimeMillis() + "\n");
+ bw.close();
+ this.logger.fine("Acquired lock.");
+ return true;
+ } catch (IOException e) {
+ this.logger.warning("Caught exception while trying to acquire "
+ + "lock!");
+ return false;
+ }
+ }
+
+ /** Releases the lock by deleting the lock file, if present. */
+ public void releaseLock() {
+ this.logger.fine("Releasing lock...");
+ this.lockFile.delete();
+ this.logger.fine("Released lock.");
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java
new file mode 100644
index 0000000..f6658c5
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java
@@ -0,0 +1,100 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+/**
+ * Initialize logging configuration.
+ *
+ * <p>Log levels used by ERNIE:</p>
+ *
+ * <p>
+ * <ul>
+ * <li>SEVERE: An event made it impossible to continue program execution.
+ * WARNING: A potential problem occurred that requires the operator to
+ * look after the otherwise unattended setup</li>
+ * <li>INFO: Messages on INFO level are meant to help the operator in
+ * making sure that operation works as expected.</li>
+ * <li>FINE: Debug messages that are used to identify problems and which
+ * are turned on by default.</li>
+ * <li>FINER: More detailed debug messages to investigate problems in more
+ * detail. Not turned on by default. Increase log file limit when
+ * using FINER.</li>
+ * <li>FINEST: Most detailed debug messages. Not used.</li>
+ * </ul>
+ * </p>
+ */
+public class LoggingConfiguration {
+
+ /** Initializes the logging configuration. */
+ public LoggingConfiguration() {
+
+ /* Remove default console handler. */
+ for (Handler h : Logger.getLogger("").getHandlers()) {
+ Logger.getLogger("").removeHandler(h);
+ }
+
+ /* Disable logging of internal Sun classes. */
+ Logger.getLogger("sun").setLevel(Level.OFF);
+
+ /* Set minimum log level we care about from INFO to FINER. */
+ Logger.getLogger("").setLevel(Level.FINER);
+
+ /* Create log handler that writes messages on WARNING or higher to the
+ * console. */
+ final SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Formatter cf = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getMessage() + "\n";
+ }
+ };
+ Handler ch = new ConsoleHandler();
+ ch.setFormatter(cf);
+ ch.setLevel(Level.WARNING);
+ Logger.getLogger("").addHandler(ch);
+
+ /* Initialize own logger for this class. */
+ Logger logger = Logger.getLogger(
+ LoggingConfiguration.class.getName());
+
+ /* Create log handler that writes all messages on FINE or higher to a
+ * local file. */
+ Formatter ff = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getLevel() + " " + record.getSourceClassName() + " "
+ + record.getSourceMethodName() + " " + record.getMessage()
+ + (record.getThrown() != null ? " " + record.getThrown() : "")
+ + "\n";
+ }
+ };
+ try {
+ FileHandler fh = new FileHandler("log", 5000000, 5, true);
+ fh.setFormatter(ff);
+ fh.setLevel(Level.FINE);
+ Logger.getLogger("").addHandler(fh);
+ } catch (SecurityException e) {
+ logger.log(Level.WARNING, "No permission to create log file. "
+ + "Logging to file is disabled.", e);
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write to log file. Logging to "
+ + "file is disabled.", e);
+ }
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java
new file mode 100644
index 0000000..0eab86f
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java
@@ -0,0 +1,90 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron;
+
+import org.torproject.ernie.cron.network.ConsensusStatsFileHandler;
+import org.torproject.ernie.cron.performance.TorperfProcessor;
+
+import java.io.File;
+import java.util.logging.Logger;
+
+/**
+ * Coordinate downloading and parsing of descriptors and extraction of
+ * statistically relevant data for later processing with R.
+ */
+public class Main {
+
+ /** Executes this data-processing module. */
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration();
+
+ Logger logger = Logger.getLogger(Main.class.getName());
+ logger.info("Starting ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile();
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Define stats directory for temporary files
+ File statsDirectory = new File("stats");
+
+ // Import relay descriptors
+ if (config.getImportDirectoryArchives()) {
+ RelayDescriptorDatabaseImporter rddi =
+ config.getWriteRelayDescriptorDatabase()
+ || config.getWriteRelayDescriptorsRawFiles()
+ ? new RelayDescriptorDatabaseImporter(
+ config.getWriteRelayDescriptorDatabase()
+ ? config.getRelayDescriptorDatabaseJdbc() : null,
+ config.getWriteRelayDescriptorsRawFiles()
+ ? config.getRelayDescriptorRawFilesDirectory() : null,
+ config.getDirectoryArchivesDirectories(),
+ statsDirectory,
+ config.getKeepDirectoryArchiveImportHistory()) : null;
+ if (rddi != null) {
+ rddi.importRelayDescriptors();
+ }
+ rddi.closeConnection();
+ }
+
+ // Prepare consensus stats file handler (used for stats on running
+ // bridges only)
+ ConsensusStatsFileHandler csfh = config.getWriteBridgeStats()
+ ? new ConsensusStatsFileHandler(
+ config.getRelayDescriptorDatabaseJdbc(),
+ new File(config.getSanitizedBridgesDirectory()),
+ statsDirectory, config.getKeepSanitizedBridgesImportHistory())
+ : null;
+
+ // Import sanitized bridges and write updated stats files to disk
+ if (csfh != null) {
+ if (config.getImportSanitizedBridges()) {
+ csfh.importSanitizedBridges();
+ }
+ csfh.writeFiles();
+ csfh = null;
+ }
+
+ // Import and process torperf stats
+ if (config.getImportWriteTorperfStats()) {
+ new TorperfProcessor(new File(config.getTorperfDirectory()),
+ statsDirectory);
+ }
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating ERNIE.");
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
new file mode 100644
index 0000000..97a330e
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
@@ -0,0 +1,995 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+import org.postgresql.util.PGbytea;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.sql.CallableStatement;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Parse directory data.
+ */
+
+/* TODO Split up this class and move its parts to cron.network,
+ * cron.users, and status.relaysearch packages. Requires extensive
+ * changes to the database schema though. */
+public final class RelayDescriptorDatabaseImporter {
+
+ /**
+ * How many records to commit with each database transaction.
+ */
+ private final long autoCommitCount = 500;
+
+ /* Counters to keep track of the number of records committed before
+ * each transaction. */
+
+ private int rdsCount = 0;
+
+ private int resCount = 0;
+
+ private int rhsCount = 0;
+
+ private int rrsCount = 0;
+
+ private int rcsCount = 0;
+
+ private int rvsCount = 0;
+
+ private int rqsCount = 0;
+
+ /**
+ * Relay descriptor database connection.
+ */
+ private Connection conn;
+
+ /**
+ * Prepared statement to check whether any network status consensus
+ * entries matching a given valid-after time have been imported into the
+ * database before.
+ */
+ private PreparedStatement psSs;
+
+ /**
+ * Prepared statement to check whether a given server descriptor has
+ * been imported into the database before.
+ */
+ private PreparedStatement psDs;
+
+ /**
+ * Prepared statement to check whether a given network status consensus
+ * has been imported into the database before.
+ */
+ private PreparedStatement psCs;
+
+ /**
+ * Set of dates that have been inserted into the database for being
+ * included in the next refresh run.
+ */
+ private Set<Long> scheduledUpdates;
+
+ /**
+ * Prepared statement to insert a date into the database that shall be
+ * included in the next refresh run.
+ */
+ private PreparedStatement psU;
+
+ /**
+ * Prepared statement to insert a network status consensus entry into
+ * the database.
+ */
+ private PreparedStatement psR;
+
+ /**
+ * Prepared statement to insert a server descriptor into the database.
+ */
+ private PreparedStatement psD;
+
+ /**
+ * Callable statement to insert the bandwidth history of an extra-info
+ * descriptor into the database.
+ */
+ private CallableStatement csH;
+
+ /**
+ * Prepared statement to insert a network status consensus into the
+ * database.
+ */
+ private PreparedStatement psC;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Directory for writing raw import files.
+ */
+ private String rawFilesDirectory;
+
+ /**
+ * Raw import file containing status entries.
+ */
+ private BufferedWriter statusentryOut;
+
+ /**
+ * Raw import file containing server descriptors.
+ */
+ private BufferedWriter descriptorOut;
+
+ /**
+ * Raw import file containing bandwidth histories.
+ */
+ private BufferedWriter bwhistOut;
+
+ /**
+ * Raw import file containing consensuses.
+ */
+ private BufferedWriter consensusOut;
+
+ /**
+ * Date format to parse timestamps.
+ */
+ private SimpleDateFormat dateTimeFormat;
+
+ /**
+ * The last valid-after time for which we checked whether they have been
+ * any network status entries in the database.
+ */
+ private long lastCheckedStatusEntries;
+
+ /**
+ * Set of fingerprints that we imported for the valid-after time in
+ * <code>lastCheckedStatusEntries</code>.
+ */
+ private Set<String> insertedStatusEntries = new HashSet<>();
+
+ private boolean importIntoDatabase;
+
+ private boolean writeRawImportFiles;
+
+ private List<String> archivesDirectories;
+
+ private File statsDirectory;
+
+ private boolean keepImportHistory;
+
+ /**
+ * Initialize database importer by connecting to the database and
+ * preparing statements.
+ */
+ public RelayDescriptorDatabaseImporter(String connectionUrl,
+ String rawFilesDirectory, List<String> archivesDirectories,
+ File statsDirectory, boolean keepImportHistory) {
+
+ if (archivesDirectories == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+ this.archivesDirectories = archivesDirectories;
+ this.statsDirectory = statsDirectory;
+ this.keepImportHistory = keepImportHistory;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ RelayDescriptorDatabaseImporter.class.getName());
+
+ if (connectionUrl != null) {
+ try {
+ /* Connect to database. */
+ this.conn = DriverManager.getConnection(connectionUrl);
+
+ /* Turn autocommit off */
+ this.conn.setAutoCommit(false);
+
+ /* Prepare statements. */
+ this.psSs = conn.prepareStatement("SELECT fingerprint "
+ + "FROM statusentry WHERE validafter = ?");
+ this.psDs = conn.prepareStatement("SELECT COUNT(*) "
+ + "FROM descriptor WHERE descriptor = ?");
+ this.psCs = conn.prepareStatement("SELECT COUNT(*) "
+ + "FROM consensus WHERE validafter = ?");
+ this.psR = conn.prepareStatement("INSERT INTO statusentry "
+ + "(validafter, nickname, fingerprint, descriptor, "
+ + "published, address, orport, dirport, isauthority, "
+ + "isbadexit, isbaddirectory, isexit, isfast, isguard, "
+ + "ishsdir, isnamed, isstable, isrunning, isunnamed, "
+ + "isvalid, isv2dir, isv3dir, version, bandwidth, ports, "
+ + "rawdesc) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+ + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
+ this.psD = conn.prepareStatement("INSERT INTO descriptor "
+ + "(descriptor, nickname, address, orport, dirport, "
+ + "fingerprint, bandwidthavg, bandwidthburst, "
+ + "bandwidthobserved, platform, published, uptime, "
+ + "extrainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+ + "?)");
+ this.csH = conn.prepareCall("{call insert_bwhist(?, ?, ?, ?, ?, "
+ + "?)}");
+ this.psC = conn.prepareStatement("INSERT INTO consensus "
+ + "(validafter) VALUES (?)");
+ this.psU = conn.prepareStatement("INSERT INTO scheduled_updates "
+ + "(date) VALUES (?)");
+ this.scheduledUpdates = new HashSet<>();
+ this.importIntoDatabase = true;
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not connect to database or "
+ + "prepare statements.", e);
+ }
+ }
+
+ /* Remember where we want to write raw import files. */
+ if (rawFilesDirectory != null) {
+ this.rawFilesDirectory = rawFilesDirectory;
+ this.writeRawImportFiles = true;
+ }
+
+ /* Initialize date format, so that we can format timestamps. */
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private void addDateToScheduledUpdates(long timestamp)
+ throws SQLException {
+ if (!this.importIntoDatabase) {
+ return;
+ }
+ long dateMillis = 0L;
+ try {
+ dateMillis = this.dateTimeFormat.parse(
+ this.dateTimeFormat.format(timestamp).substring(0, 10)
+ + " 00:00:00").getTime();
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Internal parsing error.", e);
+ return;
+ }
+ if (!this.scheduledUpdates.contains(dateMillis)) {
+ this.psU.setDate(1, new java.sql.Date(dateMillis));
+ this.psU.execute();
+ this.scheduledUpdates.add(dateMillis);
+ }
+ }
+
+ /**
+ * Insert network status consensus entry into database.
+ */
+ public void addStatusEntryContents(long validAfter, String nickname,
+ String fingerprint, String descriptor, long published,
+ String address, long orPort, long dirPort,
+ SortedSet<String> flags, String version, long bandwidth,
+ String ports, byte[] rawDescriptor) {
+ if (this.importIntoDatabase) {
+ try {
+ this.addDateToScheduledUpdates(validAfter);
+ Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ Timestamp validAfterTimestamp = new Timestamp(validAfter);
+ if (lastCheckedStatusEntries != validAfter) {
+ insertedStatusEntries.clear();
+ this.psSs.setTimestamp(1, validAfterTimestamp, cal);
+ ResultSet rs = psSs.executeQuery();
+ while (rs.next()) {
+ String insertedFingerprint = rs.getString(1);
+ insertedStatusEntries.add(insertedFingerprint);
+ }
+ rs.close();
+ lastCheckedStatusEntries = validAfter;
+ }
+ if (!insertedStatusEntries.contains(fingerprint)) {
+ this.psR.clearParameters();
+ this.psR.setTimestamp(1, validAfterTimestamp, cal);
+ this.psR.setString(2, nickname);
+ this.psR.setString(3, fingerprint);
+ this.psR.setString(4, descriptor);
+ this.psR.setTimestamp(5, new Timestamp(published), cal);
+ this.psR.setString(6, address);
+ this.psR.setLong(7, orPort);
+ this.psR.setLong(8, dirPort);
+ this.psR.setBoolean(9, flags.contains("Authority"));
+ this.psR.setBoolean(10, flags.contains("BadExit"));
+ this.psR.setBoolean(11, flags.contains("BadDirectory"));
+ this.psR.setBoolean(12, flags.contains("Exit"));
+ this.psR.setBoolean(13, flags.contains("Fast"));
+ this.psR.setBoolean(14, flags.contains("Guard"));
+ this.psR.setBoolean(15, flags.contains("HSDir"));
+ this.psR.setBoolean(16, flags.contains("Named"));
+ this.psR.setBoolean(17, flags.contains("Stable"));
+ this.psR.setBoolean(18, flags.contains("Running"));
+ this.psR.setBoolean(19, flags.contains("Unnamed"));
+ this.psR.setBoolean(20, flags.contains("Valid"));
+ this.psR.setBoolean(21, flags.contains("V2Dir"));
+ this.psR.setBoolean(22, flags.contains("V3Dir"));
+ this.psR.setString(23, version);
+ this.psR.setLong(24, bandwidth);
+ this.psR.setString(25, ports);
+ this.psR.setBytes(26, rawDescriptor);
+ this.psR.executeUpdate();
+ rrsCount++;
+ if (rrsCount % autoCommitCount == 0) {
+ this.conn.commit();
+ }
+ insertedStatusEntries.add(fingerprint);
+ }
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not add network status "
+ + "consensus entry. We won't make any further SQL requests "
+ + "in this execution.", e);
+ this.importIntoDatabase = false;
+ }
+ }
+ if (this.writeRawImportFiles) {
+ try {
+ if (this.statusentryOut == null) {
+ new File(rawFilesDirectory).mkdirs();
+ this.statusentryOut = new BufferedWriter(new FileWriter(
+ rawFilesDirectory + "/statusentry.sql"));
+ this.statusentryOut.write(" COPY statusentry (validafter, "
+ + "nickname, fingerprint, descriptor, published, address, "
+ + "orport, dirport, isauthority, isbadExit, "
+ + "isbaddirectory, isexit, isfast, isguard, ishsdir, "
+ + "isnamed, isstable, isrunning, isunnamed, isvalid, "
+ + "isv2dir, isv3dir, version, bandwidth, ports, rawdesc) "
+ + "FROM stdin;\n");
+ }
+ this.statusentryOut.write(
+ this.dateTimeFormat.format(validAfter) + "\t" + nickname
+ + "\t" + fingerprint.toLowerCase() + "\t"
+ + descriptor.toLowerCase() + "\t"
+ + this.dateTimeFormat.format(published) + "\t" + address
+ + "\t" + orPort + "\t" + dirPort + "\t"
+ + (flags.contains("Authority") ? "t" : "f") + "\t"
+ + (flags.contains("BadExit") ? "t" : "f") + "\t"
+ + (flags.contains("BadDirectory") ? "t" : "f") + "\t"
+ + (flags.contains("Exit") ? "t" : "f") + "\t"
+ + (flags.contains("Fast") ? "t" : "f") + "\t"
+ + (flags.contains("Guard") ? "t" : "f") + "\t"
+ + (flags.contains("HSDir") ? "t" : "f") + "\t"
+ + (flags.contains("Named") ? "t" : "f") + "\t"
+ + (flags.contains("Stable") ? "t" : "f") + "\t"
+ + (flags.contains("Running") ? "t" : "f") + "\t"
+ + (flags.contains("Unnamed") ? "t" : "f") + "\t"
+ + (flags.contains("Valid") ? "t" : "f") + "\t"
+ + (flags.contains("V2Dir") ? "t" : "f") + "\t"
+ + (flags.contains("V3Dir") ? "t" : "f") + "\t"
+ + (version != null ? version : "\\N") + "\t"
+ + (bandwidth >= 0 ? bandwidth : "\\N") + "\t"
+ + (ports != null ? ports : "\\N") + "\t");
+ this.statusentryOut.write(PGbytea.toPGString(rawDescriptor)
+ .replaceAll("\\\\", "\\\\\\\\") + "\n");
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not write network status "
+ + "consensus entry to raw database import file. We won't "
+ + "make any further attempts to write raw import files in "
+ + "this execution.", e);
+ this.writeRawImportFiles = false;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write network status "
+ + "consensus entry to raw database import file. We won't "
+ + "make any further attempts to write raw import files in "
+ + "this execution.", e);
+ this.writeRawImportFiles = false;
+ }
+ }
+ }
+
+ /**
+ * Insert server descriptor into database.
+ */
+ public void addServerDescriptorContents(String descriptor,
+ String nickname, String address, int orPort, int dirPort,
+ String relayIdentifier, long bandwidthAvg, long bandwidthBurst,
+ long bandwidthObserved, String platform, long published,
+ long uptime, String extraInfoDigest) {
+ if (this.importIntoDatabase) {
+ try {
+ this.addDateToScheduledUpdates(published);
+ this.addDateToScheduledUpdates(
+ published + 24L * 60L * 60L * 1000L);
+ Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ this.psDs.setString(1, descriptor);
+ ResultSet rs = psDs.executeQuery();
+ rs.next();
+ if (rs.getInt(1) == 0) {
+ this.psD.clearParameters();
+ this.psD.setString(1, descriptor);
+ this.psD.setString(2, nickname);
+ this.psD.setString(3, address);
+ this.psD.setInt(4, orPort);
+ this.psD.setInt(5, dirPort);
+ this.psD.setString(6, relayIdentifier);
+ this.psD.setLong(7, bandwidthAvg);
+ this.psD.setLong(8, bandwidthBurst);
+ this.psD.setLong(9, bandwidthObserved);
+ /* Remove all non-ASCII characters from the platform string, or
+ * we'll make Postgres unhappy. Sun's JDK and OpenJDK behave
+ * differently when creating a new String with a given encoding.
+ * That's what the regexp below is for. */
+ this.psD.setString(10, new String(platform.getBytes(),
+ "US-ASCII").replaceAll("[^\\p{ASCII}]",""));
+ this.psD.setTimestamp(11, new Timestamp(published), cal);
+ this.psD.setLong(12, uptime);
+ this.psD.setString(13, extraInfoDigest);
+ this.psD.executeUpdate();
+ rdsCount++;
+ if (rdsCount % autoCommitCount == 0) {
+ this.conn.commit();
+ }
+ }
+ } catch (UnsupportedEncodingException e) {
+ // US-ASCII is supported for sure
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not add server "
+ + "descriptor. We won't make any further SQL requests in "
+ + "this execution.", e);
+ this.importIntoDatabase = false;
+ }
+ }
+ if (this.writeRawImportFiles) {
+ try {
+ if (this.descriptorOut == null) {
+ new File(rawFilesDirectory).mkdirs();
+ this.descriptorOut = new BufferedWriter(new FileWriter(
+ rawFilesDirectory + "/descriptor.sql"));
+ this.descriptorOut.write(" COPY descriptor (descriptor, "
+ + "nickname, address, orport, dirport, fingerprint, "
+ + "bandwidthavg, bandwidthburst, bandwidthobserved, "
+ + "platform, published, uptime, extrainfo) FROM stdin;\n");
+ }
+ this.descriptorOut.write(descriptor.toLowerCase() + "\t"
+ + nickname + "\t" + address + "\t" + orPort + "\t" + dirPort
+ + "\t" + relayIdentifier + "\t" + bandwidthAvg + "\t"
+ + bandwidthBurst + "\t" + bandwidthObserved + "\t"
+ + (platform != null && platform.length() > 0
+ ? new String(platform.getBytes(), "US-ASCII") : "\\N")
+ + "\t" + this.dateTimeFormat.format(published) + "\t"
+ + (uptime >= 0 ? uptime : "\\N") + "\t"
+ + (extraInfoDigest != null ? extraInfoDigest : "\\N")
+ + "\n");
+ } catch (UnsupportedEncodingException e) {
+ // US-ASCII is supported for sure
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write server "
+ + "descriptor to raw database import file. We won't make "
+ + "any further attempts to write raw import files in this "
+ + "execution.", e);
+ this.writeRawImportFiles = false;
+ }
+ }
+ }
+
+ /**
+ * Insert extra-info descriptor into database.
+ */
+ public void addExtraInfoDescriptorContents(String extraInfoDigest,
+ String nickname, String fingerprint, long published,
+ List<String> bandwidthHistoryLines) {
+ if (!bandwidthHistoryLines.isEmpty()) {
+ this.addBandwidthHistory(fingerprint.toLowerCase(), published,
+ bandwidthHistoryLines);
+ }
+ }
+
+ private static class BigIntArray implements java.sql.Array {
+
+ private final String stringValue;
+
+ public BigIntArray(long[] array, int offset) {
+ if (array == null) {
+ this.stringValue = "[-1:-1]={0}";
+ } else {
+ StringBuilder sb = new StringBuilder("[" + offset + ":"
+ + (offset + array.length - 1) + "]={");
+ for (int i = 0; i < array.length; i++) {
+ sb.append((i > 0 ? "," : "") + array[i]);
+ }
+ sb.append('}');
+ this.stringValue = sb.toString();
+ }
+ }
+
+ public String toString() {
+ return stringValue;
+ }
+
+ public String getBaseTypeName() {
+ return "int8";
+ }
+
+ /* The other methods are never called; no need to implement them. */
+ public void free() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Object getArray() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Object getArray(long index, int count) {
+ throw new UnsupportedOperationException();
+ }
+
+ public Object getArray(long index, int count,
+ Map<String, Class<?>> map) {
+ throw new UnsupportedOperationException();
+ }
+
+ public Object getArray(Map<String, Class<?>> map) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int getBaseType() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ResultSet getResultSet() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ResultSet getResultSet(long index, int count) {
+ throw new UnsupportedOperationException();
+ }
+
+ public ResultSet getResultSet(long index, int count,
+ Map<String, Class<?>> map) {
+ throw new UnsupportedOperationException();
+ }
+
+ public ResultSet getResultSet(Map<String, Class<?>> map) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /** Inserts a bandwidth history into database. */
+ public void addBandwidthHistory(String fingerprint, long published,
+ List<String> bandwidthHistoryStrings) {
+
+ /* Split history lines by date and rewrite them so that the date
+ * comes first. */
+ SortedSet<String> historyLinesByDate = new TreeSet<>();
+ for (String bandwidthHistoryString : bandwidthHistoryStrings) {
+ String[] parts = bandwidthHistoryString.split(" ");
+ if (parts.length != 6) {
+ this.logger.finer("Bandwidth history line does not have expected "
+ + "number of elements. Ignoring this line.");
+ continue;
+ }
+ long intervalLength = 0L;
+ try {
+ intervalLength = Long.parseLong(parts[3].substring(1));
+ } catch (NumberFormatException e) {
+ this.logger.fine("Bandwidth history line does not have valid "
+ + "interval length '" + parts[3] + " " + parts[4] + "'. "
+ + "Ignoring this line.");
+ continue;
+ }
+ String[] values = parts[5].split(",");
+ if (intervalLength % 900L != 0L) {
+ this.logger.fine("Bandwidth history line does not contain "
+ + "multiples of 15-minute intervals. Ignoring this line.");
+ continue;
+ } else if (intervalLength != 900L) {
+ /* This is a really dirty hack to support bandwidth history
+ * intervals that are longer than 15 minutes by linearly
+ * distributing reported bytes to 15 minute intervals. The
+ * alternative would have been to modify the database schema. */
+ try {
+ long factor = intervalLength / 900L;
+ String[] newValues = new String[values.length * (int) factor];
+ for (int i = 0; i < newValues.length; i++) {
+ newValues[i] = String.valueOf(
+ Long.parseLong(values[i / (int) factor]) / factor);
+ }
+ values = newValues;
+ intervalLength = 900L;
+ } catch (NumberFormatException e) {
+ this.logger.fine("Number format exception while parsing "
+ + "bandwidth history line. Ignoring this line.");
+ continue;
+ }
+ }
+ String type = parts[0];
+ String intervalEndTime = parts[1] + " " + parts[2];
+ long intervalEnd;
+ long dateStart;
+ try {
+ intervalEnd = dateTimeFormat.parse(intervalEndTime).getTime();
+ dateStart = dateTimeFormat.parse(parts[1] + " 00:00:00")
+ .getTime();
+ } catch (ParseException e) {
+ this.logger.fine("Parse exception while parsing timestamp in "
+ + "bandwidth history line. Ignoring this line.");
+ continue;
+ }
+ if (Math.abs(published - intervalEnd)
+ > 7L * 24L * 60L * 60L * 1000L) {
+ this.logger.fine("Extra-info descriptor publication time "
+ + dateTimeFormat.format(published) + " and last interval "
+ + "time " + intervalEndTime + " in " + type + " line differ "
+ + "by more than 7 days! Not adding this line!");
+ continue;
+ }
+ long currentIntervalEnd = intervalEnd;
+ StringBuilder sb = new StringBuilder();
+ SortedSet<String> newHistoryLines = new TreeSet<>();
+ try {
+ for (int i = values.length - 1; i >= -1; i--) {
+ if (i == -1 || currentIntervalEnd < dateStart) {
+ sb.insert(0, intervalEndTime + " " + type + " ("
+ + intervalLength + " s) ");
+ sb.setLength(sb.length() - 1);
+ String historyLine = sb.toString();
+ newHistoryLines.add(historyLine);
+ sb = new StringBuilder();
+ dateStart -= 24L * 60L * 60L * 1000L;
+ intervalEndTime = dateTimeFormat.format(currentIntervalEnd);
+ }
+ if (i == -1) {
+ break;
+ }
+ Long.parseLong(values[i]);
+ sb.insert(0, values[i] + ",");
+ currentIntervalEnd -= intervalLength * 1000L;
+ }
+ } catch (NumberFormatException e) {
+ this.logger.fine("Number format exception while parsing "
+ + "bandwidth history line. Ignoring this line.");
+ continue;
+ }
+ historyLinesByDate.addAll(newHistoryLines);
+ }
+
+ /* Add split history lines to database. */
+ String lastDate = null;
+ historyLinesByDate.add("EOL");
+ long[] readArray = null;
+ long[] writtenArray = null;
+ long[] dirreadArray = null;
+ long[] dirwrittenArray = null;
+ int readOffset = 0;
+ int writtenOffset = 0;
+ int dirreadOffset = 0;
+ int dirwrittenOffset = 0;
+ for (String historyLine : historyLinesByDate) {
+ String[] parts = historyLine.split(" ");
+ String currentDate = parts[0];
+ if (lastDate != null && (historyLine.equals("EOL")
+ || !currentDate.equals(lastDate))) {
+ BigIntArray readIntArray = new BigIntArray(readArray,
+ readOffset);
+ BigIntArray writtenIntArray = new BigIntArray(writtenArray,
+ writtenOffset);
+ BigIntArray dirreadIntArray = new BigIntArray(dirreadArray,
+ dirreadOffset);
+ BigIntArray dirwrittenIntArray = new BigIntArray(dirwrittenArray,
+ dirwrittenOffset);
+ if (this.importIntoDatabase) {
+ try {
+ long dateMillis = dateTimeFormat.parse(lastDate
+ + " 00:00:00").getTime();
+ this.addDateToScheduledUpdates(dateMillis);
+ this.csH.setString(1, fingerprint);
+ this.csH.setDate(2, new java.sql.Date(dateMillis));
+ this.csH.setArray(3, readIntArray);
+ this.csH.setArray(4, writtenIntArray);
+ this.csH.setArray(5, dirreadIntArray);
+ this.csH.setArray(6, dirwrittenIntArray);
+ this.csH.addBatch();
+ rhsCount++;
+ if (rhsCount % autoCommitCount == 0) {
+ this.csH.executeBatch();
+ }
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not insert bandwidth "
+ + "history line into database. We won't make any "
+ + "further SQL requests in this execution.", e);
+ this.importIntoDatabase = false;
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not insert bandwidth "
+ + "history line into database. We won't make any "
+ + "further SQL requests in this execution.", e);
+ this.importIntoDatabase = false;
+ }
+ }
+ if (this.writeRawImportFiles) {
+ try {
+ if (this.bwhistOut == null) {
+ new File(rawFilesDirectory).mkdirs();
+ this.bwhistOut = new BufferedWriter(new FileWriter(
+ rawFilesDirectory + "/bwhist.sql"));
+ }
+ this.bwhistOut.write("SELECT insert_bwhist('" + fingerprint
+ + "','" + lastDate + "','" + readIntArray.toString()
+ + "','" + writtenIntArray.toString() + "','"
+ + dirreadIntArray.toString() + "','"
+ + dirwrittenIntArray.toString() + "');\n");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write bandwidth "
+ + "history to raw database import file. We won't make "
+ + "any further attempts to write raw import files in "
+ + "this execution.", e);
+ this.writeRawImportFiles = false;
+ }
+ }
+ readArray = writtenArray = dirreadArray = dirwrittenArray = null;
+ }
+ if (historyLine.equals("EOL")) {
+ break;
+ }
+ long lastIntervalTime;
+ try {
+ lastIntervalTime = dateTimeFormat.parse(parts[0] + " "
+ + parts[1]).getTime() - dateTimeFormat.parse(parts[0]
+ + " 00:00:00").getTime();
+ } catch (ParseException e) {
+ continue;
+ }
+ String[] stringValues = parts[5].split(",");
+ long[] longValues = new long[stringValues.length];
+ for (int i = 0; i < longValues.length; i++) {
+ longValues[i] = Long.parseLong(stringValues[i]);
+ }
+
+ int offset = (int) (lastIntervalTime / (15L * 60L * 1000L))
+ - longValues.length + 1;
+ String type = parts[2];
+ if (type.equals("read-history")) {
+ readArray = longValues;
+ readOffset = offset;
+ } else if (type.equals("write-history")) {
+ writtenArray = longValues;
+ writtenOffset = offset;
+ } else if (type.equals("dirreq-read-history")) {
+ dirreadArray = longValues;
+ dirreadOffset = offset;
+ } else if (type.equals("dirreq-write-history")) {
+ dirwrittenArray = longValues;
+ dirwrittenOffset = offset;
+ }
+ lastDate = currentDate;
+ }
+ }
+
+ /**
+ * Insert network status consensus into database.
+ */
+ public void addConsensus(long validAfter) {
+ if (this.importIntoDatabase) {
+ try {
+ this.addDateToScheduledUpdates(validAfter);
+ Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ Timestamp validAfterTimestamp = new Timestamp(validAfter);
+ this.psCs.setTimestamp(1, validAfterTimestamp, cal);
+ ResultSet rs = psCs.executeQuery();
+ rs.next();
+ if (rs.getInt(1) == 0) {
+ this.psC.clearParameters();
+ this.psC.setTimestamp(1, validAfterTimestamp, cal);
+ this.psC.executeUpdate();
+ rcsCount++;
+ if (rcsCount % autoCommitCount == 0) {
+ this.conn.commit();
+ }
+ }
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not add network status "
+ + "consensus. We won't make any further SQL requests in "
+ + "this execution.", e);
+ this.importIntoDatabase = false;
+ }
+ }
+ if (this.writeRawImportFiles) {
+ try {
+ if (this.consensusOut == null) {
+ new File(rawFilesDirectory).mkdirs();
+ this.consensusOut = new BufferedWriter(new FileWriter(
+ rawFilesDirectory + "/consensus.sql"));
+ this.consensusOut.write(" COPY consensus (validafter) "
+ + "FROM stdin;\n");
+ }
+ String validAfterString = this.dateTimeFormat.format(validAfter);
+ this.consensusOut.write(validAfterString + "\n");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write network status "
+ + "consensus to raw database import file. We won't make "
+ + "any further attempts to write raw import files in this "
+ + "execution.", e);
+ this.writeRawImportFiles = false;
+ }
+ }
+ }
+
+ /** Imports relay descriptors into the database. */
+ public void importRelayDescriptors() {
+ logger.fine("Importing files in directories " + archivesDirectories
+ + "/...");
+ if (!this.archivesDirectories.isEmpty()) {
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.setMaxDescriptorFilesInQueue(10);
+ for (String archivesPath : this.archivesDirectories) {
+ File archivesDirectory = new File(archivesPath);
+ if (archivesDirectory.exists()) {
+ reader.addDirectory(archivesDirectory);
+ }
+ }
+ if (keepImportHistory) {
+ reader.setExcludeFiles(new File(statsDirectory,
+ "database-importer-relay-descriptor-history"));
+ }
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof RelayNetworkStatusConsensus) {
+ this.addRelayNetworkStatusConsensus(
+ (RelayNetworkStatusConsensus) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.addServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.addExtraInfoDescriptor(
+ (ExtraInfoDescriptor) descriptor);
+ }
+ }
+ }
+ }
+ }
+
+ logger.info("Finished importing relay descriptors.");
+ }
+
+ private void addRelayNetworkStatusConsensus(
+ RelayNetworkStatusConsensus consensus) {
+ for (NetworkStatusEntry statusEntry
+ : consensus.getStatusEntries().values()) {
+ this.addStatusEntryContents(consensus.getValidAfterMillis(),
+ statusEntry.getNickname(),
+ statusEntry.getFingerprint().toLowerCase(),
+ statusEntry.getDescriptor().toLowerCase(),
+ statusEntry.getPublishedMillis(), statusEntry.getAddress(),
+ statusEntry.getOrPort(), statusEntry.getDirPort(),
+ statusEntry.getFlags(), statusEntry.getVersion(),
+ statusEntry.getBandwidth(), statusEntry.getPortList(),
+ statusEntry.getStatusEntryBytes());
+ }
+ this.addConsensus(consensus.getValidAfterMillis());
+ }
+
+ private void addServerDescriptor(ServerDescriptor descriptor) {
+ this.addServerDescriptorContents(
+ descriptor.getServerDescriptorDigest(), descriptor.getNickname(),
+ descriptor.getAddress(), descriptor.getOrPort(),
+ descriptor.getDirPort(), descriptor.getFingerprint(),
+ descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(),
+ descriptor.getBandwidthObserved(), descriptor.getPlatform(),
+ descriptor.getPublishedMillis(), descriptor.getUptime(),
+ descriptor.getExtraInfoDigest());
+ }
+
+ private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
+ List<String> bandwidthHistoryLines = new ArrayList<>();
+ if (descriptor.getWriteHistory() != null) {
+ bandwidthHistoryLines.add(descriptor.getWriteHistory().getLine());
+ }
+ if (descriptor.getReadHistory() != null) {
+ bandwidthHistoryLines.add(descriptor.getReadHistory().getLine());
+ }
+ if (descriptor.getDirreqWriteHistory() != null) {
+ bandwidthHistoryLines.add(
+ descriptor.getDirreqWriteHistory().getLine());
+ }
+ if (descriptor.getDirreqReadHistory() != null) {
+ bandwidthHistoryLines.add(
+ descriptor.getDirreqReadHistory().getLine());
+ }
+ this.addExtraInfoDescriptorContents(descriptor.getExtraInfoDigest(),
+ descriptor.getNickname(),
+ descriptor.getFingerprint().toLowerCase(),
+ descriptor.getPublishedMillis(), bandwidthHistoryLines);
+ }
+
+ /**
+ * Close the relay descriptor database connection.
+ */
+ public void closeConnection() {
+
+ /* Log stats about imported descriptors. */
+ this.logger.info(String.format("Finished importing relay "
+ + "descriptors: %d consensuses, %d network status entries, %d "
+ + "votes, %d server descriptors, %d extra-info descriptors, %d "
+ + "bandwidth history elements, and %d dirreq stats elements",
+ rcsCount, rrsCount, rvsCount, rdsCount, resCount, rhsCount,
+ rqsCount));
+
+ /* Insert scheduled updates a second time, just in case the refresh
+ * run has started since inserting them the first time in which case
+ * it will miss the data inserted afterwards. We cannot, however,
+ * insert them only now, because if a Java execution fails at a random
+ * point, we might have added data, but not the corresponding dates to
+ * update statistics. */
+ if (this.importIntoDatabase) {
+ try {
+ for (long dateMillis : this.scheduledUpdates) {
+ this.psU.setDate(1, new java.sql.Date(dateMillis));
+ this.psU.execute();
+ }
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not add scheduled dates "
+ + "for the next refresh run.", e);
+ }
+ }
+
+ /* Commit any stragglers before closing. */
+ if (this.conn != null) {
+ try {
+ this.csH.executeBatch();
+
+ this.conn.commit();
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not commit final records "
+ + "to database", e);
+ }
+ try {
+ this.conn.close();
+ } catch (SQLException e) {
+ this.logger.log(Level.WARNING, "Could not close database "
+ + "connection.", e);
+ }
+ }
+
+ /* Close raw import files. */
+ try {
+ if (this.statusentryOut != null) {
+ this.statusentryOut.write("\\.\n");
+ this.statusentryOut.close();
+ }
+ if (this.descriptorOut != null) {
+ this.descriptorOut.write("\\.\n");
+ this.descriptorOut.close();
+ }
+ if (this.bwhistOut != null) {
+ this.bwhistOut.write("\\.\n");
+ this.bwhistOut.close();
+ }
+ if (this.consensusOut != null) {
+ this.consensusOut.write("\\.\n");
+ this.consensusOut.close();
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not close one or more raw "
+ + "database import files.", e);
+ }
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java
new file mode 100644
index 0000000..aa9469e
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java
@@ -0,0 +1,412 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron.network;
+
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.NetworkStatusEntry;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Generates statistics on the average number of relays and bridges per
+ * day. Accepts parse results from <code>RelayDescriptorParser</code> and
+ * <code>BridgeDescriptorParser</code> and stores them in intermediate
+ * result files <code>stats/consensus-stats-raw</code> and
+ * <code>stats/bridge-consensus-stats-raw</code>. Writes final results to
+ * <code>stats/consensus-stats</code> for all days for which at least half
+ * of the expected consensuses or statuses are known.
+ */
+public class ConsensusStatsFileHandler {
+
+ /**
+ * Intermediate results file holding the number of running bridges per
+ * bridge status.
+ */
+ private File bridgeConsensusStatsRawFile;
+
+ /**
+ * Number of running bridges in a given bridge status. Map keys are the bridge
+ * status time formatted as "yyyy-MM-dd HH:mm:ss", a comma, and the bridge
+ * authority nickname, map values are lines as read from
+ * <code>stats/bridge-consensus-stats-raw</code>.
+ */
+ private SortedMap<String, String> bridgesRaw;
+
+ /**
+ * Average number of running bridges per day. Map keys are dates
+ * formatted as "yyyy-MM-dd", map values are the remaining columns as written
+ * to <code>stats/consensus-stats</code>.
+ */
+ private SortedMap<String, String> bridgesPerDay;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private int bridgeResultsAdded = 0;
+
+ /* Database connection string. */
+ private String connectionUrl = null;
+
+ private SimpleDateFormat dateTimeFormat;
+
+ private File bridgesDir;
+
+ private File statsDirectory;
+
+ private boolean keepImportHistory;
+
+ /**
+ * Initializes this class, including reading in intermediate results
+ * files <code>stats/consensus-stats-raw</code> and
+ * <code>stats/bridge-consensus-stats-raw</code> and final results file
+ * <code>stats/consensus-stats</code>.
+ */
+ public ConsensusStatsFileHandler(String connectionUrl,
+ File bridgesDir, File statsDirectory,
+ boolean keepImportHistory) {
+
+ if (bridgesDir == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+ this.bridgesDir = bridgesDir;
+ this.statsDirectory = statsDirectory;
+ this.keepImportHistory = keepImportHistory;
+
+ /* Initialize local data structures to hold intermediate and final
+ * results. */
+ this.bridgesPerDay = new TreeMap<>();
+ this.bridgesRaw = new TreeMap<>();
+
+ /* Initialize file names for intermediate and final results files. */
+ this.bridgeConsensusStatsRawFile = new File(
+ "stats/bridge-consensus-stats-raw");
+
+ /* Initialize database connection string. */
+ this.connectionUrl = connectionUrl;
+
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ ConsensusStatsFileHandler.class.getName());
+
+ /* Read in number of running bridges per bridge status. */
+ if (this.bridgeConsensusStatsRawFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeConsensusStatsRawFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("date")) {
+ /* Skip headers. */
+ continue;
+ }
+ String[] parts = line.split(",");
+ if (parts.length < 2 || parts.length > 4) {
+ this.logger.warning("Corrupt line '" + line + "' in file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath()
+ + "! Aborting to read this file!");
+ break;
+ }
+ /* Assume that all lines without authority nickname are based on
+ * Tonga's network status, not Bifroest's. */
+ String key = parts[0] + "," + (parts.length < 4 ? "Tonga" : parts[1]);
+ String value = null;
+ if (parts.length == 2) {
+ value = key + "," + parts[1] + ",0";
+ } else if (parts.length == 3) {
+ value = key + "," + parts[1] + "," + parts[2];
+ } else if (parts.length == 4) {
+ value = key + "," + parts[2] + "," + parts[3];
+ } /* No more cases as we already checked the range above. */
+ this.bridgesRaw.put(key, value);
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
+ e);
+ }
+ }
+ }
+
+ /**
+ * Adds the intermediate results of the number of running bridges in a
+ * given bridge status to the existing observations.
+ */
+ public void addBridgeConsensusResults(long publishedMillis,
+ String authorityNickname, int running, int runningEc2Bridges) {
+ String publishedAuthority = dateTimeFormat.format(publishedMillis) + ","
+ + authorityNickname;
+ String line = publishedAuthority + "," + running + "," + runningEc2Bridges;
+ if (!this.bridgesRaw.containsKey(publishedAuthority)) {
+ this.logger.finer("Adding new bridge numbers: " + line);
+ this.bridgesRaw.put(publishedAuthority, line);
+ this.bridgeResultsAdded++;
+ } else if (!line.equals(this.bridgesRaw.get(publishedAuthority))) {
+ this.logger.warning("The numbers of running bridges we were just "
+ + "given (" + line + ") are different from what we learned "
+ + "before (" + this.bridgesRaw.get(publishedAuthority) + ")! "
+ + "Overwriting!");
+ this.bridgesRaw.put(publishedAuthority, line);
+ }
+ }
+
+ /** Imports sanitized bridge descriptors. */
+ public void importSanitizedBridges() {
+ if (bridgesDir.exists()) {
+ logger.fine("Importing files in directory " + bridgesDir + "/...");
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.addDirectory(bridgesDir);
+ if (keepImportHistory) {
+ reader.setExcludeFiles(new File(statsDirectory,
+ "consensus-stats-bridge-descriptor-history"));
+ }
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ String authority = null;
+ if (descriptorFile.getFileName().contains(
+ "4A0CCD2DDC7995083D73F5D667100C8A5831F16D")) {
+ authority = "Tonga";
+ } else if (descriptorFile.getFileName().contains(
+ "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1")) {
+ authority = "Bifroest";
+ }
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof BridgeNetworkStatus) {
+ if (authority == null) {
+ this.logger.warning("Did not recognize the bridge authority "
+ + "that generated " + descriptorFile.getFileName()
+ + ". Skipping.");
+ continue;
+ }
+ this.addBridgeNetworkStatus(
+ (BridgeNetworkStatus) descriptor, authority);
+ }
+ }
+ }
+ }
+ logger.info("Finished importing bridge descriptors.");
+ }
+ }
+
+ private void addBridgeNetworkStatus(BridgeNetworkStatus status,
+ String authority) {
+ int runningBridges = 0;
+ int runningEc2Bridges = 0;
+ for (NetworkStatusEntry statusEntry
+ : status.getStatusEntries().values()) {
+ if (statusEntry.getFlags().contains("Running")) {
+ runningBridges++;
+ if (statusEntry.getNickname().startsWith("ec2bridge")) {
+ runningEc2Bridges++;
+ }
+ }
+ }
+ this.addBridgeConsensusResults(status.getPublishedMillis(), authority,
+ runningBridges, runningEc2Bridges);
+ }
+
+ /**
+ * Aggregates the raw observations on relay and bridge numbers and
+ * writes both raw and aggregate observations to disk.
+ */
+ public void writeFiles() {
+
+ /* Go through raw observations and put everything into nested maps by day
+ * and bridge authority. */
+ Map<String, Map<String, int[]>> bridgesPerDayAndAuthority = new HashMap<>();
+ for (String bridgesRawLine : this.bridgesRaw.values()) {
+ String date = bridgesRawLine.substring(0, 10);
+ if (!bridgesPerDayAndAuthority.containsKey(date)) {
+ bridgesPerDayAndAuthority.put(date, new TreeMap<String, int[]>());
+ }
+ String[] parts = bridgesRawLine.split(",");
+ String authority = parts[1];
+ if (!bridgesPerDayAndAuthority.get(date).containsKey(authority)) {
+ bridgesPerDayAndAuthority.get(date).put(authority, new int[3]);
+ }
+ int[] bridges = bridgesPerDayAndAuthority.get(date).get(authority);
+ bridges[0] += Integer.parseInt(parts[2]);
+ bridges[1] += Integer.parseInt(parts[3]);
+ bridges[2]++;
+ }
+
+ /* Sum up average numbers of running bridges per day reported by all bridge
+ * authorities and add these averages to final results. */
+ for (Map.Entry<String, Map<String, int[]>> perDay
+ : bridgesPerDayAndAuthority.entrySet()) {
+ String date = perDay.getKey();
+ int brunning = 0;
+ int brunningEc2 = 0;
+ for (int[] perAuthority : perDay.getValue().values()) {
+ int statuses = perAuthority[2];
+ if (statuses < 12) {
+ /* Only write results if we have seen at least a dozen statuses. */
+ continue;
+ }
+ brunning += perAuthority[0] / statuses;
+ brunningEc2 += perAuthority[1] / statuses;
+ }
+ String line = "," + brunning + "," + brunningEc2;
+ /* Are our results new? */
+ if (!this.bridgesPerDay.containsKey(date)) {
+ this.logger.finer("Adding new average bridge numbers: " + date + line);
+ this.bridgesPerDay.put(date, line);
+ } else if (!line.equals(this.bridgesPerDay.get(date))) {
+ this.logger.finer("Replacing existing average bridge numbers ("
+ + this.bridgesPerDay.get(date) + " with new numbers: " + line);
+ this.bridgesPerDay.put(date, line);
+ }
+ }
+
+ /* Write raw numbers of running bridges to disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
+ this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(
+ new FileWriter(this.bridgeConsensusStatsRawFile));
+ bw.append("datetime,authority,brunning,brunningec2");
+ bw.newLine();
+ for (String line : this.bridgesRaw.values()) {
+ bw.append(line);
+ bw.newLine();
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
+ e);
+ }
+
+ /* Add average number of bridges per day to the database. */
+ if (connectionUrl != null) {
+ try {
+ Map<String, String> insertRows = new HashMap<>();
+ Map<String, String> updateRows = new HashMap<>();
+ insertRows.putAll(this.bridgesPerDay);
+ Connection conn = DriverManager.getConnection(connectionUrl);
+ conn.setAutoCommit(false);
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(
+ "SELECT date, avg_running, avg_running_ec2 "
+ + "FROM bridge_network_size");
+ while (rs.next()) {
+ String date = rs.getDate(1).toString();
+ if (insertRows.containsKey(date)) {
+ String insertRow = insertRows.remove(date);
+ String[] parts = insertRow.substring(1).split(",");
+ long newAvgRunning = Long.parseLong(parts[0]);
+ long newAvgRunningEc2 = Long.parseLong(parts[1]);
+ long oldAvgRunning = rs.getLong(2);
+ long oldAvgRunningEc2 = rs.getLong(3);
+ if (newAvgRunning != oldAvgRunning
+ || newAvgRunningEc2 != oldAvgRunningEc2) {
+ updateRows.put(date, insertRow);
+ }
+ }
+ }
+ rs.close();
+ PreparedStatement psU = conn.prepareStatement(
+ "UPDATE bridge_network_size SET avg_running = ?, "
+ + "avg_running_ec2 = ? WHERE date = ?");
+ for (Map.Entry<String, String> e : updateRows.entrySet()) {
+ java.sql.Date date = java.sql.Date.valueOf(e.getKey());
+ String[] parts = e.getValue().substring(1).split(",");
+ long avgRunning = Long.parseLong(parts[0]);
+ long avgRunningEc2 = Long.parseLong(parts[1]);
+ psU.clearParameters();
+ psU.setLong(1, avgRunning);
+ psU.setLong(2, avgRunningEc2);
+ psU.setDate(3, date);
+ psU.executeUpdate();
+ }
+ PreparedStatement psI = conn.prepareStatement(
+ "INSERT INTO bridge_network_size (avg_running, "
+ + "avg_running_ec2, date) VALUES (?, ?, ?)");
+ for (Map.Entry<String, String> e : insertRows.entrySet()) {
+ java.sql.Date date = java.sql.Date.valueOf(e.getKey());
+ String[] parts = e.getValue().substring(1).split(",");
+ long avgRunning = Long.parseLong(parts[0]);
+ long avgRunningEc2 = Long.parseLong(parts[1]);
+ psI.clearParameters();
+ psI.setLong(1, avgRunning);
+ psI.setLong(2, avgRunningEc2);
+ psI.setDate(3, date);
+ psI.executeUpdate();
+ }
+ conn.commit();
+ conn.close();
+ } catch (SQLException e) {
+ logger.log(Level.WARNING, "Failed to add average bridge numbers "
+ + "to database.", e);
+ }
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished writing "
+ + "statistics on bridge network statuses to disk.\nAdded "
+ + this.bridgeResultsAdded + " bridge network status(es) in this "
+ + "execution.");
+ long now = System.currentTimeMillis();
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (this.bridgesRaw.isEmpty()) {
+ dumpStats.append("\nNo bridge status known yet.");
+ } else {
+ dumpStats.append("\nLast known bridge status was published "
+ + this.bridgesRaw.lastKey() + ".");
+ try {
+ if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse(
+ this.bridgesRaw.lastKey()).getTime()) {
+ logger.warning("Last known bridge status is more than 6 hours "
+ + "old: " + this.bridgesRaw.lastKey());
+ }
+ } catch (ParseException e) {
+ logger.warning("Can't parse the timestamp? Reason: " + e);
+ }
+ }
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java
new file mode 100644
index 0000000..2883299
--- /dev/null
+++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java
@@ -0,0 +1,292 @@
+/* Copyright 2011--2017 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.ernie.cron.performance;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.TorperfResult;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class TorperfProcessor {
+
+ /** Processes Torperf data from the given directory and writes
+ * aggregates statistics to the given stats directory. */
+ public TorperfProcessor(File torperfDirectory, File statsDirectory) {
+
+ if (torperfDirectory == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
+ File rawFile = new File(statsDirectory, "torperf-raw");
+ File statsFile = new File(statsDirectory, "torperf.csv");
+ SortedMap<String, String> rawObs = new TreeMap<>();
+ SortedMap<String, String> stats = new TreeMap<>();
+ int addedRawObs = 0;
+ SimpleDateFormat formatter =
+ new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ try {
+ if (rawFile.exists()) {
+ logger.fine("Reading file " + rawFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(rawFile));
+ String line = br.readLine(); // ignore header
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length != 4) {
+ logger.warning("Corrupt line in " + rawFile.getAbsolutePath()
+ + "!");
+ break;
+ }
+ String key = line.substring(0, line.lastIndexOf(","));
+ rawObs.put(key, line);
+ }
+ br.close();
+ logger.fine("Finished reading file " + rawFile.getAbsolutePath()
+ + ".");
+ }
+ if (statsFile.exists()) {
+ logger.fine("Reading file " + statsFile.getAbsolutePath()
+ + "...");
+ BufferedReader br = new BufferedReader(new FileReader(statsFile));
+ String line = br.readLine(); // ignore header
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ String key = String.format("%s,%s,%s", parts[0], parts[1],
+ parts[2]);
+ stats.put(key, line);
+ }
+ br.close();
+ logger.fine("Finished reading file " + statsFile.getAbsolutePath()
+ + ".");
+ }
+ if (torperfDirectory.exists()) {
+ logger.fine("Importing files in " + torperfDirectory + "/...");
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(torperfDirectory);
+ descriptorReader.setExcludeFiles(new File(statsDirectory,
+ "torperf-history"));
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getException() != null) {
+ logger.log(Level.FINE, "Error parsing file.",
+ descriptorFile.getException());
+ continue;
+ }
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof TorperfResult)) {
+ continue;
+ }
+ TorperfResult result = (TorperfResult) descriptor;
+ String source = result.getSource();
+ long fileSize = result.getFileSize();
+ if (fileSize == 51200) {
+ source += "-50kb";
+ } else if (fileSize == 1048576) {
+ source += "-1mb";
+ } else if (fileSize == 5242880) {
+ source += "-5mb";
+ } else {
+ logger.fine("Unexpected file size '" + fileSize
+ + "'. Skipping.");
+ continue;
+ }
+ String dateTime = formatter.format(result.getStartMillis());
+ long completeMillis = result.getDataCompleteMillis()
+ - result.getStartMillis();
+ String key = source + "," + dateTime;
+ String value = key;
+ if ((result.didTimeout() == null
+ && result.getDataCompleteMillis() < 1)
+ || (result.didTimeout() != null && result.didTimeout())) {
+ value += ",-2"; // -2 for timeout
+ } else if (result.getReadBytes() < fileSize) {
+ value += ",-1"; // -1 for failure
+ } else {
+ value += "," + completeMillis;
+ }
+ if (!rawObs.containsKey(key)) {
+ rawObs.put(key, value);
+ addedRawObs++;
+ }
+ }
+ }
+ logger.fine("Finished importing files in " + torperfDirectory
+ + "/.");
+ }
+ if (rawObs.size() > 0) {
+ logger.fine("Writing file " + rawFile.getAbsolutePath() + "...");
+ rawFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile));
+ bw.append("source,date,start,completemillis\n");
+ String tempSourceDate = null;
+ Iterator<Map.Entry<String, String>> it =
+ rawObs.entrySet().iterator();
+ List<Long> dlTimes = new ArrayList<>();
+ boolean haveWrittenFinalLine = false;
+ SortedMap<String, List<Long>> dlTimesAllSources = new TreeMap<>();
+ SortedMap<String, long[]> statusesAllSources = new TreeMap<>();
+ long failures = 0;
+ long timeouts = 0;
+ long requests = 0;
+ while (it.hasNext() || !haveWrittenFinalLine) {
+ Map.Entry<String, String> next =
+ it.hasNext() ? it.next() : null;
+ if (tempSourceDate != null
+ && (next == null || !(next.getValue().split(",")[0] + ","
+ + next.getValue().split(",")[1]).equals(tempSourceDate))) {
+ if (dlTimes.size() > 4) {
+ Collections.sort(dlTimes);
+ long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
+ long md = dlTimes.get(dlTimes.size() / 2 - 1);
+ long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
+ String[] tempParts = tempSourceDate.split("[-,]", 3);
+ String tempDate = tempParts[2];
+ int tempSize = Integer.parseInt(
+ tempParts[1].substring(0, tempParts[1].length() - 2))
+ * 1024 * (tempParts[1].endsWith("mb") ? 1024 : 1);
+ String tempSource = tempParts[0];
+ String tempDateSizeSource = String.format("%s,%d,%s",
+ tempDate, tempSize, tempSource);
+ stats.put(tempDateSizeSource,
+ String.format("%s,%s,%s,%s,%s,%s,%s",
+ tempDateSizeSource, q1, md, q3, timeouts, failures,
+ requests));
+ String allDateSizeSource = String.format("%s,%d,",
+ tempDate, tempSize);
+ if (dlTimesAllSources.containsKey(allDateSizeSource)) {
+ dlTimesAllSources.get(allDateSizeSource).addAll(dlTimes);
+ } else {
+ dlTimesAllSources.put(allDateSizeSource, dlTimes);
+ }
+ if (statusesAllSources.containsKey(allDateSizeSource)) {
+ long[] status = statusesAllSources.get(allDateSizeSource);
+ status[0] += timeouts;
+ status[1] += failures;
+ status[2] += requests;
+ } else {
+ long[] status = new long[3];
+ status[0] = timeouts;
+ status[1] = failures;
+ status[2] = requests;
+ statusesAllSources.put(allDateSizeSource, status);
+ }
+ }
+ dlTimes = new ArrayList<>();
+ failures = timeouts = requests = 0;
+ if (next == null) {
+ haveWrittenFinalLine = true;
+ }
+ }
+ if (next != null) {
+ bw.append(next.getValue() + "\n");
+ String[] parts = next.getValue().split(",");
+ tempSourceDate = parts[0] + "," + parts[1];
+ long completeMillis = Long.parseLong(parts[3]);
+ if (completeMillis == -2L) {
+ timeouts++;
+ } else if (completeMillis == -1L) {
+ failures++;
+ } else {
+ dlTimes.add(Long.parseLong(parts[3]));
+ }
+ requests++;
+ }
+ }
+ bw.close();
+ for (Map.Entry<String, List<Long>> e
+ : dlTimesAllSources.entrySet()) {
+ String allDateSizeSource = e.getKey();
+ dlTimes = e.getValue();
+ Collections.sort(dlTimes);
+ long[] status = statusesAllSources.get(allDateSizeSource);
+ timeouts = status[0];
+ failures = status[1];
+ requests = status[2];
+ long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
+ long md = dlTimes.get(dlTimes.size() / 2 - 1);
+ long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
+ stats.put(allDateSizeSource,
+ String.format("%s,%s,%s,%s,%s,%s,%s",
+ allDateSizeSource, q1, md, q3, timeouts, failures,
+ requests));
+ }
+ logger.fine("Finished writing file " + rawFile.getAbsolutePath()
+ + ".");
+ }
+ if (stats.size() > 0) {
+ logger.fine("Writing file " + statsFile.getAbsolutePath()
+ + "...");
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String yesterday = dateFormat.format(System.currentTimeMillis()
+ - 86400000L);
+ statsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile));
+ bw.append("date,size,source,q1,md,q3,timeouts,failures,"
+ + "requests\n");
+ for (String s : stats.values()) {
+ if (s.compareTo(yesterday) < 0) {
+ bw.append(s + "\n");
+ }
+ }
+ bw.close();
+ logger.fine("Finished writing file " + statsFile.getAbsolutePath()
+ + ".");
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing "
+ + rawFile.getAbsolutePath() + " or "
+ + statsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished writing "
+ + "statistics on torperf results.\nAdded " + addedRawObs
+ + " new observations in this execution.\n"
+ + "Last known obserations by source and file size are:");
+ String lastSource = null;
+ String lastLine = null;
+ for (String s : rawObs.keySet()) {
+ String[] parts = s.split(",");
+ if (lastSource == null) {
+ lastSource = parts[0];
+ } else if (!parts[0].equals(lastSource)) {
+ String lastKnownObservation = lastLine.split(",")[1] + " "
+ + lastLine.split(",")[2];
+ dumpStats.append("\n" + lastSource + " " + lastKnownObservation);
+ lastSource = parts[0];
+ }
+ lastLine = s;
+ }
+ if (lastSource != null) {
+ String lastKnownObservation = lastLine.split(",")[1] + " "
+ + lastLine.split(",")[2];
+ dumpStats.append("\n" + lastSource + " " + lastKnownObservation);
+ }
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/modules/legacy/src/org/torproject/ernie/cron/Configuration.java b/modules/legacy/src/org/torproject/ernie/cron/Configuration.java
deleted file mode 100644
index e0d753f..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/Configuration.java
+++ /dev/null
@@ -1,206 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Initialize configuration with hard-coded defaults, overwrite with
- * configuration in config file, if exists, and answer Main.java about our
- * configuration.
- */
-public class Configuration {
-
- private boolean importDirectoryArchives = false;
-
- private List<String> directoryArchivesDirectories = new ArrayList<>();
-
- private boolean keepDirectoryArchiveImportHistory = false;
-
- private boolean importSanitizedBridges = false;
-
- private String sanitizedBridgesDirectory = "in/bridge-descriptors/";
-
- private boolean keepSanitizedBridgesImportHistory = false;
-
- private boolean writeRelayDescriptorDatabase = false;
-
- private String relayDescriptorDatabaseJdbc =
- "jdbc:postgresql://localhost/tordir?user=metrics&password=password";
-
- private boolean writeRelayDescriptorsRawFiles = false;
-
- private String relayDescriptorRawFilesDirectory = "pg-import/";
-
- private boolean writeBridgeStats = false;
-
- private boolean importWriteTorperfStats = false;
-
- private String torperfDirectory = "in/torperf/";
-
- private String exoneraTorDatabaseJdbc = "jdbc:postgresql:"
- + "//localhost/exonerator?user=metrics&password=password";
-
- private String exoneraTorImportDirectory = "exonerator-import/";
-
- /** Initializes this configuration class. */
- public Configuration() {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(Configuration.class.getName());
-
- /* Read config file, if present. */
- File configFile = new File("config");
- if (!configFile.exists()) {
- logger.warning("Could not find config file.");
- return;
- }
- String line = null;
- try {
- BufferedReader br = new BufferedReader(new FileReader(configFile));
- while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.length() < 1) {
- continue;
- } else if (line.startsWith("ImportDirectoryArchives")) {
- this.importDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesDirectory")) {
- this.directoryArchivesDirectories.add(line.split(" ")[1]);
- } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
- this.keepDirectoryArchiveImportHistory = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ImportSanitizedBridges")) {
- this.importSanitizedBridges = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("SanitizedBridgesDirectory")) {
- this.sanitizedBridgesDirectory = line.split(" ")[1];
- } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) {
- this.keepSanitizedBridgesImportHistory = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("WriteRelayDescriptorDatabase")) {
- this.writeRelayDescriptorDatabase = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RelayDescriptorDatabaseJDBC")) {
- this.relayDescriptorDatabaseJdbc = line.split(" ")[1];
- } else if (line.startsWith("WriteRelayDescriptorsRawFiles")) {
- this.writeRelayDescriptorsRawFiles = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RelayDescriptorRawFilesDirectory")) {
- this.relayDescriptorRawFilesDirectory = line.split(" ")[1];
- } else if (line.startsWith("WriteBridgeStats")) {
- this.writeBridgeStats = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ImportWriteTorperfStats")) {
- this.importWriteTorperfStats = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("TorperfDirectory")) {
- this.torperfDirectory = line.split(" ")[1];
- } else if (line.startsWith("ExoneraTorDatabaseJdbc")) {
- this.exoneraTorDatabaseJdbc = line.split(" ")[1];
- } else if (line.startsWith("ExoneraTorImportDirectory")) {
- this.exoneraTorImportDirectory = line.split(" ")[1];
- } else {
- logger.severe("Configuration file contains unrecognized "
- + "configuration key in line '" + line + "'! Exiting!");
- System.exit(1);
- }
- }
- br.close();
- } catch (ArrayIndexOutOfBoundsException e) {
- logger.severe("Configuration file contains configuration key "
- + "without value in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (MalformedURLException e) {
- logger.severe("Configuration file contains illegal URL or IP:port "
- + "pair in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (NumberFormatException e) {
- logger.severe("Configuration file contains illegal value in line '"
- + line + "' with legal values being 0 or 1. Exiting!");
- System.exit(1);
- } catch (IOException e) {
- logger.log(Level.SEVERE, "Unknown problem while reading config "
- + "file! Exiting!", e);
- System.exit(1);
- }
- }
-
- public boolean getImportDirectoryArchives() {
- return this.importDirectoryArchives;
- }
-
- /** Returns directories containing archived descriptors. */
- public List<String> getDirectoryArchivesDirectories() {
- if (this.directoryArchivesDirectories.isEmpty()) {
- String prefix = "../../shared/in/recent/relay-descriptors/";
- return Arrays.asList(
- (prefix + "consensuses/," + prefix + "server-descriptors/,"
- + prefix + "extra-infos/").split(","));
- } else {
- return this.directoryArchivesDirectories;
- }
- }
-
- public boolean getKeepDirectoryArchiveImportHistory() {
- return this.keepDirectoryArchiveImportHistory;
- }
-
- public boolean getWriteRelayDescriptorDatabase() {
- return this.writeRelayDescriptorDatabase;
- }
-
- public boolean getImportSanitizedBridges() {
- return this.importSanitizedBridges;
- }
-
- public String getSanitizedBridgesDirectory() {
- return this.sanitizedBridgesDirectory;
- }
-
- public boolean getKeepSanitizedBridgesImportHistory() {
- return this.keepSanitizedBridgesImportHistory;
- }
-
- public String getRelayDescriptorDatabaseJdbc() {
- return this.relayDescriptorDatabaseJdbc;
- }
-
- public boolean getWriteRelayDescriptorsRawFiles() {
- return this.writeRelayDescriptorsRawFiles;
- }
-
- public String getRelayDescriptorRawFilesDirectory() {
- return this.relayDescriptorRawFilesDirectory;
- }
-
- public boolean getWriteBridgeStats() {
- return this.writeBridgeStats;
- }
-
- public boolean getImportWriteTorperfStats() {
- return this.importWriteTorperfStats;
- }
-
- public String getTorperfDirectory() {
- return this.torperfDirectory;
- }
-
- public String getExoneraTorDatabaseJdbc() {
- return this.exoneraTorDatabaseJdbc;
- }
-
- public String getExoneraTorImportDirectory() {
- return this.exoneraTorImportDirectory;
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/LockFile.java b/modules/legacy/src/org/torproject/ernie/cron/LockFile.java
deleted file mode 100644
index 48eb83d..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/LockFile.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.logging.Logger;
-
-public class LockFile {
-
- private File lockFile;
- private Logger logger;
-
- public LockFile() {
- this.lockFile = new File("lock");
- this.logger = Logger.getLogger(LockFile.class.getName());
- }
-
- /** Acquires the lock by checking whether a lock file already exists,
- * and if not, by creating one with the current system time as
- * content. */
- public boolean acquireLock() {
- this.logger.fine("Trying to acquire lock...");
- try {
- if (this.lockFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader("lock"));
- long runStarted = Long.parseLong(br.readLine());
- br.close();
- if (System.currentTimeMillis() - runStarted
- < 23L * 60L * 60L * 1000L) {
- return false;
- }
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
- bw.append("" + System.currentTimeMillis() + "\n");
- bw.close();
- this.logger.fine("Acquired lock.");
- return true;
- } catch (IOException e) {
- this.logger.warning("Caught exception while trying to acquire "
- + "lock!");
- return false;
- }
- }
-
- /** Releases the lock by deleting the lock file, if present. */
- public void releaseLock() {
- this.logger.fine("Releasing lock...");
- this.lockFile.delete();
- this.logger.fine("Released lock.");
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java b/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java
deleted file mode 100644
index f6658c5..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron;
-
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.TimeZone;
-import java.util.logging.ConsoleHandler;
-import java.util.logging.FileHandler;
-import java.util.logging.Formatter;
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-import java.util.logging.Logger;
-
-/**
- * Initialize logging configuration.
- *
- * <p>Log levels used by ERNIE:</p>
- *
- * <p>
- * <ul>
- * <li>SEVERE: An event made it impossible to continue program execution.
- * WARNING: A potential problem occurred that requires the operator to
- * look after the otherwise unattended setup</li>
- * <li>INFO: Messages on INFO level are meant to help the operator in
- * making sure that operation works as expected.</li>
- * <li>FINE: Debug messages that are used to identify problems and which
- * are turned on by default.</li>
- * <li>FINER: More detailed debug messages to investigate problems in more
- * detail. Not turned on by default. Increase log file limit when
- * using FINER.</li>
- * <li>FINEST: Most detailed debug messages. Not used.</li>
- * </ul>
- * </p>
- */
-public class LoggingConfiguration {
-
- /** Initializes the logging configuration. */
- public LoggingConfiguration() {
-
- /* Remove default console handler. */
- for (Handler h : Logger.getLogger("").getHandlers()) {
- Logger.getLogger("").removeHandler(h);
- }
-
- /* Disable logging of internal Sun classes. */
- Logger.getLogger("sun").setLevel(Level.OFF);
-
- /* Set minimum log level we care about from INFO to FINER. */
- Logger.getLogger("").setLevel(Level.FINER);
-
- /* Create log handler that writes messages on WARNING or higher to the
- * console. */
- final SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Formatter cf = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getMessage() + "\n";
- }
- };
- Handler ch = new ConsoleHandler();
- ch.setFormatter(cf);
- ch.setLevel(Level.WARNING);
- Logger.getLogger("").addHandler(ch);
-
- /* Initialize own logger for this class. */
- Logger logger = Logger.getLogger(
- LoggingConfiguration.class.getName());
-
- /* Create log handler that writes all messages on FINE or higher to a
- * local file. */
- Formatter ff = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getLevel() + " " + record.getSourceClassName() + " "
- + record.getSourceMethodName() + " " + record.getMessage()
- + (record.getThrown() != null ? " " + record.getThrown() : "")
- + "\n";
- }
- };
- try {
- FileHandler fh = new FileHandler("log", 5000000, 5, true);
- fh.setFormatter(ff);
- fh.setLevel(Level.FINE);
- Logger.getLogger("").addHandler(fh);
- } catch (SecurityException e) {
- logger.log(Level.WARNING, "No permission to create log file. "
- + "Logging to file is disabled.", e);
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write to log file. Logging to "
- + "file is disabled.", e);
- }
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/Main.java b/modules/legacy/src/org/torproject/ernie/cron/Main.java
deleted file mode 100644
index 0eab86f..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/Main.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron;
-
-import org.torproject.ernie.cron.network.ConsensusStatsFileHandler;
-import org.torproject.ernie.cron.performance.TorperfProcessor;
-
-import java.io.File;
-import java.util.logging.Logger;
-
-/**
- * Coordinate downloading and parsing of descriptors and extraction of
- * statistically relevant data for later processing with R.
- */
-public class Main {
-
- /** Executes this data-processing module. */
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration();
-
- Logger logger = Logger.getLogger(Main.class.getName());
- logger.info("Starting ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile();
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Define stats directory for temporary files
- File statsDirectory = new File("stats");
-
- // Import relay descriptors
- if (config.getImportDirectoryArchives()) {
- RelayDescriptorDatabaseImporter rddi =
- config.getWriteRelayDescriptorDatabase()
- || config.getWriteRelayDescriptorsRawFiles()
- ? new RelayDescriptorDatabaseImporter(
- config.getWriteRelayDescriptorDatabase()
- ? config.getRelayDescriptorDatabaseJdbc() : null,
- config.getWriteRelayDescriptorsRawFiles()
- ? config.getRelayDescriptorRawFilesDirectory() : null,
- config.getDirectoryArchivesDirectories(),
- statsDirectory,
- config.getKeepDirectoryArchiveImportHistory()) : null;
- if (rddi != null) {
- rddi.importRelayDescriptors();
- }
- rddi.closeConnection();
- }
-
- // Prepare consensus stats file handler (used for stats on running
- // bridges only)
- ConsensusStatsFileHandler csfh = config.getWriteBridgeStats()
- ? new ConsensusStatsFileHandler(
- config.getRelayDescriptorDatabaseJdbc(),
- new File(config.getSanitizedBridgesDirectory()),
- statsDirectory, config.getKeepSanitizedBridgesImportHistory())
- : null;
-
- // Import sanitized bridges and write updated stats files to disk
- if (csfh != null) {
- if (config.getImportSanitizedBridges()) {
- csfh.importSanitizedBridges();
- }
- csfh.writeFiles();
- csfh = null;
- }
-
- // Import and process torperf stats
- if (config.getImportWriteTorperfStats()) {
- new TorperfProcessor(new File(config.getTorperfDirectory()),
- statsDirectory);
- }
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating ERNIE.");
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
deleted file mode 100644
index 97a330e..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
+++ /dev/null
@@ -1,995 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-import org.torproject.descriptor.ServerDescriptor;
-
-import org.postgresql.util.PGbytea;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.sql.CallableStatement;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Timestamp;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Parse directory data.
- */
-
-/* TODO Split up this class and move its parts to cron.network,
- * cron.users, and status.relaysearch packages. Requires extensive
- * changes to the database schema though. */
-public final class RelayDescriptorDatabaseImporter {
-
- /**
- * How many records to commit with each database transaction.
- */
- private final long autoCommitCount = 500;
-
- /* Counters to keep track of the number of records committed before
- * each transaction. */
-
- private int rdsCount = 0;
-
- private int resCount = 0;
-
- private int rhsCount = 0;
-
- private int rrsCount = 0;
-
- private int rcsCount = 0;
-
- private int rvsCount = 0;
-
- private int rqsCount = 0;
-
- /**
- * Relay descriptor database connection.
- */
- private Connection conn;
-
- /**
- * Prepared statement to check whether any network status consensus
- * entries matching a given valid-after time have been imported into the
- * database before.
- */
- private PreparedStatement psSs;
-
- /**
- * Prepared statement to check whether a given server descriptor has
- * been imported into the database before.
- */
- private PreparedStatement psDs;
-
- /**
- * Prepared statement to check whether a given network status consensus
- * has been imported into the database before.
- */
- private PreparedStatement psCs;
-
- /**
- * Set of dates that have been inserted into the database for being
- * included in the next refresh run.
- */
- private Set<Long> scheduledUpdates;
-
- /**
- * Prepared statement to insert a date into the database that shall be
- * included in the next refresh run.
- */
- private PreparedStatement psU;
-
- /**
- * Prepared statement to insert a network status consensus entry into
- * the database.
- */
- private PreparedStatement psR;
-
- /**
- * Prepared statement to insert a server descriptor into the database.
- */
- private PreparedStatement psD;
-
- /**
- * Callable statement to insert the bandwidth history of an extra-info
- * descriptor into the database.
- */
- private CallableStatement csH;
-
- /**
- * Prepared statement to insert a network status consensus into the
- * database.
- */
- private PreparedStatement psC;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Directory for writing raw import files.
- */
- private String rawFilesDirectory;
-
- /**
- * Raw import file containing status entries.
- */
- private BufferedWriter statusentryOut;
-
- /**
- * Raw import file containing server descriptors.
- */
- private BufferedWriter descriptorOut;
-
- /**
- * Raw import file containing bandwidth histories.
- */
- private BufferedWriter bwhistOut;
-
- /**
- * Raw import file containing consensuses.
- */
- private BufferedWriter consensusOut;
-
- /**
- * Date format to parse timestamps.
- */
- private SimpleDateFormat dateTimeFormat;
-
- /**
- * The last valid-after time for which we checked whether they have been
- * any network status entries in the database.
- */
- private long lastCheckedStatusEntries;
-
- /**
- * Set of fingerprints that we imported for the valid-after time in
- * <code>lastCheckedStatusEntries</code>.
- */
- private Set<String> insertedStatusEntries = new HashSet<>();
-
- private boolean importIntoDatabase;
-
- private boolean writeRawImportFiles;
-
- private List<String> archivesDirectories;
-
- private File statsDirectory;
-
- private boolean keepImportHistory;
-
- /**
- * Initialize database importer by connecting to the database and
- * preparing statements.
- */
- public RelayDescriptorDatabaseImporter(String connectionUrl,
- String rawFilesDirectory, List<String> archivesDirectories,
- File statsDirectory, boolean keepImportHistory) {
-
- if (archivesDirectories == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
- this.archivesDirectories = archivesDirectories;
- this.statsDirectory = statsDirectory;
- this.keepImportHistory = keepImportHistory;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- RelayDescriptorDatabaseImporter.class.getName());
-
- if (connectionUrl != null) {
- try {
- /* Connect to database. */
- this.conn = DriverManager.getConnection(connectionUrl);
-
- /* Turn autocommit off */
- this.conn.setAutoCommit(false);
-
- /* Prepare statements. */
- this.psSs = conn.prepareStatement("SELECT fingerprint "
- + "FROM statusentry WHERE validafter = ?");
- this.psDs = conn.prepareStatement("SELECT COUNT(*) "
- + "FROM descriptor WHERE descriptor = ?");
- this.psCs = conn.prepareStatement("SELECT COUNT(*) "
- + "FROM consensus WHERE validafter = ?");
- this.psR = conn.prepareStatement("INSERT INTO statusentry "
- + "(validafter, nickname, fingerprint, descriptor, "
- + "published, address, orport, dirport, isauthority, "
- + "isbadexit, isbaddirectory, isexit, isfast, isguard, "
- + "ishsdir, isnamed, isstable, isrunning, isunnamed, "
- + "isvalid, isv2dir, isv3dir, version, bandwidth, ports, "
- + "rawdesc) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
- + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
- this.psD = conn.prepareStatement("INSERT INTO descriptor "
- + "(descriptor, nickname, address, orport, dirport, "
- + "fingerprint, bandwidthavg, bandwidthburst, "
- + "bandwidthobserved, platform, published, uptime, "
- + "extrainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
- + "?)");
- this.csH = conn.prepareCall("{call insert_bwhist(?, ?, ?, ?, ?, "
- + "?)}");
- this.psC = conn.prepareStatement("INSERT INTO consensus "
- + "(validafter) VALUES (?)");
- this.psU = conn.prepareStatement("INSERT INTO scheduled_updates "
- + "(date) VALUES (?)");
- this.scheduledUpdates = new HashSet<>();
- this.importIntoDatabase = true;
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not connect to database or "
- + "prepare statements.", e);
- }
- }
-
- /* Remember where we want to write raw import files. */
- if (rawFilesDirectory != null) {
- this.rawFilesDirectory = rawFilesDirectory;
- this.writeRawImportFiles = true;
- }
-
- /* Initialize date format, so that we can format timestamps. */
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- private void addDateToScheduledUpdates(long timestamp)
- throws SQLException {
- if (!this.importIntoDatabase) {
- return;
- }
- long dateMillis = 0L;
- try {
- dateMillis = this.dateTimeFormat.parse(
- this.dateTimeFormat.format(timestamp).substring(0, 10)
- + " 00:00:00").getTime();
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Internal parsing error.", e);
- return;
- }
- if (!this.scheduledUpdates.contains(dateMillis)) {
- this.psU.setDate(1, new java.sql.Date(dateMillis));
- this.psU.execute();
- this.scheduledUpdates.add(dateMillis);
- }
- }
-
- /**
- * Insert network status consensus entry into database.
- */
- public void addStatusEntryContents(long validAfter, String nickname,
- String fingerprint, String descriptor, long published,
- String address, long orPort, long dirPort,
- SortedSet<String> flags, String version, long bandwidth,
- String ports, byte[] rawDescriptor) {
- if (this.importIntoDatabase) {
- try {
- this.addDateToScheduledUpdates(validAfter);
- Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Timestamp validAfterTimestamp = new Timestamp(validAfter);
- if (lastCheckedStatusEntries != validAfter) {
- insertedStatusEntries.clear();
- this.psSs.setTimestamp(1, validAfterTimestamp, cal);
- ResultSet rs = psSs.executeQuery();
- while (rs.next()) {
- String insertedFingerprint = rs.getString(1);
- insertedStatusEntries.add(insertedFingerprint);
- }
- rs.close();
- lastCheckedStatusEntries = validAfter;
- }
- if (!insertedStatusEntries.contains(fingerprint)) {
- this.psR.clearParameters();
- this.psR.setTimestamp(1, validAfterTimestamp, cal);
- this.psR.setString(2, nickname);
- this.psR.setString(3, fingerprint);
- this.psR.setString(4, descriptor);
- this.psR.setTimestamp(5, new Timestamp(published), cal);
- this.psR.setString(6, address);
- this.psR.setLong(7, orPort);
- this.psR.setLong(8, dirPort);
- this.psR.setBoolean(9, flags.contains("Authority"));
- this.psR.setBoolean(10, flags.contains("BadExit"));
- this.psR.setBoolean(11, flags.contains("BadDirectory"));
- this.psR.setBoolean(12, flags.contains("Exit"));
- this.psR.setBoolean(13, flags.contains("Fast"));
- this.psR.setBoolean(14, flags.contains("Guard"));
- this.psR.setBoolean(15, flags.contains("HSDir"));
- this.psR.setBoolean(16, flags.contains("Named"));
- this.psR.setBoolean(17, flags.contains("Stable"));
- this.psR.setBoolean(18, flags.contains("Running"));
- this.psR.setBoolean(19, flags.contains("Unnamed"));
- this.psR.setBoolean(20, flags.contains("Valid"));
- this.psR.setBoolean(21, flags.contains("V2Dir"));
- this.psR.setBoolean(22, flags.contains("V3Dir"));
- this.psR.setString(23, version);
- this.psR.setLong(24, bandwidth);
- this.psR.setString(25, ports);
- this.psR.setBytes(26, rawDescriptor);
- this.psR.executeUpdate();
- rrsCount++;
- if (rrsCount % autoCommitCount == 0) {
- this.conn.commit();
- }
- insertedStatusEntries.add(fingerprint);
- }
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not add network status "
- + "consensus entry. We won't make any further SQL requests "
- + "in this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.statusentryOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.statusentryOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/statusentry.sql"));
- this.statusentryOut.write(" COPY statusentry (validafter, "
- + "nickname, fingerprint, descriptor, published, address, "
- + "orport, dirport, isauthority, isbadExit, "
- + "isbaddirectory, isexit, isfast, isguard, ishsdir, "
- + "isnamed, isstable, isrunning, isunnamed, isvalid, "
- + "isv2dir, isv3dir, version, bandwidth, ports, rawdesc) "
- + "FROM stdin;\n");
- }
- this.statusentryOut.write(
- this.dateTimeFormat.format(validAfter) + "\t" + nickname
- + "\t" + fingerprint.toLowerCase() + "\t"
- + descriptor.toLowerCase() + "\t"
- + this.dateTimeFormat.format(published) + "\t" + address
- + "\t" + orPort + "\t" + dirPort + "\t"
- + (flags.contains("Authority") ? "t" : "f") + "\t"
- + (flags.contains("BadExit") ? "t" : "f") + "\t"
- + (flags.contains("BadDirectory") ? "t" : "f") + "\t"
- + (flags.contains("Exit") ? "t" : "f") + "\t"
- + (flags.contains("Fast") ? "t" : "f") + "\t"
- + (flags.contains("Guard") ? "t" : "f") + "\t"
- + (flags.contains("HSDir") ? "t" : "f") + "\t"
- + (flags.contains("Named") ? "t" : "f") + "\t"
- + (flags.contains("Stable") ? "t" : "f") + "\t"
- + (flags.contains("Running") ? "t" : "f") + "\t"
- + (flags.contains("Unnamed") ? "t" : "f") + "\t"
- + (flags.contains("Valid") ? "t" : "f") + "\t"
- + (flags.contains("V2Dir") ? "t" : "f") + "\t"
- + (flags.contains("V3Dir") ? "t" : "f") + "\t"
- + (version != null ? version : "\\N") + "\t"
- + (bandwidth >= 0 ? bandwidth : "\\N") + "\t"
- + (ports != null ? ports : "\\N") + "\t");
- this.statusentryOut.write(PGbytea.toPGString(rawDescriptor)
- .replaceAll("\\\\", "\\\\\\\\") + "\n");
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not write network status "
- + "consensus entry to raw database import file. We won't "
- + "make any further attempts to write raw import files in "
- + "this execution.", e);
- this.writeRawImportFiles = false;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write network status "
- + "consensus entry to raw database import file. We won't "
- + "make any further attempts to write raw import files in "
- + "this execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- }
-
- /**
- * Insert server descriptor into database.
- */
- public void addServerDescriptorContents(String descriptor,
- String nickname, String address, int orPort, int dirPort,
- String relayIdentifier, long bandwidthAvg, long bandwidthBurst,
- long bandwidthObserved, String platform, long published,
- long uptime, String extraInfoDigest) {
- if (this.importIntoDatabase) {
- try {
- this.addDateToScheduledUpdates(published);
- this.addDateToScheduledUpdates(
- published + 24L * 60L * 60L * 1000L);
- Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- this.psDs.setString(1, descriptor);
- ResultSet rs = psDs.executeQuery();
- rs.next();
- if (rs.getInt(1) == 0) {
- this.psD.clearParameters();
- this.psD.setString(1, descriptor);
- this.psD.setString(2, nickname);
- this.psD.setString(3, address);
- this.psD.setInt(4, orPort);
- this.psD.setInt(5, dirPort);
- this.psD.setString(6, relayIdentifier);
- this.psD.setLong(7, bandwidthAvg);
- this.psD.setLong(8, bandwidthBurst);
- this.psD.setLong(9, bandwidthObserved);
- /* Remove all non-ASCII characters from the platform string, or
- * we'll make Postgres unhappy. Sun's JDK and OpenJDK behave
- * differently when creating a new String with a given encoding.
- * That's what the regexp below is for. */
- this.psD.setString(10, new String(platform.getBytes(),
- "US-ASCII").replaceAll("[^\\p{ASCII}]",""));
- this.psD.setTimestamp(11, new Timestamp(published), cal);
- this.psD.setLong(12, uptime);
- this.psD.setString(13, extraInfoDigest);
- this.psD.executeUpdate();
- rdsCount++;
- if (rdsCount % autoCommitCount == 0) {
- this.conn.commit();
- }
- }
- } catch (UnsupportedEncodingException e) {
- // US-ASCII is supported for sure
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not add server "
- + "descriptor. We won't make any further SQL requests in "
- + "this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.descriptorOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.descriptorOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/descriptor.sql"));
- this.descriptorOut.write(" COPY descriptor (descriptor, "
- + "nickname, address, orport, dirport, fingerprint, "
- + "bandwidthavg, bandwidthburst, bandwidthobserved, "
- + "platform, published, uptime, extrainfo) FROM stdin;\n");
- }
- this.descriptorOut.write(descriptor.toLowerCase() + "\t"
- + nickname + "\t" + address + "\t" + orPort + "\t" + dirPort
- + "\t" + relayIdentifier + "\t" + bandwidthAvg + "\t"
- + bandwidthBurst + "\t" + bandwidthObserved + "\t"
- + (platform != null && platform.length() > 0
- ? new String(platform.getBytes(), "US-ASCII") : "\\N")
- + "\t" + this.dateTimeFormat.format(published) + "\t"
- + (uptime >= 0 ? uptime : "\\N") + "\t"
- + (extraInfoDigest != null ? extraInfoDigest : "\\N")
- + "\n");
- } catch (UnsupportedEncodingException e) {
- // US-ASCII is supported for sure
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write server "
- + "descriptor to raw database import file. We won't make "
- + "any further attempts to write raw import files in this "
- + "execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- }
-
- /**
- * Insert extra-info descriptor into database.
- */
- public void addExtraInfoDescriptorContents(String extraInfoDigest,
- String nickname, String fingerprint, long published,
- List<String> bandwidthHistoryLines) {
- if (!bandwidthHistoryLines.isEmpty()) {
- this.addBandwidthHistory(fingerprint.toLowerCase(), published,
- bandwidthHistoryLines);
- }
- }
-
- private static class BigIntArray implements java.sql.Array {
-
- private final String stringValue;
-
- public BigIntArray(long[] array, int offset) {
- if (array == null) {
- this.stringValue = "[-1:-1]={0}";
- } else {
- StringBuilder sb = new StringBuilder("[" + offset + ":"
- + (offset + array.length - 1) + "]={");
- for (int i = 0; i < array.length; i++) {
- sb.append((i > 0 ? "," : "") + array[i]);
- }
- sb.append('}');
- this.stringValue = sb.toString();
- }
- }
-
- public String toString() {
- return stringValue;
- }
-
- public String getBaseTypeName() {
- return "int8";
- }
-
- /* The other methods are never called; no need to implement them. */
- public void free() {
- throw new UnsupportedOperationException();
- }
-
- public Object getArray() {
- throw new UnsupportedOperationException();
- }
-
- public Object getArray(long index, int count) {
- throw new UnsupportedOperationException();
- }
-
- public Object getArray(long index, int count,
- Map<String, Class<?>> map) {
- throw new UnsupportedOperationException();
- }
-
- public Object getArray(Map<String, Class<?>> map) {
- throw new UnsupportedOperationException();
- }
-
- public int getBaseType() {
- throw new UnsupportedOperationException();
- }
-
- public ResultSet getResultSet() {
- throw new UnsupportedOperationException();
- }
-
- public ResultSet getResultSet(long index, int count) {
- throw new UnsupportedOperationException();
- }
-
- public ResultSet getResultSet(long index, int count,
- Map<String, Class<?>> map) {
- throw new UnsupportedOperationException();
- }
-
- public ResultSet getResultSet(Map<String, Class<?>> map) {
- throw new UnsupportedOperationException();
- }
- }
-
- /** Inserts a bandwidth history into database. */
- public void addBandwidthHistory(String fingerprint, long published,
- List<String> bandwidthHistoryStrings) {
-
- /* Split history lines by date and rewrite them so that the date
- * comes first. */
- SortedSet<String> historyLinesByDate = new TreeSet<>();
- for (String bandwidthHistoryString : bandwidthHistoryStrings) {
- String[] parts = bandwidthHistoryString.split(" ");
- if (parts.length != 6) {
- this.logger.finer("Bandwidth history line does not have expected "
- + "number of elements. Ignoring this line.");
- continue;
- }
- long intervalLength = 0L;
- try {
- intervalLength = Long.parseLong(parts[3].substring(1));
- } catch (NumberFormatException e) {
- this.logger.fine("Bandwidth history line does not have valid "
- + "interval length '" + parts[3] + " " + parts[4] + "'. "
- + "Ignoring this line.");
- continue;
- }
- String[] values = parts[5].split(",");
- if (intervalLength % 900L != 0L) {
- this.logger.fine("Bandwidth history line does not contain "
- + "multiples of 15-minute intervals. Ignoring this line.");
- continue;
- } else if (intervalLength != 900L) {
- /* This is a really dirty hack to support bandwidth history
- * intervals that are longer than 15 minutes by linearly
- * distributing reported bytes to 15 minute intervals. The
- * alternative would have been to modify the database schema. */
- try {
- long factor = intervalLength / 900L;
- String[] newValues = new String[values.length * (int) factor];
- for (int i = 0; i < newValues.length; i++) {
- newValues[i] = String.valueOf(
- Long.parseLong(values[i / (int) factor]) / factor);
- }
- values = newValues;
- intervalLength = 900L;
- } catch (NumberFormatException e) {
- this.logger.fine("Number format exception while parsing "
- + "bandwidth history line. Ignoring this line.");
- continue;
- }
- }
- String type = parts[0];
- String intervalEndTime = parts[1] + " " + parts[2];
- long intervalEnd;
- long dateStart;
- try {
- intervalEnd = dateTimeFormat.parse(intervalEndTime).getTime();
- dateStart = dateTimeFormat.parse(parts[1] + " 00:00:00")
- .getTime();
- } catch (ParseException e) {
- this.logger.fine("Parse exception while parsing timestamp in "
- + "bandwidth history line. Ignoring this line.");
- continue;
- }
- if (Math.abs(published - intervalEnd)
- > 7L * 24L * 60L * 60L * 1000L) {
- this.logger.fine("Extra-info descriptor publication time "
- + dateTimeFormat.format(published) + " and last interval "
- + "time " + intervalEndTime + " in " + type + " line differ "
- + "by more than 7 days! Not adding this line!");
- continue;
- }
- long currentIntervalEnd = intervalEnd;
- StringBuilder sb = new StringBuilder();
- SortedSet<String> newHistoryLines = new TreeSet<>();
- try {
- for (int i = values.length - 1; i >= -1; i--) {
- if (i == -1 || currentIntervalEnd < dateStart) {
- sb.insert(0, intervalEndTime + " " + type + " ("
- + intervalLength + " s) ");
- sb.setLength(sb.length() - 1);
- String historyLine = sb.toString();
- newHistoryLines.add(historyLine);
- sb = new StringBuilder();
- dateStart -= 24L * 60L * 60L * 1000L;
- intervalEndTime = dateTimeFormat.format(currentIntervalEnd);
- }
- if (i == -1) {
- break;
- }
- Long.parseLong(values[i]);
- sb.insert(0, values[i] + ",");
- currentIntervalEnd -= intervalLength * 1000L;
- }
- } catch (NumberFormatException e) {
- this.logger.fine("Number format exception while parsing "
- + "bandwidth history line. Ignoring this line.");
- continue;
- }
- historyLinesByDate.addAll(newHistoryLines);
- }
-
- /* Add split history lines to database. */
- String lastDate = null;
- historyLinesByDate.add("EOL");
- long[] readArray = null;
- long[] writtenArray = null;
- long[] dirreadArray = null;
- long[] dirwrittenArray = null;
- int readOffset = 0;
- int writtenOffset = 0;
- int dirreadOffset = 0;
- int dirwrittenOffset = 0;
- for (String historyLine : historyLinesByDate) {
- String[] parts = historyLine.split(" ");
- String currentDate = parts[0];
- if (lastDate != null && (historyLine.equals("EOL")
- || !currentDate.equals(lastDate))) {
- BigIntArray readIntArray = new BigIntArray(readArray,
- readOffset);
- BigIntArray writtenIntArray = new BigIntArray(writtenArray,
- writtenOffset);
- BigIntArray dirreadIntArray = new BigIntArray(dirreadArray,
- dirreadOffset);
- BigIntArray dirwrittenIntArray = new BigIntArray(dirwrittenArray,
- dirwrittenOffset);
- if (this.importIntoDatabase) {
- try {
- long dateMillis = dateTimeFormat.parse(lastDate
- + " 00:00:00").getTime();
- this.addDateToScheduledUpdates(dateMillis);
- this.csH.setString(1, fingerprint);
- this.csH.setDate(2, new java.sql.Date(dateMillis));
- this.csH.setArray(3, readIntArray);
- this.csH.setArray(4, writtenIntArray);
- this.csH.setArray(5, dirreadIntArray);
- this.csH.setArray(6, dirwrittenIntArray);
- this.csH.addBatch();
- rhsCount++;
- if (rhsCount % autoCommitCount == 0) {
- this.csH.executeBatch();
- }
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not insert bandwidth "
- + "history line into database. We won't make any "
- + "further SQL requests in this execution.", e);
- this.importIntoDatabase = false;
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not insert bandwidth "
- + "history line into database. We won't make any "
- + "further SQL requests in this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.bwhistOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.bwhistOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/bwhist.sql"));
- }
- this.bwhistOut.write("SELECT insert_bwhist('" + fingerprint
- + "','" + lastDate + "','" + readIntArray.toString()
- + "','" + writtenIntArray.toString() + "','"
- + dirreadIntArray.toString() + "','"
- + dirwrittenIntArray.toString() + "');\n");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write bandwidth "
- + "history to raw database import file. We won't make "
- + "any further attempts to write raw import files in "
- + "this execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- readArray = writtenArray = dirreadArray = dirwrittenArray = null;
- }
- if (historyLine.equals("EOL")) {
- break;
- }
- long lastIntervalTime;
- try {
- lastIntervalTime = dateTimeFormat.parse(parts[0] + " "
- + parts[1]).getTime() - dateTimeFormat.parse(parts[0]
- + " 00:00:00").getTime();
- } catch (ParseException e) {
- continue;
- }
- String[] stringValues = parts[5].split(",");
- long[] longValues = new long[stringValues.length];
- for (int i = 0; i < longValues.length; i++) {
- longValues[i] = Long.parseLong(stringValues[i]);
- }
-
- int offset = (int) (lastIntervalTime / (15L * 60L * 1000L))
- - longValues.length + 1;
- String type = parts[2];
- if (type.equals("read-history")) {
- readArray = longValues;
- readOffset = offset;
- } else if (type.equals("write-history")) {
- writtenArray = longValues;
- writtenOffset = offset;
- } else if (type.equals("dirreq-read-history")) {
- dirreadArray = longValues;
- dirreadOffset = offset;
- } else if (type.equals("dirreq-write-history")) {
- dirwrittenArray = longValues;
- dirwrittenOffset = offset;
- }
- lastDate = currentDate;
- }
- }
-
- /**
- * Insert network status consensus into database.
- */
- public void addConsensus(long validAfter) {
- if (this.importIntoDatabase) {
- try {
- this.addDateToScheduledUpdates(validAfter);
- Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Timestamp validAfterTimestamp = new Timestamp(validAfter);
- this.psCs.setTimestamp(1, validAfterTimestamp, cal);
- ResultSet rs = psCs.executeQuery();
- rs.next();
- if (rs.getInt(1) == 0) {
- this.psC.clearParameters();
- this.psC.setTimestamp(1, validAfterTimestamp, cal);
- this.psC.executeUpdate();
- rcsCount++;
- if (rcsCount % autoCommitCount == 0) {
- this.conn.commit();
- }
- }
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not add network status "
- + "consensus. We won't make any further SQL requests in "
- + "this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.consensusOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.consensusOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/consensus.sql"));
- this.consensusOut.write(" COPY consensus (validafter) "
- + "FROM stdin;\n");
- }
- String validAfterString = this.dateTimeFormat.format(validAfter);
- this.consensusOut.write(validAfterString + "\n");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write network status "
- + "consensus to raw database import file. We won't make "
- + "any further attempts to write raw import files in this "
- + "execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- }
-
- /** Imports relay descriptors into the database. */
- public void importRelayDescriptors() {
- logger.fine("Importing files in directories " + archivesDirectories
- + "/...");
- if (!this.archivesDirectories.isEmpty()) {
- DescriptorReader reader =
- DescriptorSourceFactory.createDescriptorReader();
- reader.setMaxDescriptorFilesInQueue(10);
- for (String archivesPath : this.archivesDirectories) {
- File archivesDirectory = new File(archivesPath);
- if (archivesDirectory.exists()) {
- reader.addDirectory(archivesDirectory);
- }
- }
- if (keepImportHistory) {
- reader.setExcludeFiles(new File(statsDirectory,
- "database-importer-relay-descriptor-history"));
- }
- Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- if (descriptorFile.getDescriptors() != null) {
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof RelayNetworkStatusConsensus) {
- this.addRelayNetworkStatusConsensus(
- (RelayNetworkStatusConsensus) descriptor);
- } else if (descriptor instanceof ServerDescriptor) {
- this.addServerDescriptor((ServerDescriptor) descriptor);
- } else if (descriptor instanceof ExtraInfoDescriptor) {
- this.addExtraInfoDescriptor(
- (ExtraInfoDescriptor) descriptor);
- }
- }
- }
- }
- }
-
- logger.info("Finished importing relay descriptors.");
- }
-
- private void addRelayNetworkStatusConsensus(
- RelayNetworkStatusConsensus consensus) {
- for (NetworkStatusEntry statusEntry
- : consensus.getStatusEntries().values()) {
- this.addStatusEntryContents(consensus.getValidAfterMillis(),
- statusEntry.getNickname(),
- statusEntry.getFingerprint().toLowerCase(),
- statusEntry.getDescriptor().toLowerCase(),
- statusEntry.getPublishedMillis(), statusEntry.getAddress(),
- statusEntry.getOrPort(), statusEntry.getDirPort(),
- statusEntry.getFlags(), statusEntry.getVersion(),
- statusEntry.getBandwidth(), statusEntry.getPortList(),
- statusEntry.getStatusEntryBytes());
- }
- this.addConsensus(consensus.getValidAfterMillis());
- }
-
- private void addServerDescriptor(ServerDescriptor descriptor) {
- this.addServerDescriptorContents(
- descriptor.getServerDescriptorDigest(), descriptor.getNickname(),
- descriptor.getAddress(), descriptor.getOrPort(),
- descriptor.getDirPort(), descriptor.getFingerprint(),
- descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(),
- descriptor.getBandwidthObserved(), descriptor.getPlatform(),
- descriptor.getPublishedMillis(), descriptor.getUptime(),
- descriptor.getExtraInfoDigest());
- }
-
- private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
- List<String> bandwidthHistoryLines = new ArrayList<>();
- if (descriptor.getWriteHistory() != null) {
- bandwidthHistoryLines.add(descriptor.getWriteHistory().getLine());
- }
- if (descriptor.getReadHistory() != null) {
- bandwidthHistoryLines.add(descriptor.getReadHistory().getLine());
- }
- if (descriptor.getDirreqWriteHistory() != null) {
- bandwidthHistoryLines.add(
- descriptor.getDirreqWriteHistory().getLine());
- }
- if (descriptor.getDirreqReadHistory() != null) {
- bandwidthHistoryLines.add(
- descriptor.getDirreqReadHistory().getLine());
- }
- this.addExtraInfoDescriptorContents(descriptor.getExtraInfoDigest(),
- descriptor.getNickname(),
- descriptor.getFingerprint().toLowerCase(),
- descriptor.getPublishedMillis(), bandwidthHistoryLines);
- }
-
- /**
- * Close the relay descriptor database connection.
- */
- public void closeConnection() {
-
- /* Log stats about imported descriptors. */
- this.logger.info(String.format("Finished importing relay "
- + "descriptors: %d consensuses, %d network status entries, %d "
- + "votes, %d server descriptors, %d extra-info descriptors, %d "
- + "bandwidth history elements, and %d dirreq stats elements",
- rcsCount, rrsCount, rvsCount, rdsCount, resCount, rhsCount,
- rqsCount));
-
- /* Insert scheduled updates a second time, just in case the refresh
- * run has started since inserting them the first time in which case
- * it will miss the data inserted afterwards. We cannot, however,
- * insert them only now, because if a Java execution fails at a random
- * point, we might have added data, but not the corresponding dates to
- * update statistics. */
- if (this.importIntoDatabase) {
- try {
- for (long dateMillis : this.scheduledUpdates) {
- this.psU.setDate(1, new java.sql.Date(dateMillis));
- this.psU.execute();
- }
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not add scheduled dates "
- + "for the next refresh run.", e);
- }
- }
-
- /* Commit any stragglers before closing. */
- if (this.conn != null) {
- try {
- this.csH.executeBatch();
-
- this.conn.commit();
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not commit final records "
- + "to database", e);
- }
- try {
- this.conn.close();
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Could not close database "
- + "connection.", e);
- }
- }
-
- /* Close raw import files. */
- try {
- if (this.statusentryOut != null) {
- this.statusentryOut.write("\\.\n");
- this.statusentryOut.close();
- }
- if (this.descriptorOut != null) {
- this.descriptorOut.write("\\.\n");
- this.descriptorOut.close();
- }
- if (this.bwhistOut != null) {
- this.bwhistOut.write("\\.\n");
- this.bwhistOut.close();
- }
- if (this.consensusOut != null) {
- this.consensusOut.write("\\.\n");
- this.consensusOut.close();
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not close one or more raw "
- + "database import files.", e);
- }
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java b/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java
deleted file mode 100644
index aa9469e..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron.network;
-
-import org.torproject.descriptor.BridgeNetworkStatus;
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.NetworkStatusEntry;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Generates statistics on the average number of relays and bridges per
- * day. Accepts parse results from <code>RelayDescriptorParser</code> and
- * <code>BridgeDescriptorParser</code> and stores them in intermediate
- * result files <code>stats/consensus-stats-raw</code> and
- * <code>stats/bridge-consensus-stats-raw</code>. Writes final results to
- * <code>stats/consensus-stats</code> for all days for which at least half
- * of the expected consensuses or statuses are known.
- */
-public class ConsensusStatsFileHandler {
-
- /**
- * Intermediate results file holding the number of running bridges per
- * bridge status.
- */
- private File bridgeConsensusStatsRawFile;
-
- /**
- * Number of running bridges in a given bridge status. Map keys are the bridge
- * status time formatted as "yyyy-MM-dd HH:mm:ss", a comma, and the bridge
- * authority nickname, map values are lines as read from
- * <code>stats/bridge-consensus-stats-raw</code>.
- */
- private SortedMap<String, String> bridgesRaw;
-
- /**
- * Average number of running bridges per day. Map keys are dates
- * formatted as "yyyy-MM-dd", map values are the remaining columns as written
- * to <code>stats/consensus-stats</code>.
- */
- private SortedMap<String, String> bridgesPerDay;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private int bridgeResultsAdded = 0;
-
- /* Database connection string. */
- private String connectionUrl = null;
-
- private SimpleDateFormat dateTimeFormat;
-
- private File bridgesDir;
-
- private File statsDirectory;
-
- private boolean keepImportHistory;
-
- /**
- * Initializes this class, including reading in intermediate results
- * files <code>stats/consensus-stats-raw</code> and
- * <code>stats/bridge-consensus-stats-raw</code> and final results file
- * <code>stats/consensus-stats</code>.
- */
- public ConsensusStatsFileHandler(String connectionUrl,
- File bridgesDir, File statsDirectory,
- boolean keepImportHistory) {
-
- if (bridgesDir == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
- this.bridgesDir = bridgesDir;
- this.statsDirectory = statsDirectory;
- this.keepImportHistory = keepImportHistory;
-
- /* Initialize local data structures to hold intermediate and final
- * results. */
- this.bridgesPerDay = new TreeMap<>();
- this.bridgesRaw = new TreeMap<>();
-
- /* Initialize file names for intermediate and final results files. */
- this.bridgeConsensusStatsRawFile = new File(
- "stats/bridge-consensus-stats-raw");
-
- /* Initialize database connection string. */
- this.connectionUrl = connectionUrl;
-
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- ConsensusStatsFileHandler.class.getName());
-
- /* Read in number of running bridges per bridge status. */
- if (this.bridgeConsensusStatsRawFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.bridgeConsensusStatsRawFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("date")) {
- /* Skip headers. */
- continue;
- }
- String[] parts = line.split(",");
- if (parts.length < 2 || parts.length > 4) {
- this.logger.warning("Corrupt line '" + line + "' in file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath()
- + "! Aborting to read this file!");
- break;
- }
- /* Assume that all lines without authority nickname are based on
- * Tonga's network status, not Bifroest's. */
- String key = parts[0] + "," + (parts.length < 4 ? "Tonga" : parts[1]);
- String value = null;
- if (parts.length == 2) {
- value = key + "," + parts[1] + ",0";
- } else if (parts.length == 3) {
- value = key + "," + parts[1] + "," + parts[2];
- } else if (parts.length == 4) {
- value = key + "," + parts[2] + "," + parts[3];
- } /* No more cases as we already checked the range above. */
- this.bridgesRaw.put(key, value);
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
- e);
- }
- }
- }
-
- /**
- * Adds the intermediate results of the number of running bridges in a
- * given bridge status to the existing observations.
- */
- public void addBridgeConsensusResults(long publishedMillis,
- String authorityNickname, int running, int runningEc2Bridges) {
- String publishedAuthority = dateTimeFormat.format(publishedMillis) + ","
- + authorityNickname;
- String line = publishedAuthority + "," + running + "," + runningEc2Bridges;
- if (!this.bridgesRaw.containsKey(publishedAuthority)) {
- this.logger.finer("Adding new bridge numbers: " + line);
- this.bridgesRaw.put(publishedAuthority, line);
- this.bridgeResultsAdded++;
- } else if (!line.equals(this.bridgesRaw.get(publishedAuthority))) {
- this.logger.warning("The numbers of running bridges we were just "
- + "given (" + line + ") are different from what we learned "
- + "before (" + this.bridgesRaw.get(publishedAuthority) + ")! "
- + "Overwriting!");
- this.bridgesRaw.put(publishedAuthority, line);
- }
- }
-
- /** Imports sanitized bridge descriptors. */
- public void importSanitizedBridges() {
- if (bridgesDir.exists()) {
- logger.fine("Importing files in directory " + bridgesDir + "/...");
- DescriptorReader reader =
- DescriptorSourceFactory.createDescriptorReader();
- reader.addDirectory(bridgesDir);
- if (keepImportHistory) {
- reader.setExcludeFiles(new File(statsDirectory,
- "consensus-stats-bridge-descriptor-history"));
- }
- Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- if (descriptorFile.getDescriptors() != null) {
- String authority = null;
- if (descriptorFile.getFileName().contains(
- "4A0CCD2DDC7995083D73F5D667100C8A5831F16D")) {
- authority = "Tonga";
- } else if (descriptorFile.getFileName().contains(
- "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1")) {
- authority = "Bifroest";
- }
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof BridgeNetworkStatus) {
- if (authority == null) {
- this.logger.warning("Did not recognize the bridge authority "
- + "that generated " + descriptorFile.getFileName()
- + ". Skipping.");
- continue;
- }
- this.addBridgeNetworkStatus(
- (BridgeNetworkStatus) descriptor, authority);
- }
- }
- }
- }
- logger.info("Finished importing bridge descriptors.");
- }
- }
-
- private void addBridgeNetworkStatus(BridgeNetworkStatus status,
- String authority) {
- int runningBridges = 0;
- int runningEc2Bridges = 0;
- for (NetworkStatusEntry statusEntry
- : status.getStatusEntries().values()) {
- if (statusEntry.getFlags().contains("Running")) {
- runningBridges++;
- if (statusEntry.getNickname().startsWith("ec2bridge")) {
- runningEc2Bridges++;
- }
- }
- }
- this.addBridgeConsensusResults(status.getPublishedMillis(), authority,
- runningBridges, runningEc2Bridges);
- }
-
- /**
- * Aggregates the raw observations on relay and bridge numbers and
- * writes both raw and aggregate observations to disk.
- */
- public void writeFiles() {
-
- /* Go through raw observations and put everything into nested maps by day
- * and bridge authority. */
- Map<String, Map<String, int[]>> bridgesPerDayAndAuthority = new HashMap<>();
- for (String bridgesRawLine : this.bridgesRaw.values()) {
- String date = bridgesRawLine.substring(0, 10);
- if (!bridgesPerDayAndAuthority.containsKey(date)) {
- bridgesPerDayAndAuthority.put(date, new TreeMap<String, int[]>());
- }
- String[] parts = bridgesRawLine.split(",");
- String authority = parts[1];
- if (!bridgesPerDayAndAuthority.get(date).containsKey(authority)) {
- bridgesPerDayAndAuthority.get(date).put(authority, new int[3]);
- }
- int[] bridges = bridgesPerDayAndAuthority.get(date).get(authority);
- bridges[0] += Integer.parseInt(parts[2]);
- bridges[1] += Integer.parseInt(parts[3]);
- bridges[2]++;
- }
-
- /* Sum up average numbers of running bridges per day reported by all bridge
- * authorities and add these averages to final results. */
- for (Map.Entry<String, Map<String, int[]>> perDay
- : bridgesPerDayAndAuthority.entrySet()) {
- String date = perDay.getKey();
- int brunning = 0;
- int brunningEc2 = 0;
- for (int[] perAuthority : perDay.getValue().values()) {
- int statuses = perAuthority[2];
- if (statuses < 12) {
- /* Only write results if we have seen at least a dozen statuses. */
- continue;
- }
- brunning += perAuthority[0] / statuses;
- brunningEc2 += perAuthority[1] / statuses;
- }
- String line = "," + brunning + "," + brunningEc2;
- /* Are our results new? */
- if (!this.bridgesPerDay.containsKey(date)) {
- this.logger.finer("Adding new average bridge numbers: " + date + line);
- this.bridgesPerDay.put(date, line);
- } else if (!line.equals(this.bridgesPerDay.get(date))) {
- this.logger.finer("Replacing existing average bridge numbers ("
- + this.bridgesPerDay.get(date) + " with new numbers: " + line);
- this.bridgesPerDay.put(date, line);
- }
- }
-
- /* Write raw numbers of running bridges to disk. */
- try {
- this.logger.fine("Writing file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
- this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(
- new FileWriter(this.bridgeConsensusStatsRawFile));
- bw.append("datetime,authority,brunning,brunningec2");
- bw.newLine();
- for (String line : this.bridgesRaw.values()) {
- bw.append(line);
- bw.newLine();
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to write file "
- + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
- e);
- }
-
- /* Add average number of bridges per day to the database. */
- if (connectionUrl != null) {
- try {
- Map<String, String> insertRows = new HashMap<>();
- Map<String, String> updateRows = new HashMap<>();
- insertRows.putAll(this.bridgesPerDay);
- Connection conn = DriverManager.getConnection(connectionUrl);
- conn.setAutoCommit(false);
- Statement statement = conn.createStatement();
- ResultSet rs = statement.executeQuery(
- "SELECT date, avg_running, avg_running_ec2 "
- + "FROM bridge_network_size");
- while (rs.next()) {
- String date = rs.getDate(1).toString();
- if (insertRows.containsKey(date)) {
- String insertRow = insertRows.remove(date);
- String[] parts = insertRow.substring(1).split(",");
- long newAvgRunning = Long.parseLong(parts[0]);
- long newAvgRunningEc2 = Long.parseLong(parts[1]);
- long oldAvgRunning = rs.getLong(2);
- long oldAvgRunningEc2 = rs.getLong(3);
- if (newAvgRunning != oldAvgRunning
- || newAvgRunningEc2 != oldAvgRunningEc2) {
- updateRows.put(date, insertRow);
- }
- }
- }
- rs.close();
- PreparedStatement psU = conn.prepareStatement(
- "UPDATE bridge_network_size SET avg_running = ?, "
- + "avg_running_ec2 = ? WHERE date = ?");
- for (Map.Entry<String, String> e : updateRows.entrySet()) {
- java.sql.Date date = java.sql.Date.valueOf(e.getKey());
- String[] parts = e.getValue().substring(1).split(",");
- long avgRunning = Long.parseLong(parts[0]);
- long avgRunningEc2 = Long.parseLong(parts[1]);
- psU.clearParameters();
- psU.setLong(1, avgRunning);
- psU.setLong(2, avgRunningEc2);
- psU.setDate(3, date);
- psU.executeUpdate();
- }
- PreparedStatement psI = conn.prepareStatement(
- "INSERT INTO bridge_network_size (avg_running, "
- + "avg_running_ec2, date) VALUES (?, ?, ?)");
- for (Map.Entry<String, String> e : insertRows.entrySet()) {
- java.sql.Date date = java.sql.Date.valueOf(e.getKey());
- String[] parts = e.getValue().substring(1).split(",");
- long avgRunning = Long.parseLong(parts[0]);
- long avgRunningEc2 = Long.parseLong(parts[1]);
- psI.clearParameters();
- psI.setLong(1, avgRunning);
- psI.setLong(2, avgRunningEc2);
- psI.setDate(3, date);
- psI.executeUpdate();
- }
- conn.commit();
- conn.close();
- } catch (SQLException e) {
- logger.log(Level.WARNING, "Failed to add average bridge numbers "
- + "to database.", e);
- }
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished writing "
- + "statistics on bridge network statuses to disk.\nAdded "
- + this.bridgeResultsAdded + " bridge network status(es) in this "
- + "execution.");
- long now = System.currentTimeMillis();
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (this.bridgesRaw.isEmpty()) {
- dumpStats.append("\nNo bridge status known yet.");
- } else {
- dumpStats.append("\nLast known bridge status was published "
- + this.bridgesRaw.lastKey() + ".");
- try {
- if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse(
- this.bridgesRaw.lastKey()).getTime()) {
- logger.warning("Last known bridge status is more than 6 hours "
- + "old: " + this.bridgesRaw.lastKey());
- }
- } catch (ParseException e) {
- logger.warning("Can't parse the timestamp? Reason: " + e);
- }
- }
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java b/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java
deleted file mode 100644
index 2883299..0000000
--- a/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/* Copyright 2011--2017 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.ernie.cron.performance;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.TorperfResult;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class TorperfProcessor {
-
- /** Processes Torperf data from the given directory and writes
- * aggregates statistics to the given stats directory. */
- public TorperfProcessor(File torperfDirectory, File statsDirectory) {
-
- if (torperfDirectory == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
- File rawFile = new File(statsDirectory, "torperf-raw");
- File statsFile = new File(statsDirectory, "torperf.csv");
- SortedMap<String, String> rawObs = new TreeMap<>();
- SortedMap<String, String> stats = new TreeMap<>();
- int addedRawObs = 0;
- SimpleDateFormat formatter =
- new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- try {
- if (rawFile.exists()) {
- logger.fine("Reading file " + rawFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(rawFile));
- String line = br.readLine(); // ignore header
- while ((line = br.readLine()) != null) {
- if (line.split(",").length != 4) {
- logger.warning("Corrupt line in " + rawFile.getAbsolutePath()
- + "!");
- break;
- }
- String key = line.substring(0, line.lastIndexOf(","));
- rawObs.put(key, line);
- }
- br.close();
- logger.fine("Finished reading file " + rawFile.getAbsolutePath()
- + ".");
- }
- if (statsFile.exists()) {
- logger.fine("Reading file " + statsFile.getAbsolutePath()
- + "...");
- BufferedReader br = new BufferedReader(new FileReader(statsFile));
- String line = br.readLine(); // ignore header
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- String key = String.format("%s,%s,%s", parts[0], parts[1],
- parts[2]);
- stats.put(key, line);
- }
- br.close();
- logger.fine("Finished reading file " + statsFile.getAbsolutePath()
- + ".");
- }
- if (torperfDirectory.exists()) {
- logger.fine("Importing files in " + torperfDirectory + "/...");
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(torperfDirectory);
- descriptorReader.setExcludeFiles(new File(statsDirectory,
- "torperf-history"));
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- if (descriptorFile.getException() != null) {
- logger.log(Level.FINE, "Error parsing file.",
- descriptorFile.getException());
- continue;
- }
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (!(descriptor instanceof TorperfResult)) {
- continue;
- }
- TorperfResult result = (TorperfResult) descriptor;
- String source = result.getSource();
- long fileSize = result.getFileSize();
- if (fileSize == 51200) {
- source += "-50kb";
- } else if (fileSize == 1048576) {
- source += "-1mb";
- } else if (fileSize == 5242880) {
- source += "-5mb";
- } else {
- logger.fine("Unexpected file size '" + fileSize
- + "'. Skipping.");
- continue;
- }
- String dateTime = formatter.format(result.getStartMillis());
- long completeMillis = result.getDataCompleteMillis()
- - result.getStartMillis();
- String key = source + "," + dateTime;
- String value = key;
- if ((result.didTimeout() == null
- && result.getDataCompleteMillis() < 1)
- || (result.didTimeout() != null && result.didTimeout())) {
- value += ",-2"; // -2 for timeout
- } else if (result.getReadBytes() < fileSize) {
- value += ",-1"; // -1 for failure
- } else {
- value += "," + completeMillis;
- }
- if (!rawObs.containsKey(key)) {
- rawObs.put(key, value);
- addedRawObs++;
- }
- }
- }
- logger.fine("Finished importing files in " + torperfDirectory
- + "/.");
- }
- if (rawObs.size() > 0) {
- logger.fine("Writing file " + rawFile.getAbsolutePath() + "...");
- rawFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile));
- bw.append("source,date,start,completemillis\n");
- String tempSourceDate = null;
- Iterator<Map.Entry<String, String>> it =
- rawObs.entrySet().iterator();
- List<Long> dlTimes = new ArrayList<>();
- boolean haveWrittenFinalLine = false;
- SortedMap<String, List<Long>> dlTimesAllSources = new TreeMap<>();
- SortedMap<String, long[]> statusesAllSources = new TreeMap<>();
- long failures = 0;
- long timeouts = 0;
- long requests = 0;
- while (it.hasNext() || !haveWrittenFinalLine) {
- Map.Entry<String, String> next =
- it.hasNext() ? it.next() : null;
- if (tempSourceDate != null
- && (next == null || !(next.getValue().split(",")[0] + ","
- + next.getValue().split(",")[1]).equals(tempSourceDate))) {
- if (dlTimes.size() > 4) {
- Collections.sort(dlTimes);
- long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
- long md = dlTimes.get(dlTimes.size() / 2 - 1);
- long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
- String[] tempParts = tempSourceDate.split("[-,]", 3);
- String tempDate = tempParts[2];
- int tempSize = Integer.parseInt(
- tempParts[1].substring(0, tempParts[1].length() - 2))
- * 1024 * (tempParts[1].endsWith("mb") ? 1024 : 1);
- String tempSource = tempParts[0];
- String tempDateSizeSource = String.format("%s,%d,%s",
- tempDate, tempSize, tempSource);
- stats.put(tempDateSizeSource,
- String.format("%s,%s,%s,%s,%s,%s,%s",
- tempDateSizeSource, q1, md, q3, timeouts, failures,
- requests));
- String allDateSizeSource = String.format("%s,%d,",
- tempDate, tempSize);
- if (dlTimesAllSources.containsKey(allDateSizeSource)) {
- dlTimesAllSources.get(allDateSizeSource).addAll(dlTimes);
- } else {
- dlTimesAllSources.put(allDateSizeSource, dlTimes);
- }
- if (statusesAllSources.containsKey(allDateSizeSource)) {
- long[] status = statusesAllSources.get(allDateSizeSource);
- status[0] += timeouts;
- status[1] += failures;
- status[2] += requests;
- } else {
- long[] status = new long[3];
- status[0] = timeouts;
- status[1] = failures;
- status[2] = requests;
- statusesAllSources.put(allDateSizeSource, status);
- }
- }
- dlTimes = new ArrayList<>();
- failures = timeouts = requests = 0;
- if (next == null) {
- haveWrittenFinalLine = true;
- }
- }
- if (next != null) {
- bw.append(next.getValue() + "\n");
- String[] parts = next.getValue().split(",");
- tempSourceDate = parts[0] + "," + parts[1];
- long completeMillis = Long.parseLong(parts[3]);
- if (completeMillis == -2L) {
- timeouts++;
- } else if (completeMillis == -1L) {
- failures++;
- } else {
- dlTimes.add(Long.parseLong(parts[3]));
- }
- requests++;
- }
- }
- bw.close();
- for (Map.Entry<String, List<Long>> e
- : dlTimesAllSources.entrySet()) {
- String allDateSizeSource = e.getKey();
- dlTimes = e.getValue();
- Collections.sort(dlTimes);
- long[] status = statusesAllSources.get(allDateSizeSource);
- timeouts = status[0];
- failures = status[1];
- requests = status[2];
- long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
- long md = dlTimes.get(dlTimes.size() / 2 - 1);
- long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
- stats.put(allDateSizeSource,
- String.format("%s,%s,%s,%s,%s,%s,%s",
- allDateSizeSource, q1, md, q3, timeouts, failures,
- requests));
- }
- logger.fine("Finished writing file " + rawFile.getAbsolutePath()
- + ".");
- }
- if (stats.size() > 0) {
- logger.fine("Writing file " + statsFile.getAbsolutePath()
- + "...");
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String yesterday = dateFormat.format(System.currentTimeMillis()
- - 86400000L);
- statsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile));
- bw.append("date,size,source,q1,md,q3,timeouts,failures,"
- + "requests\n");
- for (String s : stats.values()) {
- if (s.compareTo(yesterday) < 0) {
- bw.append(s + "\n");
- }
- }
- bw.close();
- logger.fine("Finished writing file " + statsFile.getAbsolutePath()
- + ".");
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed writing "
- + rawFile.getAbsolutePath() + " or "
- + statsFile.getAbsolutePath() + "!", e);
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished writing "
- + "statistics on torperf results.\nAdded " + addedRawObs
- + " new observations in this execution.\n"
- + "Last known obserations by source and file size are:");
- String lastSource = null;
- String lastLine = null;
- for (String s : rawObs.keySet()) {
- String[] parts = s.split(",");
- if (lastSource == null) {
- lastSource = parts[0];
- } else if (!parts[0].equals(lastSource)) {
- String lastKnownObservation = lastLine.split(",")[1] + " "
- + lastLine.split(",")[2];
- dumpStats.append("\n" + lastSource + " " + lastKnownObservation);
- lastSource = parts[0];
- }
- lastLine = s;
- }
- if (lastSource != null) {
- String lastKnownObservation = lastLine.split(",")[1] + " "
- + lastLine.split(",")[2];
- dumpStats.append("\n" + lastSource + " " + lastKnownObservation);
- }
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/modules/webstats/build.xml b/modules/webstats/build.xml
index bcfe251..3c3291f 100644
--- a/modules/webstats/build.xml
+++ b/modules/webstats/build.xml
@@ -1,8 +1,5 @@
<project default="run" name="webstats" basedir=".">
- <property name="sources" value="src/main/java"/>
- <property name="testsources" value="src/test/java"/>
-
<include file="../../shared/build-base.xml" as="basetask"/>
<target name="clean" depends="basetask.clean"/>
<target name="compile" depends="basetask.compile"/>
diff --git a/shared/build-base.xml b/shared/build-base.xml
index 759e1d0..e6c09de 100644
--- a/shared/build-base.xml
+++ b/shared/build-base.xml
@@ -1,17 +1,18 @@
<project basedir=".">
- <property name="sources" value="src"/>
+ <property name="sources" value="src/main/java"/>
<property name="testsources" value="src/test/java"/>
<property name="libs" value="../../shared/lib"/>
<property name="generated" value="generated"/>
<property name="classes" value="${generated}/classes/"/>
<property name="testclasses" value="${generated}/test-classes/"/>
<property name="source-and-target-java-version" value="1.7" />
- <property name="descriptorversion" value="1.5.0" />
+ <property name="descriptorversion" value="1.6.0" />
<path id="base.classpath">
<pathelement path="${classes}"/>
<fileset dir="${libs}">
+ <include name="commons-codec-1.9.jar"/>
<include name="commons-compress-1.9.jar"/>
<include name="commons-lang-2.6.jar"/>
<include name="descriptor-${descriptorversion}.jar"/>
More information about the tor-commits
mailing list