[tor-commits] [metrics-db/master] Rename packages to org.torproject.collector.**.
karsten at torproject.org
karsten at torproject.org
Wed Apr 6 19:43:55 UTC 2016
commit b04182d027833232fce75e79680c2728b6bb8c95
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Apr 6 16:11:03 2016 +0200
Rename packages to org.torproject.collector.**.
Implements #18727.
---
build.xml | 11 +-
.../bridgedescs/BridgeDescriptorParser.java | 46 +
.../bridgedescs/BridgeSnapshotReader.java | 222 ++++
.../bridgedescs/SanitizedBridgesWriter.java | 1322 ++++++++++++++++++++
.../BridgePoolAssignmentsProcessor.java | 272 ++++
.../collector/exitlists/ExitListDownloader.java | 213 ++++
.../torproject/collector/main/Configuration.java | 304 +++++
src/org/torproject/collector/main/LockFile.java | 55 +
.../collector/main/LoggingConfiguration.java | 95 ++
.../collector/relaydescs/ArchiveReader.java | 281 +++++
.../collector/relaydescs/ArchiveWriter.java | 831 ++++++++++++
.../relaydescs/CachedRelayDescriptorReader.java | 250 ++++
.../collector/relaydescs/ReferenceChecker.java | 310 +++++
.../relaydescs/RelayDescriptorDownloader.java | 1090 ++++++++++++++++
.../relaydescs/RelayDescriptorParser.java | 332 +++++
.../collector/torperf/TorperfDownloader.java | 634 ++++++++++
.../db/bridgedescs/BridgeDescriptorParser.java | 46 -
.../ernie/db/bridgedescs/BridgeSnapshotReader.java | 222 ----
.../db/bridgedescs/SanitizedBridgesWriter.java | 1322 --------------------
.../BridgePoolAssignmentsProcessor.java | 272 ----
.../ernie/db/exitlists/ExitListDownloader.java | 213 ----
.../torproject/ernie/db/main/Configuration.java | 304 -----
src/org/torproject/ernie/db/main/LockFile.java | 55 -
.../ernie/db/main/LoggingConfiguration.java | 95 --
.../ernie/db/relaydescs/ArchiveReader.java | 281 -----
.../ernie/db/relaydescs/ArchiveWriter.java | 831 ------------
.../db/relaydescs/CachedRelayDescriptorReader.java | 250 ----
.../ernie/db/relaydescs/ReferenceChecker.java | 310 -----
.../db/relaydescs/RelayDescriptorDownloader.java | 1090 ----------------
.../ernie/db/relaydescs/RelayDescriptorParser.java | 332 -----
.../ernie/db/torperf/TorperfDownloader.java | 634 ----------
31 files changed, 6262 insertions(+), 6263 deletions(-)
diff --git a/build.xml b/build.xml
index 4d7b986..8e21a9b 100644
--- a/build.xml
+++ b/build.xml
@@ -25,7 +25,6 @@
<target name="compile" depends="metrics-lib,init">
<javac srcdir="${sources}"
destdir="${classes}"
- excludes="org/torproject/ernie/web/"
debug="true" debuglevel="lines,source"
includeantruntime="false">
<classpath refid="classpath"/>
@@ -41,35 +40,35 @@
<target name="bridgedescs" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter">
+ classname="org.torproject.collector.bridgedescs.SanitizedBridgesWriter">
<classpath refid="classpath"/>
</java>
</target>
<target name="bridgepools" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.bridgepools.BridgePoolAssignmentsProcessor">
+ classname="org.torproject.collector.bridgepools.BridgePoolAssignmentsProcessor">
<classpath refid="classpath"/>
</java>
</target>
<target name="exitlists" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.exitlists.ExitListDownloader">
+ classname="org.torproject.collector.exitlists.ExitListDownloader">
<classpath refid="classpath"/>
</java>
</target>
<target name="relaydescs" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.relaydescs.ArchiveWriter">
+ classname="org.torproject.collector.relaydescs.ArchiveWriter">
<classpath refid="classpath"/>
</java>
</target>
<target name="torperf" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.torperf.TorperfDownloader">
+ classname="org.torproject.collector.torperf.TorperfDownloader">
<classpath refid="classpath"/>
</java>
</target>
diff --git a/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java
new file mode 100644
index 0000000..c14875b
--- /dev/null
+++ b/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java
@@ -0,0 +1,46 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.bridgedescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class BridgeDescriptorParser {
+ private SanitizedBridgesWriter sbw;
+ private Logger logger;
+ public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
+ this.sbw = sbw;
+ this.logger =
+ Logger.getLogger(BridgeDescriptorParser.class.getName());
+ }
+ public void parse(byte[] allData, String dateTime) {
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = br.readLine();
+ if (line == null) {
+ return;
+ } else if (line.startsWith("router ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreServerDescriptor(allData);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
+ }
+ } else {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java
new file mode 100644
index 0000000..b16938d
--- /dev/null
+++ b/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java
@@ -0,0 +1,222 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.bridgedescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+/**
+ * Reads the half-hourly snapshots of bridge descriptors from Tonga.
+ */
+public class BridgeSnapshotReader {
+ public BridgeSnapshotReader(BridgeDescriptorParser bdp,
+ File bridgeDirectoriesDir, File statsDirectory) {
+
+ if (bdp == null || bridgeDirectoriesDir == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger =
+ Logger.getLogger(BridgeSnapshotReader.class.getName());
+ SortedSet<String> parsed = new TreeSet<String>();
+ File bdDir = bridgeDirectoriesDir;
+ File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+ boolean modified = false;
+ if (bdDir.exists()) {
+ if (pbdFile.exists()) {
+ logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(pbdFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ parsed.add(line);
+ }
+ br.close();
+ logger.fine("Finished reading file "
+ + pbdFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ return;
+ }
+ }
+ logger.fine("Importing files in directory " + bridgeDirectoriesDir
+ + "/...");
+ Set<String> descriptorImportHistory = new HashSet<String>();
+ int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
+ parsedServerDescriptors = 0, skippedServerDescriptors = 0,
+ parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(bdDir);
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else if (!parsed.contains(pop.getName())) {
+ try {
+ FileInputStream in = new FileInputStream(pop);
+ if (in.available() > 0) {
+ TarArchiveInputStream tais = null;
+ if (pop.getName().endsWith(".tar.gz")) {
+ GzipCompressorInputStream gcis =
+ new GzipCompressorInputStream(in);
+ tais = new TarArchiveInputStream(gcis);
+ } else if (pop.getName().endsWith(".tar")) {
+ tais = new TarArchiveInputStream(in);
+ } else {
+ continue;
+ }
+ BufferedInputStream bis = new BufferedInputStream(tais);
+ String fn = pop.getName();
+ String dateTime = fn.substring(11, 21) + " "
+ + fn.substring(22, 24) + ":" + fn.substring(24, 26)
+ + ":" + fn.substring(26, 28);
+ while ((tais.getNextTarEntry()) != null) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ byte[] allData = baos.toByteArray();
+ if (allData.length == 0) {
+ continue;
+ }
+ String fileDigest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ String ascii = new String(allData, "US-ASCII");
+ BufferedReader br3 = new BufferedReader(new StringReader(
+ ascii));
+ String firstLine = null;
+ while ((firstLine = br3.readLine()) != null) {
+ if (firstLine.startsWith("@")) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ if (firstLine.startsWith("published ") ||
+ firstLine.startsWith("flag-thresholds ") ||
+ firstLine.startsWith("r ")) {
+ bdp.parse(allData, dateTime);
+ parsedStatuses++;
+ } else if (descriptorImportHistory.contains(fileDigest)) {
+ /* Skip server descriptors or extra-info descriptors if
+ * we parsed them before. */
+ skippedFiles++;
+ continue;
+ } else {
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ firstLine.startsWith("router ") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0,
+ end - start);
+ String descriptorDigest = Hex.encodeHexString(
+ DigestUtils.sha(descBytes));
+ if (!descriptorImportHistory.contains(
+ descriptorDigest)) {
+ bdp.parse(descBytes, dateTime);
+ descriptorImportHistory.add(descriptorDigest);
+ if (firstLine.startsWith("router ")) {
+ parsedServerDescriptors++;
+ } else {
+ parsedExtraInfoDescriptors++;
+ }
+ } else {
+ if (firstLine.startsWith("router ")) {
+ skippedServerDescriptors++;
+ } else {
+ skippedExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+ descriptorImportHistory.add(fileDigest);
+ parsedFiles++;
+ }
+ bis.close();
+ }
+ in.close();
+
+ /* Let's give some memory back, or we'll run out of it. */
+ System.gc();
+
+ parsed.add(pop.getName());
+ modified = true;
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not parse bridge snapshot "
+ + pop.getName() + "!", e);
+ continue;
+ }
+ }
+ }
+ logger.fine("Finished importing files in directory "
+ + bridgeDirectoriesDir + "/. In total, we parsed "
+ + parsedFiles + " files (skipped " + skippedFiles
+ + ") containing " + parsedStatuses + " statuses, "
+ + parsedServerDescriptors + " server descriptors (skipped "
+ + skippedServerDescriptors + "), and "
+ + parsedExtraInfoDescriptors + " extra-info descriptors "
+ + "(skipped " + skippedExtraInfoDescriptors + ").");
+ if (!parsed.isEmpty() && modified) {
+ logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ pbdFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
+ for (String f : parsed) {
+ bw.append(f + "\n");
+ }
+ bw.close();
+ logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
new file mode 100644
index 0000000..cf2a1d0
--- /dev/null
+++ b/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -0,0 +1,1322 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.bridgedescs;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.security.GeneralSecurityException;
+import java.security.SecureRandom;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.collector.main.Configuration;
+import org.torproject.collector.main.LockFile;
+import org.torproject.collector.main.LoggingConfiguration;
+
+/**
+ * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
+ * information from them, and writes them to a local directory structure.
+ * During the sanitizing process, all information about the bridge
+ * identity or IP address are removed or replaced. The goal is to keep the
+ * sanitized bridge descriptors useful for statistical analysis while not
+ * making it easier for an adversary to enumerate bridges.
+ *
+ * There are three types of bridge descriptors: bridge network statuses
+ * (lists of all bridges at a given time), server descriptors (published
+ * by the bridge to advertise their capabilities), and extra-info
+ * descriptors (published by the bridge, mainly for statistical analysis).
+ */
+public class SanitizedBridgesWriter extends Thread {
+
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration("bridge-descriptors");
+ Logger logger = Logger.getLogger(
+ SanitizedBridgesWriter.class.getName());
+ logger.info("Starting bridge-descriptors module of ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile("bridge-descriptors");
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Sanitize bridge descriptors
+ new SanitizedBridgesWriter(config).run();
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating bridge-descriptors module of ERNIE.");
+ }
+
+ private Configuration config;
+
+ /**
+ * Initializes this class.
+ */
+ public SanitizedBridgesWriter(Configuration config) {
+ this.config = config;
+ }
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private String rsyncCatString;
+
+ private File bridgeDirectoriesDirectory;
+
+ /**
+ * Output directory for writing sanitized bridge descriptors.
+ */
+ private File sanitizedBridgesDirectory;
+
+ private boolean replaceIPAddressesWithHashes;
+
+ private boolean persistenceProblemWithSecrets;
+
+ private SortedMap<String, byte[]> secretsForHashingIPAddresses;
+
+ private String bridgeSanitizingCutOffTimestamp;
+
+ private boolean haveWarnedAboutInterval;
+
+ private File bridgeIpSecretsFile;
+
+ private SecureRandom secureRandom;
+
+ public void run() {
+
+ File bridgeDirectoriesDirectory =
+ new File(config.getBridgeSnapshotsDirectory());
+ File sanitizedBridgesDirectory =
+ new File(config.getSanitizedBridgesWriteDirectory());
+ boolean replaceIPAddressesWithHashes =
+ config.getReplaceIPAddressesWithHashes();
+ long limitBridgeSanitizingInterval =
+ config.getLimitBridgeDescriptorMappings();
+ File statsDirectory = new File("stats");
+
+ if (bridgeDirectoriesDirectory == null ||
+ sanitizedBridgesDirectory == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /* Memorize argument values. */
+ this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory;
+ this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
+ this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ SanitizedBridgesWriter.class.getName());
+
+ SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.rsyncCatString = rsyncCatFormat.format(
+ System.currentTimeMillis());
+
+ /* Initialize secure random number generator if we need it. */
+ if (this.replaceIPAddressesWithHashes) {
+ try {
+ this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
+ } catch (GeneralSecurityException e) {
+ this.logger.log(Level.WARNING, "Could not initialize secure "
+ + "random number generator! Not calculating any IP address "
+ + "hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* Read hex-encoded secrets for replacing IP addresses with hashes
+ * from disk. */
+ this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
+ this.bridgeIpSecretsFile = new File(statsDirectory,
+ "bridge-ip-secrets");
+ if (this.bridgeIpSecretsFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeIpSecretsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
+ line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
+ parts.length != 2) {
+ this.logger.warning("Invalid line in bridge-ip-secrets file "
+ + "starting with '" + line.substring(0, 7) + "'! "
+ + "Not calculating any IP address hashes in this "
+ + "execution!");
+ this.persistenceProblemWithSecrets = true;
+ break;
+ }
+ String month = parts[0];
+ byte[] secret = Hex.decodeHex(parts[1].toCharArray());
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ br.close();
+ if (!this.persistenceProblemWithSecrets) {
+ this.logger.fine("Read "
+ + this.secretsForHashingIPAddresses.size() + " secrets for "
+ + "hashing bridge IP addresses.");
+ }
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Failed to decode hex string in "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* If we're configured to keep secrets only for a limited time, define
+ * the cut-off day and time. */
+ if (limitBridgeSanitizingInterval >= 0L) {
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.bridgeSanitizingCutOffTimestamp = formatter.format(
+ System.currentTimeMillis() - 24L * 60L * 60L * 1000L
+ * limitBridgeSanitizingInterval);
+ } else {
+ this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
+ }
+
+ // Prepare bridge descriptor parser
+ BridgeDescriptorParser bdp = new BridgeDescriptorParser(this);
+
+ // Import bridge descriptors
+ new BridgeSnapshotReader(bdp, this.bridgeDirectoriesDirectory,
+ statsDirectory);
+
+ // Finish writing sanitized bridge descriptors to disk
+ this.finishWriting();
+
+ this.checkStaleDescriptors();
+
+ this.cleanUpRsyncDirectory();
+ }
+
+ private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (!orAddress.contains(":")) {
+ /* Malformed or-address or a line. */
+ return null;
+ }
+ String addressPart = orAddress.substring(0,
+ orAddress.lastIndexOf(":"));
+ String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
+ String scrubbedAddressPart = null;
+ if (addressPart.startsWith("[")) {
+ scrubbedAddressPart = this.scrubIpv6Address(addressPart,
+ fingerprintBytes, published);
+ } else {
+ scrubbedAddressPart = this.scrubIpv4Address(addressPart,
+ fingerprintBytes, published);
+ }
+ return (scrubbedAddressPart == null ? null :
+ scrubbedAddressPart + ":" + portPart);
+ }
+
+ private String scrubIpv4Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[4 + 20 + 31];
+ String[] ipParts = address.split("\\.");
+ for (int i = 0; i < 4; i++) {
+ hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+ }
+ System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 0, hashInput, 24, 31);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ String hashedAddress = "10."
+ + (((int) hashOutput[0] + 256) % 256) + "."
+ + (((int) hashOutput[1] + 256) % 256) + "."
+ + (((int) hashOutput[2] + 256) % 256);
+ return hashedAddress;
+ } else {
+ return "127.0.0.1";
+ }
+ }
+
+ private String scrubIpv6Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[16 + 20 + 19];
+ String[] doubleColonSeparatedParts = address.substring(1,
+ address.length() - 1).split("::", -1);
+ if (doubleColonSeparatedParts.length > 2) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ List<String> hexParts = new ArrayList<String>();
+ for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
+ StringBuilder hexPart = new StringBuilder();
+ String[] parts = doubleColonSeparatedPart.split(":", -1);
+ if (parts.length < 1 || parts.length > 8) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ for (int i = 0; i < parts.length; i++) {
+ String part = parts[i];
+ if (part.contains(".")) {
+ String[] ipParts = part.split("\\.");
+ byte[] ipv4Bytes = new byte[4];
+ if (ipParts.length != 4) {
+ /* Invalid IPv4 part in IPv6 address. */
+ return null;
+ }
+ for (int m = 0; m < 4; m++) {
+ ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
+ }
+ hexPart.append(Hex.encodeHexString(ipv4Bytes));
+ } else if (part.length() > 4) {
+ /* Invalid IPv6 address. */
+ return null;
+ } else {
+ for (int k = part.length(); k < 4; k++) {
+ hexPart.append("0");
+ }
+ hexPart.append(part);
+ }
+ }
+ hexParts.add(hexPart.toString());
+ }
+ StringBuilder hex = new StringBuilder();
+ hex.append(hexParts.get(0));
+ if (hexParts.size() == 2) {
+ for (int i = 32 - hexParts.get(0).length()
+ - hexParts.get(1).length(); i > 0; i--) {
+ hex.append("0");
+ }
+ hex.append(hexParts.get(1));
+ }
+ byte[] ipBytes = null;
+ try {
+ ipBytes = Hex.decodeHex(hex.toString().toCharArray());
+ } catch (DecoderException e) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ if (ipBytes.length != 16) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ System.arraycopy(ipBytes, 0, hashInput, 0, 16);
+ System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 31, hashInput, 36, 19);
+ String hashOutput = DigestUtils.sha256Hex(hashInput);
+ sb.append(hashOutput.substring(hashOutput.length() - 6,
+ hashOutput.length() - 4));
+ sb.append(":");
+ sb.append(hashOutput.substring(hashOutput.length() - 4));
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+
+ private byte[] getSecretForMonth(String month) throws IOException {
+ if (!this.secretsForHashingIPAddresses.containsKey(month) ||
+ this.secretsForHashingIPAddresses.get(month).length == 31) {
+ byte[] secret = new byte[50];
+ this.secureRandom.nextBytes(secret);
+ if (this.secretsForHashingIPAddresses.containsKey(month)) {
+ System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
+ secret, 0, 31);
+ }
+ if (month.compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ this.logger.warning("Generated a secret that we won't make "
+ + "persistent, because it's outside our bridge descriptor "
+ + "sanitizing interval.");
+ } else {
+ /* Append secret to file on disk immediately before using it, or
+ * we might end with inconsistently sanitized bridges. */
+ try {
+ if (!this.bridgeIpSecretsFile.exists()) {
+ this.bridgeIpSecretsFile.getParentFile().mkdirs();
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile,
+ this.bridgeIpSecretsFile.exists()));
+ bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store new secret "
+ + "to disk! Not calculating any IP address hashes in "
+ + "this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ throw new IOException(e);
+ }
+ }
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ return this.secretsForHashingIPAddresses.get(month);
+ }
+
+ private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
+
+ /**
+ * Sanitizes a network status and writes it to disk.
+ */
+ public void sanitizeAndStoreNetworkStatus(byte[] data,
+ String publicationTime) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
+ maxNetworkStatusPublishedTime = publicationTime;
+ }
+
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(publicationTime) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
+ : Level.FINE, "Sanitizing and storing network status with "
+ + "publication time outside our descriptor sanitizing "
+ + "interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+
+ /* Parse the given network status line by line. */
+ StringBuilder header = new StringBuilder();
+ SortedMap<String, String> scrubbedLines =
+ new TreeMap<String, String>();
+ try {
+ StringBuilder scrubbed = new StringBuilder();
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ String mostRecentDescPublished = null;
+ byte[] fingerprintBytes = null;
+ String descPublicationTime = null;
+ String hashedBridgeIdentityHex = null;
+ while ((line = br.readLine()) != null) {
+
+ /* Use publication time from "published" line instead of the
+ * file's last-modified time. Don't copy over the line, because
+ * we're going to write a "published" line below. */
+ if (line.startsWith("published ")) {
+ publicationTime = line.substring("published ".length());
+
+ /* Additional header lines don't have to be cleaned up. */
+ } else if (line.startsWith("flag-thresholds ")) {
+ header.append(line + "\n");
+
+ /* r lines contain sensitive information that needs to be removed
+ * or replaced. */
+ } else if (line.startsWith("r ")) {
+
+ /* Clear buffer from previously scrubbed lines. */
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Parse the relevant parts of this r line. */
+ String[] parts = line.split(" ");
+ String nickname = parts[1];
+ fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
+ String descriptorIdentifier = parts[3];
+ descPublicationTime = parts[4] + " " + parts[5];
+ String address = parts[6];
+ String orPort = parts[7];
+ String dirPort = parts[8];
+
+ /* Determine most recent descriptor publication time. */
+ if (descPublicationTime.compareTo(publicationTime) <= 0 &&
+ (mostRecentDescPublished == null ||
+ descPublicationTime.compareTo(
+ mostRecentDescPublished) > 0)) {
+ mostRecentDescPublished = descPublicationTime;
+ }
+
+ /* Write scrubbed r line to buffer. */
+ byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
+ String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
+ hashedBridgeIdentity).substring(0, 27);
+ hashedBridgeIdentityHex = Hex.encodeHexString(
+ hashedBridgeIdentity);
+ String hashedDescriptorIdentifier = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
+ + "=="))).substring(0, 27);
+ String scrubbedAddress = scrubIpv4Address(address,
+ fingerprintBytes,
+ descPublicationTime);
+ scrubbed.append("r " + nickname + " "
+ + hashedBridgeIdentityBase64 + " "
+ + hashedDescriptorIdentifier + " " + descPublicationTime
+ + " " + scrubbedAddress + " " + orPort + " " + dirPort
+ + "\n");
+
+ /* Sanitize any addresses in a lines using the fingerprint and
+ * descriptor publication time from the previous r line. */
+ } else if (line.startsWith("a ")) {
+ String scrubbedOrAddress = scrubOrAddress(
+ line.substring("a ".length()), fingerprintBytes,
+ descPublicationTime);
+ if (scrubbedOrAddress != null) {
+ scrubbed.append("a " + scrubbedOrAddress + "\n");
+ } else {
+ this.logger.warning("Invalid address in line '" + line
+ + "' in bridge network status. Skipping line!");
+ }
+
+ /* Nothing special about s, w, and p lines; just copy them. */
+ } else if (line.startsWith("s ") || line.equals("s") ||
+ line.startsWith("w ") || line.equals("w") ||
+ line.startsWith("p ") || line.equals("p")) {
+ scrubbed.append(line + "\n");
+
+ /* There should be nothing else but r, w, p, and s lines in the
+ * network status. If there is, we should probably learn before
+ * writing anything to the sanitized descriptors. */
+ } else {
+ this.logger.fine("Unknown line '" + line + "' in bridge "
+ + "network status. Not writing to disk!");
+ return;
+ }
+ }
+ br.close();
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Check if we can tell from the descriptor publication times
+ * whether this status is possibly stale. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (formatter.parse(publicationTime).getTime() -
+ formatter.parse(mostRecentDescPublished).getTime() >
+ 60L * 60L * 1000L) {
+ this.logger.warning("The most recent descriptor in the bridge "
+ + "network status published at " + publicationTime + " was "
+ + "published at " + mostRecentDescPublished + " which is "
+ + "more than 1 hour before the status. This is a sign for "
+ + "the status being stale. Please check!");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse timestamp in "
+ + "bridge network status.", e);
+ return;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge network "
+ + "status.", e);
+ return;
+ }
+
+ /* Write the sanitized network status to disk. */
+ try {
+ String syear = publicationTime.substring(0, 4);
+ String smonth = publicationTime.substring(5, 7);
+ String sday = publicationTime.substring(8, 10);
+ String stime = publicationTime.substring(11, 13)
+ + publicationTime.substring(14, 16)
+ + publicationTime.substring(17, 19);
+ File tarballFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
+ + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
+ + sday + "-" + stime + "-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+ File rsyncFile = new File("recent/bridge-descriptors/statuses/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-network-status 1.0\n");
+ bw.write("published " + publicationTime + "\n");
+ bw.write(header.toString());
+ for (String scrubbed : scrubbedLines.values()) {
+ bw.write(scrubbed);
+ }
+ bw.close();
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized bridge "
+ + "network status to disk.", e);
+ return;
+ }
+ }
+
+ private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00";
+
+ /**
+ * Sanitizes a bridge server descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreServerDescriptor(byte[] data) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null,
+ masterKeyEd25519FromIdentityEd25519 = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(data, "US-ASCII")));
+ StringBuilder scrubbed = new StringBuilder();
+ String line = null, hashedBridgeIdentity = null, address = null,
+ routerLine = null, scrubbedAddress = null,
+ masterKeyEd25519 = null;
+ List<String> orAddresses = null, scrubbedOrAddresses = null;
+ boolean skipCrypto = false;
+ while ((line = br.readLine()) != null) {
+
+ /* Skip all crypto parts that might be used to derive the bridge's
+ * identity fingerprint. */
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ continue;
+
+ /* Store the router line for later processing, because we may need
+ * the bridge identity fingerprint for replacing the IP address in
+ * the scrubbed version. */
+ } else if (line.startsWith("router ")) {
+ address = line.split(" ")[2];
+ routerLine = line;
+
+ /* Store or-address parts in a list and sanitize them when we have
+ * read the fingerprint. */
+ } else if (line.startsWith("or-address ")) {
+ if (orAddresses == null) {
+ orAddresses = new ArrayList<String>();
+ }
+ orAddresses.add(line.substring("or-address ".length()));
+
+ /* Parse the publication time to see if we're still inside the
+ * sanitizing interval. */
+ } else if (line.startsWith("published ")) {
+ published = line.substring("published ".length());
+ if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
+ maxServerDescriptorPublishedTime = published;
+ }
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(published) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval
+ ? Level.WARNING : Level.FINE, "Sanitizing and storing "
+ + "server descriptor with publication time outside our "
+ + "descriptor sanitizing interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+ scrubbed.append(line + "\n");
+
+ /* Parse the fingerprint to determine the hashed bridge
+ * identity. */
+ } else if (line.startsWith("opt fingerprint ") ||
+ line.startsWith("fingerprint ")) {
+ String fingerprint = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ byte[] fingerprintBytes = Hex.decodeHex(
+ fingerprint.toCharArray());
+ hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
+ toLowerCase();
+ try {
+ scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
+ published);
+ if (orAddresses != null) {
+ scrubbedOrAddresses = new ArrayList<String>();
+ for (String orAddress : orAddresses) {
+ String scrubbedOrAddress = scrubOrAddress(orAddress,
+ fingerprintBytes, published);
+ if (scrubbedOrAddress != null) {
+ scrubbedOrAddresses.add(scrubbedOrAddress);
+ } else {
+ this.logger.warning("Invalid address in line "
+ + "'or-address " + orAddress + "' in bridge server "
+ + "descriptor. Skipping line!");
+ }
+ }
+ }
+ } catch (IOException e) {
+ /* There's a persistence problem, so we shouldn't scrub more
+ * IP addresses in this execution. */
+ this.persistenceProblemWithSecrets = true;
+ return;
+ }
+ scrubbed.append((line.startsWith("opt ") ? "opt " : "")
+ + "fingerprint");
+ for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
+ scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+ 4 * (i + 1)).toUpperCase());
+ scrubbed.append("\n");
+
+ /* Replace the contact line (if present) with a generic one. */
+ } else if (line.startsWith("contact ")) {
+ scrubbed.append("contact somebody\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ String[] routerLineParts = routerLine.split(" ");
+ scrubbedDesc = "router " + routerLineParts[1] + " "
+ + scrubbedAddress + " " + routerLineParts[3] + " "
+ + routerLineParts[4] + " " + routerLineParts[5] + "\n";
+ if (scrubbedOrAddresses != null) {
+ for (String scrubbedOrAddress : scrubbedOrAddresses) {
+ scrubbedDesc = scrubbedDesc += "or-address "
+ + scrubbedOrAddress + "\n";
+ }
+ }
+ scrubbedDesc += scrubbed.toString();
+ break;
+
+ /* Replace extra-info digest with the hashed digest of the
+ * non-scrubbed descriptor. */
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ String[] parts = line.split(" ");
+ if (line.startsWith("opt ")) {
+ scrubbed.append("opt ");
+ parts = line.substring(4).split(" ");
+ }
+ scrubbed.append("extra-info-digest " + DigestUtils.shaHex(
+ Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
+ if (parts.length > 2) {
+ scrubbed.append(" " + Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(parts[2]))).
+ replaceAll("=", ""));
+ }
+ scrubbed.append("\n");
+
+ /* Possibly sanitize reject lines if they contain the bridge's own
+ * IP address. */
+ } else if (line.startsWith("reject ")) {
+ if (address != null && line.startsWith("reject " + address)) {
+ scrubbed.append("reject " + scrubbedAddress
+ + line.substring("reject ".length() + address.length())
+ + "\n");
+ } else {
+ scrubbed.append(line + "\n");
+ }
+
+ /* Extract master-key-ed25519 from identity-ed25519. */
+ } else if (line.equals("identity-ed25519")) {
+ StringBuilder sb = new StringBuilder();
+ while ((line = br.readLine()) != null &&
+ !line.equals("-----END ED25519 CERT-----")) {
+ if (line.equals("-----BEGIN ED25519 CERT-----")) {
+ continue;
+ }
+ sb.append(line);
+ }
+ masterKeyEd25519FromIdentityEd25519 =
+ this.parseMasterKeyEd25519FromIdentityEd25519(
+ sb.toString());
+ String sha256MasterKeyEd25519 = Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(
+ masterKeyEd25519FromIdentityEd25519 + "="))).
+ replaceAll("=", "");
+ scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
+ + "\n");
+ if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
+ masterKeyEd25519FromIdentityEd25519)) {
+ this.logger.warning("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return;
+ }
+
+ /* Verify that identity-ed25519 and master-key-ed25519 match. */
+ } else if (line.startsWith("master-key-ed25519 ")) {
+ masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
+ if (masterKeyEd25519FromIdentityEd25519 != null &&
+ !masterKeyEd25519FromIdentityEd25519.equals(
+ masterKeyEd25519)) {
+ this.logger.warning("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return;
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("accept ")
+ || line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("protocols ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("opt hibernating ")
+ || line.startsWith("hibernating ")
+ || line.startsWith("ntor-onion-key ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("hidden-service-dir")
+ || line.equals("opt caches-extra-info")
+ || line.equals("caches-extra-info")
+ || line.equals("opt allow-single-hop-exits")
+ || line.equals("allow-single-hop-exits")
+ || line.startsWith("ipv6-policy ")
+ || line.equals("tunnelled-dir-server")) {
+ scrubbed.append(line + "\n");
+
+ /* Replace node fingerprints in the family line with their hashes
+ * and leave nicknames unchanged. */
+ } else if (line.startsWith("family ")) {
+ StringBuilder familyLine = new StringBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$")) {
+ familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ } else {
+ familyLine.append(" " + s);
+ }
+ }
+ scrubbed.append(familyLine.toString() + "\n");
+
+ /* Skip the purpose line that the bridge authority adds to its
+ * cached-descriptors file. */
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+
+ /* Skip all crypto parts that might leak the bridge's identity
+ * fingerprint. */
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key") ||
+ line.equals("onion-key-crosscert") ||
+ line.startsWith("ntor-onion-key-crosscert ")) {
+ skipCrypto = true;
+
+ /* Stop skipping lines when the crypto parts are over. */
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+
+ /* Skip the ed25519 signature; we'll include a SHA256 digest of
+ * the SHA256 descriptor digest in router-digest-sha256. */
+ } else if (line.startsWith("router-sig-ed25519 ")) {
+ continue;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.warning("Unrecognized line '" + line
+ + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not parse server "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized server descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate server "
+ + "descriptor digest.");
+ return;
+ }
+ String descriptorDigestSha256Base64 = null;
+ if (masterKeyEd25519FromIdentityEd25519 != null) {
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\n-----END SIGNATURE-----\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigestSha256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(DigestUtils.sha256(forDigest))).
+ replaceAll("=", "");
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigestSha256Base64 == null) {
+ this.logger.log(Level.WARNING, "Could not calculate server "
+ + "descriptor SHA256 digest.");
+ return;
+ }
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File tarballFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/server-descriptors/"
+ + "/" + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+ File rsyncCatFile = new File("recent/bridge-descriptors/"
+ + "server-descriptors/" + this.rsyncCatString
+ + "-server-descriptors.tmp");
+ File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ boolean[] append = new boolean[] { false, true };
+ try {
+ for (int i = 0; i < outputFiles.length; i++) {
+ File outputFile = outputFiles[i];
+ boolean appendToFile = append[i];
+ if (outputFile.exists() && !appendToFile) {
+ /* We already stored this descriptor to disk before, so let's
+ * not store it yet another time. */
+ break;
+ }
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, appendToFile));
+ bw.write("@type bridge-server-descriptor 1.1\n");
+ bw.write(scrubbedDesc);
+ if (descriptorDigestSha256Base64 != null) {
+ bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
+ + "\n");
+ }
+ bw.write("router-digest " + descriptorDigest.toUpperCase()
+ + "\n");
+ bw.close();
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized server "
+ + "descriptor to disk.", e);
+ return;
+ }
+ }
+
+ private String parseMasterKeyEd25519FromIdentityEd25519(
+ String identityEd25519Base64) {
+ byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
+ if (identityEd25519.length < 40) {
+ this.logger.warning("Invalid length of identity-ed25519 (in "
+ + "bytes): " + identityEd25519.length);
+ } else if (identityEd25519[0] != 0x01) {
+ this.logger.warning("Unknown version in identity-ed25519: "
+ + identityEd25519[0]);
+ } else if (identityEd25519[1] != 0x04) {
+ this.logger.warning("Unknown cert type in identity-ed25519: "
+ + identityEd25519[1]);
+ } else if (identityEd25519[6] != 0x01) {
+ this.logger.warning("Unknown certified key type in "
+ + "identity-ed25519: " + identityEd25519[1]);
+ } else if (identityEd25519[39] == 0x00) {
+ this.logger.warning("No extensions in identity-ed25519 (which "
+ + "would contain the encoded master-key-ed25519): "
+ + identityEd25519[39]);
+ } else {
+ int extensionStart = 40;
+ for (int i = 0; i < (int) identityEd25519[39]; i++) {
+ if (identityEd25519.length < extensionStart + 4) {
+ this.logger.warning("Invalid extension with id " + i
+ + " in identity-ed25519.");
+ break;
+ }
+ int extensionLength = identityEd25519[extensionStart];
+ extensionLength <<= 8;
+ extensionLength += identityEd25519[extensionStart + 1];
+ int extensionType = identityEd25519[extensionStart + 2];
+ if (extensionLength == 32 && extensionType == 4) {
+ if (identityEd25519.length < extensionStart + 4 + 32) {
+ this.logger.warning("Invalid extension with id " + i
+ + " in identity-ed25519.");
+ break;
+ }
+ byte[] masterKeyEd25519 = new byte[32];
+ System.arraycopy(identityEd25519, extensionStart + 4,
+ masterKeyEd25519, 0, masterKeyEd25519.length);
+ String masterKeyEd25519Base64 = Base64.encodeBase64String(
+ masterKeyEd25519);
+ String masterKeyEd25519Base64NoTrailingEqualSigns =
+ masterKeyEd25519Base64.replaceAll("=", "");
+ return masterKeyEd25519Base64NoTrailingEqualSigns;
+ }
+ extensionStart += 4 + extensionLength;
+ }
+ }
+ this.logger.warning("Unable to locate master-key-ed25519 in "
+ + "identity-ed25519.");
+ return null;
+ }
+
+ private String maxExtraInfoDescriptorPublishedTime =
+ "1970-01-01 00:00:00";
+
+ /**
+ * Sanitizes an extra-info descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null,
+ masterKeyEd25519FromIdentityEd25519 = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ StringBuilder scrubbed = null;
+ String hashedBridgeIdentity = null, masterKeyEd25519 = null;
+ while ((line = br.readLine()) != null) {
+
+ /* Parse bridge identity from extra-info line and replace it with
+ * its hash in the sanitized descriptor. */
+ String[] parts = line.split(" ");
+ if (line.startsWith("extra-info ")) {
+ hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
+ parts[2].toCharArray())).toLowerCase();
+ scrubbed = new StringBuilder("extra-info " + parts[1] + " "
+ + hashedBridgeIdentity.toUpperCase() + "\n");
+
+ /* Parse the publication time to determine the file name. */
+ } else if (line.startsWith("published ")) {
+ scrubbed.append(line + "\n");
+ published = line.substring("published ".length());
+ if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
+ > 0) {
+ maxExtraInfoDescriptorPublishedTime = published;
+ }
+
+ /* Remove everything from transport lines except the transport
+ * name. */
+ } else if (line.startsWith("transport ")) {
+ if (parts.length < 3) {
+ this.logger.fine("Illegal line in extra-info descriptor: '"
+ + line + "'. Skipping descriptor.");
+ return;
+ }
+ scrubbed.append("transport " + parts[1] + "\n");
+
+ /* Skip transport-info lines entirely. */
+ } else if (line.startsWith("transport-info ")) {
+
+ /* Extract master-key-ed25519 from identity-ed25519. */
+ } else if (line.equals("identity-ed25519")) {
+ StringBuilder sb = new StringBuilder();
+ while ((line = br.readLine()) != null &&
+ !line.equals("-----END ED25519 CERT-----")) {
+ if (line.equals("-----BEGIN ED25519 CERT-----")) {
+ continue;
+ }
+ sb.append(line);
+ }
+ masterKeyEd25519FromIdentityEd25519 =
+ this.parseMasterKeyEd25519FromIdentityEd25519(
+ sb.toString());
+ String sha256MasterKeyEd25519 = Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(
+ masterKeyEd25519FromIdentityEd25519 + "="))).
+ replaceAll("=", "");
+ scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
+ + "\n");
+ if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
+ masterKeyEd25519FromIdentityEd25519)) {
+ this.logger.warning("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return;
+ }
+
+ /* Verify that identity-ed25519 and master-key-ed25519 match. */
+ } else if (line.startsWith("master-key-ed25519 ")) {
+ masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
+ if (masterKeyEd25519FromIdentityEd25519 != null &&
+ !masterKeyEd25519FromIdentityEd25519.equals(
+ masterKeyEd25519)) {
+ this.logger.warning("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return;
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")
+ || line.startsWith("geoip-db-digest ")
+ || line.startsWith("geoip6-db-digest ")
+ || line.startsWith("conn-bi-direct ")
+ || line.startsWith("bridge-")
+ || line.startsWith("dirreq-")
+ || line.startsWith("cell-")
+ || line.startsWith("entry-")
+ || line.startsWith("exit-")) {
+ scrubbed.append(line + "\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ scrubbedDesc = scrubbed.toString();
+ break;
+
+ /* Skip the ed25519 signature; we'll include a SHA256 digest of
+ * the SHA256 descriptor digest in router-digest-sha256. */
+ } else if (line.startsWith("router-sig-ed25519 ")) {
+ continue;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.warning("Unrecognized line '" + line
+ + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized extra-info descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate extra-info "
+ + "descriptor digest.");
+ return;
+ }
+ String descriptorDigestSha256Base64 = null;
+ if (masterKeyEd25519FromIdentityEd25519 != null) {
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\n-----END SIGNATURE-----\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigestSha256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(DigestUtils.sha256(forDigest))).
+ replaceAll("=", "");
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigestSha256Base64 == null) {
+ this.logger.log(Level.WARNING, "Could not calculate extra-info "
+ + "descriptor SHA256 digest.");
+ return;
+ }
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File tarballFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/extra-infos/"
+ + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+ File rsyncCatFile = new File("recent/bridge-descriptors/"
+ + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp");
+ File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ boolean[] append = new boolean[] { false, true };
+ try {
+ for (int i = 0; i < outputFiles.length; i++) {
+ File outputFile = outputFiles[i];
+ boolean appendToFile = append[i];
+ if (outputFile.exists() && !appendToFile) {
+ /* We already stored this descriptor to disk before, so let's
+ * not store it yet another time. */
+ break;
+ }
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, appendToFile));
+ bw.write("@type bridge-extra-info 1.3\n");
+ bw.write(scrubbedDesc);
+ if (descriptorDigestSha256Base64 != null) {
+ bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
+ + "\n");
+ }
+ bw.write("router-digest " + descriptorDigest.toUpperCase()
+ + "\n");
+ bw.close();
+ }
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized "
+ + "extra-info descriptor to disk.", e);
+ }
+ }
+
+ /**
+ * Rewrite all network statuses that might contain references to server
+ * descriptors we added or updated in this execution. This applies to
+ * all statuses that have been published up to 24 hours after any added
+ * or updated server descriptor.
+ */
+ public void finishWriting() {
+
+ /* Delete secrets that we don't need anymore. */
+ if (!this.secretsForHashingIPAddresses.isEmpty() &&
+ this.secretsForHashingIPAddresses.firstKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ try {
+ int kept = 0, deleted = 0;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile));
+ for (Map.Entry<String, byte[]> e :
+ this.secretsForHashingIPAddresses.entrySet()) {
+ if (e.getKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ deleted++;
+ } else {
+ bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
+ + "\n");
+ kept++;
+ }
+ }
+ bw.close();
+ this.logger.info("Deleted " + deleted + " secrets that we don't "
+ + "need anymore and kept " + kept + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store reduced set of "
+ + "secrets to disk! This is a bad sign, better check what's "
+ + "going on!", e);
+ }
+ }
+ }
+
+ private void checkStaleDescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+ try {
+ long maxNetworkStatusPublishedMillis =
+ dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime();
+ if (maxNetworkStatusPublishedMillis > 0L &&
+ maxNetworkStatusPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge network status was "
+ + "published " + maxNetworkStatusPublishedTime + ", which is "
+ + "more than 5:30 hours in the past.");
+ }
+ long maxServerDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxServerDescriptorPublishedTime).
+ getTime();
+ if (maxServerDescriptorPublishedMillis > 0L &&
+ maxServerDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge server descriptor was "
+ + "published " + maxServerDescriptorPublishedTime + ", which "
+ + "is more than 5:30 hours in the past.");
+ }
+ long maxExtraInfoDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime).
+ getTime();
+ if (maxExtraInfoDescriptorPublishedMillis > 0L &&
+ maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge extra-info descriptor "
+ + "was published " + maxExtraInfoDescriptorPublishedTime
+ + ", which is more than 5:30 hours in the past.");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Unable to parse timestamp for "
+ + "stale check.", e);
+ }
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days, and remove the .tmp extension from newly
+ * written files. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("recent/bridge-descriptors"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ } else if (file.getName().endsWith(".tmp")) {
+ file.renameTo(new File(file.getParentFile(),
+ file.getName().substring(0,
+ file.getName().lastIndexOf(".tmp"))));
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/collector/bridgepools/BridgePoolAssignmentsProcessor.java
new file mode 100644
index 0000000..33f28e2
--- /dev/null
+++ b/src/org/torproject/collector/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -0,0 +1,272 @@
+/* Copyright 2011--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.bridgepools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.torproject.collector.main.Configuration;
+import org.torproject.collector.main.LockFile;
+import org.torproject.collector.main.LoggingConfiguration;
+
+public class BridgePoolAssignmentsProcessor extends Thread {
+
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration("bridge-pool-assignments");
+ Logger logger = Logger.getLogger(
+ BridgePoolAssignmentsProcessor.class.getName());
+ logger.info("Starting bridge-pool-assignments module of ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile("bridge-pool-assignments");
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Process bridge pool assignments
+ new BridgePoolAssignmentsProcessor(config).run();
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating bridge-pool-assignments module of ERNIE.");
+ }
+
+ private Configuration config;
+
+ public BridgePoolAssignmentsProcessor(Configuration config) {
+ this.config = config;
+ }
+
+ public void run() {
+
+ File assignmentsDirectory =
+ new File(config.getAssignmentsDirectory());
+ File sanitizedAssignmentsDirectory =
+ new File(config.getSanitizedAssignmentsDirectory());
+
+ Logger logger =
+ Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
+ if (assignmentsDirectory == null ||
+ sanitizedAssignmentsDirectory == null) {
+ IllegalArgumentException e = new IllegalArgumentException("Neither "
+ + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
+ + "be null!");
+ throw e;
+ }
+
+ List<File> assignmentFiles = new ArrayList<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(assignmentsDirectory);
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.getName().equals("assignments.log")) {
+ assignmentFiles.add(file);
+ }
+ }
+
+ SimpleDateFormat assignmentFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat filenameFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String duplicateFingerprint = null;
+ long maxBridgePoolAssignmentTime = 0L;
+ for (File assignmentFile : assignmentFiles) {
+ logger.info("Processing bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'...");
+ try {
+ BufferedReader br = null;
+ if (assignmentFile.getName().endsWith(".gz")) {
+ br = new BufferedReader(new InputStreamReader(
+ new GzipCompressorInputStream(new FileInputStream(
+ assignmentFile))));
+ } else {
+ br = new BufferedReader(new FileReader(assignmentFile));
+ }
+ String line, bridgePoolAssignmentLine = null;
+ SortedSet<String> sanitizedAssignments = new TreeSet<String>();
+ boolean wroteLastLine = false, skipBefore20120504125947 = true;
+ Set<String> hashedFingerprints = null;
+ while ((line = br.readLine()) != null || !wroteLastLine) {
+ if (line != null && line.startsWith("bridge-pool-assignment ")) {
+ String[] parts = line.split(" ");
+ if (parts.length != 3) {
+ continue;
+ }
+ /* TODO Take out this temporary hack to ignore all assignments
+ * coming from ponticum when byblos was still the official
+ * BridgeDB host. */
+ if (line.compareTo(
+ "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
+ skipBefore20120504125947 = false;
+ }
+ }
+ if (skipBefore20120504125947) {
+ if (line == null) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ if (line == null ||
+ line.startsWith("bridge-pool-assignment ")) {
+ if (bridgePoolAssignmentLine != null) {
+ try {
+ long bridgePoolAssignmentTime = assignmentFormat.parse(
+ bridgePoolAssignmentLine.substring(
+ "bridge-pool-assignment ".length())).getTime();
+ maxBridgePoolAssignmentTime = Math.max(
+ maxBridgePoolAssignmentTime,
+ bridgePoolAssignmentTime);
+ File tarballFile = new File(
+ sanitizedAssignmentsDirectory, filenameFormat.format(
+ bridgePoolAssignmentTime));
+ File rsyncFile = new File(
+ "recent/bridge-pool-assignments/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile,
+ rsyncFile };
+ for (File outputFile : outputFiles) {
+ if (!outputFile.exists()) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-pool-assignment 1.0\n");
+ bw.write(bridgePoolAssignmentLine + "\n");
+ for (String assignmentLine : sanitizedAssignments) {
+ bw.write(assignmentLine + "\n");
+ }
+ bw.close();
+ }
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ }
+ sanitizedAssignments.clear();
+ }
+ if (line == null) {
+ wroteLastLine = true;
+ } else {
+ bridgePoolAssignmentLine = line;
+ hashedFingerprints = new HashSet<String>();
+ }
+ } else {
+ String[] parts = line.split(" ");
+ if (parts.length < 2 || parts[0].length() < 40) {
+ logger.warning("Unrecognized line '" + line
+ + "'. Aborting.");
+ break;
+ }
+ String hashedFingerprint = null;
+ try {
+ hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[0].toCharArray())).toLowerCase();
+ } catch (DecoderException e) {
+ logger.warning("Unable to decode hex fingerprint in line '"
+ + line + "'. Aborting.");
+ break;
+ }
+ if (hashedFingerprints.contains(hashedFingerprint)) {
+ duplicateFingerprint = bridgePoolAssignmentLine;
+ }
+ hashedFingerprints.add(hashedFingerprint);
+ String assignmentDetails = line.substring(40);
+ sanitizedAssignments.add(hashedFingerprint
+ + assignmentDetails);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read bridge pool assignment "
+ + "file '" + assignmentFile.getAbsolutePath()
+ + "'. Skipping.", e);
+ }
+ }
+
+ if (duplicateFingerprint != null) {
+ logger.warning("At least one bridge pool assignment list contained "
+ + "duplicate fingerprints. Last found in assignment list "
+ + "starting with '" + duplicateFingerprint + "'.");
+ }
+
+ if (maxBridgePoolAssignmentTime > 0L &&
+ maxBridgePoolAssignmentTime + 330L * 60L * 1000L
+ < System.currentTimeMillis()) {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ logger.warning("The last known bridge pool assignment list was "
+ + "published at "
+ + dateTimeFormat.format(maxBridgePoolAssignmentTime)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
+ this.cleanUpRsyncDirectory();
+
+ logger.info("Finished processing bridge pool assignment file(s).");
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("recent/bridge-pool-assignments"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/exitlists/ExitListDownloader.java b/src/org/torproject/collector/exitlists/ExitListDownloader.java
new file mode 100644
index 0000000..b5425fb
--- /dev/null
+++ b/src/org/torproject/collector/exitlists/ExitListDownloader.java
@@ -0,0 +1,213 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.exitlists;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+import org.torproject.descriptor.impl.DescriptorParseException;
+import org.torproject.collector.main.Configuration;
+import org.torproject.collector.main.LockFile;
+import org.torproject.collector.main.LoggingConfiguration;
+
+public class ExitListDownloader extends Thread {
+
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration("exit-lists");
+ Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+ logger.info("Starting exit-lists module of ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile("exit-lists");
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Download exit list and store it to disk
+ new ExitListDownloader(config).run();
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating exit-lists module of ERNIE.");
+ }
+
+ public ExitListDownloader(Configuration config) {
+ }
+
+ public void run() {
+
+ Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ Date downloadedDate = new Date();
+ String downloadedExitList = null;
+ try {
+ logger.fine("Downloading exit list...");
+ StringBuilder sb = new StringBuilder();
+ sb.append("@type tordnsel 1.0\n");
+ sb.append("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
+ String exitAddressesUrl =
+ "http://exitlist.torproject.org/exit-addresses";
+ URL u = new URL(exitAddressesUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response != 200) {
+ logger.warning("Could not download exit list. Response code " +
+ response);
+ return;
+ }
+ BufferedInputStream in = new BufferedInputStream(
+ huc.getInputStream());
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ sb.append(new String(data, 0, len));
+ }
+ in.close();
+ downloadedExitList = sb.toString();
+ logger.fine("Finished downloading exit list.");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed downloading exit list", e);
+ return;
+ }
+ if (downloadedExitList == null) {
+ logger.warning("Failed downloading exit list");
+ return;
+ }
+
+ SimpleDateFormat tarballFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File("out/exit-lists/" + tarballFormat.format(
+ downloadedDate));
+
+ long maxScanMillis = 0L;
+ try {
+ DescriptorParser descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ List<Descriptor> parsedDescriptors =
+ descriptorParser.parseDescriptors(downloadedExitList.getBytes(),
+ tarballFile.getName());
+ if (parsedDescriptors.size() != 1 ||
+ !(parsedDescriptors.get(0) instanceof ExitList)) {
+ logger.warning("Could not parse downloaded exit list");
+ return;
+ }
+ ExitList parsedExitList = (ExitList) parsedDescriptors.get(0);
+ for (ExitListEntry entry : parsedExitList.getExitListEntries()) {
+ maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis());
+ }
+ } catch (DescriptorParseException e) {
+ logger.log(Level.WARNING, "Could not parse downloaded exit list",
+ e);
+ }
+ if (maxScanMillis > 0L &&
+ maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) {
+ logger.warning("The last reported scan in the downloaded exit list "
+ + "took place at " + dateTimeFormat.format(maxScanMillis)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
+ /* Write to disk. */
+ File rsyncFile = new File("recent/exit-lists/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ try {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write(downloadedExitList);
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write downloaded exit list "
+ + "to " + outputFile.getAbsolutePath(), e);
+ }
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished downloading "
+ + "exit list.\nLast three exit lists are:");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(new File("out/exit-lists"));
+ SortedSet<File> lastThreeExitLists = new TreeSet<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ SortedSet<File> lastThreeElements = new TreeSet<File>();
+ for (File f : pop.listFiles()) {
+ lastThreeElements.add(f);
+ }
+ while (lastThreeElements.size() > 3) {
+ lastThreeElements.remove(lastThreeElements.first());
+ }
+ for (File f : lastThreeElements) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ lastThreeExitLists.add(pop);
+ while (lastThreeExitLists.size() > 3) {
+ lastThreeExitLists.remove(lastThreeExitLists.first());
+ }
+ }
+ }
+ for (File f : lastThreeExitLists) {
+ dumpStats.append("\n" + f.getName());
+ }
+ logger.info(dumpStats.toString());
+
+ this.cleanUpRsyncDirectory();
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("recent/exit-lists"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/main/Configuration.java b/src/org/torproject/collector/main/Configuration.java
new file mode 100644
index 0000000..eaf3217
--- /dev/null
+++ b/src/org/torproject/collector/main/Configuration.java
@@ -0,0 +1,304 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.main;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Initialize configuration with hard-coded defaults, overwrite with
+ * configuration in config file, if exists, and answer Main.java about our
+ * configuration.
+ */
+public class Configuration {
+ private String directoryArchivesOutputDirectory =
+ "out/relay-descriptors/";
+ private boolean importCachedRelayDescriptors = false;
+ private List<String> cachedRelayDescriptorsDirectory =
+ new ArrayList<String>(Arrays.asList(
+ "in/relay-descriptors/cacheddesc/".split(",")));
+ private boolean importDirectoryArchives = false;
+ private String directoryArchivesDirectory =
+ "in/relay-descriptors/archives/";
+ private boolean keepDirectoryArchiveImportHistory = false;
+ private boolean replaceIPAddressesWithHashes = false;
+ private long limitBridgeDescriptorMappings = -1L;
+ private String sanitizedBridgesWriteDirectory =
+ "out/bridge-descriptors/";
+ private String bridgeSnapshotsDirectory = "in/bridge-descriptors/";
+ private boolean downloadRelayDescriptors = false;
+ private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
+ "86.59.21.38,76.73.17.194:9030,171.25.193.9:443,"
+ + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
+ + "194.109.206.212,212.112.245.170,154.35.32.5").split(","));
+ private List<String> downloadVotesByFingerprint = Arrays.asList((
+ "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4,"
+ + "27B6B5996C426270A5C95488AA5BCEB6BCC86956,"
+ + "49015F787433103580E3B66A1707A00E60F2D15B,"
+ + "585769C78764D58426B8B52B6651A5A71137189A,"
+ + "80550987E1D626E3EBA5E5E75A458DE0626D088C,"
+ + "D586D18309DED4CD6D57C18FDB97EFA96D330566,"
+ + "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58,"
+ + "ED03BB616EB2F60BEC80151114BB25CEF515B226,"
+ + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(","));
+ private boolean downloadCurrentConsensus = true;
+ private boolean downloadCurrentMicrodescConsensus = true;
+ private boolean downloadCurrentVotes = true;
+ private boolean downloadMissingServerDescriptors = true;
+ private boolean downloadMissingExtraInfoDescriptors = true;
+ private boolean downloadMissingMicrodescriptors = true;
+ private boolean downloadAllServerDescriptors = false;
+ private boolean downloadAllExtraInfoDescriptors = false;
+ private boolean compressRelayDescriptorDownloads;
+ private String assignmentsDirectory = "in/bridge-pool-assignments/";
+ private String sanitizedAssignmentsDirectory =
+ "out/bridge-pool-assignments/";
+ private String torperfOutputDirectory = "out/torperf/";
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFiles = null;
+ public Configuration() {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(Configuration.class.getName());
+
+ /* Read config file, if present. */
+ File configFile = new File("config");
+ if (!configFile.exists()) {
+ logger.warning("Could not find config file. In the default "
+ + "configuration, we are not configured to read data from any "
+ + "data source or write data to any data sink. You need to "
+ + "create a config file (" + configFile.getAbsolutePath()
+ + ") and provide at least one data source and one data sink. "
+ + "Refer to the manual for more information.");
+ return;
+ }
+ String line = null;
+ boolean containsCachedRelayDescriptorsDirectory = false;
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(configFile));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("#") || line.length() < 1) {
+ continue;
+ } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
+ this.directoryArchivesOutputDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ImportCachedRelayDescriptors")) {
+ this.importCachedRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
+ if (!containsCachedRelayDescriptorsDirectory) {
+ this.cachedRelayDescriptorsDirectory.clear();
+ containsCachedRelayDescriptorsDirectory = true;
+ }
+ this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
+ } else if (line.startsWith("ImportDirectoryArchives")) {
+ this.importDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesDirectory")) {
+ this.directoryArchivesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
+ this.keepDirectoryArchiveImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
+ this.replaceIPAddressesWithHashes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
+ this.limitBridgeDescriptorMappings = Long.parseLong(
+ line.split(" ")[1]);
+ } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
+ this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
+ } else if (line.startsWith("BridgeSnapshotsDirectory")) {
+ this.bridgeSnapshotsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("DownloadRelayDescriptors")) {
+ this.downloadRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
+ this.downloadFromDirectoryAuthorities = new ArrayList<String>();
+ for (String dir : line.split(" ")[1].split(",")) {
+ // test if IP:port pair has correct format
+ if (dir.length() < 1) {
+ logger.severe("Configuration file contains directory "
+ + "authority IP:port of length 0 in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ new URL("http://" + dir + "/");
+ this.downloadFromDirectoryAuthorities.add(dir);
+ }
+ } else if (line.startsWith("DownloadVotesByFingerprint")) {
+ this.downloadVotesByFingerprint = new ArrayList<String>();
+ for (String fingerprint : line.split(" ")[1].split(",")) {
+ this.downloadVotesByFingerprint.add(fingerprint);
+ }
+ } else if (line.startsWith("DownloadCurrentConsensus")) {
+ this.downloadCurrentConsensus = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) {
+ this.downloadCurrentMicrodescConsensus = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadCurrentVotes")) {
+ this.downloadCurrentVotes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadMissingServerDescriptors")) {
+ this.downloadMissingServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith(
+ "DownloadMissingExtraInfoDescriptors")) {
+ this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadMissingMicrodescriptors")) {
+ this.downloadMissingMicrodescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllServerDescriptors")) {
+ this.downloadAllServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
+ this.downloadAllExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
+ this.compressRelayDescriptorDownloads = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("AssignmentsDirectory")) {
+ this.assignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
+ this.sanitizedAssignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("TorperfOutputDirectory")) {
+ this.torperfOutputDirectory = line.split(" ")[1];
+ } else if (line.startsWith("TorperfSource")) {
+ if (this.torperfSources == null) {
+ this.torperfSources = new TreeMap<String, String>();
+ }
+ String[] parts = line.split(" ");
+ String sourceName = parts[1];
+ String baseUrl = parts[2];
+ this.torperfSources.put(sourceName, baseUrl);
+ } else if (line.startsWith("TorperfFiles")) {
+ if (this.torperfFiles == null) {
+ this.torperfFiles = new ArrayList<String>();
+ }
+ String[] parts = line.split(" ");
+ if (parts.length != 5) {
+ logger.severe("Configuration file contains TorperfFiles "
+ + "option with wrong number of values in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ this.torperfFiles.add(line);
+ } else {
+ logger.severe("Configuration file contains unrecognized "
+ + "configuration key in line '" + line + "'! Exiting!");
+ System.exit(1);
+ }
+ }
+ br.close();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ logger.severe("Configuration file contains configuration key "
+ + "without value in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (MalformedURLException e) {
+ logger.severe("Configuration file contains illegal URL or IP:port "
+ + "pair in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (NumberFormatException e) {
+ logger.severe("Configuration file contains illegal value in line '"
+ + line + "' with legal values being 0 or 1. Exiting!");
+ System.exit(1);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Unknown problem while reading config "
+ + "file! Exiting!", e);
+ System.exit(1);
+ }
+ }
+ public String getDirectoryArchivesOutputDirectory() {
+ return this.directoryArchivesOutputDirectory;
+ }
+ public boolean getImportCachedRelayDescriptors() {
+ return this.importCachedRelayDescriptors;
+ }
+ public List<String> getCachedRelayDescriptorDirectory() {
+ return this.cachedRelayDescriptorsDirectory;
+ }
+ public boolean getImportDirectoryArchives() {
+ return this.importDirectoryArchives;
+ }
+ public String getDirectoryArchivesDirectory() {
+ return this.directoryArchivesDirectory;
+ }
+ public boolean getKeepDirectoryArchiveImportHistory() {
+ return this.keepDirectoryArchiveImportHistory;
+ }
+ public boolean getReplaceIPAddressesWithHashes() {
+ return this.replaceIPAddressesWithHashes;
+ }
+ public long getLimitBridgeDescriptorMappings() {
+ return this.limitBridgeDescriptorMappings;
+ }
+ public String getSanitizedBridgesWriteDirectory() {
+ return this.sanitizedBridgesWriteDirectory;
+ }
+ public String getBridgeSnapshotsDirectory() {
+ return this.bridgeSnapshotsDirectory;
+ }
+ public boolean getDownloadRelayDescriptors() {
+ return this.downloadRelayDescriptors;
+ }
+ public List<String> getDownloadFromDirectoryAuthorities() {
+ return this.downloadFromDirectoryAuthorities;
+ }
+ public List<String> getDownloadVotesByFingerprint() {
+ return this.downloadVotesByFingerprint;
+ }
+ public boolean getDownloadCurrentConsensus() {
+ return this.downloadCurrentConsensus;
+ }
+ public boolean getDownloadCurrentMicrodescConsensus() {
+ return this.downloadCurrentMicrodescConsensus;
+ }
+ public boolean getDownloadCurrentVotes() {
+ return this.downloadCurrentVotes;
+ }
+ public boolean getDownloadMissingServerDescriptors() {
+ return this.downloadMissingServerDescriptors;
+ }
+ public boolean getDownloadMissingExtraInfoDescriptors() {
+ return this.downloadMissingExtraInfoDescriptors;
+ }
+ public boolean getDownloadMissingMicrodescriptors() {
+ return this.downloadMissingMicrodescriptors;
+ }
+ public boolean getDownloadAllServerDescriptors() {
+ return this.downloadAllServerDescriptors;
+ }
+ public boolean getDownloadAllExtraInfoDescriptors() {
+ return this.downloadAllExtraInfoDescriptors;
+ }
+ public boolean getCompressRelayDescriptorDownloads() {
+ return this.compressRelayDescriptorDownloads;
+ }
+ public String getAssignmentsDirectory() {
+ return this.assignmentsDirectory;
+ }
+ public String getSanitizedAssignmentsDirectory() {
+ return this.sanitizedAssignmentsDirectory;
+ }
+ public String getTorperfOutputDirectory() {
+ return this.torperfOutputDirectory;
+ }
+ public SortedMap<String, String> getTorperfSources() {
+ return this.torperfSources;
+ }
+ public List<String> getTorperfFiles() {
+ return this.torperfFiles;
+ }
+}
+
diff --git a/src/org/torproject/collector/main/LockFile.java b/src/org/torproject/collector/main/LockFile.java
new file mode 100644
index 0000000..cd850bd
--- /dev/null
+++ b/src/org/torproject/collector/main/LockFile.java
@@ -0,0 +1,55 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.main;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+public class LockFile {
+
+ private File lockFile;
+ private Logger logger;
+
+ public LockFile(String moduleName) {
+ this.lockFile = new File("lock/" + moduleName);
+ this.logger = Logger.getLogger(LockFile.class.getName());
+ }
+
+ public boolean acquireLock() {
+ this.logger.fine("Trying to acquire lock...");
+ try {
+ if (this.lockFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.lockFile));
+ long runStarted = Long.parseLong(br.readLine());
+ br.close();
+ if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
+ return false;
+ }
+ }
+ this.lockFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.lockFile));
+ bw.append("" + System.currentTimeMillis() + "\n");
+ bw.close();
+ this.logger.fine("Acquired lock.");
+ return true;
+ } catch (IOException e) {
+ this.logger.warning("Caught exception while trying to acquire "
+ + "lock!");
+ return false;
+ }
+ }
+
+ public void releaseLock() {
+ this.logger.fine("Releasing lock...");
+ this.lockFile.delete();
+ this.logger.fine("Released lock.");
+ }
+}
+
diff --git a/src/org/torproject/collector/main/LoggingConfiguration.java b/src/org/torproject/collector/main/LoggingConfiguration.java
new file mode 100644
index 0000000..4da527b
--- /dev/null
+++ b/src/org/torproject/collector/main/LoggingConfiguration.java
@@ -0,0 +1,95 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.main;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+/**
+ * Initialize logging configuration.
+ *
+ * Log levels used by ERNIE:
+ *
+ * - SEVERE: An event made it impossible to continue program execution.
+ * - WARNING: A potential problem occurred that requires the operator to
+ * look after the otherwise unattended setup
+ * - INFO: Messages on INFO level are meant to help the operator in making
+ * sure that operation works as expected.
+ * - FINE: Debug messages that are used to identify problems and which are
+ * turned on by default.
+ * - FINER: More detailed debug messages to investigate problems in more
+ * detail. Not turned on by default. Increase log file limit when using
+ * FINER.
+ * - FINEST: Most detailed debug messages. Not used.
+ */
+public class LoggingConfiguration {
+ public LoggingConfiguration(String moduleName) {
+
+ /* Remove default console handler. */
+ for (Handler h : Logger.getLogger("").getHandlers()) {
+ Logger.getLogger("").removeHandler(h);
+ }
+
+ /* Disable logging of internal Sun classes. */
+ Logger.getLogger("sun").setLevel(Level.OFF);
+
+ /* Set minimum log level we care about from INFO to FINER. */
+ Logger.getLogger("").setLevel(Level.FINER);
+
+ /* Create log handler that writes messages on WARNING or higher to the
+ * console. */
+ final SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Formatter cf = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getMessage() + "\n";
+ }
+ };
+ Handler ch = new ConsoleHandler();
+ ch.setFormatter(cf);
+ ch.setLevel(Level.WARNING);
+ Logger.getLogger("").addHandler(ch);
+
+ /* Initialize own logger for this class. */
+ Logger logger = Logger.getLogger(
+ LoggingConfiguration.class.getName());
+
+ /* Create log handler that writes all messages on FINE or higher to a
+ * local file. */
+ Formatter ff = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getLevel() + " " + record.getSourceClassName() + " "
+ + record.getSourceMethodName() + " " + record.getMessage()
+ + (record.getThrown() != null ? " " + record.getThrown() : "")
+ + "\n";
+ }
+ };
+ try {
+ new File("log").mkdirs();
+ FileHandler fh = new FileHandler("log/" + moduleName, 5000000, 5,
+ true);
+ fh.setFormatter(ff);
+ fh.setLevel(Level.FINE);
+ Logger.getLogger("").addHandler(fh);
+ } catch (SecurityException e) {
+ logger.log(Level.WARNING, "No permission to create log file. "
+ + "Logging to file is disabled.", e);
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write to log file. Logging to "
+ + "file is disabled.", e);
+ }
+ }
+}
diff --git a/src/org/torproject/collector/relaydescs/ArchiveReader.java b/src/org/torproject/collector/relaydescs/ArchiveReader.java
new file mode 100644
index 0000000..93bea7f
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/ArchiveReader.java
@@ -0,0 +1,281 @@
+/* Copyright 2010--2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+
+/**
+ * Read in all files in a given directory and pass buffered readers of
+ * them to the relay descriptor parser.
+ */
+public class ArchiveReader {
+ public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
+ File statsDirectory, boolean keepImportHistory) {
+
+ if (rdp == null || archivesDirectory == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ rdp.setArchiveReader(this);
+ int parsedFiles = 0, ignoredFiles = 0;
+ Logger logger = Logger.getLogger(ArchiveReader.class.getName());
+ SortedSet<String> archivesImportHistory = new TreeSet<String>();
+ File archivesImportHistoryFile = new File(statsDirectory,
+ "archives-import-history");
+ if (keepImportHistory && archivesImportHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ archivesImportHistoryFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ archivesImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read in archives import "
+ + "history file. Skipping.");
+ }
+ }
+ if (archivesDirectory.exists()) {
+ logger.fine("Importing files in directory " + archivesDirectory
+ + "/...");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(archivesDirectory);
+ List<File> problems = new ArrayList<File>();
+ Set<File> filesToRetry = new HashSet<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ if (rdp != null) {
+ try {
+ BufferedInputStream bis = null;
+ if (keepImportHistory &&
+ archivesImportHistory.contains(pop.getName())) {
+ ignoredFiles++;
+ continue;
+ } else if (pop.getName().endsWith(".tar.bz2")) {
+ logger.warning("Cannot parse compressed tarball "
+ + pop.getAbsolutePath() + ". Skipping.");
+ continue;
+ } else if (pop.getName().endsWith(".bz2")) {
+ FileInputStream fis = new FileInputStream(pop);
+ BZip2CompressorInputStream bcis =
+ new BZip2CompressorInputStream(fis);
+ bis = new BufferedInputStream(bcis);
+ } else {
+ FileInputStream fis = new FileInputStream(pop);
+ bis = new BufferedInputStream(fis);
+ }
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ boolean stored = rdp.parse(allData);
+ if (!stored) {
+ filesToRetry.add(pop);
+ continue;
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
+ parsedFiles++;
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ for (File pop : filesToRetry) {
+ /* TODO We need to parse microdescriptors ourselves, rather than
+ * RelayDescriptorParser, because only we know the valid-after
+ * time(s) of microdesc consensus(es) containing this
+ * microdescriptor. However, this breaks functional abstraction
+ * pretty badly. */
+ if (rdp != null) {
+ try {
+ BufferedInputStream bis = null;
+ if (pop.getName().endsWith(".bz2")) {
+ FileInputStream fis = new FileInputStream(pop);
+ BZip2CompressorInputStream bcis =
+ new BZip2CompressorInputStream(fis);
+ bis = new BufferedInputStream(bcis);
+ } else {
+ FileInputStream fis = new FileInputStream(pop);
+ bis = new BufferedInputStream(fis);
+ }
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line;
+ do {
+ line = br.readLine();
+ } while (line != null && line.startsWith("@"));
+ br.close();
+ if (line == null) {
+ logger.fine("We were given an empty descriptor for "
+ + "parsing. Ignoring.");
+ continue;
+ }
+ if (!line.equals("onion-key")) {
+ logger.fine("Skipping non-recognized descriptor.");
+ continue;
+ }
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, end = -1;
+ String startToken = "onion-key\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ end = ascii.indexOf(startToken, start + 1);
+ if (end < 0) {
+ end = ascii.length();
+ if (end <= start) {
+ break;
+ }
+ }
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ String digest256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(descBytes)).replaceAll("=", "");
+ String digest256Hex = DigestUtils.sha256Hex(descBytes);
+ if (!this.microdescriptorValidAfterTimes.containsKey(
+ digest256Hex)) {
+ logger.fine("Could not store microdescriptor '"
+ + digest256Hex + "', which was not contained in a "
+ + "microdesc consensus.");
+ continue;
+ }
+ for (String validAfterTime :
+ this.microdescriptorValidAfterTimes.get(digest256Hex)) {
+ try {
+ long validAfter =
+ parseFormat.parse(validAfterTime).getTime();
+ rdp.storeMicrodescriptor(descBytes, digest256Hex,
+ digest256Base64, validAfter);
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not parse "
+ + "valid-after time '" + validAfterTime + "'. Not "
+ + "storing microdescriptor.", e);
+ }
+ }
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
+ parsedFiles++;
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
+ if (problems.isEmpty()) {
+ logger.fine("Finished importing files in directory "
+ + archivesDirectory + "/.");
+ } else {
+ StringBuilder sb = new StringBuilder("Failed importing files in "
+ + "directory " + archivesDirectory + "/:");
+ int printed = 0;
+ for (File f : problems) {
+ sb.append("\n " + f.getAbsolutePath());
+ if (++printed >= 3) {
+ sb.append("\n ... more");
+ break;
+ }
+ }
+ }
+ }
+ if (keepImportHistory) {
+ try {
+ archivesImportHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ archivesImportHistoryFile));
+ for (String line : archivesImportHistory) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write archives import "
+ + "history file.");
+ }
+ }
+ logger.info("Finished importing relay descriptors from local "
+ + "directory:\nParsed " + parsedFiles + ", ignored "
+ + ignoredFiles + " files.");
+ }
+
+ private Map<String, Set<String>> microdescriptorValidAfterTimes =
+ new HashMap<String, Set<String>>();
+ public void haveParsedMicrodescConsensus(String validAfterTime,
+ SortedSet<String> microdescriptorDigests) {
+ for (String microdescriptor : microdescriptorDigests) {
+ if (!this.microdescriptorValidAfterTimes.containsKey(
+ microdescriptor)) {
+ this.microdescriptorValidAfterTimes.put(microdescriptor,
+ new HashSet<String>());
+ }
+ this.microdescriptorValidAfterTimes.get(microdescriptor).add(
+ validAfterTime);
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/relaydescs/ArchiveWriter.java b/src/org/torproject/collector/relaydescs/ArchiveWriter.java
new file mode 100644
index 0000000..560687c
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/ArchiveWriter.java
@@ -0,0 +1,831 @@
+/* Copyright 2010--2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.relaydescs;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.impl.DescriptorParseException;
+import org.torproject.collector.main.Configuration;
+import org.torproject.collector.main.LockFile;
+import org.torproject.collector.main.LoggingConfiguration;
+
+public class ArchiveWriter extends Thread {
+
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration("relay-descriptors");
+ Logger logger = Logger.getLogger(ArchiveWriter.class.getName());
+ logger.info("Starting relay-descriptors module of ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile("relay-descriptors");
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Import/download relay descriptors from the various sources
+ new ArchiveWriter(config).run();
+
+ new ReferenceChecker(new File("recent/relay-descriptors"),
+ new File("stats/references"),
+ new File("stats/references-history")).check();
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating relay-descriptors module of ERNIE.");
+ }
+
+ private Configuration config;
+
+ public ArchiveWriter(Configuration config) {
+ this.config = config;
+ }
+
+ private long now = System.currentTimeMillis();
+ private Logger logger;
+ private File outputDirectory;
+ private String rsyncCatString;
+ private DescriptorParser descriptorParser;
+ private int storedConsensusesCounter = 0,
+ storedMicrodescConsensusesCounter = 0, storedVotesCounter = 0,
+ storedCertsCounter = 0, storedServerDescriptorsCounter = 0,
+ storedExtraInfoDescriptorsCounter = 0,
+ storedMicrodescriptorsCounter = 0;
+
+ private SortedMap<Long, SortedSet<String>> storedConsensuses =
+ new TreeMap<Long, SortedSet<String>>();
+ private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses =
+ new TreeMap<Long, SortedSet<String>>();
+ private SortedMap<Long, Integer> expectedVotes =
+ new TreeMap<Long, Integer>();
+ private SortedMap<Long, SortedMap<String, SortedSet<String>>>
+ storedVotes =
+ new TreeMap<Long, SortedMap<String, SortedSet<String>>>();
+ private SortedMap<Long, Map<String, String>> storedServerDescriptors =
+ new TreeMap<Long, Map<String, String>>();
+ private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
+ new TreeMap<Long, Set<String>>();
+ private SortedMap<Long, Set<String>> storedMicrodescriptors =
+ new TreeMap<Long, Set<String>>();
+
+ private File storedServerDescriptorsFile = new File(
+ "stats/stored-server-descriptors");
+ private File storedExtraInfoDescriptorsFile = new File(
+ "stats/stored-extra-info-descriptors");
+ private File storedMicrodescriptorsFile = new File(
+ "stats/stored-microdescriptors");
+
+ private void loadDescriptorDigests() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ try {
+ if (this.storedServerDescriptorsFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.storedServerDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != 3) {
+ this.logger.warning("Could not load server descriptor "
+ + "digests because of illegal line '" + line + "'. We "
+ + "might not be able to correctly check descriptors for "
+ + "completeness.");
+ break;
+ }
+ long published = dateTimeFormat.parse(parts[0]).getTime();
+ if (published < this.now - 48L * 60L * 60L * 1000L) {
+ continue;
+ }
+ if (!this.storedServerDescriptors.containsKey(published)) {
+ this.storedServerDescriptors.put(published,
+ new HashMap<String, String>());
+ }
+ String serverDescriptorDigest = parts[1];
+ String extraInfoDescriptorDigest = parts[2].equals("NA") ? null
+ : parts[2];
+ this.storedServerDescriptors.get(published).put(
+ serverDescriptorDigest, extraInfoDescriptorDigest);
+ }
+ br.close();
+ }
+ if (this.storedExtraInfoDescriptorsFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.storedExtraInfoDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != 2) {
+ this.logger.warning("Could not load extra-info descriptor "
+ + "digests because of illegal line '" + line + "'. We "
+ + "might not be able to correctly check descriptors for "
+ + "completeness.");
+ break;
+ }
+ long published = dateTimeFormat.parse(parts[0]).getTime();
+ if (published < this.now - 48L * 60L * 60L * 1000L) {
+ continue;
+ }
+ if (!this.storedExtraInfoDescriptors.containsKey(published)) {
+ this.storedExtraInfoDescriptors.put(published,
+ new HashSet<String>());
+ }
+ String extraInfoDescriptorDigest = parts[1];
+ this.storedExtraInfoDescriptors.get(published).add(
+ extraInfoDescriptorDigest);
+ }
+ br.close();
+ }
+ if (this.storedMicrodescriptorsFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.storedMicrodescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != 2) {
+ this.logger.warning("Could not load microdescriptor digests "
+ + "because of illegal line '" + line + "'. We might not "
+ + "be able to correctly check descriptors for "
+ + "completeness.");
+ break;
+ }
+ long validAfter = dateTimeFormat.parse(parts[0]).getTime();
+ if (validAfter < this.now - 40L * 24L * 60L * 60L * 1000L) {
+ continue;
+ }
+ if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+ this.storedMicrodescriptors.put(validAfter,
+ new HashSet<String>());
+ }
+ String microdescriptorDigest = parts[1];
+ this.storedMicrodescriptors.get(validAfter).add(
+ microdescriptorDigest);
+ }
+ br.close();
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not load descriptor "
+ + "digests. We might not be able to correctly check "
+ + "descriptors for completeness.", e);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not load descriptor "
+ + "digests. We might not be able to correctly check "
+ + "descriptors for completeness.", e);
+ }
+ }
+
+ private void saveDescriptorDigests() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ try {
+ this.storedServerDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.storedServerDescriptorsFile));
+ for (Map.Entry<Long, Map<String, String>> e :
+ this.storedServerDescriptors.entrySet()) {
+ String published = dateTimeFormat.format(e.getKey());
+ for (Map.Entry<String, String> f : e.getValue().entrySet()) {
+ String serverDescriptorDigest = f.getKey();
+ String extraInfoDescriptorDigest = f.getValue() == null ? "NA"
+ : f.getValue();
+ bw.write(String.format("%s,%s,%s%n", published,
+ serverDescriptorDigest, extraInfoDescriptorDigest));
+ }
+ }
+ bw.close();
+ this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs();
+ bw = new BufferedWriter(new FileWriter(
+ this.storedExtraInfoDescriptorsFile));
+ for (Map.Entry<Long, Set<String>> e :
+ this.storedExtraInfoDescriptors.entrySet()) {
+ String published = dateTimeFormat.format(e.getKey());
+ for (String extraInfoDescriptorDigest : e.getValue()) {
+ bw.write(String.format("%s,%s%n", published,
+ extraInfoDescriptorDigest));
+ }
+ }
+ bw.close();
+ this.storedMicrodescriptorsFile.getParentFile().mkdirs();
+ bw = new BufferedWriter(new FileWriter(
+ this.storedMicrodescriptorsFile));
+ for (Map.Entry<Long, Set<String>> e :
+ this.storedMicrodescriptors.entrySet()) {
+ String validAfter = dateTimeFormat.format(e.getKey());
+ for (String microdescriptorDigest : e.getValue()) {
+ bw.write(String.format("%s,%s%n", validAfter,
+ microdescriptorDigest));
+ }
+ }
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not save descriptor "
+ + "digests. We might not be able to correctly check "
+ + "descriptors for completeness in the next run.", e);
+ }
+ }
+
+ public void run() {
+
+ File outputDirectory =
+ new File(config.getDirectoryArchivesOutputDirectory());
+ File statsDirectory = new File("stats");
+
+ this.logger = Logger.getLogger(ArchiveWriter.class.getName());
+ this.outputDirectory = outputDirectory;
+ SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.rsyncCatString = rsyncCatFormat.format(
+ System.currentTimeMillis());
+ this.descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+
+ this.loadDescriptorDigests();
+
+ // Prepare relay descriptor parser
+ RelayDescriptorParser rdp = new RelayDescriptorParser(this);
+
+ RelayDescriptorDownloader rdd = null;
+ if (config.getDownloadRelayDescriptors()) {
+ List<String> dirSources =
+ config.getDownloadFromDirectoryAuthorities();
+ rdd = new RelayDescriptorDownloader(rdp, dirSources,
+ config.getDownloadVotesByFingerprint(),
+ config.getDownloadCurrentConsensus(),
+ config.getDownloadCurrentMicrodescConsensus(),
+ config.getDownloadCurrentVotes(),
+ config.getDownloadMissingServerDescriptors(),
+ config.getDownloadMissingExtraInfoDescriptors(),
+ config.getDownloadMissingMicrodescriptors(),
+ config.getDownloadAllServerDescriptors(),
+ config.getDownloadAllExtraInfoDescriptors(),
+ config.getCompressRelayDescriptorDownloads());
+ rdp.setRelayDescriptorDownloader(rdd);
+ }
+ if (config.getImportCachedRelayDescriptors()) {
+ new CachedRelayDescriptorReader(rdp,
+ config.getCachedRelayDescriptorDirectory(), statsDirectory);
+ this.intermediateStats("importing relay descriptors from local "
+ + "Tor data directories");
+ }
+ if (config.getImportDirectoryArchives()) {
+ new ArchiveReader(rdp,
+ new File(config.getDirectoryArchivesDirectory()),
+ statsDirectory,
+ config.getKeepDirectoryArchiveImportHistory());
+ this.intermediateStats("importing relay descriptors from local "
+ + "directory");
+ }
+ if (rdd != null) {
+ rdd.downloadDescriptors();
+ rdd.writeFile();
+ rdd = null;
+ this.intermediateStats("downloading relay descriptors from the "
+ + "directory authorities");
+ }
+
+ this.checkMissingDescriptors();
+
+ this.checkStaledescriptors();
+
+ this.cleanUpRsyncDirectory();
+
+ this.saveDescriptorDigests();
+ }
+
+ private boolean store(byte[] typeAnnotation, byte[] data,
+ File[] outputFiles, boolean[] append) {
+ try {
+ this.logger.finer("Storing " + outputFiles[0]);
+ if (this.descriptorParser.parseDescriptors(data,
+ outputFiles[0].getName()).size() != 1) {
+ this.logger.info("Relay descriptor file " + outputFiles[0]
+ + " doesn't contain exactly one descriptor. Not storing.");
+ return false;
+ }
+ for (int i = 0; i < outputFiles.length; i++) {
+ File outputFile = outputFiles[i];
+ boolean appendToFile = append == null ? false : append[i];
+ outputFile.getParentFile().mkdirs();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(outputFile, appendToFile));
+ if (data.length > 0 && data[0] != '@') {
+ bos.write(typeAnnotation, 0, typeAnnotation.length);
+ }
+ bos.write(data, 0, data.length);
+ bos.close();
+ }
+ return true;
+ } catch (DescriptorParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse relay descriptor "
+ + outputFiles[0] + " before storing it to disk. Skipping.", e);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store relay descriptor "
+ + outputFiles[0], e);
+ }
+ return false;
+ }
+
+ private static final byte[] CONSENSUS_ANNOTATION =
+ "@type network-status-consensus-3 1.0\n".getBytes();
+ public void storeConsensus(byte[] data, long validAfter,
+ SortedSet<String> dirSources,
+ SortedSet<String> serverDescriptorDigests) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/consensus/"
+ + printFormat.format(new Date(validAfter)) + "-consensus");
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncFile = new File("recent/relay-descriptors/consensuses/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) {
+ this.storedConsensusesCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - validAfter < 3L * 60L * 60L * 1000L) {
+ this.storedConsensuses.put(validAfter, serverDescriptorDigests);
+ this.expectedVotes.put(validAfter, dirSources.size());
+ }
+ }
+
+ private static final byte[] MICRODESCCONSENSUS_ANNOTATION =
+ "@type network-status-microdesc-consensus-3 1.0\n".getBytes();
+ public void storeMicrodescConsensus(byte[] data, long validAfter,
+ SortedSet<String> microdescriptorDigests) {
+ SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat(
+ "yyyy/MM");
+ yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat(
+ "dd/yyyy-MM-dd-HH-mm-ss");
+ dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory
+ + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter)
+ + "/consensus-microdesc/"
+ + dayDirectoryFileFormat.format(validAfter)
+ + "-consensus-microdesc");
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncFile = new File("recent/relay-descriptors/microdescs/"
+ + "consensus-microdesc/" + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles,
+ null)) {
+ this.storedMicrodescConsensusesCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - validAfter < 3L * 60L * 60L * 1000L) {
+ this.storedMicrodescConsensuses.put(validAfter,
+ microdescriptorDigests);
+ }
+ }
+
+ private static final byte[] VOTE_ANNOTATION =
+ "@type network-status-vote-3 1.0\n".getBytes();
+ public void storeVote(byte[] data, long validAfter,
+ String fingerprint, String digest,
+ SortedSet<String> serverDescriptorDigests) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/vote/"
+ + printFormat.format(new Date(validAfter)) + "-vote-"
+ + fingerprint + "-" + digest);
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncFile = new File("recent/relay-descriptors/votes/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) {
+ this.storedVotesCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - validAfter < 3L * 60L * 60L * 1000L) {
+ if (!this.storedVotes.containsKey(validAfter)) {
+ this.storedVotes.put(validAfter,
+ new TreeMap<String, SortedSet<String>>());
+ }
+ this.storedVotes.get(validAfter).put(fingerprint,
+ serverDescriptorDigests);
+ }
+ }
+
+ private static final byte[] CERTIFICATE_ANNOTATION =
+ "@type dir-key-certificate-3 1.0\n".getBytes();
+ public void storeCertificate(byte[] data, String fingerprint,
+ long published) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/certs/"
+ + fingerprint + "-" + printFormat.format(new Date(published)));
+ File[] outputFiles = new File[] { tarballFile };
+ if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) {
+ this.storedCertsCounter++;
+ }
+ }
+
+ private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
+ "@type server-descriptor 1.0\n".getBytes();
+ public void storeServerDescriptor(byte[] data, String digest,
+ long published, String extraInfoDigest) {
+ SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory
+ + "/server-descriptor/" + printFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+ + digest);
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncCatFile = new File("recent/relay-descriptors/"
+ + "server-descriptors/" + this.rsyncCatString
+ + "-server-descriptors.tmp");
+ File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ boolean[] append = new boolean[] { false, true };
+ if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles,
+ append)) {
+ this.storedServerDescriptorsCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - published < 48L * 60L * 60L * 1000L) {
+ if (!this.storedServerDescriptors.containsKey(published)) {
+ this.storedServerDescriptors.put(published,
+ new HashMap<String, String>());
+ }
+ this.storedServerDescriptors.get(published).put(digest,
+ extraInfoDigest);
+ }
+ }
+
+ private static final byte[] EXTRA_INFO_ANNOTATION =
+ "@type extra-info 1.0\n".getBytes();
+ public void storeExtraInfoDescriptor(byte[] data,
+ String extraInfoDigest, long published) {
+ SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest);
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncCatFile = new File("recent/relay-descriptors/"
+ + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp");
+ File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ boolean[] append = new boolean[] { false, true };
+ if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) {
+ this.storedExtraInfoDescriptorsCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - published < 48L * 60L * 60L * 1000L) {
+ if (!this.storedExtraInfoDescriptors.containsKey(published)) {
+ this.storedExtraInfoDescriptors.put(published,
+ new HashSet<String>());
+ }
+ this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest);
+ }
+ }
+
+ private static final byte[] MICRODESCRIPTOR_ANNOTATION =
+ "@type microdescriptor 1.0\n".getBytes();
+ public void storeMicrodescriptor(byte[] data,
+ String microdescriptorDigest, long validAfter) {
+ /* TODO We could check here whether we already stored the
+ * microdescriptor in the same valid-after month. This can happen,
+ * e.g., when two relays share the same microdescriptor. In that case
+ * this method gets called twice and the second call overwrites the
+ * file written in the first call. However, this method must be
+ * called twice to store the same microdescriptor in two different
+ * valid-after months. */
+ SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/microdesc/"
+ + descriptorFormat.format(validAfter) + "micro/"
+ + microdescriptorDigest.substring(0, 1) + "/"
+ + microdescriptorDigest.substring(1, 2) + "/"
+ + microdescriptorDigest);
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncCatFile = new File("recent/relay-descriptors/"
+ + "microdescs/micro/" + this.rsyncCatString
+ + "-micro.tmp");
+ File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ boolean[] append = new boolean[] { false, true };
+ if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles,
+ append)) {
+ this.storedMicrodescriptorsCounter++;
+ }
+ if (!tarballFileExistedBefore &&
+ this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) {
+ if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+ this.storedMicrodescriptors.put(validAfter,
+ new HashSet<String>());
+ }
+ this.storedMicrodescriptors.get(validAfter).add(
+ microdescriptorDigest);
+ }
+ }
+
+ private StringBuilder intermediateStats = new StringBuilder();
+ public void intermediateStats(String event) {
+ intermediateStats.append("While " + event + ", we stored "
+ + this.storedConsensusesCounter + " consensus(es), "
+ + this.storedMicrodescConsensusesCounter + " microdesc "
+ + "consensus(es), " + this.storedVotesCounter + " vote(s), "
+ + this.storedCertsCounter + " certificate(s), "
+ + this.storedServerDescriptorsCounter + " server descriptor(s), "
+ + this.storedExtraInfoDescriptorsCounter + " extra-info "
+ + "descriptor(s), and " + this.storedMicrodescriptorsCounter
+ + " microdescriptor(s) to disk.\n");
+ this.storedConsensusesCounter = 0;
+ this.storedMicrodescConsensusesCounter = 0;
+ this.storedVotesCounter = 0;
+ this.storedCertsCounter = 0;
+ this.storedServerDescriptorsCounter = 0;
+ this.storedExtraInfoDescriptorsCounter = 0;
+ this.storedMicrodescriptorsCounter = 0;
+ }
+
+ private void checkMissingDescriptors() {
+ StringBuilder sb = new StringBuilder("Finished writing relay "
+ + "descriptors to disk.\n");
+ sb.append(intermediateStats.toString());
+ sb.append("Statistics on the completeness of written relay "
+ + "descriptors:");
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Map<String, String> knownServerDescriptors =
+ new HashMap<String, String>();
+ for (Map<String, String> descriptors :
+ this.storedServerDescriptors.values()) {
+ knownServerDescriptors.putAll(descriptors);
+ }
+ Set<String> knownExtraInfoDescriptors = new HashSet<String>();
+ for (Set<String> descriptors :
+ this.storedExtraInfoDescriptors.values()) {
+ knownExtraInfoDescriptors.addAll(descriptors);
+ }
+ Set<String> knownMicrodescriptors = new HashSet<String>();
+ for (Set<String> descriptors : this.storedMicrodescriptors.values()) {
+ knownMicrodescriptors.addAll(descriptors);
+ }
+ boolean missingDescriptors = false, missingVotes = false,
+ missingMicrodescConsensus = false;
+ for (Map.Entry<Long, SortedSet<String>> c :
+ this.storedConsensuses.entrySet()) {
+ long validAfterMillis = c.getKey();
+ String validAfterTime = dateTimeFormat.format(validAfterMillis);
+ int allVotes = this.expectedVotes.containsKey(validAfterMillis)
+ ? this.expectedVotes.get(validAfterMillis) : 0;
+ int foundVotes = 0;
+ if (this.storedVotes.containsKey(validAfterMillis)) {
+ foundVotes = this.storedVotes.get(validAfterMillis).size();
+ for (Map.Entry<String, SortedSet<String>> v :
+ this.storedVotes.get(validAfterMillis).entrySet()) {
+ int voteFoundServerDescs = 0, voteAllServerDescs = 0,
+ voteFoundExtraInfos = 0, voteAllExtraInfos = 0;
+ for (String serverDescriptorDigest : v.getValue()) {
+ voteAllServerDescs++;
+ if (knownServerDescriptors.containsKey(
+ serverDescriptorDigest)) {
+ voteFoundServerDescs++;
+ if (knownServerDescriptors.get(serverDescriptorDigest)
+ != null) {
+ String extraInfoDescriptorDigest =
+ knownServerDescriptors.get(serverDescriptorDigest);
+ voteAllExtraInfos++;
+ if (knownExtraInfoDescriptors.contains(
+ extraInfoDescriptorDigest)) {
+ voteFoundExtraInfos++;
+ }
+ }
+ }
+ }
+ sb.append("\nV, " + validAfterTime);
+ if (voteAllServerDescs > 0) {
+ sb.append(String.format(", %d/%d S (%.1f%%)",
+ voteFoundServerDescs, voteAllServerDescs,
+ 100.0D * (double) voteFoundServerDescs /
+ (double) voteAllServerDescs));
+ } else {
+ sb.append(", 0/0 S");
+ }
+ if (voteAllExtraInfos > 0) {
+ sb.append(String.format(", %d/%d E (%.1f%%)",
+ voteFoundExtraInfos, voteAllExtraInfos,
+ 100.0D * (double) voteFoundExtraInfos /
+ (double) voteAllExtraInfos));
+ } else {
+ sb.append(", 0/0 E");
+ }
+ String fingerprint = v.getKey();
+ /* Ignore turtles when warning about missing descriptors. */
+ if (!fingerprint.equalsIgnoreCase(
+ "27B6B5996C426270A5C95488AA5BCEB6BCC86956") &&
+ (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 ||
+ voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995)) {
+ missingDescriptors = true;
+ }
+ }
+ }
+ int foundServerDescs = 0, allServerDescs = 0, foundExtraInfos = 0,
+ allExtraInfos = 0, foundMicrodescriptors = 0,
+ allMicrodescriptors = 0;
+ for (String serverDescriptorDigest : c.getValue()) {
+ allServerDescs++;
+ if (knownServerDescriptors.containsKey(
+ serverDescriptorDigest)) {
+ foundServerDescs++;
+ if (knownServerDescriptors.get(
+ serverDescriptorDigest) != null) {
+ allExtraInfos++;
+ String extraInfoDescriptorDigest =
+ knownServerDescriptors.get(serverDescriptorDigest);
+ if (knownExtraInfoDescriptors.contains(
+ extraInfoDescriptorDigest)) {
+ foundExtraInfos++;
+ }
+ }
+ }
+ }
+ sb.append("\nC, " + validAfterTime);
+ if (allVotes > 0) {
+ sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes,
+ 100.0D * (double) foundVotes / (double) allVotes));
+ } else {
+ sb.append(", 0/0 V");
+ }
+ if (allServerDescs > 0) {
+ sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs,
+ allServerDescs, 100.0D * (double) foundServerDescs /
+ (double) allServerDescs));
+ } else {
+ sb.append(", 0/0 S");
+ }
+ if (allExtraInfos > 0) {
+ sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos,
+ allExtraInfos, 100.0D * (double) foundExtraInfos /
+ (double) allExtraInfos));
+ } else {
+ sb.append(", 0/0 E");
+ }
+ if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) {
+ for (String microdescriptorDigest :
+ this.storedMicrodescConsensuses.get(validAfterMillis)) {
+ allMicrodescriptors++;
+ if (knownMicrodescriptors.contains(microdescriptorDigest)) {
+ foundMicrodescriptors++;
+ }
+ }
+ sb.append("\nM, " + validAfterTime);
+ if (allMicrodescriptors > 0) {
+ sb.append(String.format(", %d/%d M (%.1f%%)",
+ foundMicrodescriptors, allMicrodescriptors,
+ 100.0D * (double) foundMicrodescriptors /
+ (double) allMicrodescriptors));
+ } else {
+ sb.append(", 0/0 M");
+ }
+ } else {
+ missingMicrodescConsensus = true;
+ }
+ if (foundServerDescs * 1000 < allServerDescs * 995 ||
+ foundExtraInfos * 1000 < allExtraInfos * 995 ||
+ foundMicrodescriptors * 1000 < allMicrodescriptors * 995) {
+ missingDescriptors = true;
+ }
+ if (foundVotes < allVotes) {
+ missingVotes = true;
+ }
+ }
+ this.logger.info(sb.toString());
+ if (missingDescriptors) {
+ this.logger.fine("We are missing at least 0.5% of server or "
+ + "extra-info descriptors referenced from a consensus or "
+ + "vote or at least 0.5% of microdescriptors referenced from a "
+ + "microdesc consensus.");
+ }
+ if (missingVotes) {
+ /* TODO Shouldn't warn if we're not trying to archive votes at
+ * all. */
+ this.logger.fine("We are missing at least one vote that was "
+ + "referenced from a consensus.");
+ }
+ if (missingMicrodescConsensus) {
+ /* TODO Shouldn't warn if we're not trying to archive microdesc
+ * consensuses at all. */
+ this.logger.fine("We are missing at least one microdesc "
+ + "consensus that was published together with a known "
+ + "consensus.");
+ }
+ }
+
+ private void checkStaledescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = this.now - 330L * 60L * 1000L;
+ if (!this.storedConsensuses.isEmpty() &&
+ this.storedConsensuses.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay network status "
+ + "consensus was valid after "
+ + dateTimeFormat.format(this.storedConsensuses.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (!this.storedMicrodescConsensuses.isEmpty() &&
+ this.storedMicrodescConsensuses.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay network status "
+ + "microdesc consensus was valid after "
+ + dateTimeFormat.format(
+ this.storedMicrodescConsensuses.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (!this.storedVotes.isEmpty() &&
+ this.storedVotes.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay network status vote "
+ + "was valid after " + dateTimeFormat.format(
+ this.storedVotes.lastKey()) + ", which is more than 5:30 hours "
+ + "in the past.");
+ }
+ if (!this.storedServerDescriptors.isEmpty() &&
+ this.storedServerDescriptors.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay server descriptor was "
+ + "published at "
+ + dateTimeFormat.format(this.storedServerDescriptors.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (!this.storedExtraInfoDescriptors.isEmpty() &&
+ this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay extra-info descriptor "
+ + "was published at " + dateTimeFormat.format(
+ this.storedExtraInfoDescriptors.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (!this.storedMicrodescriptors.isEmpty() &&
+ this.storedMicrodescriptors.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay microdescriptor was "
+ + "contained in a microdesc consensus that was valid after "
+ + dateTimeFormat.format(this.storedMicrodescriptors.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days (except for microdescriptors which are kept
+ * for up to thirty days), and remove the .tmp extension from newly
+ * written files. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("recent/relay-descriptors"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.getName().endsWith("-micro")) {
+ if (file.lastModified() < cutOffMicroMillis) {
+ file.delete();
+ }
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ } else if (file.getName().endsWith(".tmp")) {
+ file.renameTo(new File(file.getParentFile(),
+ file.getName().substring(0,
+ file.getName().lastIndexOf(".tmp"))));
+ }
+ }
+ }
+}
diff --git a/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java
new file mode 100644
index 0000000..de0c060
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java
@@ -0,0 +1,250 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses all descriptors in local directory cacheddesc/ and sorts them
+ * into directory structure in directory-archive/.
+ */
+public class CachedRelayDescriptorReader {
+ public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
+ List<String> inputDirectories, File statsDirectory) {
+
+ if (rdp == null || inputDirectories == null ||
+ inputDirectories.isEmpty() || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ StringBuilder dumpStats = new StringBuilder("Finished importing "
+ + "relay descriptors from local Tor data directories:");
+ Logger logger = Logger.getLogger(
+ CachedRelayDescriptorReader.class.getName());
+
+ /* Read import history containing SHA-1 digests of previously parsed
+ * statuses and descriptors, so that we can skip them in this run. */
+ Set<String> lastImportHistory = new HashSet<String>(),
+ currentImportHistory = new HashSet<String>();
+ File importHistoryFile = new File(statsDirectory,
+ "cacheddesc-import-history");
+ if (importHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ importHistoryFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ lastImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read import history from "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+ }
+
+ /* Read cached descriptors directories. */
+ for (String inputDirectory : inputDirectories) {
+ File cachedDescDir = new File(inputDirectory);
+ if (!cachedDescDir.exists()) {
+ logger.warning("Directory " + cachedDescDir.getAbsolutePath()
+ + " does not exist. Skipping.");
+ continue;
+ }
+ logger.fine("Reading " + cachedDescDir.getAbsolutePath()
+ + " directory.");
+ SortedSet<File> cachedDescFiles = new TreeSet<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(cachedDescDir);
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else {
+ cachedDescFiles.add(file);
+ }
+ }
+ for (File f : cachedDescFiles) {
+ try {
+ // descriptors may contain non-ASCII chars; read as bytes to
+ // determine digests
+ BufferedInputStream bis =
+ new BufferedInputStream(new FileInputStream(f));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ if (f.getName().equals("cached-consensus")) {
+ /* Check if directory information is stale. */
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ dumpStats.append("\n" + f.getName() + ": " + line.substring(
+ "valid-after ".length()));
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (dateTimeFormat.parse(line.substring("valid-after ".
+ length())).getTime() < System.currentTimeMillis()
+ - 6L * 60L * 60L * 1000L) {
+ logger.warning("Cached descriptor files in "
+ + cachedDescDir.getAbsolutePath() + " are stale. "
+ + "The valid-after line in cached-consensus is '"
+ + line + "'.");
+ dumpStats.append(" (stale!)");
+ }
+ break;
+ }
+ }
+ br.close();
+
+ /* Parse the cached consensus if we haven't parsed it before
+ * (but regardless of whether it's stale or not). */
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(allData);
+ } else {
+ dumpStats.append(" (skipped)");
+ }
+ currentImportHistory.add(digest);
+ }
+ } else if (f.getName().equals("v3-status-votes")) {
+ int parsedNum = 0, skippedNum = 0;
+ String ascii = new String(allData, "US-ASCII");
+ String startToken = "network-status-version ";
+ int end = ascii.length();
+ int start = ascii.indexOf(startToken);
+ while (start >= 0 && start < end) {
+ int next = ascii.indexOf(startToken, start + 1);
+ if (next < 0) {
+ next = end;
+ }
+ if (start < next) {
+ byte[] rawNetworkStatusBytes = new byte[next - start];
+ System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
+ next - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ rawNetworkStatusBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(rawNetworkStatusBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ start = next;
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " votes");
+ } else if (f.getName().startsWith("cached-descriptors") ||
+ f.getName().startsWith("cached-extrainfo")) {
+ String ascii = new String(allData, "US-ASCII");
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ f.getName().startsWith("cached-descriptors") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ int parsedNum = 0, skippedNum = 0;
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ descBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(descBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " "
+ + (f.getName().startsWith("cached-descriptors") ?
+ "server" : "extra-info") + " descriptors");
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ }
+ }
+ logger.fine("Finished reading "
+ + cachedDescDir.getAbsolutePath() + " directory.");
+ }
+
+ /* Write import history containing SHA-1 digests to disk. */
+ try {
+ importHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ importHistoryFile));
+ for (String digest : currentImportHistory) {
+ bw.write(digest + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write import history to "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/collector/relaydescs/ReferenceChecker.java b/src/org/torproject/collector/relaydescs/ReferenceChecker.java
new file mode 100644
index 0000000..b9f36de
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/ReferenceChecker.java
@@ -0,0 +1,310 @@
+package org.torproject.collector.relaydescs;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirSourceEntry;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+
+import com.google.gson.Gson;
+
+public class ReferenceChecker {
+
+ private Logger log = Logger.getLogger(ReferenceChecker.class.getName());
+
+ private File descriptorsDir;
+
+ private File referencesFile;
+
+ private File historyFile;
+
+ public ReferenceChecker(File descriptorsDir, File referencesFile,
+ File historyFile) {
+ this.descriptorsDir = descriptorsDir;
+ this.referencesFile = referencesFile;
+ this.historyFile = historyFile;
+ }
+
+ public void check() {
+ this.getCurrentTimeMillis();
+ this.readReferencesFile();
+ this.readNewDescriptors();
+ this.dropStaleReferences();
+ this.checkReferences();
+ this.writeReferencesFile();
+ }
+
+ private long currentTimeMillis;
+
+ private void getCurrentTimeMillis() {
+ this.currentTimeMillis = System.currentTimeMillis();
+ }
+
+ private static class Reference implements Comparable<Reference> {
+
+ private String referencing;
+
+ private String referenced;
+
+ private double weight;
+
+ private long expiresAfterMillis;
+
+ public Reference(String referencing, String referenced, double weight,
+ long expiresAfterMillis) {
+ this.referencing = referencing;
+ this.referenced = referenced;
+ this.weight = weight;
+ this.expiresAfterMillis = expiresAfterMillis;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof Reference)) {
+ return false;
+ }
+ Reference other = (Reference) otherObject;
+ return this.referencing.equals(other.referencing) &&
+ this.referenced.equals(other.referenced);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.referencing.hashCode() + this.referenced.hashCode();
+ }
+
+ @Override
+ public int compareTo(Reference other) {
+ int result = this.referencing.compareTo(other.referencing);
+ if (result == 0) {
+ result = this.referenced.compareTo(other.referenced);
+ }
+ return result;
+ }
+ }
+
+ private SortedSet<Reference> references = new TreeSet<Reference>();
+
+ private void addReference(String referencing, String referenced,
+ double weight, long expiresAfterMillis) {
+ this.references.add(new Reference(referencing.toUpperCase(),
+ referenced.toUpperCase(), weight, expiresAfterMillis));
+ }
+
+ private void readReferencesFile() {
+ if (!this.referencesFile.exists()) {
+ return;
+ }
+ Gson gson = new Gson();
+ try {
+ FileReader fr = new FileReader(this.referencesFile);
+ this.references.addAll(Arrays.asList(gson.fromJson(fr,
+ Reference[].class)));
+ fr.close();
+ } catch (IOException e) {
+ this.log.log(Level.WARNING, "Cannot read existing references file "
+ + "from previous run.", e);
+ }
+ }
+
+ private void readNewDescriptors() {
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(this.descriptorsDir);
+ descriptorReader.setExcludeFiles(this.historyFile);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof RelayNetworkStatusConsensus) {
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ String consensusFlavor = consensus.getConsensusFlavor();
+ if (consensusFlavor == null) {
+ this.readRelayNetworkStatusConsensusUnflavored(consensus);
+ } else if (consensusFlavor.equals("microdesc")) {
+ this.readRelayNetworkStatusConsensusMicrodesc(consensus);
+ } else {
+ /* Ignore unknown consensus flavors. */
+ }
+ } else if (descriptor instanceof RelayNetworkStatusVote) {
+ this.readRelayNetworkStatusVote(
+ (RelayNetworkStatusVote) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.readServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof Microdescriptor) {
+ readMicrodescriptor((Microdescriptor) descriptor);
+ } else {
+ /* Ignore unknown descriptors. */
+ }
+ }
+ }
+ }
+
+ private static DateFormat dateTimeFormat;
+ static {
+ dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
+ Locale.US);
+ dateTimeFormat.setLenient(false);
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private static final long ONE_HOUR = 60L * 60L * 1000L,
+ THREE_HOURS = 3L * ONE_HOUR, SIX_HOURS = 6L * ONE_HOUR,
+ ONE_DAY = 24L * ONE_HOUR, THIRTY_DAYS = 30L * ONE_DAY;
+
+ private void readRelayNetworkStatusConsensusUnflavored(
+ RelayNetworkStatusConsensus consensus) {
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ String referencing = String.format("C-%s", validAfter);
+ this.addReference(referencing, String.format("M-%s", validAfter), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ for (DirSourceEntry dirSourceEntry :
+ consensus.getDirSourceEntries().values()) {
+ if (!dirSourceEntry.isLegacy()) {
+ this.addReference(referencing, String.format("V-%s-%s",
+ validAfter, dirSourceEntry.getIdentity()), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ }
+ }
+ double entryWeight = 200.0 /
+ ((double) consensus.getStatusEntries().size());
+ for (NetworkStatusEntry entry :
+ consensus.getStatusEntries().values()) {
+ this.addReference(referencing,
+ String.format("S-%s", entry.getDescriptor()), entryWeight,
+ entry.getPublishedMillis() + THREE_HOURS);
+ }
+ }
+
+
+ private void readRelayNetworkStatusConsensusMicrodesc(
+ RelayNetworkStatusConsensus consensus) {
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ String referencing = String.format("M-%s", validAfter);
+ this.addReference(referencing, String.format("C-%s", validAfter), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ double entryWeight = 200.0 /
+ ((double) consensus.getStatusEntries().size());
+ for (NetworkStatusEntry entry :
+ consensus.getStatusEntries().values()) {
+ for (String digest : entry.getMicrodescriptorDigests()) {
+ this.addReference(referencing, String.format("D-%s", digest),
+ entryWeight, entry.getPublishedMillis() + THREE_HOURS);
+ }
+ }
+ }
+
+ private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) {
+ String validAfter = dateTimeFormat.format(vote.getValidAfterMillis());
+ String referencing = String.format("V-%s-%s", validAfter,
+ vote.getIdentity());
+ double entryWeight = 200.0 /
+ ((double) vote.getStatusEntries().size());
+ for (NetworkStatusEntry entry : vote.getStatusEntries().values()) {
+ this.addReference(referencing,
+ String.format("S-%s", entry.getDescriptor()), entryWeight,
+ entry.getPublishedMillis() + SIX_HOURS);
+ }
+ }
+
+ private void readServerDescriptor(ServerDescriptor serverDescriptor) {
+ String referenced = serverDescriptor.getExtraInfoDigest() == null ? ""
+ : String.format("E-%s", serverDescriptor.getExtraInfoDigest());
+ this.addReference(String.format("S-%s",
+ serverDescriptor.getServerDescriptorDigest()), referenced, 0.01,
+ serverDescriptor.getPublishedMillis() + SIX_HOURS);
+ }
+
+ private void readExtraInfoDescriptor(
+ ExtraInfoDescriptor extraInfoDescriptor) {
+ this.addReference(String.format("E-%s",
+ extraInfoDescriptor.getExtraInfoDigest()), "", 0.005,
+ extraInfoDescriptor.getPublishedMillis() + SIX_HOURS);
+ }
+
+ private void readMicrodescriptor(Microdescriptor microdesc) {
+ this.addReference(
+ String.format("D-%s", microdesc.getMicrodescriptorDigest()), "",
+ 0.0, this.currentTimeMillis + THIRTY_DAYS);
+ }
+
+ private void dropStaleReferences() {
+ SortedSet<Reference> recentReferences = new TreeSet<Reference>();
+ for (Reference reference : this.references) {
+ if (this.currentTimeMillis <= reference.expiresAfterMillis) {
+ recentReferences.add(reference);
+ }
+ }
+ this.references = recentReferences;
+ }
+
+ private void checkReferences() {
+ Set<String> knownDescriptors = new HashSet<String>();
+ for (Reference reference : this.references) {
+ knownDescriptors.add(reference.referencing);
+ }
+ double totalMissingDescriptorsWeight = 0.0;
+ Set<String> missingDescriptors = new TreeSet<String>();
+ StringBuilder sb = new StringBuilder("Missing referenced "
+ + "descriptors:");
+ for (Reference reference : this.references) {
+ if (reference.referenced.length() > 0 &&
+ !knownDescriptors.contains(reference.referenced)) {
+ if (!missingDescriptors.contains(reference.referenced)) {
+ totalMissingDescriptorsWeight += reference.weight;
+ }
+ missingDescriptors.add(reference.referenced);
+ sb.append(String.format("%n%s -> %s (%.4f -> %.4f)",
+ reference.referencing, reference.referenced, reference.weight,
+ totalMissingDescriptorsWeight));
+ }
+ }
+ this.log.log(Level.INFO, sb.toString());
+ if (totalMissingDescriptorsWeight > 0.999) {
+ this.log.log(Level.WARNING, "Missing too many referenced "
+ + "descriptors (" + totalMissingDescriptorsWeight + ").");
+ }
+ }
+
+ private void writeReferencesFile() {
+ Gson gson = new Gson();
+ try {
+ FileWriter fw = new FileWriter(this.referencesFile);
+ gson.toJson(this.references, fw);
+ fw.close();
+ } catch (IOException e) {
+ this.log.log(Level.WARNING, "Cannot write references file for next "
+ + "run.", e);
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java
new file mode 100644
index 0000000..8cfbf10
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java
@@ -0,0 +1,1090 @@
+/* Copyright 2010--2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.InflaterInputStream;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Downloads relay descriptors from the directory authorities via HTTP.
+ * Keeps a list of missing descriptors that gets updated by parse results
+ * from <code>RelayDescriptorParser</code> and downloads all missing
+ * descriptors that have been published in the last 24 hours. Also
+ * downloads all server and extra-info descriptors known to a directory
+ * authority at most once a day.
+ */
+public class RelayDescriptorDownloader {
+
+ /**
+ * Text file containing the descriptors that we are missing and that we
+ * want to download. Lines are formatted as:
+ *
+ * - "consensus,<validafter>,<parsed>",
+ * - "consensus-microdesc,<validafter>,<parsed>",
+ * - "vote,<validafter>,<fingerprint>,<parsed>",
+ * - "server,<published>,<relayid>,<descid>,<parsed>",
+ * - "extra,<published>,<relayid>,<descid>,<parsed>", or
+ * - "micro,<validafter>,<relayid>,<descid>,<parsed>".
+ */
+ private File missingDescriptorsFile;
+
+ /**
+ * Relay descriptors that we are missing and that we want to download
+ * either in this execution or write to disk and try next time. Map keys
+ * contain comma-separated values as in the missing descriptors files
+ * without the "parsed" column. Map values contain the "parsed" column.
+ */
+ private SortedMap<String, String> missingDescriptors;
+
+ /**
+ * Map from base64 microdescriptor digests to keys in missingDescriptors
+ * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because
+ * we can't learn <validafter> or <relayid> from parsing
+ * microdescriptors, but we need to know <validafter> to store
+ * microdescriptors to disk and both <validafter> and <relayid> to
+ * remove microdescriptors from the missing list. There are potentially
+ * many matching keys in missingDescriptors for the same microdescriptor
+ * digest. Also, in rare cases relays share the same microdescriptor
+ * (which is only possible if they share the same onion key), and then
+ * we don't have to download their microdescriptor more than once.
+ */
+ private Map<String, Set<String>> microdescriptorKeys;
+
+ /**
+ * Set of microdescriptor digests that are currently missing. Used for
+ * logging statistics instead of "micro,<validafter>,..." keys which may
+ * contain the same microdescriptor digest multiple times.
+ */
+ private Set<String> missingMicrodescriptors;
+
+ /**
+ * Text file containing the IP addresses (and Dir ports if not 80) of
+ * directory authorities and when we last downloaded all server and
+ * extra-info descriptors from them, so that we can avoid downloading
+ * them too often.
+ */
+ private File lastDownloadedAllDescriptorsFile;
+
+ /**
+ * Map of directory authorities and when we last downloaded all server
+ * and extra-info descriptors from them. Map keys are IP addresses (and
+ * Dir ports if not 80), map values are timestamps.
+ */
+ private Map<String, String> lastDownloadedAllDescriptors;
+
+ /**
+ * <code>RelayDescriptorParser</code> that we will hand over the
+ * downloaded descriptors for parsing.
+ */
+ private RelayDescriptorParser rdp;
+
+ /**
+ * Directory authorities that we will try to download missing
+ * descriptors from.
+ */
+ private List<String> authorities;
+
+ /**
+ * Fingerprints of directory authorities that we will use to download
+ * votes without requiring a successfully downloaded consensus.
+ */
+ private List<String> authorityFingerprints;
+
+ /**
+ * Should we try to download the current consensus if we don't have it?
+ */
+ private boolean downloadCurrentConsensus;
+
+ /**
+ * Should we try to download the current microdesc consensus if we don't
+ * have it?
+ */
+ private boolean downloadCurrentMicrodescConsensus;
+
+ /**
+ * Should we try to download current votes if we don't have them?
+ */
+ private boolean downloadCurrentVotes;
+
+ /**
+ * Should we try to download missing server descriptors that have been
+ * published within the past 24 hours?
+ */
+ private boolean downloadMissingServerDescriptors;
+
+ /**
+ * Should we try to download missing extra-info descriptors that have
+ * been published within the past 24 hours?
+ */
+ private boolean downloadMissingExtraInfos;
+
+ /**
+ * Should we try to download missing microdescriptors that have been
+ * published within the past 24 hours?
+ */
+ private boolean downloadMissingMicrodescriptors;
+
+ /**
+ * Should we try to download all server descriptors from the authorities
+ * once every 24 hours?
+ */
+ private boolean downloadAllServerDescriptors;
+
+ /**
+ * Should we try to download all extra-info descriptors from the
+ * authorities once every 24 hours?
+ */
+ private boolean downloadAllExtraInfos;
+
+ /**
+ * Should we download zlib-compressed versions of descriptors by adding
+ * ".z" to URLs?
+ */
+ private boolean downloadCompressed;
+
+ /**
+ * valid-after time that we expect the current consensus,
+ * microdescriptor consensus, and votes to have, formatted
+ * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this
+ * valid-after time on the directory authorities. This time is
+ * initialized as the beginning of the current hour.
+ */
+ private String currentValidAfter;
+
+ /**
+ * Cut-off time for missing server and extra-info descriptors, formatted
+ * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
+ * time minus 24 hours.
+ */
+ private String descriptorCutOff;
+
+ /**
+ * Cut-off time for downloading all server and extra-info descriptors
+ * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
+ * time is initialized as the current system time minus 23:30 hours.
+ */
+ private String downloadAllDescriptorsCutOff;
+
+ /**
+ * Directory authorities that we plan to download all server and
+ * extra-info descriptors from in this execution.
+ */
+ private Set<String> downloadAllDescriptorsFromAuthorities;
+
+ /**
+ * Current timestamp that is written to the missing list for descriptors
+ * that we parsed in this execution and for authorities that we
+ * downloaded all server and extra-info descriptors from.
+ */
+ private String currentTimestamp;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Number of descriptors requested by directory authority to be included
+ * in logs.
+ */
+ private Map<String, Integer> requestsByAuthority;
+
+ /**
+ * Counters for descriptors that we had on the missing list at the
+ * beginning of the execution, that we added to the missing list,
+ * that we requested, and that we successfully downloaded in this
+ * execution.
+ */
+ private int oldMissingConsensuses = 0,
+ oldMissingMicrodescConsensuses = 0, oldMissingVotes = 0,
+ oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
+ oldMissingMicrodescriptors = 0, newMissingConsensuses = 0,
+ newMissingMicrodescConsensuses = 0, newMissingVotes = 0,
+ newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
+ newMissingMicrodescriptors = 0, requestedConsensuses = 0,
+ requestedMicrodescConsensuses = 0, requestedVotes = 0,
+ requestedMissingServerDescriptors = 0,
+ requestedAllServerDescriptors = 0,
+ requestedMissingExtraInfoDescriptors = 0,
+ requestedAllExtraInfoDescriptors = 0,
+ requestedMissingMicrodescriptors = 0, downloadedConsensuses = 0,
+ downloadedMicrodescConsensuses = 0, downloadedVotes = 0,
+ downloadedMissingServerDescriptors = 0,
+ downloadedAllServerDescriptors = 0,
+ downloadedMissingExtraInfoDescriptors = 0,
+ downloadedAllExtraInfoDescriptors = 0,
+ downloadedMissingMicrodescriptors = 0;
+
+ /**
+ * Initializes this class, including reading in missing descriptors from
+ * <code>stats/missing-relay-descriptors</code> and the times when we
+ * last downloaded all server and extra-info descriptors from
+ * <code>stats/last-downloaded-all-descriptors</code>.
+ */
+ public RelayDescriptorDownloader(RelayDescriptorParser rdp,
+ List<String> authorities, List<String> authorityFingerprints,
+ boolean downloadCurrentConsensus,
+ boolean downloadCurrentMicrodescConsensus,
+ boolean downloadCurrentVotes,
+ boolean downloadMissingServerDescriptors,
+ boolean downloadMissingExtraInfos,
+ boolean downloadMissingMicrodescriptors,
+ boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
+ boolean downloadCompressed) {
+
+ /* Memorize argument values. */
+ this.rdp = rdp;
+ this.authorities = new ArrayList<String>(authorities);
+ this.authorityFingerprints = new ArrayList<String>(
+ authorityFingerprints);
+ this.downloadCurrentConsensus = downloadCurrentConsensus;
+ this.downloadCurrentMicrodescConsensus =
+ downloadCurrentMicrodescConsensus;
+ this.downloadCurrentVotes = downloadCurrentVotes;
+ this.downloadMissingServerDescriptors =
+ downloadMissingServerDescriptors;
+ this.downloadMissingExtraInfos = downloadMissingExtraInfos;
+ this.downloadMissingMicrodescriptors =
+ downloadMissingMicrodescriptors;
+ this.downloadAllServerDescriptors = downloadAllServerDescriptors;
+ this.downloadAllExtraInfos = downloadAllExtraInfos;
+ this.downloadCompressed = downloadCompressed;
+
+ /* Shuffle list of authorities for better load balancing over time. */
+ Collections.shuffle(this.authorities);
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ RelayDescriptorDownloader.class.getName());
+
+ /* Prepare cut-off times and timestamp for the missing descriptors
+ * list and the list of authorities to download all server and
+ * extra-info descriptors from. */
+ SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ format.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long now = System.currentTimeMillis();
+ this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
+ (60L * 60L * 1000L));
+ this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
+ this.currentTimestamp = format.format(now);
+ this.downloadAllDescriptorsCutOff = format.format(now
+ - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
+
+ /* Read list of missing descriptors from disk and memorize those that
+ * we are interested in and that are likely to be found on the
+ * directory authorities. */
+ this.missingDescriptors = new TreeMap<String, String>();
+ this.microdescriptorKeys = new HashMap<String, Set<String>>();
+ this.missingMicrodescriptors = new HashSet<String>();
+ this.missingDescriptorsFile = new File(
+ "stats/missing-relay-descriptors");
+ if (this.missingDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.missingDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length > 2) {
+ String published = line.split(",")[1];
+ if (((line.startsWith("consensus,") ||
+ line.startsWith("consensus-microdesc,") ||
+ line.startsWith("vote,")) &&
+ this.currentValidAfter.equals(published)) ||
+ ((line.startsWith("server,") ||
+ line.startsWith("extra,") ||
+ line.startsWith("micro,")) &&
+ this.descriptorCutOff.compareTo(published) < 0)) {
+ if (!line.endsWith("NA")) {
+ /* Not missing. */
+ } else if (line.startsWith("consensus,")) {
+ oldMissingConsensuses++;
+ } else if (line.startsWith("consensus-microdesc,")) {
+ oldMissingMicrodescConsensuses++;
+ } else if (line.startsWith("vote,")) {
+ oldMissingVotes++;
+ } else if (line.startsWith("server,")) {
+ oldMissingServerDescriptors++;
+ } else if (line.startsWith("extra,")) {
+ oldMissingExtraInfoDescriptors++;
+ }
+ int separateAt = line.lastIndexOf(",");
+ this.missingDescriptors.put(line.substring(0,
+ separateAt), line.substring(separateAt + 1));
+ if (line.startsWith("micro,")) {
+ String microdescriptorDigest = line.split(",")[3];
+ String microdescriptorKey = line.substring(0,
+ line.lastIndexOf(","));
+ if (!this.microdescriptorKeys.containsKey(
+ microdescriptorDigest)) {
+ this.microdescriptorKeys.put(
+ microdescriptorDigest, new HashSet<String>());
+ }
+ this.microdescriptorKeys.get(microdescriptorDigest).add(
+ microdescriptorKey);
+ if (line.endsWith("NA") && !this.missingMicrodescriptors.
+ contains(microdescriptorDigest)) {
+ this.missingMicrodescriptors.add(microdescriptorDigest);
+ oldMissingMicrodescriptors++;
+ }
+ }
+ }
+ } else {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + "! This means that we might forget to dowload relay "
+ + "descriptors we are missing.", e);
+ }
+ }
+
+ /* Read list of directory authorities and when we last downloaded all
+ * server and extra-info descriptors from them. */
+ this.lastDownloadedAllDescriptors = new HashMap<String, String>();
+ this.lastDownloadedAllDescriptorsFile = new File(
+ "stats/last-downloaded-all-descriptors");
+ if (this.lastDownloadedAllDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.lastDownloadedAllDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length != 2) {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ } else {
+ String[] parts = line.split(",");
+ String authority = parts[0];
+ String lastDownloaded = parts[1];
+ this.lastDownloadedAllDescriptors.put(authority,
+ lastDownloaded);
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "! This means that we might download all server and "
+ + "extra-info descriptors more often than we should.", e);
+ }
+ }
+
+ /* Make a list of at most two directory authorities that we want to
+ * download all server and extra-info descriptors from. */
+ this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
+ for (String authority : this.authorities) {
+ if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
+ this.lastDownloadedAllDescriptors.get(authority).compareTo(
+ this.downloadAllDescriptorsCutOff) < 0) {
+ this.downloadAllDescriptorsFromAuthorities.add(authority);
+ }
+ if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
+ break;
+ }
+ }
+
+ /* Prepare statistics on this execution. */
+ this.requestsByAuthority = new HashMap<String, Integer>();
+ for (String authority : this.authorities) {
+ this.requestsByAuthority.put(authority, 0);
+ }
+ }
+
+ /**
+ * We have parsed a consensus. Take this consensus off the missing list
+ * and add the votes created by the given <code>authorities</code> and
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedConsensus(String validAfter,
+ Set<String> authorities, Set<String> serverDescriptors) {
+
+ /* Mark consensus as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String consensusKey = "consensus," + validAfter;
+ this.missingDescriptors.put(consensusKey, this.currentTimestamp);
+
+ /* Add votes to missing list. */
+ for (String authority : authorities) {
+ String voteKey = "vote," + validAfter + "," + authority;
+ if (!this.missingDescriptors.containsKey(voteKey)) {
+ this.missingDescriptors.put(voteKey, "NA");
+ this.newMissingVotes++;
+ }
+ }
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a microdesc consensus. Take this microdesc consensus
+ * off the missing list and add the <code>microdescriptors</code> which
+ * are in the format "<validafter>,<relayid>,<descid>" to that
+ * list.
+ */
+ public void haveParsedMicrodescConsensus(String validAfter,
+ Set<String> microdescriptors) {
+
+ /* Mark microdesc consensus as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String microdescConsensusKey = "consensus-microdesc," + validAfter;
+ this.missingDescriptors.put(microdescConsensusKey,
+ this.currentTimestamp);
+ }
+
+ /* Add microdescriptors to missing list. Exclude those that we already
+ * downloaded this month. (We download each microdescriptor at least
+ * once per month to keep the storage logic sane; otherwise we'd have
+ * to copy microdescriptors from the earlier month to the current
+ * month, and that gets messy.) */
+ if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+ String validAfterYearMonth = validAfter.substring(0,
+ "YYYY-MM".length());
+ for (String microdescriptor : microdescriptors) {
+ String microdescriptorKey = "micro," + microdescriptor;
+ String parsed = "NA";
+ String microdescriptorDigest = microdescriptor.split(",")[2];
+ if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) {
+ for (String otherMicrodescriptorKey :
+ this.microdescriptorKeys.get(microdescriptorDigest)) {
+ String otherValidAfter =
+ otherMicrodescriptorKey.split(",")[1];
+ if (!otherValidAfter.startsWith(validAfterYearMonth)) {
+ continue;
+ }
+ String otherParsed = this.missingDescriptors.get(
+ otherMicrodescriptorKey);
+ if (otherParsed != null && !otherParsed.equals("NA")) {
+ parsed = otherParsed;
+ break;
+ }
+ }
+ } else {
+ this.microdescriptorKeys.put(
+ microdescriptorDigest, new HashSet<String>());
+ }
+ this.microdescriptorKeys.get(microdescriptorDigest).add(
+ microdescriptorKey);
+ this.missingDescriptors.put(microdescriptorKey, parsed);
+ if (parsed.equals("NA") &&
+ !this.missingMicrodescriptors.contains(microdescriptorDigest)) {
+ this.missingMicrodescriptors.add(microdescriptorDigest);
+ this.newMissingMicrodescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a vote. Take this vote off the missing list and add
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedVote(String validAfter, String fingerprint,
+ Set<String> serverDescriptors) {
+
+ /* Mark vote as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String voteKey = "vote," + validAfter + "," + fingerprint;
+ this.missingDescriptors.put(voteKey, this.currentTimestamp);
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a server descriptor. Take this server descriptor off
+ * the missing list and put the extra-info descriptor digest on that
+ * list.
+ */
+ public void haveParsedServerDescriptor(String published,
+ String relayIdentity, String serverDescriptorDigest,
+ String extraInfoDigest) {
+
+ /* Mark server descriptor as parsed. */
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + published + ","
+ + relayIdentity + "," + serverDescriptorDigest;
+ this.missingDescriptors.put(serverDescriptorKey,
+ this.currentTimestamp);
+
+ /* Add extra-info descriptor to missing list. */
+ if (extraInfoDigest != null) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ if (!this.missingDescriptors.containsKey(extraInfoKey)) {
+ this.missingDescriptors.put(extraInfoKey, "NA");
+ this.newMissingExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed an extra-info descriptor. Take it off the missing
+ * list.
+ */
+ public void haveParsedExtraInfoDescriptor(String published,
+ String relayIdentity, String extraInfoDigest) {
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
+ }
+ }
+
+ /**
+ * We have parsed a microdescriptor. Take it off the missing list.
+ */
+ public void haveParsedMicrodescriptor(String descriptorDigest) {
+ if (this.microdescriptorKeys.containsKey(descriptorDigest)) {
+ for (String microdescriptorKey :
+ this.microdescriptorKeys.get(descriptorDigest)) {
+ String validAfter = microdescriptorKey.split(",")[0];
+ if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+ this.missingDescriptors.put(microdescriptorKey,
+ this.currentTimestamp);
+ }
+ }
+ this.missingMicrodescriptors.remove(descriptorDigest);
+ }
+ }
+
+ /**
+ * Downloads missing descriptors that we think might still be available
+ * on the directory authorities as well as all server and extra-info
+ * descriptors once per day.
+ */
+ public void downloadDescriptors() {
+
+ /* Put the current consensus and votes on the missing list, unless we
+ * already have them. */
+ String consensusKey = "consensus," + this.currentValidAfter;
+ if (!this.missingDescriptors.containsKey(consensusKey)) {
+ this.missingDescriptors.put(consensusKey, "NA");
+ this.newMissingConsensuses++;
+ }
+ String microdescConsensusKey = "consensus-microdesc,"
+ + this.currentValidAfter;
+ if (!this.missingDescriptors.containsKey(microdescConsensusKey)) {
+ this.missingDescriptors.put(microdescConsensusKey, "NA");
+ this.newMissingMicrodescConsensuses++;
+ }
+ for (String authority : authorityFingerprints) {
+ String voteKey = "vote," + this.currentValidAfter + "," + authority;
+ if (!this.missingDescriptors.containsKey(voteKey)) {
+ this.missingDescriptors.put(voteKey, "NA");
+ this.newMissingVotes++;
+ }
+ }
+
+ /* Download descriptors from authorities which are in random order, so
+ * that we distribute the load somewhat fairly over time. */
+ for (String authority : authorities) {
+
+ /* Make all requests to an authority in a single try block. If
+ * something goes wrong with this authority, we give up on all
+ * downloads and continue with the next authority. */
+ /* TODO Some authorities provide very little bandwidth and could
+ * slow down the entire download process. Ponder adding a timeout of
+ * 3 or 5 minutes per authority to avoid getting in the way of the
+ * next execution. */
+ try {
+
+ /* Start with downloading the current consensus, unless we already
+ * have it. */
+ if (downloadCurrentConsensus) {
+ if (this.missingDescriptors.containsKey(consensusKey) &&
+ this.missingDescriptors.get(consensusKey).equals("NA")) {
+ this.requestedConsensuses++;
+ this.downloadedConsensuses +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/consensus");
+ }
+ }
+
+ /* Then try to download the microdesc consensus. */
+ if (downloadCurrentMicrodescConsensus) {
+ if (this.missingDescriptors.containsKey(
+ microdescConsensusKey) &&
+ this.missingDescriptors.get(microdescConsensusKey).
+ equals("NA")) {
+ this.requestedMicrodescConsensuses++;
+ this.downloadedMicrodescConsensuses +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/consensus-microdesc");
+ }
+ }
+
+ /* Next, try to download current votes that we're missing. */
+ if (downloadCurrentVotes) {
+ String voteKeyPrefix = "vote," + this.currentValidAfter;
+ SortedSet<String> fingerprints = new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(voteKeyPrefix)) {
+ String fingerprint = e.getKey().split(",")[2];
+ fingerprints.add(fingerprint);
+ }
+ }
+ for (String fingerprint : fingerprints) {
+ this.requestedVotes++;
+ this.downloadedVotes +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/" + fingerprint);
+ }
+ }
+
+ /* Download either all server and extra-info descriptors or only
+ * those that we're missing. Start with server descriptors, then
+ * request extra-info descriptors. Finally, request missing
+ * microdescriptors. */
+ for (String type : new String[] { "server", "extra", "micro" }) {
+
+ /* Download all server or extra-info descriptors from this
+ * authority if we haven't done so for 24 hours and if we're
+ * configured to do so. */
+ if (this.downloadAllDescriptorsFromAuthorities.contains(
+ authority) && ((type.equals("server") &&
+ this.downloadAllServerDescriptors) ||
+ (type.equals("extra") && this.downloadAllExtraInfos))) {
+ int downloadedAllDescriptors =
+ this.downloadResourceFromAuthority(authority, "/tor/"
+ + type + "/all");
+ if (type.equals("server")) {
+ this.requestedAllServerDescriptors++;
+ this.downloadedAllServerDescriptors +=
+ downloadedAllDescriptors;
+ } else if (type.equals("extra")) {
+ this.requestedAllExtraInfoDescriptors++;
+ this.downloadedAllExtraInfoDescriptors +=
+ downloadedAllDescriptors;
+ }
+
+ /* Download missing server descriptors, extra-info descriptors,
+ * and microdescriptors if we're configured to do so. */
+ } else if ((type.equals("server") &&
+ this.downloadMissingServerDescriptors) ||
+ (type.equals("extra") && this.downloadMissingExtraInfos) ||
+ (type.equals("micro") &&
+ this.downloadMissingMicrodescriptors)) {
+
+ /* Go through the list of missing descriptors of this type
+ * and combine the descriptor identifiers to a URL of up to
+ * 96 server or extra-info descriptors or 92 microdescriptors
+ * that we can download at once. */
+ SortedSet<String> descriptorIdentifiers =
+ new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(type + ",") &&
+ this.descriptorCutOff.compareTo(
+ e.getKey().split(",")[1]) < 0) {
+ String descriptorIdentifier = e.getKey().split(",")[3];
+ descriptorIdentifiers.add(descriptorIdentifier);
+ }
+ }
+ StringBuilder combinedResource = null;
+ int descriptorsInCombinedResource = 0,
+ requestedDescriptors = 0, downloadedDescriptors = 0;
+ int maxDescriptorsInCombinedResource =
+ type.equals("micro") ? 92 : 96;
+ String separator = type.equals("micro") ? "-" : "+";
+ for (String descriptorIdentifier : descriptorIdentifiers) {
+ if (descriptorsInCombinedResource >=
+ maxDescriptorsInCombinedResource) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ combinedResource = null;
+ descriptorsInCombinedResource = 0;
+ }
+ if (descriptorsInCombinedResource == 0) {
+ combinedResource = new StringBuilder("/tor/" + type
+ + "/d/" + descriptorIdentifier);
+ } else {
+ combinedResource.append(separator + descriptorIdentifier);
+ }
+ descriptorsInCombinedResource++;
+ }
+ if (descriptorsInCombinedResource > 0) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ }
+ if (type.equals("server")) {
+ this.requestedMissingServerDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingServerDescriptors +=
+ downloadedDescriptors;
+ } else if (type.equals("extra")) {
+ this.requestedMissingExtraInfoDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingExtraInfoDescriptors +=
+ downloadedDescriptors;
+ } else if (type.equals("micro")) {
+ this.requestedMissingMicrodescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingMicrodescriptors +=
+ downloadedDescriptors;
+ }
+ }
+ }
+
+ /* If a download failed, stop requesting descriptors from this
+ * authority and move on to the next. */
+ } catch (IOException e) {
+ logger.log(Level.FINE, "Failed downloading from " + authority
+ + "!", e);
+ }
+ }
+ }
+
+ /**
+ * Attempts to download one or more descriptors identified by a resource
+ * string from a directory authority and passes the returned
+ * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
+ * Returns the number of descriptors contained in the reply. Throws an
+ * <code>IOException</code> if something goes wrong while downloading.
+ */
+ private int downloadResourceFromAuthority(String authority,
+ String resource) throws IOException {
+ byte[] allData = null;
+ this.requestsByAuthority.put(authority,
+ this.requestsByAuthority.get(authority) + 1);
+ /* TODO Disable compressed downloads for extra-info descriptors,
+ * because zlib decompression doesn't work correctly. Figure out why
+ * this is and fix it. */
+ String fullUrl = "http://" + authority + resource
+ + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
+ ? ".z" : "");
+ URL u = new URL(fullUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response == 200) {
+ BufferedInputStream in = this.downloadCompressed &&
+ !resource.startsWith("/tor/extra/")
+ ? new BufferedInputStream(new InflaterInputStream(
+ huc.getInputStream()))
+ : new BufferedInputStream(huc.getInputStream());
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ in.close();
+ allData = baos.toByteArray();
+ }
+ logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
+ + (allData == null ? 0 : allData.length) + " bytes)");
+ int receivedDescriptors = 0;
+ if (allData != null) {
+ if (resource.startsWith("/tor/status-vote/current/")) {
+ this.rdp.parse(allData);
+ receivedDescriptors = 1;
+ } else if (resource.startsWith("/tor/server/") ||
+ resource.startsWith("/tor/extra/")) {
+ if (resource.equals("/tor/server/all") ||
+ resource.equals("/tor/extra/all")) {
+ this.lastDownloadedAllDescriptors.put(authority,
+ this.currentTimestamp);
+ }
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, sig = -1, end = -1;
+ String startToken = resource.startsWith("/tor/server/") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ this.rdp.parse(descBytes);
+ receivedDescriptors++;
+ }
+ } else if (resource.startsWith("/tor/micro/")) {
+ /* TODO We need to parse microdescriptors ourselves, rather than
+ * RelayDescriptorParser, because only we know the valid-after
+ * time(s) of microdesc consensus(es) containing this
+ * microdescriptor. However, this breaks functional abstraction
+ * pretty badly. */
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, end = -1;
+ String startToken = "onion-key\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ end = ascii.indexOf(startToken, start + 1);
+ if (end < 0) {
+ end = ascii.length();
+ if (end <= start) {
+ break;
+ }
+ }
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ String digest256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(descBytes)).replaceAll("=", "");
+ if (!this.microdescriptorKeys.containsKey(digest256Base64)) {
+ continue;
+ }
+ String digest256Hex = DigestUtils.sha256Hex(descBytes);
+ for (String microdescriptorKey :
+ this.microdescriptorKeys.get(digest256Base64)) {
+ String validAfterTime = microdescriptorKey.split(",")[1];
+ try {
+ long validAfter =
+ parseFormat.parse(validAfterTime).getTime();
+ this.rdp.storeMicrodescriptor(descBytes, digest256Hex,
+ digest256Base64, validAfter);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse "
+ + "valid-after time '" + validAfterTime + "' in "
+ + "microdescriptor key. Not storing microdescriptor.",
+ e);
+ }
+ }
+ receivedDescriptors++;
+ }
+ }
+ }
+ return receivedDescriptors;
+ }
+
+ /**
+ * Writes status files to disk and logs statistics about downloading
+ * relay descriptors in this execution.
+ */
+ public void writeFile() {
+
+ /* Write missing descriptors file to disk. */
+ int missingConsensuses = 0, missingMicrodescConsensuses = 0,
+ missingVotes = 0, missingServerDescriptors = 0,
+ missingExtraInfoDescriptors = 0;
+ try {
+ this.logger.fine("Writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ this.missingDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.missingDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ String key = e.getKey(), value = e.getValue();
+ if (!value.equals("NA")) {
+ /* Not missing. */
+ } else if (key.startsWith("consensus,")) {
+ missingConsensuses++;
+ } else if (key.startsWith("consensus-microdesc,")) {
+ missingMicrodescConsensuses++;
+ } else if (key.startsWith("vote,")) {
+ missingVotes++;
+ } else if (key.startsWith("server,")) {
+ missingServerDescriptors++;
+ } else if (key.startsWith("extra,")) {
+ missingExtraInfoDescriptors++;
+ } else if (key.startsWith("micro,")) {
+ }
+ bw.write(key + "," + value + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
+ }
+ int missingMicrodescriptors = this.missingMicrodescriptors.size();
+
+ /* Write text file containing the directory authorities and when we
+ * last downloaded all server and extra-info descriptors from them to
+ * disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.lastDownloadedAllDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.lastDownloadedAllDescriptors.entrySet()) {
+ String authority = e.getKey();
+ String lastDownloaded = e.getValue();
+ bw.write(authority + "," + lastDownloaded + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
+ e);
+ }
+
+ /* Log statistics about this execution. */
+ this.logger.info("Finished downloading relay descriptors from the "
+ + "directory authorities.");
+ this.logger.info("At the beginning of this execution, we were "
+ + "missing " + oldMissingConsensuses + " consensus(es), "
+ + oldMissingMicrodescConsensuses + " microdesc consensus(es), "
+ + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
+ + " server descriptor(s), " + oldMissingExtraInfoDescriptors
+ + " extra-info descriptor(s), and " + oldMissingMicrodescriptors
+ + " microdescriptor(s).");
+ this.logger.info("During this execution, we added "
+ + this.newMissingConsensuses + " consensus(es), "
+ + this.newMissingMicrodescConsensuses
+ + " microdesc consensus(es), " + this.newMissingVotes
+ + " vote(s), " + this.newMissingServerDescriptors
+ + " server descriptor(s), " + this.newMissingExtraInfoDescriptors
+ + " extra-info descriptor(s), and "
+ + this.newMissingMicrodescriptors + " microdescriptor(s) to the "
+ + "missing list, some of which we also "
+ + "requested and removed from the list again.");
+ this.logger.info("We requested " + this.requestedConsensuses
+ + " consensus(es), " + this.requestedMicrodescConsensuses
+ + " microdesc consensus(es), " + this.requestedVotes
+ + " vote(s), " + this.requestedMissingServerDescriptors
+ + " missing server descriptor(s), "
+ + this.requestedAllServerDescriptors
+ + " times all server descriptors, "
+ + this.requestedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s), "
+ + this.requestedAllExtraInfoDescriptors + " times all extra-info "
+ + "descriptors, and " + this.requestedMissingMicrodescriptors
+ + " missing microdescriptor(s) from the directory authorities.");
+ StringBuilder sb = new StringBuilder();
+ for (String authority : this.authorities) {
+ sb.append(" " + authority + "="
+ + this.requestsByAuthority.get(authority));
+ }
+ this.logger.info("We sent these numbers of requests to the directory "
+ + "authorities:" + sb.toString());
+ this.logger.info("We successfully downloaded "
+ + this.downloadedConsensuses + " consensus(es), "
+ + this.downloadedMicrodescConsensuses
+ + " microdesc consensus(es), " + this.downloadedVotes
+ + " vote(s), " + this.downloadedMissingServerDescriptors
+ + " missing server descriptor(s), "
+ + this.downloadedAllServerDescriptors
+ + " server descriptor(s) when downloading all descriptors, "
+ + this.downloadedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s), "
+ + this.downloadedAllExtraInfoDescriptors + " extra-info "
+ + "descriptor(s) when downloading all descriptors, and "
+ + this.downloadedMissingMicrodescriptors
+ + " missing microdescriptor(s).");
+ this.logger.info("At the end of this execution, we are missing "
+ + missingConsensuses + " consensus(es), "
+ + missingMicrodescConsensuses + " microdesc consensus(es), "
+ + missingVotes + " vote(s), " + missingServerDescriptors
+ + " server descriptor(s), " + missingExtraInfoDescriptors
+ + " extra-info descriptor(s), and " + missingMicrodescriptors
+ + " microdescriptor(s), some of which we may try in the next "
+ + "execution.");
+ }
+}
+
diff --git a/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java b/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java
new file mode 100644
index 0000000..4158d52
--- /dev/null
+++ b/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java
@@ -0,0 +1,332 @@
+/* Copyright 2010--2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.relaydescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses relay descriptors including network status consensuses and
+ * votes, server and extra-info descriptors, and passes the results to the
+ * stats handlers, to the archive writer, or to the relay descriptor
+ * downloader.
+ */
+public class RelayDescriptorParser {
+
+ /**
+ * File writer that writes descriptor contents to files in a
+ * directory-archive directory structure.
+ */
+ private ArchiveWriter aw;
+
+ private ArchiveReader ar;
+
+ /**
+ * Missing descriptor downloader that uses the parse results to learn
+ * which descriptors we are missing and want to download.
+ */
+ private RelayDescriptorDownloader rdd;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private SimpleDateFormat dateTimeFormat;
+
+ /**
+ * Initializes this class.
+ */
+ public RelayDescriptorParser(ArchiveWriter aw) {
+ this.aw = aw;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
+
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ public void setRelayDescriptorDownloader(
+ RelayDescriptorDownloader rdd) {
+ this.rdd = rdd;
+ }
+
+ public void setArchiveReader(ArchiveReader ar) {
+ this.ar = ar;
+ }
+
+ public boolean parse(byte[] data) {
+ boolean stored = false;
+ try {
+ /* Convert descriptor to ASCII for parsing. This means we'll lose
+ * the non-ASCII chars, but we don't care about them for parsing
+ * anyway. */
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line;
+ do {
+ line = br.readLine();
+ } while (line != null && line.startsWith("@"));
+ if (line == null) {
+ this.logger.fine("We were given an empty descriptor for "
+ + "parsing. Ignoring.");
+ return false;
+ }
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (line.startsWith("network-status-version 3")) {
+ String statusType = "consensus";
+ if (line.equals("network-status-version 3 microdesc")) {
+ statusType = "consensus-microdesc";
+ }
+ String validAfterTime = null, fingerprint = null,
+ dirSource = null;
+ long validAfter = -1L, dirKeyPublished = -1L;
+ SortedSet<String> dirSources = new TreeSet<String>();
+ SortedSet<String> serverDescriptors = new TreeSet<String>();
+ SortedSet<String> serverDescriptorDigests = new TreeSet<String>();
+ SortedSet<String> microdescriptorKeys = new TreeSet<String>();
+ SortedSet<String> microdescriptorDigests = new TreeSet<String>();
+ StringBuilder certificateStringBuilder = null;
+ String certificateString = null;
+ String lastRelayIdentity = null;
+ while ((line = br.readLine()) != null) {
+ if (certificateStringBuilder != null) {
+ if (line.startsWith("r ")) {
+ certificateString = certificateStringBuilder.toString();
+ certificateStringBuilder = null;
+ } else {
+ certificateStringBuilder.append(line + "\n");
+ }
+ }
+ if (line.equals("vote-status vote")) {
+ statusType = "vote";
+ } else if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ validAfter = parseFormat.parse(validAfterTime).getTime();
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("vote-digest ")) {
+ dirSources.add(dirSource);
+ } else if (line.startsWith("dir-key-certificate-version ")) {
+ certificateStringBuilder = new StringBuilder();
+ certificateStringBuilder.append(line + "\n");
+ } else if (line.startsWith("fingerprint ")) {
+ fingerprint = line.split(" ")[1];
+ } else if (line.startsWith("dir-key-published ")) {
+ String dirKeyPublishedTime = line.substring(
+ "dir-key-published ".length());
+ dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
+ getTime();
+ } else if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ if (parts.length == 8) {
+ lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "=")).toLowerCase();
+ } else if (parts.length == 9) {
+ lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "=")).toLowerCase();
+ String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "=")).toLowerCase();
+ String publishedTime = parts[4] + " " + parts[5];
+ serverDescriptors.add(publishedTime + ","
+ + lastRelayIdentity + "," + serverDesc);
+ serverDescriptorDigests.add(serverDesc);
+ } else {
+ this.logger.log(Level.WARNING, "Could not parse r line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
+ } else if (line.startsWith("m ")) {
+ String[] parts = line.split(" ");
+ if (parts.length == 2 && parts[1].length() == 43) {
+ String digest256Base64 = parts[1];
+ microdescriptorKeys.add(validAfterTime + ","
+ + lastRelayIdentity + "," + digest256Base64);
+ String digest256Hex = Hex.encodeHexString(
+ Base64.decodeBase64(digest256Base64 + "=")).
+ toLowerCase();
+ microdescriptorDigests.add(digest256Hex);
+ } else if (parts.length != 3 ||
+ !parts[2].startsWith("sha256=") ||
+ parts[2].length() != 50) {
+ this.logger.log(Level.WARNING, "Could not parse m line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
+ }
+ }
+ if (statusType.equals("consensus")) {
+ if (this.rdd != null) {
+ this.rdd.haveParsedConsensus(validAfterTime, dirSources,
+ serverDescriptors);
+ }
+ if (this.aw != null) {
+ this.aw.storeConsensus(data, validAfter, dirSources,
+ serverDescriptorDigests);
+ stored = true;
+ }
+ } else if (statusType.equals("consensus-microdesc")) {
+ if (this.rdd != null) {
+ this.rdd.haveParsedMicrodescConsensus(validAfterTime,
+ microdescriptorKeys);
+ }
+ if (this.ar != null) {
+ this.ar.haveParsedMicrodescConsensus(validAfterTime,
+ microdescriptorDigests);
+ }
+ if (this.aw != null) {
+ this.aw.storeMicrodescConsensus(data, validAfter,
+ microdescriptorDigests);
+ stored = true;
+ }
+ } else {
+ if (this.aw != null || this.rdd != null) {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "network-status-version ";
+ String sigToken = "directory-signature ";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken);
+ if (start >= 0 && sig >= 0 && sig > start) {
+ sig += sigToken.length();
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ String digest = DigestUtils.shaHex(forDigest).toUpperCase();
+ if (this.aw != null) {
+ this.aw.storeVote(data, validAfter, dirSource, digest,
+ serverDescriptorDigests);
+ stored = true;
+ }
+ if (this.rdd != null) {
+ this.rdd.haveParsedVote(validAfterTime, fingerprint,
+ serverDescriptors);
+ }
+ }
+ if (certificateString != null) {
+ if (this.aw != null) {
+ this.aw.storeCertificate(certificateString.getBytes(),
+ dirSource, dirKeyPublished);
+ stored = true;
+ }
+ }
+ }
+ }
+ } else if (line.startsWith("router ")) {
+ String publishedTime = null, extraInfoDigest = null,
+ relayIdentifier = null;
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ relayIdentifier = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ extraInfoDigest = line.startsWith("opt ") ?
+ line.split(" ")[2].toLowerCase() :
+ line.split(" ")[1].toLowerCase();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ String digest = null;
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeServerDescriptor(data, digest, published,
+ extraInfoDigest);
+ stored = true;
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedServerDescriptor(publishedTime,
+ relayIdentifier, digest, extraInfoDigest);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ String publishedTime = null, relayIdentifier = line.split(" ")[2];
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String digest = null;
+ int start = ascii.indexOf(startToken);
+ if (start > 0) {
+ /* Do not confuse "extra-info " in "@type extra-info 1.0" with
+ * "extra-info 0000...". TODO This is a hack that should be
+ * solved by using metrics-lib some day. */
+ start = ascii.indexOf("\n" + startToken);
+ if (start > 0) {
+ start++;
+ }
+ }
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeExtraInfoDescriptor(data, digest, published);
+ stored = true;
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
+ relayIdentifier.toLowerCase(), digest);
+ }
+ } else if (line.equals("onion-key")) {
+ /* Cannot store microdescriptors without knowing valid-after
+ * time(s) of microdesc consensuses containing them, because we
+ * don't know which month directories to put them in. Have to use
+ * storeMicrodescriptor below. */
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ }
+ return stored;
+ }
+
+ public void storeMicrodescriptor(byte[] data, String digest256Hex,
+ String digest256Base64, long validAfter) {
+ if (this.aw != null) {
+ this.aw.storeMicrodescriptor(data, digest256Hex, validAfter);
+ }
+ if (this.rdd != null) {
+ this.rdd.haveParsedMicrodescriptor(digest256Base64);
+ }
+ }
+}
+
diff --git a/src/org/torproject/collector/torperf/TorperfDownloader.java b/src/org/torproject/collector/torperf/TorperfDownloader.java
new file mode 100644
index 0000000..2b65552
--- /dev/null
+++ b/src/org/torproject/collector/torperf/TorperfDownloader.java
@@ -0,0 +1,634 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.collector.torperf;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.collector.main.Configuration;
+import org.torproject.collector.main.LockFile;
+import org.torproject.collector.main.LoggingConfiguration;
+
+/* Download possibly truncated Torperf .data and .extradata files from
+ * configured sources, append them to the files we already have, and merge
+ * the two files into the .tpf format. */
+public class TorperfDownloader extends Thread {
+
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration("torperf");
+ Logger logger = Logger.getLogger(TorperfDownloader.class.getName());
+ logger.info("Starting torperf module of ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile("torperf");
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Process Torperf files
+ new TorperfDownloader(config).run();
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating torperf module of ERNIE.");
+ }
+
+ private Configuration config;
+
+ public TorperfDownloader(Configuration config) {
+ this.config = config;
+ }
+
+ private File torperfOutputDirectory = null;
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFilesLines = null;
+ private Logger logger = null;
+ private SimpleDateFormat dateFormat;
+
+ public void run() {
+
+ File torperfOutputDirectory =
+ new File(config.getTorperfOutputDirectory());
+ SortedMap<String, String> torperfSources = config.getTorperfSources();
+ List<String> torperfFilesLines = config.getTorperfFiles();
+
+ this.torperfOutputDirectory = torperfOutputDirectory;
+ this.torperfSources = torperfSources;
+ this.torperfFilesLines = torperfFilesLines;
+ if (!this.torperfOutputDirectory.exists()) {
+ this.torperfOutputDirectory.mkdirs();
+ }
+ this.logger = Logger.getLogger(TorperfDownloader.class.getName());
+ this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.readLastMergedTimestamps();
+ for (String torperfFilesLine : this.torperfFilesLines) {
+ this.downloadAndMergeFiles(torperfFilesLine);
+ }
+ this.writeLastMergedTimestamps();
+
+ this.cleanUpRsyncDirectory();
+ }
+
+ private File torperfLastMergedFile =
+ new File("stats/torperf-last-merged");
+ SortedMap<String, String> lastMergedTimestamps =
+ new TreeMap<String, String>();
+ private void readLastMergedTimestamps() {
+ if (!this.torperfLastMergedFile.exists()) {
+ return;
+ }
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.torperfLastMergedFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ String fileName = null, timestamp = null;
+ if (parts.length == 2) {
+ try {
+ Double.parseDouble(parts[1]);
+ fileName = parts[0];
+ timestamp = parts[1];
+ } catch (NumberFormatException e) {
+ /* Handle below. */
+ }
+ }
+ if (fileName == null || timestamp == null) {
+ this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
+ + this.torperfLastMergedFile.getAbsolutePath() + ". "
+ + "Ignoring past history of merging .data and .extradata "
+ + "files.");
+ this.lastMergedTimestamps.clear();
+ break;
+ }
+ this.lastMergedTimestamps.put(fileName, timestamp);
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while reading '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
+ + "past history of merging .data and .extradata files.");
+ this.lastMergedTimestamps.clear();
+ }
+ }
+
+ private void writeLastMergedTimestamps() {
+ try {
+ this.torperfLastMergedFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.torperfLastMergedFile));
+ for (Map.Entry<String, String> e :
+ this.lastMergedTimestamps.entrySet()) {
+ String fileName = e.getKey();
+ String timestamp = e.getValue();
+ bw.write(fileName + " " + timestamp + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while writing '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
+ + "result in ignoring history of merging .data and .extradata "
+ + "files in the next execution.", e);
+ }
+ }
+
+ private void downloadAndMergeFiles(String torperfFilesLine) {
+ String[] parts = torperfFilesLine.split(" ");
+ String sourceName = parts[1];
+ int fileSize = -1;
+ try {
+ fileSize = Integer.parseInt(parts[2]);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Could not parse file size in "
+ + "TorperfFiles configuration line '" + torperfFilesLine
+ + "'.");
+ return;
+ }
+
+ /* Download and append the .data file. */
+ String dataFileName = parts[3];
+ String sourceBaseUrl = torperfSources.get(sourceName);
+ String dataUrl = sourceBaseUrl + dataFileName;
+ String dataOutputFileName = sourceName + "-" + dataFileName;
+ File dataOutputFile = new File(torperfOutputDirectory,
+ dataOutputFileName);
+ boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
+ dataOutputFile, true);
+
+ /* Download and append the .extradata file. */
+ String extradataFileName = parts[4];
+ String extradataUrl = sourceBaseUrl + extradataFileName;
+ String extradataOutputFileName = sourceName + "-" + extradataFileName;
+ File extradataOutputFile = new File(torperfOutputDirectory,
+ extradataOutputFileName);
+ boolean downloadedExtradataFile = this.downloadAndAppendFile(
+ extradataUrl, extradataOutputFile, false);
+
+ /* Merge both files into .tpf format. */
+ if (!downloadedDataFile && !downloadedExtradataFile) {
+ return;
+ }
+ String skipUntil = null;
+ if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
+ skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
+ }
+ try {
+ skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
+ sourceName, fileSize, skipUntil);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
+ + " and " + extradataOutputFile + ".", e);
+ }
+ if (skipUntil != null) {
+ this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
+ }
+ }
+
+ private boolean downloadAndAppendFile(String url, File outputFile,
+ boolean isDataFile) {
+
+ /* Read an existing output file to determine which line will be the
+ * first to append to it. */
+ String lastTimestampLine = null;
+ int linesAfterLastTimestampLine = 0;
+ if (outputFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ outputFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ linesAfterLastTimestampLine = 0;
+ } else {
+ linesAfterLastTimestampLine++;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed reading '"
+ + outputFile.getAbsolutePath() + "' to determine the first "
+ + "line to append to it.", e);
+ return false;
+ }
+ }
+ try {
+ this.logger.fine("Downloading " + (isDataFile ? ".data" :
+ ".extradata") + " file from '" + url + "' and merging it into "
+ + "'" + outputFile.getAbsolutePath() + "'.");
+ URL u = new URL(url);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ huc.getInputStream()));
+ String line;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
+ true));
+ boolean copyLines = lastTimestampLine == null;
+ while ((line = br.readLine()) != null) {
+ if (copyLines && linesAfterLastTimestampLine == 0) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ }
+ bw.write(line + "\n");
+ } else if (copyLines && linesAfterLastTimestampLine > 0) {
+ linesAfterLastTimestampLine--;
+ } else if (line.equals(lastTimestampLine)) {
+ copyLines = true;
+ }
+ }
+ bw.close();
+ br.close();
+ if (!copyLines) {
+ this.logger.warning("The last timestamp line in '"
+ + outputFile.getAbsolutePath() + "' is not contained in the "
+ + "new file downloaded from '" + url + "'. Cannot append "
+ + "new lines without possibly leaving a gap. Skipping.");
+ return false;
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
+ + url + "'.", e);
+ return false;
+ }
+ if (lastTimestampLine == null) {
+ this.logger.warning("'" + outputFile.getAbsolutePath()
+ + "' doesn't contain any timestamp lines. Unable to check "
+ + "whether that file is stale or not.");
+ } else {
+ long lastTimestampMillis = -1L;
+ if (isDataFile) {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ 0, lastTimestampLine.indexOf(" "))) * 1000L;
+ } else {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
+ lastTimestampLine.indexOf(".",
+ lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
+ }
+ if (lastTimestampMillis < System.currentTimeMillis()
+ - 330L * 60L * 1000L) {
+ this.logger.warning("The last timestamp in '"
+ + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
+ + "old: " + lastTimestampMillis);
+ }
+ }
+ return true;
+ }
+
+ private String mergeFiles(File dataFile, File extradataFile,
+ String source, int fileSize, String skipUntil) throws IOException {
+ SortedMap<String, String> config = new TreeMap<String, String>();
+ config.put("SOURCE", source);
+ config.put("FILESIZE", String.valueOf(fileSize));
+ if (!dataFile.exists() || !extradataFile.exists()) {
+ this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
+ + extradataFile.getAbsolutePath() + " is missing.");
+ return null;
+ }
+ this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
+ + extradataFile.getAbsolutePath() + " into .tpf format.");
+ BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
+ brE = new BufferedReader(new FileReader(extradataFile));
+ String lineD = brD.readLine(), lineE = brE.readLine();
+ int d = 1, e = 1;
+ String maxDataComplete = null, maxUsedAt = null;
+ while (lineD != null) {
+
+ /* Parse .data line. Every valid .data line will go into the .tpf
+ * format, either with additional information from the .extradata
+ * file or without it. */
+ if (lineD.isEmpty()) {
+ this.logger.finer("Skipping empty line " + dataFile.getName()
+ + ":" + d++ + ".");
+ lineD = brD.readLine();
+ continue;
+ }
+ SortedMap<String, String> data = this.parseDataLine(lineD);
+ if (data == null) {
+ this.logger.finer("Skipping illegal line " + dataFile.getName()
+ + ":" + d++ + " '" + lineD + "'.");
+ lineD = brD.readLine();
+ continue;
+ }
+ String dataComplete = data.get("DATACOMPLETE");
+ double dataCompleteSeconds = Double.parseDouble(dataComplete);
+ if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + dataFile.getName() + ":"
+ + d++ + " which we already processed before.");
+ lineD = brD.readLine();
+ continue;
+ }
+ maxDataComplete = dataComplete;
+
+ /* Parse .extradata line if available and try to find the one that
+ * matches the .data line. */
+ SortedMap<String, String> extradata = null;
+ while (lineE != null) {
+ if (lineE.isEmpty()) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is empty.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is a BUILDTIMEOUT_SET line.");
+ lineE = brE.readLine();
+ continue;
+ } else if (lineE.startsWith("ok ") ||
+ lineE.startsWith("error ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is in the old format.");
+ lineE = brE.readLine();
+ continue;
+ }
+ extradata = this.parseExtradataLine(lineE);
+ if (extradata == null) {
+ this.logger.finer("Skipping Illegal line "
+ + extradataFile.getName() + ":" + e++ + " '" + lineE
+ + "'.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (!extradata.containsKey("USED_AT")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which doesn't contain a USED_AT element.");
+ lineE = brE.readLine();
+ continue;
+ }
+ String usedAt = extradata.get("USED_AT");
+ double usedAtSeconds = Double.parseDouble(usedAt);
+ if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which we already processed before.");
+ lineE = brE.readLine();
+ continue;
+ }
+ maxUsedAt = usedAt;
+ if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
+ this.logger.fine("Merging " + extradataFile.getName() + ":"
+ + e++ + " into the current .data line.");
+ lineE = brE.readLine();
+ break;
+ } else if (usedAtSeconds > dataCompleteSeconds) {
+ this.logger.finer("Comparing " + extradataFile.getName()
+ + " to the next .data line.");
+ extradata = null;
+ break;
+ } else {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is too old to be merged with "
+ + dataFile.getName() + ":" + d + ".");
+ lineE = brE.readLine();
+ continue;
+ }
+ }
+
+ /* Write output line to .tpf file. */
+ SortedMap<String, String> keysAndValues =
+ new TreeMap<String, String>();
+ if (extradata != null) {
+ keysAndValues.putAll(extradata);
+ }
+ keysAndValues.putAll(data);
+ keysAndValues.putAll(config);
+ this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
+ lineD = brD.readLine();
+ try {
+ this.writeTpfLine(source, fileSize, keysAndValues);
+ } catch (IOException ex) {
+ this.logger.log(Level.WARNING, "Error writing output line. "
+ + "Aborting to merge " + dataFile.getName() + " and "
+ + extradataFile.getName() + ".", e);
+ break;
+ }
+ }
+ brD.close();
+ brE.close();
+ this.writeCachedTpfLines();
+ if (maxDataComplete == null) {
+ return maxUsedAt;
+ } else if (maxUsedAt == null) {
+ return maxDataComplete;
+ } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
+ return maxUsedAt;
+ } else {
+ return maxDataComplete;
+ }
+ }
+
+ private SortedMap<Integer, String> dataTimestamps;
+ private SortedMap<String, String> parseDataLine(String line) {
+ String[] parts = line.trim().split(" ");
+ if (line.length() == 0 || parts.length < 20) {
+ return null;
+ }
+ if (this.dataTimestamps == null) {
+ this.dataTimestamps = new TreeMap<Integer, String>();
+ this.dataTimestamps.put(0, "START");
+ this.dataTimestamps.put(2, "SOCKET");
+ this.dataTimestamps.put(4, "CONNECT");
+ this.dataTimestamps.put(6, "NEGOTIATE");
+ this.dataTimestamps.put(8, "REQUEST");
+ this.dataTimestamps.put(10, "RESPONSE");
+ this.dataTimestamps.put(12, "DATAREQUEST");
+ this.dataTimestamps.put(14, "DATARESPONSE");
+ this.dataTimestamps.put(16, "DATACOMPLETE");
+ this.dataTimestamps.put(21, "DATAPERC10");
+ this.dataTimestamps.put(23, "DATAPERC20");
+ this.dataTimestamps.put(25, "DATAPERC30");
+ this.dataTimestamps.put(27, "DATAPERC40");
+ this.dataTimestamps.put(29, "DATAPERC50");
+ this.dataTimestamps.put(31, "DATAPERC60");
+ this.dataTimestamps.put(33, "DATAPERC70");
+ this.dataTimestamps.put(35, "DATAPERC80");
+ this.dataTimestamps.put(37, "DATAPERC90");
+ }
+ SortedMap<String, String> data = new TreeMap<String, String>();
+ try {
+ for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
+ int i = e.getKey();
+ if (parts.length > i + 1) {
+ String key = e.getValue();
+ String value = String.format("%s.%02d", parts[i],
+ Integer.parseInt(parts[i + 1]) / 10000);
+ data.put(key, value);
+ }
+ }
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ data.put("WRITEBYTES", parts[18]);
+ data.put("READBYTES", parts[19]);
+ if (parts.length >= 21) {
+ data.put("DIDTIMEOUT", parts[20]);
+ }
+ return data;
+ }
+
+ private SortedMap<String, String> parseExtradataLine(String line) {
+ String[] parts = line.split(" ");
+ SortedMap<String, String> extradata = new TreeMap<String, String>();
+ String previousKey = null;
+ for (String part : parts) {
+ String[] keyAndValue = part.split("=", -1);
+ if (keyAndValue.length == 2) {
+ String key = keyAndValue[0];
+ previousKey = key;
+ String value = keyAndValue[1];
+ if (value.contains(".") && value.lastIndexOf(".") ==
+ value.length() - 2) {
+ /* Make sure that all floats have two trailing digits. */
+ value += "0";
+ }
+ extradata.put(key, value);
+ } else if (keyAndValue.length == 1 && previousKey != null) {
+ String value = keyAndValue[0];
+ if (previousKey.equals("STREAM_FAIL_REASONS") &&
+ (value.equals("MISC") || value.equals("EXITPOLICY") ||
+ value.equals("RESOURCELIMIT") ||
+ value.equals("RESOLVEFAILED"))) {
+ extradata.put(previousKey, extradata.get(previousKey) + ":"
+ + value);
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ return extradata;
+ }
+
+ private String cachedSource;
+ private int cachedFileSize;
+ private String cachedStartDate;
+ private SortedMap<String, String> cachedTpfLines;
+ private void writeTpfLine(String source, int fileSize,
+ SortedMap<String, String> keysAndValues) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ int written = 0;
+ for (Map.Entry<String, String> keyAndValue :
+ keysAndValues.entrySet()) {
+ String key = keyAndValue.getKey();
+ String value = keyAndValue.getValue();
+ sb.append((written++ > 0 ? " " : "") + key + "=" + value);
+ }
+ String line = sb.toString();
+ String startString = keysAndValues.get("START");
+ long startMillis = Long.parseLong(startString.substring(0,
+ startString.indexOf("."))) * 1000L;
+ String startDate = dateFormat.format(startMillis);
+ if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
+ fileSize != this.cachedFileSize ||
+ !startDate.equals(this.cachedStartDate)) {
+ this.writeCachedTpfLines();
+ this.readTpfLinesToCache(source, fileSize, startDate);
+ }
+ if (!this.cachedTpfLines.containsKey(startString) ||
+ line.length() > this.cachedTpfLines.get(startString).length()) {
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+
+ private void readTpfLinesToCache(String source, int fileSize,
+ String startDate) throws IOException {
+ this.cachedTpfLines = new TreeMap<String, String>();
+ this.cachedSource = source;
+ this.cachedFileSize = fileSize;
+ this.cachedStartDate = startDate;
+ File tpfFile = new File(torperfOutputDirectory,
+ startDate.replaceAll("-", "/") + "/"
+ + source + "-" + String.valueOf(fileSize) + "-" + startDate
+ + ".tpf");
+ if (!tpfFile.exists()) {
+ return;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(tpfFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("@type ")) {
+ continue;
+ }
+ if (line.contains("START=")) {
+ String startString = line.substring(line.indexOf("START=")
+ + "START=".length()).split(" ")[0];
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+ br.close();
+ }
+
+ private void writeCachedTpfLines() throws IOException {
+ if (this.cachedSource == null || this.cachedFileSize == 0 ||
+ this.cachedStartDate == null || this.cachedTpfLines == null) {
+ return;
+ }
+ File tarballFile = new File(torperfOutputDirectory,
+ this.cachedStartDate.replaceAll("-", "/")
+ + "/" + this.cachedSource + "-"
+ + String.valueOf(this.cachedFileSize) + "-"
+ + this.cachedStartDate + ".tpf");
+ File rsyncFile = new File("recent/torperf/" + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile));
+ for (String line : this.cachedTpfLines.values()) {
+ bw.write("@type torperf 1.0\n");
+ bw.write(line + "\n");
+ }
+ bw.close();
+ }
+ this.cachedSource = null;
+ this.cachedFileSize = 0;
+ this.cachedStartDate = null;
+ this.cachedTpfLines = null;
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("recent/torperf"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
deleted file mode 100644
index b78171a..0000000
--- a/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.bridgedescs;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class BridgeDescriptorParser {
- private SanitizedBridgesWriter sbw;
- private Logger logger;
- public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
- this.sbw = sbw;
- this.logger =
- Logger.getLogger(BridgeDescriptorParser.class.getName());
- }
- public void parse(byte[] allData, String dateTime) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = br.readLine();
- if (line == null) {
- return;
- } else if (line.startsWith("router ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreServerDescriptor(allData);
- }
- } else if (line.startsWith("extra-info ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
- }
- } else {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
deleted file mode 100644
index 3433cfe..0000000
--- a/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
+++ /dev/null
@@ -1,222 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.bridgedescs;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-/**
- * Reads the half-hourly snapshots of bridge descriptors from Tonga.
- */
-public class BridgeSnapshotReader {
- public BridgeSnapshotReader(BridgeDescriptorParser bdp,
- File bridgeDirectoriesDir, File statsDirectory) {
-
- if (bdp == null || bridgeDirectoriesDir == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- Logger logger =
- Logger.getLogger(BridgeSnapshotReader.class.getName());
- SortedSet<String> parsed = new TreeSet<String>();
- File bdDir = bridgeDirectoriesDir;
- File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
- boolean modified = false;
- if (bdDir.exists()) {
- if (pbdFile.exists()) {
- logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
- try {
- BufferedReader br = new BufferedReader(new FileReader(pbdFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- parsed.add(line);
- }
- br.close();
- logger.fine("Finished reading file "
- + pbdFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading file "
- + pbdFile.getAbsolutePath() + "!", e);
- return;
- }
- }
- logger.fine("Importing files in directory " + bridgeDirectoriesDir
- + "/...");
- Set<String> descriptorImportHistory = new HashSet<String>();
- int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
- parsedServerDescriptors = 0, skippedServerDescriptors = 0,
- parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(bdDir);
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else if (!parsed.contains(pop.getName())) {
- try {
- FileInputStream in = new FileInputStream(pop);
- if (in.available() > 0) {
- TarArchiveInputStream tais = null;
- if (pop.getName().endsWith(".tar.gz")) {
- GzipCompressorInputStream gcis =
- new GzipCompressorInputStream(in);
- tais = new TarArchiveInputStream(gcis);
- } else if (pop.getName().endsWith(".tar")) {
- tais = new TarArchiveInputStream(in);
- } else {
- continue;
- }
- BufferedInputStream bis = new BufferedInputStream(tais);
- String fn = pop.getName();
- String dateTime = fn.substring(11, 21) + " "
- + fn.substring(22, 24) + ":" + fn.substring(24, 26)
- + ":" + fn.substring(26, 28);
- while ((tais.getNextTarEntry()) != null) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- byte[] allData = baos.toByteArray();
- if (allData.length == 0) {
- continue;
- }
- String fileDigest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- String ascii = new String(allData, "US-ASCII");
- BufferedReader br3 = new BufferedReader(new StringReader(
- ascii));
- String firstLine = null;
- while ((firstLine = br3.readLine()) != null) {
- if (firstLine.startsWith("@")) {
- continue;
- } else {
- break;
- }
- }
- if (firstLine.startsWith("published ") ||
- firstLine.startsWith("flag-thresholds ") ||
- firstLine.startsWith("r ")) {
- bdp.parse(allData, dateTime);
- parsedStatuses++;
- } else if (descriptorImportHistory.contains(fileDigest)) {
- /* Skip server descriptors or extra-info descriptors if
- * we parsed them before. */
- skippedFiles++;
- continue;
- } else {
- int start = -1, sig = -1, end = -1;
- String startToken =
- firstLine.startsWith("router ") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0,
- end - start);
- String descriptorDigest = Hex.encodeHexString(
- DigestUtils.sha(descBytes));
- if (!descriptorImportHistory.contains(
- descriptorDigest)) {
- bdp.parse(descBytes, dateTime);
- descriptorImportHistory.add(descriptorDigest);
- if (firstLine.startsWith("router ")) {
- parsedServerDescriptors++;
- } else {
- parsedExtraInfoDescriptors++;
- }
- } else {
- if (firstLine.startsWith("router ")) {
- skippedServerDescriptors++;
- } else {
- skippedExtraInfoDescriptors++;
- }
- }
- }
- }
- descriptorImportHistory.add(fileDigest);
- parsedFiles++;
- }
- bis.close();
- }
- in.close();
-
- /* Let's give some memory back, or we'll run out of it. */
- System.gc();
-
- parsed.add(pop.getName());
- modified = true;
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not parse bridge snapshot "
- + pop.getName() + "!", e);
- continue;
- }
- }
- }
- logger.fine("Finished importing files in directory "
- + bridgeDirectoriesDir + "/. In total, we parsed "
- + parsedFiles + " files (skipped " + skippedFiles
- + ") containing " + parsedStatuses + " statuses, "
- + parsedServerDescriptors + " server descriptors (skipped "
- + skippedServerDescriptors + "), and "
- + parsedExtraInfoDescriptors + " extra-info descriptors "
- + "(skipped " + skippedExtraInfoDescriptors + ").");
- if (!parsed.isEmpty() && modified) {
- logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
- try {
- pbdFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
- for (String f : parsed) {
- bw.append(f + "\n");
- }
- bw.close();
- logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed writing file "
- + pbdFile.getAbsolutePath() + "!", e);
- }
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
deleted file mode 100644
index a0f9dda..0000000
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ /dev/null
@@ -1,1322 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.bridgedescs;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.io.UnsupportedEncodingException;
-import java.security.GeneralSecurityException;
-import java.security.SecureRandom;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.LockFile;
-import org.torproject.ernie.db.main.LoggingConfiguration;
-
-/**
- * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
- * information from them, and writes them to a local directory structure.
- * During the sanitizing process, all information about the bridge
- * identity or IP address are removed or replaced. The goal is to keep the
- * sanitized bridge descriptors useful for statistical analysis while not
- * making it easier for an adversary to enumerate bridges.
- *
- * There are three types of bridge descriptors: bridge network statuses
- * (lists of all bridges at a given time), server descriptors (published
- * by the bridge to advertise their capabilities), and extra-info
- * descriptors (published by the bridge, mainly for statistical analysis).
- */
-public class SanitizedBridgesWriter extends Thread {
-
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration("bridge-descriptors");
- Logger logger = Logger.getLogger(
- SanitizedBridgesWriter.class.getName());
- logger.info("Starting bridge-descriptors module of ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile("bridge-descriptors");
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Sanitize bridge descriptors
- new SanitizedBridgesWriter(config).run();
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating bridge-descriptors module of ERNIE.");
- }
-
- private Configuration config;
-
- /**
- * Initializes this class.
- */
- public SanitizedBridgesWriter(Configuration config) {
- this.config = config;
- }
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private String rsyncCatString;
-
- private File bridgeDirectoriesDirectory;
-
- /**
- * Output directory for writing sanitized bridge descriptors.
- */
- private File sanitizedBridgesDirectory;
-
- private boolean replaceIPAddressesWithHashes;
-
- private boolean persistenceProblemWithSecrets;
-
- private SortedMap<String, byte[]> secretsForHashingIPAddresses;
-
- private String bridgeSanitizingCutOffTimestamp;
-
- private boolean haveWarnedAboutInterval;
-
- private File bridgeIpSecretsFile;
-
- private SecureRandom secureRandom;
-
- public void run() {
-
- File bridgeDirectoriesDirectory =
- new File(config.getBridgeSnapshotsDirectory());
- File sanitizedBridgesDirectory =
- new File(config.getSanitizedBridgesWriteDirectory());
- boolean replaceIPAddressesWithHashes =
- config.getReplaceIPAddressesWithHashes();
- long limitBridgeSanitizingInterval =
- config.getLimitBridgeDescriptorMappings();
- File statsDirectory = new File("stats");
-
- if (bridgeDirectoriesDirectory == null ||
- sanitizedBridgesDirectory == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- /* Memorize argument values. */
- this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory;
- this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
- this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- SanitizedBridgesWriter.class.getName());
-
- SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.rsyncCatString = rsyncCatFormat.format(
- System.currentTimeMillis());
-
- /* Initialize secure random number generator if we need it. */
- if (this.replaceIPAddressesWithHashes) {
- try {
- this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
- } catch (GeneralSecurityException e) {
- this.logger.log(Level.WARNING, "Could not initialize secure "
- + "random number generator! Not calculating any IP address "
- + "hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* Read hex-encoded secrets for replacing IP addresses with hashes
- * from disk. */
- this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
- this.bridgeIpSecretsFile = new File(statsDirectory,
- "bridge-ip-secrets");
- if (this.bridgeIpSecretsFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.bridgeIpSecretsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
- line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
- parts.length != 2) {
- this.logger.warning("Invalid line in bridge-ip-secrets file "
- + "starting with '" + line.substring(0, 7) + "'! "
- + "Not calculating any IP address hashes in this "
- + "execution!");
- this.persistenceProblemWithSecrets = true;
- break;
- }
- String month = parts[0];
- byte[] secret = Hex.decodeHex(parts[1].toCharArray());
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- br.close();
- if (!this.persistenceProblemWithSecrets) {
- this.logger.fine("Read "
- + this.secretsForHashingIPAddresses.size() + " secrets for "
- + "hashing bridge IP addresses.");
- }
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Failed to decode hex string in "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* If we're configured to keep secrets only for a limited time, define
- * the cut-off day and time. */
- if (limitBridgeSanitizingInterval >= 0L) {
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.bridgeSanitizingCutOffTimestamp = formatter.format(
- System.currentTimeMillis() - 24L * 60L * 60L * 1000L
- * limitBridgeSanitizingInterval);
- } else {
- this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
- }
-
- // Prepare bridge descriptor parser
- BridgeDescriptorParser bdp = new BridgeDescriptorParser(this);
-
- // Import bridge descriptors
- new BridgeSnapshotReader(bdp, this.bridgeDirectoriesDirectory,
- statsDirectory);
-
- // Finish writing sanitized bridge descriptors to disk
- this.finishWriting();
-
- this.checkStaleDescriptors();
-
- this.cleanUpRsyncDirectory();
- }
-
- private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
- String published) throws IOException {
- if (!orAddress.contains(":")) {
- /* Malformed or-address or a line. */
- return null;
- }
- String addressPart = orAddress.substring(0,
- orAddress.lastIndexOf(":"));
- String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
- String scrubbedAddressPart = null;
- if (addressPart.startsWith("[")) {
- scrubbedAddressPart = this.scrubIpv6Address(addressPart,
- fingerprintBytes, published);
- } else {
- scrubbedAddressPart = this.scrubIpv4Address(addressPart,
- fingerprintBytes, published);
- }
- return (scrubbedAddressPart == null ? null :
- scrubbedAddressPart + ":" + portPart);
- }
-
- private String scrubIpv4Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[4 + 20 + 31];
- String[] ipParts = address.split("\\.");
- for (int i = 0; i < 4; i++) {
- hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
- }
- System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 0, hashInput, 24, 31);
- byte[] hashOutput = DigestUtils.sha256(hashInput);
- String hashedAddress = "10."
- + (((int) hashOutput[0] + 256) % 256) + "."
- + (((int) hashOutput[1] + 256) % 256) + "."
- + (((int) hashOutput[2] + 256) % 256);
- return hashedAddress;
- } else {
- return "127.0.0.1";
- }
- }
-
- private String scrubIpv6Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[16 + 20 + 19];
- String[] doubleColonSeparatedParts = address.substring(1,
- address.length() - 1).split("::", -1);
- if (doubleColonSeparatedParts.length > 2) {
- /* Invalid IPv6 address. */
- return null;
- }
- List<String> hexParts = new ArrayList<String>();
- for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
- StringBuilder hexPart = new StringBuilder();
- String[] parts = doubleColonSeparatedPart.split(":", -1);
- if (parts.length < 1 || parts.length > 8) {
- /* Invalid IPv6 address. */
- return null;
- }
- for (int i = 0; i < parts.length; i++) {
- String part = parts[i];
- if (part.contains(".")) {
- String[] ipParts = part.split("\\.");
- byte[] ipv4Bytes = new byte[4];
- if (ipParts.length != 4) {
- /* Invalid IPv4 part in IPv6 address. */
- return null;
- }
- for (int m = 0; m < 4; m++) {
- ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
- }
- hexPart.append(Hex.encodeHexString(ipv4Bytes));
- } else if (part.length() > 4) {
- /* Invalid IPv6 address. */
- return null;
- } else {
- for (int k = part.length(); k < 4; k++) {
- hexPart.append("0");
- }
- hexPart.append(part);
- }
- }
- hexParts.add(hexPart.toString());
- }
- StringBuilder hex = new StringBuilder();
- hex.append(hexParts.get(0));
- if (hexParts.size() == 2) {
- for (int i = 32 - hexParts.get(0).length()
- - hexParts.get(1).length(); i > 0; i--) {
- hex.append("0");
- }
- hex.append(hexParts.get(1));
- }
- byte[] ipBytes = null;
- try {
- ipBytes = Hex.decodeHex(hex.toString().toCharArray());
- } catch (DecoderException e) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- if (ipBytes.length != 16) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- System.arraycopy(ipBytes, 0, hashInput, 0, 16);
- System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 31, hashInput, 36, 19);
- String hashOutput = DigestUtils.sha256Hex(hashInput);
- sb.append(hashOutput.substring(hashOutput.length() - 6,
- hashOutput.length() - 4));
- sb.append(":");
- sb.append(hashOutput.substring(hashOutput.length() - 4));
- }
- sb.append("]");
- return sb.toString();
- }
-
- private byte[] getSecretForMonth(String month) throws IOException {
- if (!this.secretsForHashingIPAddresses.containsKey(month) ||
- this.secretsForHashingIPAddresses.get(month).length == 31) {
- byte[] secret = new byte[50];
- this.secureRandom.nextBytes(secret);
- if (this.secretsForHashingIPAddresses.containsKey(month)) {
- System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
- secret, 0, 31);
- }
- if (month.compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- this.logger.warning("Generated a secret that we won't make "
- + "persistent, because it's outside our bridge descriptor "
- + "sanitizing interval.");
- } else {
- /* Append secret to file on disk immediately before using it, or
- * we might end with inconsistently sanitized bridges. */
- try {
- if (!this.bridgeIpSecretsFile.exists()) {
- this.bridgeIpSecretsFile.getParentFile().mkdirs();
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile,
- this.bridgeIpSecretsFile.exists()));
- bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store new secret "
- + "to disk! Not calculating any IP address hashes in "
- + "this execution!", e);
- this.persistenceProblemWithSecrets = true;
- throw new IOException(e);
- }
- }
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- return this.secretsForHashingIPAddresses.get(month);
- }
-
- private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
-
- /**
- * Sanitizes a network status and writes it to disk.
- */
- public void sanitizeAndStoreNetworkStatus(byte[] data,
- String publicationTime) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
- maxNetworkStatusPublishedTime = publicationTime;
- }
-
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(publicationTime) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
- : Level.FINE, "Sanitizing and storing network status with "
- + "publication time outside our descriptor sanitizing "
- + "interval.");
- this.haveWarnedAboutInterval = true;
- }
-
- /* Parse the given network status line by line. */
- StringBuilder header = new StringBuilder();
- SortedMap<String, String> scrubbedLines =
- new TreeMap<String, String>();
- try {
- StringBuilder scrubbed = new StringBuilder();
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- String mostRecentDescPublished = null;
- byte[] fingerprintBytes = null;
- String descPublicationTime = null;
- String hashedBridgeIdentityHex = null;
- while ((line = br.readLine()) != null) {
-
- /* Use publication time from "published" line instead of the
- * file's last-modified time. Don't copy over the line, because
- * we're going to write a "published" line below. */
- if (line.startsWith("published ")) {
- publicationTime = line.substring("published ".length());
-
- /* Additional header lines don't have to be cleaned up. */
- } else if (line.startsWith("flag-thresholds ")) {
- header.append(line + "\n");
-
- /* r lines contain sensitive information that needs to be removed
- * or replaced. */
- } else if (line.startsWith("r ")) {
-
- /* Clear buffer from previously scrubbed lines. */
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Parse the relevant parts of this r line. */
- String[] parts = line.split(" ");
- String nickname = parts[1];
- fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
- String descriptorIdentifier = parts[3];
- descPublicationTime = parts[4] + " " + parts[5];
- String address = parts[6];
- String orPort = parts[7];
- String dirPort = parts[8];
-
- /* Determine most recent descriptor publication time. */
- if (descPublicationTime.compareTo(publicationTime) <= 0 &&
- (mostRecentDescPublished == null ||
- descPublicationTime.compareTo(
- mostRecentDescPublished) > 0)) {
- mostRecentDescPublished = descPublicationTime;
- }
-
- /* Write scrubbed r line to buffer. */
- byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
- String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
- hashedBridgeIdentity).substring(0, 27);
- hashedBridgeIdentityHex = Hex.encodeHexString(
- hashedBridgeIdentity);
- String hashedDescriptorIdentifier = Base64.encodeBase64String(
- DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
- + "=="))).substring(0, 27);
- String scrubbedAddress = scrubIpv4Address(address,
- fingerprintBytes,
- descPublicationTime);
- scrubbed.append("r " + nickname + " "
- + hashedBridgeIdentityBase64 + " "
- + hashedDescriptorIdentifier + " " + descPublicationTime
- + " " + scrubbedAddress + " " + orPort + " " + dirPort
- + "\n");
-
- /* Sanitize any addresses in a lines using the fingerprint and
- * descriptor publication time from the previous r line. */
- } else if (line.startsWith("a ")) {
- String scrubbedOrAddress = scrubOrAddress(
- line.substring("a ".length()), fingerprintBytes,
- descPublicationTime);
- if (scrubbedOrAddress != null) {
- scrubbed.append("a " + scrubbedOrAddress + "\n");
- } else {
- this.logger.warning("Invalid address in line '" + line
- + "' in bridge network status. Skipping line!");
- }
-
- /* Nothing special about s, w, and p lines; just copy them. */
- } else if (line.startsWith("s ") || line.equals("s") ||
- line.startsWith("w ") || line.equals("w") ||
- line.startsWith("p ") || line.equals("p")) {
- scrubbed.append(line + "\n");
-
- /* There should be nothing else but r, w, p, and s lines in the
- * network status. If there is, we should probably learn before
- * writing anything to the sanitized descriptors. */
- } else {
- this.logger.fine("Unknown line '" + line + "' in bridge "
- + "network status. Not writing to disk!");
- return;
- }
- }
- br.close();
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Check if we can tell from the descriptor publication times
- * whether this status is possibly stale. */
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (formatter.parse(publicationTime).getTime() -
- formatter.parse(mostRecentDescPublished).getTime() >
- 60L * 60L * 1000L) {
- this.logger.warning("The most recent descriptor in the bridge "
- + "network status published at " + publicationTime + " was "
- + "published at " + mostRecentDescPublished + " which is "
- + "more than 1 hour before the status. This is a sign for "
- + "the status being stale. Please check!");
- }
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse timestamp in "
- + "bridge network status.", e);
- return;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge network "
- + "status.", e);
- return;
- }
-
- /* Write the sanitized network status to disk. */
- try {
- String syear = publicationTime.substring(0, 4);
- String smonth = publicationTime.substring(5, 7);
- String sday = publicationTime.substring(8, 10);
- String stime = publicationTime.substring(11, 13)
- + publicationTime.substring(14, 16)
- + publicationTime.substring(17, 19);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
- + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
- + sday + "-" + stime + "-"
- + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
- File rsyncFile = new File("recent/bridge-descriptors/statuses/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- for (File outputFile : outputFiles) {
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile));
- bw.write("@type bridge-network-status 1.0\n");
- bw.write("published " + publicationTime + "\n");
- bw.write(header.toString());
- for (String scrubbed : scrubbedLines.values()) {
- bw.write(scrubbed);
- }
- bw.close();
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized bridge "
- + "network status to disk.", e);
- return;
- }
- }
-
- private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00";
-
- /**
- * Sanitizes a bridge server descriptor and writes it to disk.
- */
- public void sanitizeAndStoreServerDescriptor(byte[] data) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null,
- masterKeyEd25519FromIdentityEd25519 = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(data, "US-ASCII")));
- StringBuilder scrubbed = new StringBuilder();
- String line = null, hashedBridgeIdentity = null, address = null,
- routerLine = null, scrubbedAddress = null,
- masterKeyEd25519 = null;
- List<String> orAddresses = null, scrubbedOrAddresses = null;
- boolean skipCrypto = false;
- while ((line = br.readLine()) != null) {
-
- /* Skip all crypto parts that might be used to derive the bridge's
- * identity fingerprint. */
- if (skipCrypto && !line.startsWith("-----END ")) {
- continue;
-
- /* Store the router line for later processing, because we may need
- * the bridge identity fingerprint for replacing the IP address in
- * the scrubbed version. */
- } else if (line.startsWith("router ")) {
- address = line.split(" ")[2];
- routerLine = line;
-
- /* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
- } else if (line.startsWith("or-address ")) {
- if (orAddresses == null) {
- orAddresses = new ArrayList<String>();
- }
- orAddresses.add(line.substring("or-address ".length()));
-
- /* Parse the publication time to see if we're still inside the
- * sanitizing interval. */
- } else if (line.startsWith("published ")) {
- published = line.substring("published ".length());
- if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
- maxServerDescriptorPublishedTime = published;
- }
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(published) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval
- ? Level.WARNING : Level.FINE, "Sanitizing and storing "
- + "server descriptor with publication time outside our "
- + "descriptor sanitizing interval.");
- this.haveWarnedAboutInterval = true;
- }
- scrubbed.append(line + "\n");
-
- /* Parse the fingerprint to determine the hashed bridge
- * identity. */
- } else if (line.startsWith("opt fingerprint ") ||
- line.startsWith("fingerprint ")) {
- String fingerprint = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- byte[] fingerprintBytes = Hex.decodeHex(
- fingerprint.toCharArray());
- hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
- toLowerCase();
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- if (orAddresses != null) {
- scrubbedOrAddresses = new ArrayList<String>();
- for (String orAddress : orAddresses) {
- String scrubbedOrAddress = scrubOrAddress(orAddress,
- fingerprintBytes, published);
- if (scrubbedOrAddress != null) {
- scrubbedOrAddresses.add(scrubbedOrAddress);
- } else {
- this.logger.warning("Invalid address in line "
- + "'or-address " + orAddress + "' in bridge server "
- + "descriptor. Skipping line!");
- }
- }
- }
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "fingerprint");
- for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
- scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
- 4 * (i + 1)).toUpperCase());
- scrubbed.append("\n");
-
- /* Replace the contact line (if present) with a generic one. */
- } else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- String[] routerLineParts = routerLine.split(" ");
- scrubbedDesc = "router " + routerLineParts[1] + " "
- + scrubbedAddress + " " + routerLineParts[3] + " "
- + routerLineParts[4] + " " + routerLineParts[5] + "\n";
- if (scrubbedOrAddresses != null) {
- for (String scrubbedOrAddress : scrubbedOrAddresses) {
- scrubbedDesc = scrubbedDesc += "or-address "
- + scrubbedOrAddress + "\n";
- }
- }
- scrubbedDesc += scrubbed.toString();
- break;
-
- /* Replace extra-info digest with the hashed digest of the
- * non-scrubbed descriptor. */
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- String[] parts = line.split(" ");
- if (line.startsWith("opt ")) {
- scrubbed.append("opt ");
- parts = line.substring(4).split(" ");
- }
- scrubbed.append("extra-info-digest " + DigestUtils.shaHex(
- Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
- if (parts.length > 2) {
- scrubbed.append(" " + Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(parts[2]))).
- replaceAll("=", ""));
- }
- scrubbed.append("\n");
-
- /* Possibly sanitize reject lines if they contain the bridge's own
- * IP address. */
- } else if (line.startsWith("reject ")) {
- if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject " + scrubbedAddress
- + line.substring("reject ".length() + address.length())
- + "\n");
- } else {
- scrubbed.append(line + "\n");
- }
-
- /* Extract master-key-ed25519 from identity-ed25519. */
- } else if (line.equals("identity-ed25519")) {
- StringBuilder sb = new StringBuilder();
- while ((line = br.readLine()) != null &&
- !line.equals("-----END ED25519 CERT-----")) {
- if (line.equals("-----BEGIN ED25519 CERT-----")) {
- continue;
- }
- sb.append(line);
- }
- masterKeyEd25519FromIdentityEd25519 =
- this.parseMasterKeyEd25519FromIdentityEd25519(
- sb.toString());
- String sha256MasterKeyEd25519 = Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(
- masterKeyEd25519FromIdentityEd25519 + "="))).
- replaceAll("=", "");
- scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
- + "\n");
- if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
- masterKeyEd25519FromIdentityEd25519)) {
- this.logger.warning("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Verify that identity-ed25519 and master-key-ed25519 match. */
- } else if (line.startsWith("master-key-ed25519 ")) {
- masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
- if (masterKeyEd25519FromIdentityEd25519 != null &&
- !masterKeyEd25519FromIdentityEd25519.equals(
- masterKeyEd25519)) {
- this.logger.warning("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("accept ")
- || line.startsWith("platform ")
- || line.startsWith("opt protocols ")
- || line.startsWith("protocols ")
- || line.startsWith("uptime ")
- || line.startsWith("bandwidth ")
- || line.startsWith("opt hibernating ")
- || line.startsWith("hibernating ")
- || line.startsWith("ntor-onion-key ")
- || line.equals("opt hidden-service-dir")
- || line.equals("hidden-service-dir")
- || line.equals("opt caches-extra-info")
- || line.equals("caches-extra-info")
- || line.equals("opt allow-single-hop-exits")
- || line.equals("allow-single-hop-exits")
- || line.startsWith("ipv6-policy ")
- || line.equals("tunnelled-dir-server")) {
- scrubbed.append(line + "\n");
-
- /* Replace node fingerprints in the family line with their hashes
- * and leave nicknames unchanged. */
- } else if (line.startsWith("family ")) {
- StringBuilder familyLine = new StringBuilder("family");
- for (String s : line.substring(7).split(" ")) {
- if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
- s.substring(1).toCharArray())).toUpperCase());
- } else {
- familyLine.append(" " + s);
- }
- }
- scrubbed.append(familyLine.toString() + "\n");
-
- /* Skip the purpose line that the bridge authority adds to its
- * cached-descriptors file. */
- } else if (line.startsWith("@purpose ")) {
- continue;
-
- /* Skip all crypto parts that might leak the bridge's identity
- * fingerprint. */
- } else if (line.startsWith("-----BEGIN ")
- || line.equals("onion-key") || line.equals("signing-key") ||
- line.equals("onion-key-crosscert") ||
- line.startsWith("ntor-onion-key-crosscert ")) {
- skipCrypto = true;
-
- /* Stop skipping lines when the crypto parts are over. */
- } else if (line.startsWith("-----END ")) {
- skipCrypto = false;
-
- /* Skip the ed25519 signature; we'll include a SHA256 digest of
- * the SHA256 descriptor digest in router-digest-sha256. */
- } else if (line.startsWith("router-sig-ed25519 ")) {
- continue;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.warning("Unrecognized line '" + line
- + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not parse server "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized server descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate server "
- + "descriptor digest.");
- return;
- }
- String descriptorDigestSha256Base64 = null;
- if (masterKeyEd25519FromIdentityEd25519 != null) {
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\n-----END SIGNATURE-----\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigestSha256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(DigestUtils.sha256(forDigest))).
- replaceAll("=", "");
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigestSha256Base64 == null) {
- this.logger.log(Level.WARNING, "Could not calculate server "
- + "descriptor SHA256 digest.");
- return;
- }
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/server-descriptors/"
- + "/" + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
- File rsyncCatFile = new File("recent/bridge-descriptors/"
- + "server-descriptors/" + this.rsyncCatString
- + "-server-descriptors.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
- boolean[] append = new boolean[] { false, true };
- try {
- for (int i = 0; i < outputFiles.length; i++) {
- File outputFile = outputFiles[i];
- boolean appendToFile = append[i];
- if (outputFile.exists() && !appendToFile) {
- /* We already stored this descriptor to disk before, so let's
- * not store it yet another time. */
- break;
- }
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile, appendToFile));
- bw.write("@type bridge-server-descriptor 1.1\n");
- bw.write(scrubbedDesc);
- if (descriptorDigestSha256Base64 != null) {
- bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
- + "\n");
- }
- bw.write("router-digest " + descriptorDigest.toUpperCase()
- + "\n");
- bw.close();
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized server "
- + "descriptor to disk.", e);
- return;
- }
- }
-
- private String parseMasterKeyEd25519FromIdentityEd25519(
- String identityEd25519Base64) {
- byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
- if (identityEd25519.length < 40) {
- this.logger.warning("Invalid length of identity-ed25519 (in "
- + "bytes): " + identityEd25519.length);
- } else if (identityEd25519[0] != 0x01) {
- this.logger.warning("Unknown version in identity-ed25519: "
- + identityEd25519[0]);
- } else if (identityEd25519[1] != 0x04) {
- this.logger.warning("Unknown cert type in identity-ed25519: "
- + identityEd25519[1]);
- } else if (identityEd25519[6] != 0x01) {
- this.logger.warning("Unknown certified key type in "
- + "identity-ed25519: " + identityEd25519[1]);
- } else if (identityEd25519[39] == 0x00) {
- this.logger.warning("No extensions in identity-ed25519 (which "
- + "would contain the encoded master-key-ed25519): "
- + identityEd25519[39]);
- } else {
- int extensionStart = 40;
- for (int i = 0; i < (int) identityEd25519[39]; i++) {
- if (identityEd25519.length < extensionStart + 4) {
- this.logger.warning("Invalid extension with id " + i
- + " in identity-ed25519.");
- break;
- }
- int extensionLength = identityEd25519[extensionStart];
- extensionLength <<= 8;
- extensionLength += identityEd25519[extensionStart + 1];
- int extensionType = identityEd25519[extensionStart + 2];
- if (extensionLength == 32 && extensionType == 4) {
- if (identityEd25519.length < extensionStart + 4 + 32) {
- this.logger.warning("Invalid extension with id " + i
- + " in identity-ed25519.");
- break;
- }
- byte[] masterKeyEd25519 = new byte[32];
- System.arraycopy(identityEd25519, extensionStart + 4,
- masterKeyEd25519, 0, masterKeyEd25519.length);
- String masterKeyEd25519Base64 = Base64.encodeBase64String(
- masterKeyEd25519);
- String masterKeyEd25519Base64NoTrailingEqualSigns =
- masterKeyEd25519Base64.replaceAll("=", "");
- return masterKeyEd25519Base64NoTrailingEqualSigns;
- }
- extensionStart += 4 + extensionLength;
- }
- }
- this.logger.warning("Unable to locate master-key-ed25519 in "
- + "identity-ed25519.");
- return null;
- }
-
- private String maxExtraInfoDescriptorPublishedTime =
- "1970-01-01 00:00:00";
-
- /**
- * Sanitizes an extra-info descriptor and writes it to disk.
- */
- public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null,
- masterKeyEd25519FromIdentityEd25519 = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- StringBuilder scrubbed = null;
- String hashedBridgeIdentity = null, masterKeyEd25519 = null;
- while ((line = br.readLine()) != null) {
-
- /* Parse bridge identity from extra-info line and replace it with
- * its hash in the sanitized descriptor. */
- String[] parts = line.split(" ");
- if (line.startsWith("extra-info ")) {
- hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
- parts[2].toCharArray())).toLowerCase();
- scrubbed = new StringBuilder("extra-info " + parts[1] + " "
- + hashedBridgeIdentity.toUpperCase() + "\n");
-
- /* Parse the publication time to determine the file name. */
- } else if (line.startsWith("published ")) {
- scrubbed.append(line + "\n");
- published = line.substring("published ".length());
- if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
- > 0) {
- maxExtraInfoDescriptorPublishedTime = published;
- }
-
- /* Remove everything from transport lines except the transport
- * name. */
- } else if (line.startsWith("transport ")) {
- if (parts.length < 3) {
- this.logger.fine("Illegal line in extra-info descriptor: '"
- + line + "'. Skipping descriptor.");
- return;
- }
- scrubbed.append("transport " + parts[1] + "\n");
-
- /* Skip transport-info lines entirely. */
- } else if (line.startsWith("transport-info ")) {
-
- /* Extract master-key-ed25519 from identity-ed25519. */
- } else if (line.equals("identity-ed25519")) {
- StringBuilder sb = new StringBuilder();
- while ((line = br.readLine()) != null &&
- !line.equals("-----END ED25519 CERT-----")) {
- if (line.equals("-----BEGIN ED25519 CERT-----")) {
- continue;
- }
- sb.append(line);
- }
- masterKeyEd25519FromIdentityEd25519 =
- this.parseMasterKeyEd25519FromIdentityEd25519(
- sb.toString());
- String sha256MasterKeyEd25519 = Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(
- masterKeyEd25519FromIdentityEd25519 + "="))).
- replaceAll("=", "");
- scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
- + "\n");
- if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
- masterKeyEd25519FromIdentityEd25519)) {
- this.logger.warning("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Verify that identity-ed25519 and master-key-ed25519 match. */
- } else if (line.startsWith("master-key-ed25519 ")) {
- masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
- if (masterKeyEd25519FromIdentityEd25519 != null &&
- !masterKeyEd25519FromIdentityEd25519.equals(
- masterKeyEd25519)) {
- this.logger.warning("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("write-history ")
- || line.startsWith("read-history ")
- || line.startsWith("geoip-start-time ")
- || line.startsWith("geoip-client-origins ")
- || line.startsWith("geoip-db-digest ")
- || line.startsWith("geoip6-db-digest ")
- || line.startsWith("conn-bi-direct ")
- || line.startsWith("bridge-")
- || line.startsWith("dirreq-")
- || line.startsWith("cell-")
- || line.startsWith("entry-")
- || line.startsWith("exit-")) {
- scrubbed.append(line + "\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- scrubbedDesc = scrubbed.toString();
- break;
-
- /* Skip the ed25519 signature; we'll include a SHA256 digest of
- * the SHA256 descriptor digest in router-digest-sha256. */
- } else if (line.startsWith("router-sig-ed25519 ")) {
- continue;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.warning("Unrecognized line '" + line
- + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized extra-info descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate extra-info "
- + "descriptor digest.");
- return;
- }
- String descriptorDigestSha256Base64 = null;
- if (masterKeyEd25519FromIdentityEd25519 != null) {
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\n-----END SIGNATURE-----\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigestSha256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(DigestUtils.sha256(forDigest))).
- replaceAll("=", "");
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigestSha256Base64 == null) {
- this.logger.log(Level.WARNING, "Could not calculate extra-info "
- + "descriptor SHA256 digest.");
- return;
- }
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/extra-infos/"
- + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
- File rsyncCatFile = new File("recent/bridge-descriptors/"
- + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
- boolean[] append = new boolean[] { false, true };
- try {
- for (int i = 0; i < outputFiles.length; i++) {
- File outputFile = outputFiles[i];
- boolean appendToFile = append[i];
- if (outputFile.exists() && !appendToFile) {
- /* We already stored this descriptor to disk before, so let's
- * not store it yet another time. */
- break;
- }
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile, appendToFile));
- bw.write("@type bridge-extra-info 1.3\n");
- bw.write(scrubbedDesc);
- if (descriptorDigestSha256Base64 != null) {
- bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
- + "\n");
- }
- bw.write("router-digest " + descriptorDigest.toUpperCase()
- + "\n");
- bw.close();
- }
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not write sanitized "
- + "extra-info descriptor to disk.", e);
- }
- }
-
- /**
- * Rewrite all network statuses that might contain references to server
- * descriptors we added or updated in this execution. This applies to
- * all statuses that have been published up to 24 hours after any added
- * or updated server descriptor.
- */
- public void finishWriting() {
-
- /* Delete secrets that we don't need anymore. */
- if (!this.secretsForHashingIPAddresses.isEmpty() &&
- this.secretsForHashingIPAddresses.firstKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- try {
- int kept = 0, deleted = 0;
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile));
- for (Map.Entry<String, byte[]> e :
- this.secretsForHashingIPAddresses.entrySet()) {
- if (e.getKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- deleted++;
- } else {
- bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
- + "\n");
- kept++;
- }
- }
- bw.close();
- this.logger.info("Deleted " + deleted + " secrets that we don't "
- + "need anymore and kept " + kept + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store reduced set of "
- + "secrets to disk! This is a bad sign, better check what's "
- + "going on!", e);
- }
- }
- }
-
- private void checkStaleDescriptors() {
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
- try {
- long maxNetworkStatusPublishedMillis =
- dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime();
- if (maxNetworkStatusPublishedMillis > 0L &&
- maxNetworkStatusPublishedMillis < tooOldMillis) {
- this.logger.warning("The last known bridge network status was "
- + "published " + maxNetworkStatusPublishedTime + ", which is "
- + "more than 5:30 hours in the past.");
- }
- long maxServerDescriptorPublishedMillis =
- dateTimeFormat.parse(maxServerDescriptorPublishedTime).
- getTime();
- if (maxServerDescriptorPublishedMillis > 0L &&
- maxServerDescriptorPublishedMillis < tooOldMillis) {
- this.logger.warning("The last known bridge server descriptor was "
- + "published " + maxServerDescriptorPublishedTime + ", which "
- + "is more than 5:30 hours in the past.");
- }
- long maxExtraInfoDescriptorPublishedMillis =
- dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime).
- getTime();
- if (maxExtraInfoDescriptorPublishedMillis > 0L &&
- maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
- this.logger.warning("The last known bridge extra-info descriptor "
- + "was published " + maxExtraInfoDescriptorPublishedTime
- + ", which is more than 5:30 hours in the past.");
- }
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Unable to parse timestamp for "
- + "stale check.", e);
- }
- }
-
- /* Delete all files from the rsync directory that have not been modified
- * in the last three days, and remove the .tmp extension from newly
- * written files. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<File>();
- allFiles.add(new File("recent/bridge-descriptors"));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- } else if (file.getName().endsWith(".tmp")) {
- file.renameTo(new File(file.getParentFile(),
- file.getName().substring(0,
- file.getName().lastIndexOf(".tmp"))));
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
deleted file mode 100644
index 77217d4..0000000
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/* Copyright 2011--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.bridgepools;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.LockFile;
-import org.torproject.ernie.db.main.LoggingConfiguration;
-
-public class BridgePoolAssignmentsProcessor extends Thread {
-
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration("bridge-pool-assignments");
- Logger logger = Logger.getLogger(
- BridgePoolAssignmentsProcessor.class.getName());
- logger.info("Starting bridge-pool-assignments module of ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile("bridge-pool-assignments");
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Process bridge pool assignments
- new BridgePoolAssignmentsProcessor(config).run();
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating bridge-pool-assignments module of ERNIE.");
- }
-
- private Configuration config;
-
- public BridgePoolAssignmentsProcessor(Configuration config) {
- this.config = config;
- }
-
- public void run() {
-
- File assignmentsDirectory =
- new File(config.getAssignmentsDirectory());
- File sanitizedAssignmentsDirectory =
- new File(config.getSanitizedAssignmentsDirectory());
-
- Logger logger =
- Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
- if (assignmentsDirectory == null ||
- sanitizedAssignmentsDirectory == null) {
- IllegalArgumentException e = new IllegalArgumentException("Neither "
- + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
- + "be null!");
- throw e;
- }
-
- List<File> assignmentFiles = new ArrayList<File>();
- Stack<File> files = new Stack<File>();
- files.add(assignmentsDirectory);
- while (!files.isEmpty()) {
- File file = files.pop();
- if (file.isDirectory()) {
- files.addAll(Arrays.asList(file.listFiles()));
- } else if (file.getName().equals("assignments.log")) {
- assignmentFiles.add(file);
- }
- }
-
- SimpleDateFormat assignmentFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat filenameFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String duplicateFingerprint = null;
- long maxBridgePoolAssignmentTime = 0L;
- for (File assignmentFile : assignmentFiles) {
- logger.info("Processing bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'...");
- try {
- BufferedReader br = null;
- if (assignmentFile.getName().endsWith(".gz")) {
- br = new BufferedReader(new InputStreamReader(
- new GzipCompressorInputStream(new FileInputStream(
- assignmentFile))));
- } else {
- br = new BufferedReader(new FileReader(assignmentFile));
- }
- String line, bridgePoolAssignmentLine = null;
- SortedSet<String> sanitizedAssignments = new TreeSet<String>();
- boolean wroteLastLine = false, skipBefore20120504125947 = true;
- Set<String> hashedFingerprints = null;
- while ((line = br.readLine()) != null || !wroteLastLine) {
- if (line != null && line.startsWith("bridge-pool-assignment ")) {
- String[] parts = line.split(" ");
- if (parts.length != 3) {
- continue;
- }
- /* TODO Take out this temporary hack to ignore all assignments
- * coming from ponticum when byblos was still the official
- * BridgeDB host. */
- if (line.compareTo(
- "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
- skipBefore20120504125947 = false;
- }
- }
- if (skipBefore20120504125947) {
- if (line == null) {
- break;
- } else {
- continue;
- }
- }
- if (line == null ||
- line.startsWith("bridge-pool-assignment ")) {
- if (bridgePoolAssignmentLine != null) {
- try {
- long bridgePoolAssignmentTime = assignmentFormat.parse(
- bridgePoolAssignmentLine.substring(
- "bridge-pool-assignment ".length())).getTime();
- maxBridgePoolAssignmentTime = Math.max(
- maxBridgePoolAssignmentTime,
- bridgePoolAssignmentTime);
- File tarballFile = new File(
- sanitizedAssignmentsDirectory, filenameFormat.format(
- bridgePoolAssignmentTime));
- File rsyncFile = new File(
- "recent/bridge-pool-assignments/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile,
- rsyncFile };
- for (File outputFile : outputFiles) {
- if (!outputFile.exists()) {
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile));
- bw.write("@type bridge-pool-assignment 1.0\n");
- bw.write(bridgePoolAssignmentLine + "\n");
- for (String assignmentLine : sanitizedAssignments) {
- bw.write(assignmentLine + "\n");
- }
- bw.close();
- }
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- }
- sanitizedAssignments.clear();
- }
- if (line == null) {
- wroteLastLine = true;
- } else {
- bridgePoolAssignmentLine = line;
- hashedFingerprints = new HashSet<String>();
- }
- } else {
- String[] parts = line.split(" ");
- if (parts.length < 2 || parts[0].length() < 40) {
- logger.warning("Unrecognized line '" + line
- + "'. Aborting.");
- break;
- }
- String hashedFingerprint = null;
- try {
- hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
- line.split(" ")[0].toCharArray())).toLowerCase();
- } catch (DecoderException e) {
- logger.warning("Unable to decode hex fingerprint in line '"
- + line + "'. Aborting.");
- break;
- }
- if (hashedFingerprints.contains(hashedFingerprint)) {
- duplicateFingerprint = bridgePoolAssignmentLine;
- }
- hashedFingerprints.add(hashedFingerprint);
- String assignmentDetails = line.substring(40);
- sanitizedAssignments.add(hashedFingerprint
- + assignmentDetails);
- }
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read bridge pool assignment "
- + "file '" + assignmentFile.getAbsolutePath()
- + "'. Skipping.", e);
- }
- }
-
- if (duplicateFingerprint != null) {
- logger.warning("At least one bridge pool assignment list contained "
- + "duplicate fingerprints. Last found in assignment list "
- + "starting with '" + duplicateFingerprint + "'.");
- }
-
- if (maxBridgePoolAssignmentTime > 0L &&
- maxBridgePoolAssignmentTime + 330L * 60L * 1000L
- < System.currentTimeMillis()) {
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- logger.warning("The last known bridge pool assignment list was "
- + "published at "
- + dateTimeFormat.format(maxBridgePoolAssignmentTime)
- + ", which is more than 5:30 hours in the past.");
- }
-
- this.cleanUpRsyncDirectory();
-
- logger.info("Finished processing bridge pool assignment file(s).");
- }
-
- /* Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<File>();
- allFiles.add(new File("recent/bridge-pool-assignments"));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
deleted file mode 100644
index be3070f..0000000
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.exitlists;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorParser;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExitList;
-import org.torproject.descriptor.ExitListEntry;
-import org.torproject.descriptor.impl.DescriptorParseException;
-import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.LockFile;
-import org.torproject.ernie.db.main.LoggingConfiguration;
-
-public class ExitListDownloader extends Thread {
-
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration("exit-lists");
- Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
- logger.info("Starting exit-lists module of ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile("exit-lists");
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Download exit list and store it to disk
- new ExitListDownloader(config).run();
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating exit-lists module of ERNIE.");
- }
-
- public ExitListDownloader(Configuration config) {
- }
-
- public void run() {
-
- Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
-
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- Date downloadedDate = new Date();
- String downloadedExitList = null;
- try {
- logger.fine("Downloading exit list...");
- StringBuilder sb = new StringBuilder();
- sb.append("@type tordnsel 1.0\n");
- sb.append("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
- String exitAddressesUrl =
- "http://exitlist.torproject.org/exit-addresses";
- URL u = new URL(exitAddressesUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response != 200) {
- logger.warning("Could not download exit list. Response code " +
- response);
- return;
- }
- BufferedInputStream in = new BufferedInputStream(
- huc.getInputStream());
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- sb.append(new String(data, 0, len));
- }
- in.close();
- downloadedExitList = sb.toString();
- logger.fine("Finished downloading exit list.");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed downloading exit list", e);
- return;
- }
- if (downloadedExitList == null) {
- logger.warning("Failed downloading exit list");
- return;
- }
-
- SimpleDateFormat tarballFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File("out/exit-lists/" + tarballFormat.format(
- downloadedDate));
-
- long maxScanMillis = 0L;
- try {
- DescriptorParser descriptorParser =
- DescriptorSourceFactory.createDescriptorParser();
- List<Descriptor> parsedDescriptors =
- descriptorParser.parseDescriptors(downloadedExitList.getBytes(),
- tarballFile.getName());
- if (parsedDescriptors.size() != 1 ||
- !(parsedDescriptors.get(0) instanceof ExitList)) {
- logger.warning("Could not parse downloaded exit list");
- return;
- }
- ExitList parsedExitList = (ExitList) parsedDescriptors.get(0);
- for (ExitListEntry entry : parsedExitList.getExitListEntries()) {
- maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis());
- }
- } catch (DescriptorParseException e) {
- logger.log(Level.WARNING, "Could not parse downloaded exit list",
- e);
- }
- if (maxScanMillis > 0L &&
- maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) {
- logger.warning("The last reported scan in the downloaded exit list "
- + "took place at " + dateTimeFormat.format(maxScanMillis)
- + ", which is more than 5:30 hours in the past.");
- }
-
- /* Write to disk. */
- File rsyncFile = new File("recent/exit-lists/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- for (File outputFile : outputFiles) {
- try {
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile));
- bw.write(downloadedExitList);
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write downloaded exit list "
- + "to " + outputFile.getAbsolutePath(), e);
- }
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished downloading "
- + "exit list.\nLast three exit lists are:");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(new File("out/exit-lists"));
- SortedSet<File> lastThreeExitLists = new TreeSet<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- SortedSet<File> lastThreeElements = new TreeSet<File>();
- for (File f : pop.listFiles()) {
- lastThreeElements.add(f);
- }
- while (lastThreeElements.size() > 3) {
- lastThreeElements.remove(lastThreeElements.first());
- }
- for (File f : lastThreeElements) {
- filesInInputDir.add(f);
- }
- } else {
- lastThreeExitLists.add(pop);
- while (lastThreeExitLists.size() > 3) {
- lastThreeExitLists.remove(lastThreeExitLists.first());
- }
- }
- }
- for (File f : lastThreeExitLists) {
- dumpStats.append("\n" + f.getName());
- }
- logger.info(dumpStats.toString());
-
- this.cleanUpRsyncDirectory();
- }
-
- /* Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<File>();
- allFiles.add(new File("recent/exit-lists"));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
deleted file mode 100644
index eeb0d36..0000000
--- a/src/org/torproject/ernie/db/main/Configuration.java
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.main;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Initialize configuration with hard-coded defaults, overwrite with
- * configuration in config file, if exists, and answer Main.java about our
- * configuration.
- */
-public class Configuration {
- private String directoryArchivesOutputDirectory =
- "out/relay-descriptors/";
- private boolean importCachedRelayDescriptors = false;
- private List<String> cachedRelayDescriptorsDirectory =
- new ArrayList<String>(Arrays.asList(
- "in/relay-descriptors/cacheddesc/".split(",")));
- private boolean importDirectoryArchives = false;
- private String directoryArchivesDirectory =
- "in/relay-descriptors/archives/";
- private boolean keepDirectoryArchiveImportHistory = false;
- private boolean replaceIPAddressesWithHashes = false;
- private long limitBridgeDescriptorMappings = -1L;
- private String sanitizedBridgesWriteDirectory =
- "out/bridge-descriptors/";
- private String bridgeSnapshotsDirectory = "in/bridge-descriptors/";
- private boolean downloadRelayDescriptors = false;
- private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
- "86.59.21.38,76.73.17.194:9030,171.25.193.9:443,"
- + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
- + "194.109.206.212,212.112.245.170,154.35.32.5").split(","));
- private List<String> downloadVotesByFingerprint = Arrays.asList((
- "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4,"
- + "27B6B5996C426270A5C95488AA5BCEB6BCC86956,"
- + "49015F787433103580E3B66A1707A00E60F2D15B,"
- + "585769C78764D58426B8B52B6651A5A71137189A,"
- + "80550987E1D626E3EBA5E5E75A458DE0626D088C,"
- + "D586D18309DED4CD6D57C18FDB97EFA96D330566,"
- + "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58,"
- + "ED03BB616EB2F60BEC80151114BB25CEF515B226,"
- + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(","));
- private boolean downloadCurrentConsensus = true;
- private boolean downloadCurrentMicrodescConsensus = true;
- private boolean downloadCurrentVotes = true;
- private boolean downloadMissingServerDescriptors = true;
- private boolean downloadMissingExtraInfoDescriptors = true;
- private boolean downloadMissingMicrodescriptors = true;
- private boolean downloadAllServerDescriptors = false;
- private boolean downloadAllExtraInfoDescriptors = false;
- private boolean compressRelayDescriptorDownloads;
- private String assignmentsDirectory = "in/bridge-pool-assignments/";
- private String sanitizedAssignmentsDirectory =
- "out/bridge-pool-assignments/";
- private String torperfOutputDirectory = "out/torperf/";
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFiles = null;
- public Configuration() {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(Configuration.class.getName());
-
- /* Read config file, if present. */
- File configFile = new File("config");
- if (!configFile.exists()) {
- logger.warning("Could not find config file. In the default "
- + "configuration, we are not configured to read data from any "
- + "data source or write data to any data sink. You need to "
- + "create a config file (" + configFile.getAbsolutePath()
- + ") and provide at least one data source and one data sink. "
- + "Refer to the manual for more information.");
- return;
- }
- String line = null;
- boolean containsCachedRelayDescriptorsDirectory = false;
- try {
- BufferedReader br = new BufferedReader(new FileReader(configFile));
- while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.length() < 1) {
- continue;
- } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
- this.directoryArchivesOutputDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportCachedRelayDescriptors")) {
- this.importCachedRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
- if (!containsCachedRelayDescriptorsDirectory) {
- this.cachedRelayDescriptorsDirectory.clear();
- containsCachedRelayDescriptorsDirectory = true;
- }
- this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
- } else if (line.startsWith("ImportDirectoryArchives")) {
- this.importDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesDirectory")) {
- this.directoryArchivesDirectory = line.split(" ")[1];
- } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
- this.keepDirectoryArchiveImportHistory = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
- this.replaceIPAddressesWithHashes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
- this.limitBridgeDescriptorMappings = Long.parseLong(
- line.split(" ")[1]);
- } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
- this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
- } else if (line.startsWith("BridgeSnapshotsDirectory")) {
- this.bridgeSnapshotsDirectory = line.split(" ")[1];
- } else if (line.startsWith("DownloadRelayDescriptors")) {
- this.downloadRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
- this.downloadFromDirectoryAuthorities = new ArrayList<String>();
- for (String dir : line.split(" ")[1].split(",")) {
- // test if IP:port pair has correct format
- if (dir.length() < 1) {
- logger.severe("Configuration file contains directory "
- + "authority IP:port of length 0 in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- new URL("http://" + dir + "/");
- this.downloadFromDirectoryAuthorities.add(dir);
- }
- } else if (line.startsWith("DownloadVotesByFingerprint")) {
- this.downloadVotesByFingerprint = new ArrayList<String>();
- for (String fingerprint : line.split(" ")[1].split(",")) {
- this.downloadVotesByFingerprint.add(fingerprint);
- }
- } else if (line.startsWith("DownloadCurrentConsensus")) {
- this.downloadCurrentConsensus = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) {
- this.downloadCurrentMicrodescConsensus = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadCurrentVotes")) {
- this.downloadCurrentVotes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadMissingServerDescriptors")) {
- this.downloadMissingServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith(
- "DownloadMissingExtraInfoDescriptors")) {
- this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadMissingMicrodescriptors")) {
- this.downloadMissingMicrodescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllServerDescriptors")) {
- this.downloadAllServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
- this.downloadAllExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
- this.compressRelayDescriptorDownloads = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("AssignmentsDirectory")) {
- this.assignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
- this.sanitizedAssignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("TorperfOutputDirectory")) {
- this.torperfOutputDirectory = line.split(" ")[1];
- } else if (line.startsWith("TorperfSource")) {
- if (this.torperfSources == null) {
- this.torperfSources = new TreeMap<String, String>();
- }
- String[] parts = line.split(" ");
- String sourceName = parts[1];
- String baseUrl = parts[2];
- this.torperfSources.put(sourceName, baseUrl);
- } else if (line.startsWith("TorperfFiles")) {
- if (this.torperfFiles == null) {
- this.torperfFiles = new ArrayList<String>();
- }
- String[] parts = line.split(" ");
- if (parts.length != 5) {
- logger.severe("Configuration file contains TorperfFiles "
- + "option with wrong number of values in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- this.torperfFiles.add(line);
- } else {
- logger.severe("Configuration file contains unrecognized "
- + "configuration key in line '" + line + "'! Exiting!");
- System.exit(1);
- }
- }
- br.close();
- } catch (ArrayIndexOutOfBoundsException e) {
- logger.severe("Configuration file contains configuration key "
- + "without value in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (MalformedURLException e) {
- logger.severe("Configuration file contains illegal URL or IP:port "
- + "pair in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (NumberFormatException e) {
- logger.severe("Configuration file contains illegal value in line '"
- + line + "' with legal values being 0 or 1. Exiting!");
- System.exit(1);
- } catch (IOException e) {
- logger.log(Level.SEVERE, "Unknown problem while reading config "
- + "file! Exiting!", e);
- System.exit(1);
- }
- }
- public String getDirectoryArchivesOutputDirectory() {
- return this.directoryArchivesOutputDirectory;
- }
- public boolean getImportCachedRelayDescriptors() {
- return this.importCachedRelayDescriptors;
- }
- public List<String> getCachedRelayDescriptorDirectory() {
- return this.cachedRelayDescriptorsDirectory;
- }
- public boolean getImportDirectoryArchives() {
- return this.importDirectoryArchives;
- }
- public String getDirectoryArchivesDirectory() {
- return this.directoryArchivesDirectory;
- }
- public boolean getKeepDirectoryArchiveImportHistory() {
- return this.keepDirectoryArchiveImportHistory;
- }
- public boolean getReplaceIPAddressesWithHashes() {
- return this.replaceIPAddressesWithHashes;
- }
- public long getLimitBridgeDescriptorMappings() {
- return this.limitBridgeDescriptorMappings;
- }
- public String getSanitizedBridgesWriteDirectory() {
- return this.sanitizedBridgesWriteDirectory;
- }
- public String getBridgeSnapshotsDirectory() {
- return this.bridgeSnapshotsDirectory;
- }
- public boolean getDownloadRelayDescriptors() {
- return this.downloadRelayDescriptors;
- }
- public List<String> getDownloadFromDirectoryAuthorities() {
- return this.downloadFromDirectoryAuthorities;
- }
- public List<String> getDownloadVotesByFingerprint() {
- return this.downloadVotesByFingerprint;
- }
- public boolean getDownloadCurrentConsensus() {
- return this.downloadCurrentConsensus;
- }
- public boolean getDownloadCurrentMicrodescConsensus() {
- return this.downloadCurrentMicrodescConsensus;
- }
- public boolean getDownloadCurrentVotes() {
- return this.downloadCurrentVotes;
- }
- public boolean getDownloadMissingServerDescriptors() {
- return this.downloadMissingServerDescriptors;
- }
- public boolean getDownloadMissingExtraInfoDescriptors() {
- return this.downloadMissingExtraInfoDescriptors;
- }
- public boolean getDownloadMissingMicrodescriptors() {
- return this.downloadMissingMicrodescriptors;
- }
- public boolean getDownloadAllServerDescriptors() {
- return this.downloadAllServerDescriptors;
- }
- public boolean getDownloadAllExtraInfoDescriptors() {
- return this.downloadAllExtraInfoDescriptors;
- }
- public boolean getCompressRelayDescriptorDownloads() {
- return this.compressRelayDescriptorDownloads;
- }
- public String getAssignmentsDirectory() {
- return this.assignmentsDirectory;
- }
- public String getSanitizedAssignmentsDirectory() {
- return this.sanitizedAssignmentsDirectory;
- }
- public String getTorperfOutputDirectory() {
- return this.torperfOutputDirectory;
- }
- public SortedMap<String, String> getTorperfSources() {
- return this.torperfSources;
- }
- public List<String> getTorperfFiles() {
- return this.torperfFiles;
- }
-}
-
diff --git a/src/org/torproject/ernie/db/main/LockFile.java b/src/org/torproject/ernie/db/main/LockFile.java
deleted file mode 100644
index d356d90..0000000
--- a/src/org/torproject/ernie/db/main/LockFile.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.main;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.logging.Logger;
-
-public class LockFile {
-
- private File lockFile;
- private Logger logger;
-
- public LockFile(String moduleName) {
- this.lockFile = new File("lock/" + moduleName);
- this.logger = Logger.getLogger(LockFile.class.getName());
- }
-
- public boolean acquireLock() {
- this.logger.fine("Trying to acquire lock...");
- try {
- if (this.lockFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader(
- this.lockFile));
- long runStarted = Long.parseLong(br.readLine());
- br.close();
- if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
- return false;
- }
- }
- this.lockFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.lockFile));
- bw.append("" + System.currentTimeMillis() + "\n");
- bw.close();
- this.logger.fine("Acquired lock.");
- return true;
- } catch (IOException e) {
- this.logger.warning("Caught exception while trying to acquire "
- + "lock!");
- return false;
- }
- }
-
- public void releaseLock() {
- this.logger.fine("Releasing lock...");
- this.lockFile.delete();
- this.logger.fine("Released lock.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/main/LoggingConfiguration.java b/src/org/torproject/ernie/db/main/LoggingConfiguration.java
deleted file mode 100644
index c60eb34..0000000
--- a/src/org/torproject/ernie/db/main/LoggingConfiguration.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.main;
-
-import java.io.File;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.TimeZone;
-import java.util.logging.ConsoleHandler;
-import java.util.logging.FileHandler;
-import java.util.logging.Formatter;
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-import java.util.logging.Logger;
-
-/**
- * Initialize logging configuration.
- *
- * Log levels used by ERNIE:
- *
- * - SEVERE: An event made it impossible to continue program execution.
- * - WARNING: A potential problem occurred that requires the operator to
- * look after the otherwise unattended setup
- * - INFO: Messages on INFO level are meant to help the operator in making
- * sure that operation works as expected.
- * - FINE: Debug messages that are used to identify problems and which are
- * turned on by default.
- * - FINER: More detailed debug messages to investigate problems in more
- * detail. Not turned on by default. Increase log file limit when using
- * FINER.
- * - FINEST: Most detailed debug messages. Not used.
- */
-public class LoggingConfiguration {
- public LoggingConfiguration(String moduleName) {
-
- /* Remove default console handler. */
- for (Handler h : Logger.getLogger("").getHandlers()) {
- Logger.getLogger("").removeHandler(h);
- }
-
- /* Disable logging of internal Sun classes. */
- Logger.getLogger("sun").setLevel(Level.OFF);
-
- /* Set minimum log level we care about from INFO to FINER. */
- Logger.getLogger("").setLevel(Level.FINER);
-
- /* Create log handler that writes messages on WARNING or higher to the
- * console. */
- final SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Formatter cf = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getMessage() + "\n";
- }
- };
- Handler ch = new ConsoleHandler();
- ch.setFormatter(cf);
- ch.setLevel(Level.WARNING);
- Logger.getLogger("").addHandler(ch);
-
- /* Initialize own logger for this class. */
- Logger logger = Logger.getLogger(
- LoggingConfiguration.class.getName());
-
- /* Create log handler that writes all messages on FINE or higher to a
- * local file. */
- Formatter ff = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getLevel() + " " + record.getSourceClassName() + " "
- + record.getSourceMethodName() + " " + record.getMessage()
- + (record.getThrown() != null ? " " + record.getThrown() : "")
- + "\n";
- }
- };
- try {
- new File("log").mkdirs();
- FileHandler fh = new FileHandler("log/" + moduleName, 5000000, 5,
- true);
- fh.setFormatter(ff);
- fh.setLevel(Level.FINE);
- Logger.getLogger("").addHandler(fh);
- } catch (SecurityException e) {
- logger.log(Level.WARNING, "No permission to create log file. "
- + "Logging to file is disabled.", e);
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write to log file. Logging to "
- + "file is disabled.", e);
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
deleted file mode 100644
index ea54874..0000000
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/* Copyright 2010--2014 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.io.UnsupportedEncodingException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-
-/**
- * Read in all files in a given directory and pass buffered readers of
- * them to the relay descriptor parser.
- */
-public class ArchiveReader {
- public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
- File statsDirectory, boolean keepImportHistory) {
-
- if (rdp == null || archivesDirectory == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- rdp.setArchiveReader(this);
- int parsedFiles = 0, ignoredFiles = 0;
- Logger logger = Logger.getLogger(ArchiveReader.class.getName());
- SortedSet<String> archivesImportHistory = new TreeSet<String>();
- File archivesImportHistoryFile = new File(statsDirectory,
- "archives-import-history");
- if (keepImportHistory && archivesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- archivesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- archivesImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in archives import "
- + "history file. Skipping.");
- }
- }
- if (archivesDirectory.exists()) {
- logger.fine("Importing files in directory " + archivesDirectory
- + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(archivesDirectory);
- List<File> problems = new ArrayList<File>();
- Set<File> filesToRetry = new HashSet<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else {
- if (rdp != null) {
- try {
- BufferedInputStream bis = null;
- if (keepImportHistory &&
- archivesImportHistory.contains(pop.getName())) {
- ignoredFiles++;
- continue;
- } else if (pop.getName().endsWith(".tar.bz2")) {
- logger.warning("Cannot parse compressed tarball "
- + pop.getAbsolutePath() + ". Skipping.");
- continue;
- } else if (pop.getName().endsWith(".bz2")) {
- FileInputStream fis = new FileInputStream(pop);
- BZip2CompressorInputStream bcis =
- new BZip2CompressorInputStream(fis);
- bis = new BufferedInputStream(bcis);
- } else {
- FileInputStream fis = new FileInputStream(pop);
- bis = new BufferedInputStream(fis);
- }
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- boolean stored = rdp.parse(allData);
- if (!stored) {
- filesToRetry.add(pop);
- continue;
- }
- if (keepImportHistory) {
- archivesImportHistory.add(pop.getName());
- }
- parsedFiles++;
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
- }
- }
- for (File pop : filesToRetry) {
- /* TODO We need to parse microdescriptors ourselves, rather than
- * RelayDescriptorParser, because only we know the valid-after
- * time(s) of microdesc consensus(es) containing this
- * microdescriptor. However, this breaks functional abstraction
- * pretty badly. */
- if (rdp != null) {
- try {
- BufferedInputStream bis = null;
- if (pop.getName().endsWith(".bz2")) {
- FileInputStream fis = new FileInputStream(pop);
- BZip2CompressorInputStream bcis =
- new BZip2CompressorInputStream(fis);
- bis = new BufferedInputStream(bcis);
- } else {
- FileInputStream fis = new FileInputStream(pop);
- bis = new BufferedInputStream(fis);
- }
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line;
- do {
- line = br.readLine();
- } while (line != null && line.startsWith("@"));
- br.close();
- if (line == null) {
- logger.fine("We were given an empty descriptor for "
- + "parsing. Ignoring.");
- continue;
- }
- if (!line.equals("onion-key")) {
- logger.fine("Skipping non-recognized descriptor.");
- continue;
- }
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String ascii = null;
- try {
- ascii = new String(allData, "US-ASCII");
- } catch (UnsupportedEncodingException e) {
- /* No way that US-ASCII is not supported. */
- }
- int start = -1, end = -1;
- String startToken = "onion-key\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- end = ascii.indexOf(startToken, start + 1);
- if (end < 0) {
- end = ascii.length();
- if (end <= start) {
- break;
- }
- }
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- String digest256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(descBytes)).replaceAll("=", "");
- String digest256Hex = DigestUtils.sha256Hex(descBytes);
- if (!this.microdescriptorValidAfterTimes.containsKey(
- digest256Hex)) {
- logger.fine("Could not store microdescriptor '"
- + digest256Hex + "', which was not contained in a "
- + "microdesc consensus.");
- continue;
- }
- for (String validAfterTime :
- this.microdescriptorValidAfterTimes.get(digest256Hex)) {
- try {
- long validAfter =
- parseFormat.parse(validAfterTime).getTime();
- rdp.storeMicrodescriptor(descBytes, digest256Hex,
- digest256Base64, validAfter);
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Could not parse "
- + "valid-after time '" + validAfterTime + "'. Not "
- + "storing microdescriptor.", e);
- }
- }
- }
- if (keepImportHistory) {
- archivesImportHistory.add(pop.getName());
- }
- parsedFiles++;
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
- }
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory "
- + archivesDirectory + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + archivesDirectory + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
- }
- }
- }
- }
- if (keepImportHistory) {
- try {
- archivesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- archivesImportHistoryFile));
- for (String line : archivesImportHistory) {
- bw.write(line + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write archives import "
- + "history file.");
- }
- }
- logger.info("Finished importing relay descriptors from local "
- + "directory:\nParsed " + parsedFiles + ", ignored "
- + ignoredFiles + " files.");
- }
-
- private Map<String, Set<String>> microdescriptorValidAfterTimes =
- new HashMap<String, Set<String>>();
- public void haveParsedMicrodescConsensus(String validAfterTime,
- SortedSet<String> microdescriptorDigests) {
- for (String microdescriptor : microdescriptorDigests) {
- if (!this.microdescriptorValidAfterTimes.containsKey(
- microdescriptor)) {
- this.microdescriptorValidAfterTimes.put(microdescriptor,
- new HashSet<String>());
- }
- this.microdescriptorValidAfterTimes.get(microdescriptor).add(
- validAfterTime);
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
deleted file mode 100644
index 60cfca9..0000000
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ /dev/null
@@ -1,831 +0,0 @@
-/* Copyright 2010--2014 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.torproject.descriptor.DescriptorParser;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.impl.DescriptorParseException;
-import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.LockFile;
-import org.torproject.ernie.db.main.LoggingConfiguration;
-
-public class ArchiveWriter extends Thread {
-
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration("relay-descriptors");
- Logger logger = Logger.getLogger(ArchiveWriter.class.getName());
- logger.info("Starting relay-descriptors module of ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile("relay-descriptors");
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Import/download relay descriptors from the various sources
- new ArchiveWriter(config).run();
-
- new ReferenceChecker(new File("recent/relay-descriptors"),
- new File("stats/references"),
- new File("stats/references-history")).check();
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating relay-descriptors module of ERNIE.");
- }
-
- private Configuration config;
-
- public ArchiveWriter(Configuration config) {
- this.config = config;
- }
-
- private long now = System.currentTimeMillis();
- private Logger logger;
- private File outputDirectory;
- private String rsyncCatString;
- private DescriptorParser descriptorParser;
- private int storedConsensusesCounter = 0,
- storedMicrodescConsensusesCounter = 0, storedVotesCounter = 0,
- storedCertsCounter = 0, storedServerDescriptorsCounter = 0,
- storedExtraInfoDescriptorsCounter = 0,
- storedMicrodescriptorsCounter = 0;
-
- private SortedMap<Long, SortedSet<String>> storedConsensuses =
- new TreeMap<Long, SortedSet<String>>();
- private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses =
- new TreeMap<Long, SortedSet<String>>();
- private SortedMap<Long, Integer> expectedVotes =
- new TreeMap<Long, Integer>();
- private SortedMap<Long, SortedMap<String, SortedSet<String>>>
- storedVotes =
- new TreeMap<Long, SortedMap<String, SortedSet<String>>>();
- private SortedMap<Long, Map<String, String>> storedServerDescriptors =
- new TreeMap<Long, Map<String, String>>();
- private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
- new TreeMap<Long, Set<String>>();
- private SortedMap<Long, Set<String>> storedMicrodescriptors =
- new TreeMap<Long, Set<String>>();
-
- private File storedServerDescriptorsFile = new File(
- "stats/stored-server-descriptors");
- private File storedExtraInfoDescriptorsFile = new File(
- "stats/stored-extra-info-descriptors");
- private File storedMicrodescriptorsFile = new File(
- "stats/stored-microdescriptors");
-
- private void loadDescriptorDigests() {
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- try {
- if (this.storedServerDescriptorsFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader(
- this.storedServerDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if (parts.length != 3) {
- this.logger.warning("Could not load server descriptor "
- + "digests because of illegal line '" + line + "'. We "
- + "might not be able to correctly check descriptors for "
- + "completeness.");
- break;
- }
- long published = dateTimeFormat.parse(parts[0]).getTime();
- if (published < this.now - 48L * 60L * 60L * 1000L) {
- continue;
- }
- if (!this.storedServerDescriptors.containsKey(published)) {
- this.storedServerDescriptors.put(published,
- new HashMap<String, String>());
- }
- String serverDescriptorDigest = parts[1];
- String extraInfoDescriptorDigest = parts[2].equals("NA") ? null
- : parts[2];
- this.storedServerDescriptors.get(published).put(
- serverDescriptorDigest, extraInfoDescriptorDigest);
- }
- br.close();
- }
- if (this.storedExtraInfoDescriptorsFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader(
- this.storedExtraInfoDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if (parts.length != 2) {
- this.logger.warning("Could not load extra-info descriptor "
- + "digests because of illegal line '" + line + "'. We "
- + "might not be able to correctly check descriptors for "
- + "completeness.");
- break;
- }
- long published = dateTimeFormat.parse(parts[0]).getTime();
- if (published < this.now - 48L * 60L * 60L * 1000L) {
- continue;
- }
- if (!this.storedExtraInfoDescriptors.containsKey(published)) {
- this.storedExtraInfoDescriptors.put(published,
- new HashSet<String>());
- }
- String extraInfoDescriptorDigest = parts[1];
- this.storedExtraInfoDescriptors.get(published).add(
- extraInfoDescriptorDigest);
- }
- br.close();
- }
- if (this.storedMicrodescriptorsFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader(
- this.storedMicrodescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if (parts.length != 2) {
- this.logger.warning("Could not load microdescriptor digests "
- + "because of illegal line '" + line + "'. We might not "
- + "be able to correctly check descriptors for "
- + "completeness.");
- break;
- }
- long validAfter = dateTimeFormat.parse(parts[0]).getTime();
- if (validAfter < this.now - 40L * 24L * 60L * 60L * 1000L) {
- continue;
- }
- if (!this.storedMicrodescriptors.containsKey(validAfter)) {
- this.storedMicrodescriptors.put(validAfter,
- new HashSet<String>());
- }
- String microdescriptorDigest = parts[1];
- this.storedMicrodescriptors.get(validAfter).add(
- microdescriptorDigest);
- }
- br.close();
- }
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not load descriptor "
- + "digests. We might not be able to correctly check "
- + "descriptors for completeness.", e);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not load descriptor "
- + "digests. We might not be able to correctly check "
- + "descriptors for completeness.", e);
- }
- }
-
- private void saveDescriptorDigests() {
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- try {
- this.storedServerDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.storedServerDescriptorsFile));
- for (Map.Entry<Long, Map<String, String>> e :
- this.storedServerDescriptors.entrySet()) {
- String published = dateTimeFormat.format(e.getKey());
- for (Map.Entry<String, String> f : e.getValue().entrySet()) {
- String serverDescriptorDigest = f.getKey();
- String extraInfoDescriptorDigest = f.getValue() == null ? "NA"
- : f.getValue();
- bw.write(String.format("%s,%s,%s%n", published,
- serverDescriptorDigest, extraInfoDescriptorDigest));
- }
- }
- bw.close();
- this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs();
- bw = new BufferedWriter(new FileWriter(
- this.storedExtraInfoDescriptorsFile));
- for (Map.Entry<Long, Set<String>> e :
- this.storedExtraInfoDescriptors.entrySet()) {
- String published = dateTimeFormat.format(e.getKey());
- for (String extraInfoDescriptorDigest : e.getValue()) {
- bw.write(String.format("%s,%s%n", published,
- extraInfoDescriptorDigest));
- }
- }
- bw.close();
- this.storedMicrodescriptorsFile.getParentFile().mkdirs();
- bw = new BufferedWriter(new FileWriter(
- this.storedMicrodescriptorsFile));
- for (Map.Entry<Long, Set<String>> e :
- this.storedMicrodescriptors.entrySet()) {
- String validAfter = dateTimeFormat.format(e.getKey());
- for (String microdescriptorDigest : e.getValue()) {
- bw.write(String.format("%s,%s%n", validAfter,
- microdescriptorDigest));
- }
- }
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not save descriptor "
- + "digests. We might not be able to correctly check "
- + "descriptors for completeness in the next run.", e);
- }
- }
-
- public void run() {
-
- File outputDirectory =
- new File(config.getDirectoryArchivesOutputDirectory());
- File statsDirectory = new File("stats");
-
- this.logger = Logger.getLogger(ArchiveWriter.class.getName());
- this.outputDirectory = outputDirectory;
- SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.rsyncCatString = rsyncCatFormat.format(
- System.currentTimeMillis());
- this.descriptorParser =
- DescriptorSourceFactory.createDescriptorParser();
-
- this.loadDescriptorDigests();
-
- // Prepare relay descriptor parser
- RelayDescriptorParser rdp = new RelayDescriptorParser(this);
-
- RelayDescriptorDownloader rdd = null;
- if (config.getDownloadRelayDescriptors()) {
- List<String> dirSources =
- config.getDownloadFromDirectoryAuthorities();
- rdd = new RelayDescriptorDownloader(rdp, dirSources,
- config.getDownloadVotesByFingerprint(),
- config.getDownloadCurrentConsensus(),
- config.getDownloadCurrentMicrodescConsensus(),
- config.getDownloadCurrentVotes(),
- config.getDownloadMissingServerDescriptors(),
- config.getDownloadMissingExtraInfoDescriptors(),
- config.getDownloadMissingMicrodescriptors(),
- config.getDownloadAllServerDescriptors(),
- config.getDownloadAllExtraInfoDescriptors(),
- config.getCompressRelayDescriptorDownloads());
- rdp.setRelayDescriptorDownloader(rdd);
- }
- if (config.getImportCachedRelayDescriptors()) {
- new CachedRelayDescriptorReader(rdp,
- config.getCachedRelayDescriptorDirectory(), statsDirectory);
- this.intermediateStats("importing relay descriptors from local "
- + "Tor data directories");
- }
- if (config.getImportDirectoryArchives()) {
- new ArchiveReader(rdp,
- new File(config.getDirectoryArchivesDirectory()),
- statsDirectory,
- config.getKeepDirectoryArchiveImportHistory());
- this.intermediateStats("importing relay descriptors from local "
- + "directory");
- }
- if (rdd != null) {
- rdd.downloadDescriptors();
- rdd.writeFile();
- rdd = null;
- this.intermediateStats("downloading relay descriptors from the "
- + "directory authorities");
- }
-
- this.checkMissingDescriptors();
-
- this.checkStaledescriptors();
-
- this.cleanUpRsyncDirectory();
-
- this.saveDescriptorDigests();
- }
-
- private boolean store(byte[] typeAnnotation, byte[] data,
- File[] outputFiles, boolean[] append) {
- try {
- this.logger.finer("Storing " + outputFiles[0]);
- if (this.descriptorParser.parseDescriptors(data,
- outputFiles[0].getName()).size() != 1) {
- this.logger.info("Relay descriptor file " + outputFiles[0]
- + " doesn't contain exactly one descriptor. Not storing.");
- return false;
- }
- for (int i = 0; i < outputFiles.length; i++) {
- File outputFile = outputFiles[i];
- boolean appendToFile = append == null ? false : append[i];
- outputFile.getParentFile().mkdirs();
- BufferedOutputStream bos = new BufferedOutputStream(
- new FileOutputStream(outputFile, appendToFile));
- if (data.length > 0 && data[0] != '@') {
- bos.write(typeAnnotation, 0, typeAnnotation.length);
- }
- bos.write(data, 0, data.length);
- bos.close();
- }
- return true;
- } catch (DescriptorParseException e) {
- this.logger.log(Level.WARNING, "Could not parse relay descriptor "
- + outputFiles[0] + " before storing it to disk. Skipping.", e);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store relay descriptor "
- + outputFiles[0], e);
- }
- return false;
- }
-
- private static final byte[] CONSENSUS_ANNOTATION =
- "@type network-status-consensus-3 1.0\n".getBytes();
- public void storeConsensus(byte[] data, long validAfter,
- SortedSet<String> dirSources,
- SortedSet<String> serverDescriptorDigests) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory + "/consensus/"
- + printFormat.format(new Date(validAfter)) + "-consensus");
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncFile = new File("recent/relay-descriptors/consensuses/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) {
- this.storedConsensusesCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - validAfter < 3L * 60L * 60L * 1000L) {
- this.storedConsensuses.put(validAfter, serverDescriptorDigests);
- this.expectedVotes.put(validAfter, dirSources.size());
- }
- }
-
- private static final byte[] MICRODESCCONSENSUS_ANNOTATION =
- "@type network-status-microdesc-consensus-3 1.0\n".getBytes();
- public void storeMicrodescConsensus(byte[] data, long validAfter,
- SortedSet<String> microdescriptorDigests) {
- SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat(
- "yyyy/MM");
- yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat(
- "dd/yyyy-MM-dd-HH-mm-ss");
- dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory
- + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter)
- + "/consensus-microdesc/"
- + dayDirectoryFileFormat.format(validAfter)
- + "-consensus-microdesc");
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncFile = new File("recent/relay-descriptors/microdescs/"
- + "consensus-microdesc/" + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles,
- null)) {
- this.storedMicrodescConsensusesCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - validAfter < 3L * 60L * 60L * 1000L) {
- this.storedMicrodescConsensuses.put(validAfter,
- microdescriptorDigests);
- }
- }
-
- private static final byte[] VOTE_ANNOTATION =
- "@type network-status-vote-3 1.0\n".getBytes();
- public void storeVote(byte[] data, long validAfter,
- String fingerprint, String digest,
- SortedSet<String> serverDescriptorDigests) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory + "/vote/"
- + printFormat.format(new Date(validAfter)) + "-vote-"
- + fingerprint + "-" + digest);
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncFile = new File("recent/relay-descriptors/votes/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) {
- this.storedVotesCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - validAfter < 3L * 60L * 60L * 1000L) {
- if (!this.storedVotes.containsKey(validAfter)) {
- this.storedVotes.put(validAfter,
- new TreeMap<String, SortedSet<String>>());
- }
- this.storedVotes.get(validAfter).put(fingerprint,
- serverDescriptorDigests);
- }
- }
-
- private static final byte[] CERTIFICATE_ANNOTATION =
- "@type dir-key-certificate-3 1.0\n".getBytes();
- public void storeCertificate(byte[] data, String fingerprint,
- long published) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory + "/certs/"
- + fingerprint + "-" + printFormat.format(new Date(published)));
- File[] outputFiles = new File[] { tarballFile };
- if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) {
- this.storedCertsCounter++;
- }
- }
-
- private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
- "@type server-descriptor 1.0\n".getBytes();
- public void storeServerDescriptor(byte[] data, String digest,
- long published, String extraInfoDigest) {
- SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory
- + "/server-descriptor/" + printFormat.format(new Date(published))
- + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
- + digest);
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncCatFile = new File("recent/relay-descriptors/"
- + "server-descriptors/" + this.rsyncCatString
- + "-server-descriptors.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
- boolean[] append = new boolean[] { false, true };
- if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles,
- append)) {
- this.storedServerDescriptorsCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - published < 48L * 60L * 60L * 1000L) {
- if (!this.storedServerDescriptors.containsKey(published)) {
- this.storedServerDescriptors.put(published,
- new HashMap<String, String>());
- }
- this.storedServerDescriptors.get(published).put(digest,
- extraInfoDigest);
- }
- }
-
- private static final byte[] EXTRA_INFO_ANNOTATION =
- "@type extra-info 1.0\n".getBytes();
- public void storeExtraInfoDescriptor(byte[] data,
- String extraInfoDigest, long published) {
- SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest);
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncCatFile = new File("recent/relay-descriptors/"
- + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
- boolean[] append = new boolean[] { false, true };
- if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) {
- this.storedExtraInfoDescriptorsCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - published < 48L * 60L * 60L * 1000L) {
- if (!this.storedExtraInfoDescriptors.containsKey(published)) {
- this.storedExtraInfoDescriptors.put(published,
- new HashSet<String>());
- }
- this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest);
- }
- }
-
- private static final byte[] MICRODESCRIPTOR_ANNOTATION =
- "@type microdescriptor 1.0\n".getBytes();
- public void storeMicrodescriptor(byte[] data,
- String microdescriptorDigest, long validAfter) {
- /* TODO We could check here whether we already stored the
- * microdescriptor in the same valid-after month. This can happen,
- * e.g., when two relays share the same microdescriptor. In that case
- * this method gets called twice and the second call overwrites the
- * file written in the first call. However, this method must be
- * called twice to store the same microdescriptor in two different
- * valid-after months. */
- SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- File tarballFile = new File(this.outputDirectory + "/microdesc/"
- + descriptorFormat.format(validAfter) + "micro/"
- + microdescriptorDigest.substring(0, 1) + "/"
- + microdescriptorDigest.substring(1, 2) + "/"
- + microdescriptorDigest);
- boolean tarballFileExistedBefore = tarballFile.exists();
- File rsyncCatFile = new File("recent/relay-descriptors/"
- + "microdescs/micro/" + this.rsyncCatString
- + "-micro.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
- boolean[] append = new boolean[] { false, true };
- if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles,
- append)) {
- this.storedMicrodescriptorsCounter++;
- }
- if (!tarballFileExistedBefore &&
- this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) {
- if (!this.storedMicrodescriptors.containsKey(validAfter)) {
- this.storedMicrodescriptors.put(validAfter,
- new HashSet<String>());
- }
- this.storedMicrodescriptors.get(validAfter).add(
- microdescriptorDigest);
- }
- }
-
- private StringBuilder intermediateStats = new StringBuilder();
- public void intermediateStats(String event) {
- intermediateStats.append("While " + event + ", we stored "
- + this.storedConsensusesCounter + " consensus(es), "
- + this.storedMicrodescConsensusesCounter + " microdesc "
- + "consensus(es), " + this.storedVotesCounter + " vote(s), "
- + this.storedCertsCounter + " certificate(s), "
- + this.storedServerDescriptorsCounter + " server descriptor(s), "
- + this.storedExtraInfoDescriptorsCounter + " extra-info "
- + "descriptor(s), and " + this.storedMicrodescriptorsCounter
- + " microdescriptor(s) to disk.\n");
- this.storedConsensusesCounter = 0;
- this.storedMicrodescConsensusesCounter = 0;
- this.storedVotesCounter = 0;
- this.storedCertsCounter = 0;
- this.storedServerDescriptorsCounter = 0;
- this.storedExtraInfoDescriptorsCounter = 0;
- this.storedMicrodescriptorsCounter = 0;
- }
-
- private void checkMissingDescriptors() {
- StringBuilder sb = new StringBuilder("Finished writing relay "
- + "descriptors to disk.\n");
- sb.append(intermediateStats.toString());
- sb.append("Statistics on the completeness of written relay "
- + "descriptors:");
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Map<String, String> knownServerDescriptors =
- new HashMap<String, String>();
- for (Map<String, String> descriptors :
- this.storedServerDescriptors.values()) {
- knownServerDescriptors.putAll(descriptors);
- }
- Set<String> knownExtraInfoDescriptors = new HashSet<String>();
- for (Set<String> descriptors :
- this.storedExtraInfoDescriptors.values()) {
- knownExtraInfoDescriptors.addAll(descriptors);
- }
- Set<String> knownMicrodescriptors = new HashSet<String>();
- for (Set<String> descriptors : this.storedMicrodescriptors.values()) {
- knownMicrodescriptors.addAll(descriptors);
- }
- boolean missingDescriptors = false, missingVotes = false,
- missingMicrodescConsensus = false;
- for (Map.Entry<Long, SortedSet<String>> c :
- this.storedConsensuses.entrySet()) {
- long validAfterMillis = c.getKey();
- String validAfterTime = dateTimeFormat.format(validAfterMillis);
- int allVotes = this.expectedVotes.containsKey(validAfterMillis)
- ? this.expectedVotes.get(validAfterMillis) : 0;
- int foundVotes = 0;
- if (this.storedVotes.containsKey(validAfterMillis)) {
- foundVotes = this.storedVotes.get(validAfterMillis).size();
- for (Map.Entry<String, SortedSet<String>> v :
- this.storedVotes.get(validAfterMillis).entrySet()) {
- int voteFoundServerDescs = 0, voteAllServerDescs = 0,
- voteFoundExtraInfos = 0, voteAllExtraInfos = 0;
- for (String serverDescriptorDigest : v.getValue()) {
- voteAllServerDescs++;
- if (knownServerDescriptors.containsKey(
- serverDescriptorDigest)) {
- voteFoundServerDescs++;
- if (knownServerDescriptors.get(serverDescriptorDigest)
- != null) {
- String extraInfoDescriptorDigest =
- knownServerDescriptors.get(serverDescriptorDigest);
- voteAllExtraInfos++;
- if (knownExtraInfoDescriptors.contains(
- extraInfoDescriptorDigest)) {
- voteFoundExtraInfos++;
- }
- }
- }
- }
- sb.append("\nV, " + validAfterTime);
- if (voteAllServerDescs > 0) {
- sb.append(String.format(", %d/%d S (%.1f%%)",
- voteFoundServerDescs, voteAllServerDescs,
- 100.0D * (double) voteFoundServerDescs /
- (double) voteAllServerDescs));
- } else {
- sb.append(", 0/0 S");
- }
- if (voteAllExtraInfos > 0) {
- sb.append(String.format(", %d/%d E (%.1f%%)",
- voteFoundExtraInfos, voteAllExtraInfos,
- 100.0D * (double) voteFoundExtraInfos /
- (double) voteAllExtraInfos));
- } else {
- sb.append(", 0/0 E");
- }
- String fingerprint = v.getKey();
- /* Ignore turtles when warning about missing descriptors. */
- if (!fingerprint.equalsIgnoreCase(
- "27B6B5996C426270A5C95488AA5BCEB6BCC86956") &&
- (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 ||
- voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995)) {
- missingDescriptors = true;
- }
- }
- }
- int foundServerDescs = 0, allServerDescs = 0, foundExtraInfos = 0,
- allExtraInfos = 0, foundMicrodescriptors = 0,
- allMicrodescriptors = 0;
- for (String serverDescriptorDigest : c.getValue()) {
- allServerDescs++;
- if (knownServerDescriptors.containsKey(
- serverDescriptorDigest)) {
- foundServerDescs++;
- if (knownServerDescriptors.get(
- serverDescriptorDigest) != null) {
- allExtraInfos++;
- String extraInfoDescriptorDigest =
- knownServerDescriptors.get(serverDescriptorDigest);
- if (knownExtraInfoDescriptors.contains(
- extraInfoDescriptorDigest)) {
- foundExtraInfos++;
- }
- }
- }
- }
- sb.append("\nC, " + validAfterTime);
- if (allVotes > 0) {
- sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes,
- 100.0D * (double) foundVotes / (double) allVotes));
- } else {
- sb.append(", 0/0 V");
- }
- if (allServerDescs > 0) {
- sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs,
- allServerDescs, 100.0D * (double) foundServerDescs /
- (double) allServerDescs));
- } else {
- sb.append(", 0/0 S");
- }
- if (allExtraInfos > 0) {
- sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos,
- allExtraInfos, 100.0D * (double) foundExtraInfos /
- (double) allExtraInfos));
- } else {
- sb.append(", 0/0 E");
- }
- if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) {
- for (String microdescriptorDigest :
- this.storedMicrodescConsensuses.get(validAfterMillis)) {
- allMicrodescriptors++;
- if (knownMicrodescriptors.contains(microdescriptorDigest)) {
- foundMicrodescriptors++;
- }
- }
- sb.append("\nM, " + validAfterTime);
- if (allMicrodescriptors > 0) {
- sb.append(String.format(", %d/%d M (%.1f%%)",
- foundMicrodescriptors, allMicrodescriptors,
- 100.0D * (double) foundMicrodescriptors /
- (double) allMicrodescriptors));
- } else {
- sb.append(", 0/0 M");
- }
- } else {
- missingMicrodescConsensus = true;
- }
- if (foundServerDescs * 1000 < allServerDescs * 995 ||
- foundExtraInfos * 1000 < allExtraInfos * 995 ||
- foundMicrodescriptors * 1000 < allMicrodescriptors * 995) {
- missingDescriptors = true;
- }
- if (foundVotes < allVotes) {
- missingVotes = true;
- }
- }
- this.logger.info(sb.toString());
- if (missingDescriptors) {
- this.logger.fine("We are missing at least 0.5% of server or "
- + "extra-info descriptors referenced from a consensus or "
- + "vote or at least 0.5% of microdescriptors referenced from a "
- + "microdesc consensus.");
- }
- if (missingVotes) {
- /* TODO Shouldn't warn if we're not trying to archive votes at
- * all. */
- this.logger.fine("We are missing at least one vote that was "
- + "referenced from a consensus.");
- }
- if (missingMicrodescConsensus) {
- /* TODO Shouldn't warn if we're not trying to archive microdesc
- * consensuses at all. */
- this.logger.fine("We are missing at least one microdesc "
- + "consensus that was published together with a known "
- + "consensus.");
- }
- }
-
- private void checkStaledescriptors() {
- SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- long tooOldMillis = this.now - 330L * 60L * 1000L;
- if (!this.storedConsensuses.isEmpty() &&
- this.storedConsensuses.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay network status "
- + "consensus was valid after "
- + dateTimeFormat.format(this.storedConsensuses.lastKey())
- + ", which is more than 5:30 hours in the past.");
- }
- if (!this.storedMicrodescConsensuses.isEmpty() &&
- this.storedMicrodescConsensuses.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay network status "
- + "microdesc consensus was valid after "
- + dateTimeFormat.format(
- this.storedMicrodescConsensuses.lastKey())
- + ", which is more than 5:30 hours in the past.");
- }
- if (!this.storedVotes.isEmpty() &&
- this.storedVotes.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay network status vote "
- + "was valid after " + dateTimeFormat.format(
- this.storedVotes.lastKey()) + ", which is more than 5:30 hours "
- + "in the past.");
- }
- if (!this.storedServerDescriptors.isEmpty() &&
- this.storedServerDescriptors.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay server descriptor was "
- + "published at "
- + dateTimeFormat.format(this.storedServerDescriptors.lastKey())
- + ", which is more than 5:30 hours in the past.");
- }
- if (!this.storedExtraInfoDescriptors.isEmpty() &&
- this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay extra-info descriptor "
- + "was published at " + dateTimeFormat.format(
- this.storedExtraInfoDescriptors.lastKey())
- + ", which is more than 5:30 hours in the past.");
- }
- if (!this.storedMicrodescriptors.isEmpty() &&
- this.storedMicrodescriptors.lastKey() < tooOldMillis) {
- this.logger.warning("The last known relay microdescriptor was "
- + "contained in a microdesc consensus that was valid after "
- + dateTimeFormat.format(this.storedMicrodescriptors.lastKey())
- + ", which is more than 5:30 hours in the past.");
- }
- }
-
- /* Delete all files from the rsync directory that have not been modified
- * in the last three days (except for microdescriptors which are kept
- * for up to thirty days), and remove the .tmp extension from newly
- * written files. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<File>();
- allFiles.add(new File("recent/relay-descriptors"));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.getName().endsWith("-micro")) {
- if (file.lastModified() < cutOffMicroMillis) {
- file.delete();
- }
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- } else if (file.getName().endsWith(".tmp")) {
- file.renameTo(new File(file.getParentFile(),
- file.getName().substring(0,
- file.getName().lastIndexOf(".tmp"))));
- }
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
deleted file mode 100644
index d0a2600..0000000
--- a/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
+++ /dev/null
@@ -1,250 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses all descriptors in local directory cacheddesc/ and sorts them
- * into directory structure in directory-archive/.
- */
-public class CachedRelayDescriptorReader {
- public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
- List<String> inputDirectories, File statsDirectory) {
-
- if (rdp == null || inputDirectories == null ||
- inputDirectories.isEmpty() || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- StringBuilder dumpStats = new StringBuilder("Finished importing "
- + "relay descriptors from local Tor data directories:");
- Logger logger = Logger.getLogger(
- CachedRelayDescriptorReader.class.getName());
-
- /* Read import history containing SHA-1 digests of previously parsed
- * statuses and descriptors, so that we can skip them in this run. */
- Set<String> lastImportHistory = new HashSet<String>(),
- currentImportHistory = new HashSet<String>();
- File importHistoryFile = new File(statsDirectory,
- "cacheddesc-import-history");
- if (importHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- importHistoryFile));
- String line;
- while ((line = br.readLine()) != null) {
- lastImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read import history from "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
- }
-
- /* Read cached descriptors directories. */
- for (String inputDirectory : inputDirectories) {
- File cachedDescDir = new File(inputDirectory);
- if (!cachedDescDir.exists()) {
- logger.warning("Directory " + cachedDescDir.getAbsolutePath()
- + " does not exist. Skipping.");
- continue;
- }
- logger.fine("Reading " + cachedDescDir.getAbsolutePath()
- + " directory.");
- SortedSet<File> cachedDescFiles = new TreeSet<File>();
- Stack<File> files = new Stack<File>();
- files.add(cachedDescDir);
- while (!files.isEmpty()) {
- File file = files.pop();
- if (file.isDirectory()) {
- files.addAll(Arrays.asList(file.listFiles()));
- } else {
- cachedDescFiles.add(file);
- }
- }
- for (File f : cachedDescFiles) {
- try {
- // descriptors may contain non-ASCII chars; read as bytes to
- // determine digests
- BufferedInputStream bis =
- new BufferedInputStream(new FileInputStream(f));
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- if (f.getName().equals("cached-consensus")) {
- /* Check if directory information is stale. */
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("valid-after ")) {
- dumpStats.append("\n" + f.getName() + ": " + line.substring(
- "valid-after ".length()));
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (dateTimeFormat.parse(line.substring("valid-after ".
- length())).getTime() < System.currentTimeMillis()
- - 6L * 60L * 60L * 1000L) {
- logger.warning("Cached descriptor files in "
- + cachedDescDir.getAbsolutePath() + " are stale. "
- + "The valid-after line in cached-consensus is '"
- + line + "'.");
- dumpStats.append(" (stale!)");
- }
- break;
- }
- }
- br.close();
-
- /* Parse the cached consensus if we haven't parsed it before
- * (but regardless of whether it's stale or not). */
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(allData);
- } else {
- dumpStats.append(" (skipped)");
- }
- currentImportHistory.add(digest);
- }
- } else if (f.getName().equals("v3-status-votes")) {
- int parsedNum = 0, skippedNum = 0;
- String ascii = new String(allData, "US-ASCII");
- String startToken = "network-status-version ";
- int end = ascii.length();
- int start = ascii.indexOf(startToken);
- while (start >= 0 && start < end) {
- int next = ascii.indexOf(startToken, start + 1);
- if (next < 0) {
- next = end;
- }
- if (start < next) {
- byte[] rawNetworkStatusBytes = new byte[next - start];
- System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
- next - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- rawNetworkStatusBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(rawNetworkStatusBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- start = next;
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " votes");
- } else if (f.getName().startsWith("cached-descriptors") ||
- f.getName().startsWith("cached-extrainfo")) {
- String ascii = new String(allData, "US-ASCII");
- int start = -1, sig = -1, end = -1;
- String startToken =
- f.getName().startsWith("cached-descriptors") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- int parsedNum = 0, skippedNum = 0;
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- descBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(descBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " "
- + (f.getName().startsWith("cached-descriptors") ?
- "server" : "extra-info") + " descriptors");
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- }
- }
- logger.fine("Finished reading "
- + cachedDescDir.getAbsolutePath() + " directory.");
- }
-
- /* Write import history containing SHA-1 digests to disk. */
- try {
- importHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- importHistoryFile));
- for (String digest : currentImportHistory) {
- bw.write(digest + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write import history to "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
-
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java b/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
deleted file mode 100644
index 4bafa76..0000000
--- a/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
+++ /dev/null
@@ -1,310 +0,0 @@
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.DirSourceEntry;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.Microdescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-import org.torproject.descriptor.RelayNetworkStatusVote;
-import org.torproject.descriptor.ServerDescriptor;
-
-import com.google.gson.Gson;
-
-public class ReferenceChecker {
-
- private Logger log = Logger.getLogger(ReferenceChecker.class.getName());
-
- private File descriptorsDir;
-
- private File referencesFile;
-
- private File historyFile;
-
- public ReferenceChecker(File descriptorsDir, File referencesFile,
- File historyFile) {
- this.descriptorsDir = descriptorsDir;
- this.referencesFile = referencesFile;
- this.historyFile = historyFile;
- }
-
- public void check() {
- this.getCurrentTimeMillis();
- this.readReferencesFile();
- this.readNewDescriptors();
- this.dropStaleReferences();
- this.checkReferences();
- this.writeReferencesFile();
- }
-
- private long currentTimeMillis;
-
- private void getCurrentTimeMillis() {
- this.currentTimeMillis = System.currentTimeMillis();
- }
-
- private static class Reference implements Comparable<Reference> {
-
- private String referencing;
-
- private String referenced;
-
- private double weight;
-
- private long expiresAfterMillis;
-
- public Reference(String referencing, String referenced, double weight,
- long expiresAfterMillis) {
- this.referencing = referencing;
- this.referenced = referenced;
- this.weight = weight;
- this.expiresAfterMillis = expiresAfterMillis;
- }
-
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof Reference)) {
- return false;
- }
- Reference other = (Reference) otherObject;
- return this.referencing.equals(other.referencing) &&
- this.referenced.equals(other.referenced);
- }
-
- @Override
- public int hashCode() {
- return this.referencing.hashCode() + this.referenced.hashCode();
- }
-
- @Override
- public int compareTo(Reference other) {
- int result = this.referencing.compareTo(other.referencing);
- if (result == 0) {
- result = this.referenced.compareTo(other.referenced);
- }
- return result;
- }
- }
-
- private SortedSet<Reference> references = new TreeSet<Reference>();
-
- private void addReference(String referencing, String referenced,
- double weight, long expiresAfterMillis) {
- this.references.add(new Reference(referencing.toUpperCase(),
- referenced.toUpperCase(), weight, expiresAfterMillis));
- }
-
- private void readReferencesFile() {
- if (!this.referencesFile.exists()) {
- return;
- }
- Gson gson = new Gson();
- try {
- FileReader fr = new FileReader(this.referencesFile);
- this.references.addAll(Arrays.asList(gson.fromJson(fr,
- Reference[].class)));
- fr.close();
- } catch (IOException e) {
- this.log.log(Level.WARNING, "Cannot read existing references file "
- + "from previous run.", e);
- }
- }
-
- private void readNewDescriptors() {
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(this.descriptorsDir);
- descriptorReader.setExcludeFiles(this.historyFile);
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (descriptor instanceof RelayNetworkStatusConsensus) {
- RelayNetworkStatusConsensus consensus =
- (RelayNetworkStatusConsensus) descriptor;
- String consensusFlavor = consensus.getConsensusFlavor();
- if (consensusFlavor == null) {
- this.readRelayNetworkStatusConsensusUnflavored(consensus);
- } else if (consensusFlavor.equals("microdesc")) {
- this.readRelayNetworkStatusConsensusMicrodesc(consensus);
- } else {
- /* Ignore unknown consensus flavors. */
- }
- } else if (descriptor instanceof RelayNetworkStatusVote) {
- this.readRelayNetworkStatusVote(
- (RelayNetworkStatusVote) descriptor);
- } else if (descriptor instanceof ServerDescriptor) {
- this.readServerDescriptor((ServerDescriptor) descriptor);
- } else if (descriptor instanceof ExtraInfoDescriptor) {
- this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
- } else if (descriptor instanceof Microdescriptor) {
- readMicrodescriptor((Microdescriptor) descriptor);
- } else {
- /* Ignore unknown descriptors. */
- }
- }
- }
- }
-
- private static DateFormat dateTimeFormat;
- static {
- dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
- Locale.US);
- dateTimeFormat.setLenient(false);
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- private static final long ONE_HOUR = 60L * 60L * 1000L,
- THREE_HOURS = 3L * ONE_HOUR, SIX_HOURS = 6L * ONE_HOUR,
- ONE_DAY = 24L * ONE_HOUR, THIRTY_DAYS = 30L * ONE_DAY;
-
- private void readRelayNetworkStatusConsensusUnflavored(
- RelayNetworkStatusConsensus consensus) {
- String validAfter = dateTimeFormat.format(
- consensus.getValidAfterMillis());
- String referencing = String.format("C-%s", validAfter);
- this.addReference(referencing, String.format("M-%s", validAfter), 1.0,
- consensus.getValidAfterMillis() + THREE_HOURS);
- for (DirSourceEntry dirSourceEntry :
- consensus.getDirSourceEntries().values()) {
- if (!dirSourceEntry.isLegacy()) {
- this.addReference(referencing, String.format("V-%s-%s",
- validAfter, dirSourceEntry.getIdentity()), 1.0,
- consensus.getValidAfterMillis() + THREE_HOURS);
- }
- }
- double entryWeight = 200.0 /
- ((double) consensus.getStatusEntries().size());
- for (NetworkStatusEntry entry :
- consensus.getStatusEntries().values()) {
- this.addReference(referencing,
- String.format("S-%s", entry.getDescriptor()), entryWeight,
- entry.getPublishedMillis() + THREE_HOURS);
- }
- }
-
-
- private void readRelayNetworkStatusConsensusMicrodesc(
- RelayNetworkStatusConsensus consensus) {
- String validAfter = dateTimeFormat.format(
- consensus.getValidAfterMillis());
- String referencing = String.format("M-%s", validAfter);
- this.addReference(referencing, String.format("C-%s", validAfter), 1.0,
- consensus.getValidAfterMillis() + THREE_HOURS);
- double entryWeight = 200.0 /
- ((double) consensus.getStatusEntries().size());
- for (NetworkStatusEntry entry :
- consensus.getStatusEntries().values()) {
- for (String digest : entry.getMicrodescriptorDigests()) {
- this.addReference(referencing, String.format("D-%s", digest),
- entryWeight, entry.getPublishedMillis() + THREE_HOURS);
- }
- }
- }
-
- private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) {
- String validAfter = dateTimeFormat.format(vote.getValidAfterMillis());
- String referencing = String.format("V-%s-%s", validAfter,
- vote.getIdentity());
- double entryWeight = 200.0 /
- ((double) vote.getStatusEntries().size());
- for (NetworkStatusEntry entry : vote.getStatusEntries().values()) {
- this.addReference(referencing,
- String.format("S-%s", entry.getDescriptor()), entryWeight,
- entry.getPublishedMillis() + SIX_HOURS);
- }
- }
-
- private void readServerDescriptor(ServerDescriptor serverDescriptor) {
- String referenced = serverDescriptor.getExtraInfoDigest() == null ? ""
- : String.format("E-%s", serverDescriptor.getExtraInfoDigest());
- this.addReference(String.format("S-%s",
- serverDescriptor.getServerDescriptorDigest()), referenced, 0.01,
- serverDescriptor.getPublishedMillis() + SIX_HOURS);
- }
-
- private void readExtraInfoDescriptor(
- ExtraInfoDescriptor extraInfoDescriptor) {
- this.addReference(String.format("E-%s",
- extraInfoDescriptor.getExtraInfoDigest()), "", 0.005,
- extraInfoDescriptor.getPublishedMillis() + SIX_HOURS);
- }
-
- private void readMicrodescriptor(Microdescriptor microdesc) {
- this.addReference(
- String.format("D-%s", microdesc.getMicrodescriptorDigest()), "",
- 0.0, this.currentTimeMillis + THIRTY_DAYS);
- }
-
- private void dropStaleReferences() {
- SortedSet<Reference> recentReferences = new TreeSet<Reference>();
- for (Reference reference : this.references) {
- if (this.currentTimeMillis <= reference.expiresAfterMillis) {
- recentReferences.add(reference);
- }
- }
- this.references = recentReferences;
- }
-
- private void checkReferences() {
- Set<String> knownDescriptors = new HashSet<String>();
- for (Reference reference : this.references) {
- knownDescriptors.add(reference.referencing);
- }
- double totalMissingDescriptorsWeight = 0.0;
- Set<String> missingDescriptors = new TreeSet<String>();
- StringBuilder sb = new StringBuilder("Missing referenced "
- + "descriptors:");
- for (Reference reference : this.references) {
- if (reference.referenced.length() > 0 &&
- !knownDescriptors.contains(reference.referenced)) {
- if (!missingDescriptors.contains(reference.referenced)) {
- totalMissingDescriptorsWeight += reference.weight;
- }
- missingDescriptors.add(reference.referenced);
- sb.append(String.format("%n%s -> %s (%.4f -> %.4f)",
- reference.referencing, reference.referenced, reference.weight,
- totalMissingDescriptorsWeight));
- }
- }
- this.log.log(Level.INFO, sb.toString());
- if (totalMissingDescriptorsWeight > 0.999) {
- this.log.log(Level.WARNING, "Missing too many referenced "
- + "descriptors (" + totalMissingDescriptorsWeight + ").");
- }
- }
-
- private void writeReferencesFile() {
- Gson gson = new Gson();
- try {
- FileWriter fw = new FileWriter(this.referencesFile);
- gson.toJson(this.references, fw);
- fw.close();
- } catch (IOException e) {
- this.log.log(Level.WARNING, "Cannot write references file for next "
- + "run.", e);
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
deleted file mode 100644
index 1c8a375..0000000
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
+++ /dev/null
@@ -1,1090 +0,0 @@
-/* Copyright 2010--2014 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.zip.InflaterInputStream;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Downloads relay descriptors from the directory authorities via HTTP.
- * Keeps a list of missing descriptors that gets updated by parse results
- * from <code>RelayDescriptorParser</code> and downloads all missing
- * descriptors that have been published in the last 24 hours. Also
- * downloads all server and extra-info descriptors known to a directory
- * authority at most once a day.
- */
-public class RelayDescriptorDownloader {
-
- /**
- * Text file containing the descriptors that we are missing and that we
- * want to download. Lines are formatted as:
- *
- * - "consensus,<validafter>,<parsed>",
- * - "consensus-microdesc,<validafter>,<parsed>",
- * - "vote,<validafter>,<fingerprint>,<parsed>",
- * - "server,<published>,<relayid>,<descid>,<parsed>",
- * - "extra,<published>,<relayid>,<descid>,<parsed>", or
- * - "micro,<validafter>,<relayid>,<descid>,<parsed>".
- */
- private File missingDescriptorsFile;
-
- /**
- * Relay descriptors that we are missing and that we want to download
- * either in this execution or write to disk and try next time. Map keys
- * contain comma-separated values as in the missing descriptors files
- * without the "parsed" column. Map values contain the "parsed" column.
- */
- private SortedMap<String, String> missingDescriptors;
-
- /**
- * Map from base64 microdescriptor digests to keys in missingDescriptors
- * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because
- * we can't learn <validafter> or <relayid> from parsing
- * microdescriptors, but we need to know <validafter> to store
- * microdescriptors to disk and both <validafter> and <relayid> to
- * remove microdescriptors from the missing list. There are potentially
- * many matching keys in missingDescriptors for the same microdescriptor
- * digest. Also, in rare cases relays share the same microdescriptor
- * (which is only possible if they share the same onion key), and then
- * we don't have to download their microdescriptor more than once.
- */
- private Map<String, Set<String>> microdescriptorKeys;
-
- /**
- * Set of microdescriptor digests that are currently missing. Used for
- * logging statistics instead of "micro,<validafter>,..." keys which may
- * contain the same microdescriptor digest multiple times.
- */
- private Set<String> missingMicrodescriptors;
-
- /**
- * Text file containing the IP addresses (and Dir ports if not 80) of
- * directory authorities and when we last downloaded all server and
- * extra-info descriptors from them, so that we can avoid downloading
- * them too often.
- */
- private File lastDownloadedAllDescriptorsFile;
-
- /**
- * Map of directory authorities and when we last downloaded all server
- * and extra-info descriptors from them. Map keys are IP addresses (and
- * Dir ports if not 80), map values are timestamps.
- */
- private Map<String, String> lastDownloadedAllDescriptors;
-
- /**
- * <code>RelayDescriptorParser</code> that we will hand over the
- * downloaded descriptors for parsing.
- */
- private RelayDescriptorParser rdp;
-
- /**
- * Directory authorities that we will try to download missing
- * descriptors from.
- */
- private List<String> authorities;
-
- /**
- * Fingerprints of directory authorities that we will use to download
- * votes without requiring a successfully downloaded consensus.
- */
- private List<String> authorityFingerprints;
-
- /**
- * Should we try to download the current consensus if we don't have it?
- */
- private boolean downloadCurrentConsensus;
-
- /**
- * Should we try to download the current microdesc consensus if we don't
- * have it?
- */
- private boolean downloadCurrentMicrodescConsensus;
-
- /**
- * Should we try to download current votes if we don't have them?
- */
- private boolean downloadCurrentVotes;
-
- /**
- * Should we try to download missing server descriptors that have been
- * published within the past 24 hours?
- */
- private boolean downloadMissingServerDescriptors;
-
- /**
- * Should we try to download missing extra-info descriptors that have
- * been published within the past 24 hours?
- */
- private boolean downloadMissingExtraInfos;
-
- /**
- * Should we try to download missing microdescriptors that have been
- * published within the past 24 hours?
- */
- private boolean downloadMissingMicrodescriptors;
-
- /**
- * Should we try to download all server descriptors from the authorities
- * once every 24 hours?
- */
- private boolean downloadAllServerDescriptors;
-
- /**
- * Should we try to download all extra-info descriptors from the
- * authorities once every 24 hours?
- */
- private boolean downloadAllExtraInfos;
-
- /**
- * Should we download zlib-compressed versions of descriptors by adding
- * ".z" to URLs?
- */
- private boolean downloadCompressed;
-
- /**
- * valid-after time that we expect the current consensus,
- * microdescriptor consensus, and votes to have, formatted
- * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this
- * valid-after time on the directory authorities. This time is
- * initialized as the beginning of the current hour.
- */
- private String currentValidAfter;
-
- /**
- * Cut-off time for missing server and extra-info descriptors, formatted
- * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
- * time minus 24 hours.
- */
- private String descriptorCutOff;
-
- /**
- * Cut-off time for downloading all server and extra-info descriptors
- * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
- * time is initialized as the current system time minus 23:30 hours.
- */
- private String downloadAllDescriptorsCutOff;
-
- /**
- * Directory authorities that we plan to download all server and
- * extra-info descriptors from in this execution.
- */
- private Set<String> downloadAllDescriptorsFromAuthorities;
-
- /**
- * Current timestamp that is written to the missing list for descriptors
- * that we parsed in this execution and for authorities that we
- * downloaded all server and extra-info descriptors from.
- */
- private String currentTimestamp;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Number of descriptors requested by directory authority to be included
- * in logs.
- */
- private Map<String, Integer> requestsByAuthority;
-
- /**
- * Counters for descriptors that we had on the missing list at the
- * beginning of the execution, that we added to the missing list,
- * that we requested, and that we successfully downloaded in this
- * execution.
- */
- private int oldMissingConsensuses = 0,
- oldMissingMicrodescConsensuses = 0, oldMissingVotes = 0,
- oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
- oldMissingMicrodescriptors = 0, newMissingConsensuses = 0,
- newMissingMicrodescConsensuses = 0, newMissingVotes = 0,
- newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
- newMissingMicrodescriptors = 0, requestedConsensuses = 0,
- requestedMicrodescConsensuses = 0, requestedVotes = 0,
- requestedMissingServerDescriptors = 0,
- requestedAllServerDescriptors = 0,
- requestedMissingExtraInfoDescriptors = 0,
- requestedAllExtraInfoDescriptors = 0,
- requestedMissingMicrodescriptors = 0, downloadedConsensuses = 0,
- downloadedMicrodescConsensuses = 0, downloadedVotes = 0,
- downloadedMissingServerDescriptors = 0,
- downloadedAllServerDescriptors = 0,
- downloadedMissingExtraInfoDescriptors = 0,
- downloadedAllExtraInfoDescriptors = 0,
- downloadedMissingMicrodescriptors = 0;
-
- /**
- * Initializes this class, including reading in missing descriptors from
- * <code>stats/missing-relay-descriptors</code> and the times when we
- * last downloaded all server and extra-info descriptors from
- * <code>stats/last-downloaded-all-descriptors</code>.
- */
- public RelayDescriptorDownloader(RelayDescriptorParser rdp,
- List<String> authorities, List<String> authorityFingerprints,
- boolean downloadCurrentConsensus,
- boolean downloadCurrentMicrodescConsensus,
- boolean downloadCurrentVotes,
- boolean downloadMissingServerDescriptors,
- boolean downloadMissingExtraInfos,
- boolean downloadMissingMicrodescriptors,
- boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
- boolean downloadCompressed) {
-
- /* Memorize argument values. */
- this.rdp = rdp;
- this.authorities = new ArrayList<String>(authorities);
- this.authorityFingerprints = new ArrayList<String>(
- authorityFingerprints);
- this.downloadCurrentConsensus = downloadCurrentConsensus;
- this.downloadCurrentMicrodescConsensus =
- downloadCurrentMicrodescConsensus;
- this.downloadCurrentVotes = downloadCurrentVotes;
- this.downloadMissingServerDescriptors =
- downloadMissingServerDescriptors;
- this.downloadMissingExtraInfos = downloadMissingExtraInfos;
- this.downloadMissingMicrodescriptors =
- downloadMissingMicrodescriptors;
- this.downloadAllServerDescriptors = downloadAllServerDescriptors;
- this.downloadAllExtraInfos = downloadAllExtraInfos;
- this.downloadCompressed = downloadCompressed;
-
- /* Shuffle list of authorities for better load balancing over time. */
- Collections.shuffle(this.authorities);
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- RelayDescriptorDownloader.class.getName());
-
- /* Prepare cut-off times and timestamp for the missing descriptors
- * list and the list of authorities to download all server and
- * extra-info descriptors from. */
- SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- format.setTimeZone(TimeZone.getTimeZone("UTC"));
- long now = System.currentTimeMillis();
- this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
- (60L * 60L * 1000L));
- this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
- this.currentTimestamp = format.format(now);
- this.downloadAllDescriptorsCutOff = format.format(now
- - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
-
- /* Read list of missing descriptors from disk and memorize those that
- * we are interested in and that are likely to be found on the
- * directory authorities. */
- this.missingDescriptors = new TreeMap<String, String>();
- this.microdescriptorKeys = new HashMap<String, Set<String>>();
- this.missingMicrodescriptors = new HashSet<String>();
- this.missingDescriptorsFile = new File(
- "stats/missing-relay-descriptors");
- if (this.missingDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.missingDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length > 2) {
- String published = line.split(",")[1];
- if (((line.startsWith("consensus,") ||
- line.startsWith("consensus-microdesc,") ||
- line.startsWith("vote,")) &&
- this.currentValidAfter.equals(published)) ||
- ((line.startsWith("server,") ||
- line.startsWith("extra,") ||
- line.startsWith("micro,")) &&
- this.descriptorCutOff.compareTo(published) < 0)) {
- if (!line.endsWith("NA")) {
- /* Not missing. */
- } else if (line.startsWith("consensus,")) {
- oldMissingConsensuses++;
- } else if (line.startsWith("consensus-microdesc,")) {
- oldMissingMicrodescConsensuses++;
- } else if (line.startsWith("vote,")) {
- oldMissingVotes++;
- } else if (line.startsWith("server,")) {
- oldMissingServerDescriptors++;
- } else if (line.startsWith("extra,")) {
- oldMissingExtraInfoDescriptors++;
- }
- int separateAt = line.lastIndexOf(",");
- this.missingDescriptors.put(line.substring(0,
- separateAt), line.substring(separateAt + 1));
- if (line.startsWith("micro,")) {
- String microdescriptorDigest = line.split(",")[3];
- String microdescriptorKey = line.substring(0,
- line.lastIndexOf(","));
- if (!this.microdescriptorKeys.containsKey(
- microdescriptorDigest)) {
- this.microdescriptorKeys.put(
- microdescriptorDigest, new HashSet<String>());
- }
- this.microdescriptorKeys.get(microdescriptorDigest).add(
- microdescriptorKey);
- if (line.endsWith("NA") && !this.missingMicrodescriptors.
- contains(microdescriptorDigest)) {
- this.missingMicrodescriptors.add(microdescriptorDigest);
- oldMissingMicrodescriptors++;
- }
- }
- }
- } else {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.missingDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.missingDescriptorsFile.getAbsolutePath()
- + "! This means that we might forget to dowload relay "
- + "descriptors we are missing.", e);
- }
- }
-
- /* Read list of directory authorities and when we last downloaded all
- * server and extra-info descriptors from them. */
- this.lastDownloadedAllDescriptors = new HashMap<String, String>();
- this.lastDownloadedAllDescriptorsFile = new File(
- "stats/last-downloaded-all-descriptors");
- if (this.lastDownloadedAllDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.lastDownloadedAllDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length != 2) {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- } else {
- String[] parts = line.split(",");
- String authority = parts[0];
- String lastDownloaded = parts[1];
- this.lastDownloadedAllDescriptors.put(authority,
- lastDownloaded);
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "! This means that we might download all server and "
- + "extra-info descriptors more often than we should.", e);
- }
- }
-
- /* Make a list of at most two directory authorities that we want to
- * download all server and extra-info descriptors from. */
- this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
- for (String authority : this.authorities) {
- if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
- this.lastDownloadedAllDescriptors.get(authority).compareTo(
- this.downloadAllDescriptorsCutOff) < 0) {
- this.downloadAllDescriptorsFromAuthorities.add(authority);
- }
- if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
- break;
- }
- }
-
- /* Prepare statistics on this execution. */
- this.requestsByAuthority = new HashMap<String, Integer>();
- for (String authority : this.authorities) {
- this.requestsByAuthority.put(authority, 0);
- }
- }
-
- /**
- * We have parsed a consensus. Take this consensus off the missing list
- * and add the votes created by the given <code>authorities</code> and
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedConsensus(String validAfter,
- Set<String> authorities, Set<String> serverDescriptors) {
-
- /* Mark consensus as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String consensusKey = "consensus," + validAfter;
- this.missingDescriptors.put(consensusKey, this.currentTimestamp);
-
- /* Add votes to missing list. */
- for (String authority : authorities) {
- String voteKey = "vote," + validAfter + "," + authority;
- if (!this.missingDescriptors.containsKey(voteKey)) {
- this.missingDescriptors.put(voteKey, "NA");
- this.newMissingVotes++;
- }
- }
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a microdesc consensus. Take this microdesc consensus
- * off the missing list and add the <code>microdescriptors</code> which
- * are in the format "<validafter>,<relayid>,<descid>" to that
- * list.
- */
- public void haveParsedMicrodescConsensus(String validAfter,
- Set<String> microdescriptors) {
-
- /* Mark microdesc consensus as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String microdescConsensusKey = "consensus-microdesc," + validAfter;
- this.missingDescriptors.put(microdescConsensusKey,
- this.currentTimestamp);
- }
-
- /* Add microdescriptors to missing list. Exclude those that we already
- * downloaded this month. (We download each microdescriptor at least
- * once per month to keep the storage logic sane; otherwise we'd have
- * to copy microdescriptors from the earlier month to the current
- * month, and that gets messy.) */
- if (this.descriptorCutOff.compareTo(validAfter) < 0) {
- String validAfterYearMonth = validAfter.substring(0,
- "YYYY-MM".length());
- for (String microdescriptor : microdescriptors) {
- String microdescriptorKey = "micro," + microdescriptor;
- String parsed = "NA";
- String microdescriptorDigest = microdescriptor.split(",")[2];
- if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) {
- for (String otherMicrodescriptorKey :
- this.microdescriptorKeys.get(microdescriptorDigest)) {
- String otherValidAfter =
- otherMicrodescriptorKey.split(",")[1];
- if (!otherValidAfter.startsWith(validAfterYearMonth)) {
- continue;
- }
- String otherParsed = this.missingDescriptors.get(
- otherMicrodescriptorKey);
- if (otherParsed != null && !otherParsed.equals("NA")) {
- parsed = otherParsed;
- break;
- }
- }
- } else {
- this.microdescriptorKeys.put(
- microdescriptorDigest, new HashSet<String>());
- }
- this.microdescriptorKeys.get(microdescriptorDigest).add(
- microdescriptorKey);
- this.missingDescriptors.put(microdescriptorKey, parsed);
- if (parsed.equals("NA") &&
- !this.missingMicrodescriptors.contains(microdescriptorDigest)) {
- this.missingMicrodescriptors.add(microdescriptorDigest);
- this.newMissingMicrodescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a vote. Take this vote off the missing list and add
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedVote(String validAfter, String fingerprint,
- Set<String> serverDescriptors) {
-
- /* Mark vote as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String voteKey = "vote," + validAfter + "," + fingerprint;
- this.missingDescriptors.put(voteKey, this.currentTimestamp);
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a server descriptor. Take this server descriptor off
- * the missing list and put the extra-info descriptor digest on that
- * list.
- */
- public void haveParsedServerDescriptor(String published,
- String relayIdentity, String serverDescriptorDigest,
- String extraInfoDigest) {
-
- /* Mark server descriptor as parsed. */
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + published + ","
- + relayIdentity + "," + serverDescriptorDigest;
- this.missingDescriptors.put(serverDescriptorKey,
- this.currentTimestamp);
-
- /* Add extra-info descriptor to missing list. */
- if (extraInfoDigest != null) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- if (!this.missingDescriptors.containsKey(extraInfoKey)) {
- this.missingDescriptors.put(extraInfoKey, "NA");
- this.newMissingExtraInfoDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed an extra-info descriptor. Take it off the missing
- * list.
- */
- public void haveParsedExtraInfoDescriptor(String published,
- String relayIdentity, String extraInfoDigest) {
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
- }
- }
-
- /**
- * We have parsed a microdescriptor. Take it off the missing list.
- */
- public void haveParsedMicrodescriptor(String descriptorDigest) {
- if (this.microdescriptorKeys.containsKey(descriptorDigest)) {
- for (String microdescriptorKey :
- this.microdescriptorKeys.get(descriptorDigest)) {
- String validAfter = microdescriptorKey.split(",")[0];
- if (this.descriptorCutOff.compareTo(validAfter) < 0) {
- this.missingDescriptors.put(microdescriptorKey,
- this.currentTimestamp);
- }
- }
- this.missingMicrodescriptors.remove(descriptorDigest);
- }
- }
-
- /**
- * Downloads missing descriptors that we think might still be available
- * on the directory authorities as well as all server and extra-info
- * descriptors once per day.
- */
- public void downloadDescriptors() {
-
- /* Put the current consensus and votes on the missing list, unless we
- * already have them. */
- String consensusKey = "consensus," + this.currentValidAfter;
- if (!this.missingDescriptors.containsKey(consensusKey)) {
- this.missingDescriptors.put(consensusKey, "NA");
- this.newMissingConsensuses++;
- }
- String microdescConsensusKey = "consensus-microdesc,"
- + this.currentValidAfter;
- if (!this.missingDescriptors.containsKey(microdescConsensusKey)) {
- this.missingDescriptors.put(microdescConsensusKey, "NA");
- this.newMissingMicrodescConsensuses++;
- }
- for (String authority : authorityFingerprints) {
- String voteKey = "vote," + this.currentValidAfter + "," + authority;
- if (!this.missingDescriptors.containsKey(voteKey)) {
- this.missingDescriptors.put(voteKey, "NA");
- this.newMissingVotes++;
- }
- }
-
- /* Download descriptors from authorities which are in random order, so
- * that we distribute the load somewhat fairly over time. */
- for (String authority : authorities) {
-
- /* Make all requests to an authority in a single try block. If
- * something goes wrong with this authority, we give up on all
- * downloads and continue with the next authority. */
- /* TODO Some authorities provide very little bandwidth and could
- * slow down the entire download process. Ponder adding a timeout of
- * 3 or 5 minutes per authority to avoid getting in the way of the
- * next execution. */
- try {
-
- /* Start with downloading the current consensus, unless we already
- * have it. */
- if (downloadCurrentConsensus) {
- if (this.missingDescriptors.containsKey(consensusKey) &&
- this.missingDescriptors.get(consensusKey).equals("NA")) {
- this.requestedConsensuses++;
- this.downloadedConsensuses +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/consensus");
- }
- }
-
- /* Then try to download the microdesc consensus. */
- if (downloadCurrentMicrodescConsensus) {
- if (this.missingDescriptors.containsKey(
- microdescConsensusKey) &&
- this.missingDescriptors.get(microdescConsensusKey).
- equals("NA")) {
- this.requestedMicrodescConsensuses++;
- this.downloadedMicrodescConsensuses +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/consensus-microdesc");
- }
- }
-
- /* Next, try to download current votes that we're missing. */
- if (downloadCurrentVotes) {
- String voteKeyPrefix = "vote," + this.currentValidAfter;
- SortedSet<String> fingerprints = new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(voteKeyPrefix)) {
- String fingerprint = e.getKey().split(",")[2];
- fingerprints.add(fingerprint);
- }
- }
- for (String fingerprint : fingerprints) {
- this.requestedVotes++;
- this.downloadedVotes +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/" + fingerprint);
- }
- }
-
- /* Download either all server and extra-info descriptors or only
- * those that we're missing. Start with server descriptors, then
- * request extra-info descriptors. Finally, request missing
- * microdescriptors. */
- for (String type : new String[] { "server", "extra", "micro" }) {
-
- /* Download all server or extra-info descriptors from this
- * authority if we haven't done so for 24 hours and if we're
- * configured to do so. */
- if (this.downloadAllDescriptorsFromAuthorities.contains(
- authority) && ((type.equals("server") &&
- this.downloadAllServerDescriptors) ||
- (type.equals("extra") && this.downloadAllExtraInfos))) {
- int downloadedAllDescriptors =
- this.downloadResourceFromAuthority(authority, "/tor/"
- + type + "/all");
- if (type.equals("server")) {
- this.requestedAllServerDescriptors++;
- this.downloadedAllServerDescriptors +=
- downloadedAllDescriptors;
- } else if (type.equals("extra")) {
- this.requestedAllExtraInfoDescriptors++;
- this.downloadedAllExtraInfoDescriptors +=
- downloadedAllDescriptors;
- }
-
- /* Download missing server descriptors, extra-info descriptors,
- * and microdescriptors if we're configured to do so. */
- } else if ((type.equals("server") &&
- this.downloadMissingServerDescriptors) ||
- (type.equals("extra") && this.downloadMissingExtraInfos) ||
- (type.equals("micro") &&
- this.downloadMissingMicrodescriptors)) {
-
- /* Go through the list of missing descriptors of this type
- * and combine the descriptor identifiers to a URL of up to
- * 96 server or extra-info descriptors or 92 microdescriptors
- * that we can download at once. */
- SortedSet<String> descriptorIdentifiers =
- new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(type + ",") &&
- this.descriptorCutOff.compareTo(
- e.getKey().split(",")[1]) < 0) {
- String descriptorIdentifier = e.getKey().split(",")[3];
- descriptorIdentifiers.add(descriptorIdentifier);
- }
- }
- StringBuilder combinedResource = null;
- int descriptorsInCombinedResource = 0,
- requestedDescriptors = 0, downloadedDescriptors = 0;
- int maxDescriptorsInCombinedResource =
- type.equals("micro") ? 92 : 96;
- String separator = type.equals("micro") ? "-" : "+";
- for (String descriptorIdentifier : descriptorIdentifiers) {
- if (descriptorsInCombinedResource >=
- maxDescriptorsInCombinedResource) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- combinedResource = null;
- descriptorsInCombinedResource = 0;
- }
- if (descriptorsInCombinedResource == 0) {
- combinedResource = new StringBuilder("/tor/" + type
- + "/d/" + descriptorIdentifier);
- } else {
- combinedResource.append(separator + descriptorIdentifier);
- }
- descriptorsInCombinedResource++;
- }
- if (descriptorsInCombinedResource > 0) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- }
- if (type.equals("server")) {
- this.requestedMissingServerDescriptors +=
- requestedDescriptors;
- this.downloadedMissingServerDescriptors +=
- downloadedDescriptors;
- } else if (type.equals("extra")) {
- this.requestedMissingExtraInfoDescriptors +=
- requestedDescriptors;
- this.downloadedMissingExtraInfoDescriptors +=
- downloadedDescriptors;
- } else if (type.equals("micro")) {
- this.requestedMissingMicrodescriptors +=
- requestedDescriptors;
- this.downloadedMissingMicrodescriptors +=
- downloadedDescriptors;
- }
- }
- }
-
- /* If a download failed, stop requesting descriptors from this
- * authority and move on to the next. */
- } catch (IOException e) {
- logger.log(Level.FINE, "Failed downloading from " + authority
- + "!", e);
- }
- }
- }
-
- /**
- * Attempts to download one or more descriptors identified by a resource
- * string from a directory authority and passes the returned
- * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
- * Returns the number of descriptors contained in the reply. Throws an
- * <code>IOException</code> if something goes wrong while downloading.
- */
- private int downloadResourceFromAuthority(String authority,
- String resource) throws IOException {
- byte[] allData = null;
- this.requestsByAuthority.put(authority,
- this.requestsByAuthority.get(authority) + 1);
- /* TODO Disable compressed downloads for extra-info descriptors,
- * because zlib decompression doesn't work correctly. Figure out why
- * this is and fix it. */
- String fullUrl = "http://" + authority + resource
- + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
- ? ".z" : "");
- URL u = new URL(fullUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response == 200) {
- BufferedInputStream in = this.downloadCompressed &&
- !resource.startsWith("/tor/extra/")
- ? new BufferedInputStream(new InflaterInputStream(
- huc.getInputStream()))
- : new BufferedInputStream(huc.getInputStream());
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- in.close();
- allData = baos.toByteArray();
- }
- logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
- + (allData == null ? 0 : allData.length) + " bytes)");
- int receivedDescriptors = 0;
- if (allData != null) {
- if (resource.startsWith("/tor/status-vote/current/")) {
- this.rdp.parse(allData);
- receivedDescriptors = 1;
- } else if (resource.startsWith("/tor/server/") ||
- resource.startsWith("/tor/extra/")) {
- if (resource.equals("/tor/server/all") ||
- resource.equals("/tor/extra/all")) {
- this.lastDownloadedAllDescriptors.put(authority,
- this.currentTimestamp);
- }
- String ascii = null;
- try {
- ascii = new String(allData, "US-ASCII");
- } catch (UnsupportedEncodingException e) {
- /* No way that US-ASCII is not supported. */
- }
- int start = -1, sig = -1, end = -1;
- String startToken = resource.startsWith("/tor/server/") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- this.rdp.parse(descBytes);
- receivedDescriptors++;
- }
- } else if (resource.startsWith("/tor/micro/")) {
- /* TODO We need to parse microdescriptors ourselves, rather than
- * RelayDescriptorParser, because only we know the valid-after
- * time(s) of microdesc consensus(es) containing this
- * microdescriptor. However, this breaks functional abstraction
- * pretty badly. */
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String ascii = null;
- try {
- ascii = new String(allData, "US-ASCII");
- } catch (UnsupportedEncodingException e) {
- /* No way that US-ASCII is not supported. */
- }
- int start = -1, end = -1;
- String startToken = "onion-key\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- end = ascii.indexOf(startToken, start + 1);
- if (end < 0) {
- end = ascii.length();
- if (end <= start) {
- break;
- }
- }
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- String digest256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(descBytes)).replaceAll("=", "");
- if (!this.microdescriptorKeys.containsKey(digest256Base64)) {
- continue;
- }
- String digest256Hex = DigestUtils.sha256Hex(descBytes);
- for (String microdescriptorKey :
- this.microdescriptorKeys.get(digest256Base64)) {
- String validAfterTime = microdescriptorKey.split(",")[1];
- try {
- long validAfter =
- parseFormat.parse(validAfterTime).getTime();
- this.rdp.storeMicrodescriptor(descBytes, digest256Hex,
- digest256Base64, validAfter);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse "
- + "valid-after time '" + validAfterTime + "' in "
- + "microdescriptor key. Not storing microdescriptor.",
- e);
- }
- }
- receivedDescriptors++;
- }
- }
- }
- return receivedDescriptors;
- }
-
- /**
- * Writes status files to disk and logs statistics about downloading
- * relay descriptors in this execution.
- */
- public void writeFile() {
-
- /* Write missing descriptors file to disk. */
- int missingConsensuses = 0, missingMicrodescConsensuses = 0,
- missingVotes = 0, missingServerDescriptors = 0,
- missingExtraInfoDescriptors = 0;
- try {
- this.logger.fine("Writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- this.missingDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.missingDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- String key = e.getKey(), value = e.getValue();
- if (!value.equals("NA")) {
- /* Not missing. */
- } else if (key.startsWith("consensus,")) {
- missingConsensuses++;
- } else if (key.startsWith("consensus-microdesc,")) {
- missingMicrodescConsensuses++;
- } else if (key.startsWith("vote,")) {
- missingVotes++;
- } else if (key.startsWith("server,")) {
- missingServerDescriptors++;
- } else if (key.startsWith("extra,")) {
- missingExtraInfoDescriptors++;
- } else if (key.startsWith("micro,")) {
- }
- bw.write(key + "," + value + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
- }
- int missingMicrodescriptors = this.missingMicrodescriptors.size();
-
- /* Write text file containing the directory authorities and when we
- * last downloaded all server and extra-info descriptors from them to
- * disk. */
- try {
- this.logger.fine("Writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.lastDownloadedAllDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.lastDownloadedAllDescriptors.entrySet()) {
- String authority = e.getKey();
- String lastDownloaded = e.getValue();
- bw.write(authority + "," + lastDownloaded + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
- e);
- }
-
- /* Log statistics about this execution. */
- this.logger.info("Finished downloading relay descriptors from the "
- + "directory authorities.");
- this.logger.info("At the beginning of this execution, we were "
- + "missing " + oldMissingConsensuses + " consensus(es), "
- + oldMissingMicrodescConsensuses + " microdesc consensus(es), "
- + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
- + " server descriptor(s), " + oldMissingExtraInfoDescriptors
- + " extra-info descriptor(s), and " + oldMissingMicrodescriptors
- + " microdescriptor(s).");
- this.logger.info("During this execution, we added "
- + this.newMissingConsensuses + " consensus(es), "
- + this.newMissingMicrodescConsensuses
- + " microdesc consensus(es), " + this.newMissingVotes
- + " vote(s), " + this.newMissingServerDescriptors
- + " server descriptor(s), " + this.newMissingExtraInfoDescriptors
- + " extra-info descriptor(s), and "
- + this.newMissingMicrodescriptors + " microdescriptor(s) to the "
- + "missing list, some of which we also "
- + "requested and removed from the list again.");
- this.logger.info("We requested " + this.requestedConsensuses
- + " consensus(es), " + this.requestedMicrodescConsensuses
- + " microdesc consensus(es), " + this.requestedVotes
- + " vote(s), " + this.requestedMissingServerDescriptors
- + " missing server descriptor(s), "
- + this.requestedAllServerDescriptors
- + " times all server descriptors, "
- + this.requestedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s), "
- + this.requestedAllExtraInfoDescriptors + " times all extra-info "
- + "descriptors, and " + this.requestedMissingMicrodescriptors
- + " missing microdescriptor(s) from the directory authorities.");
- StringBuilder sb = new StringBuilder();
- for (String authority : this.authorities) {
- sb.append(" " + authority + "="
- + this.requestsByAuthority.get(authority));
- }
- this.logger.info("We sent these numbers of requests to the directory "
- + "authorities:" + sb.toString());
- this.logger.info("We successfully downloaded "
- + this.downloadedConsensuses + " consensus(es), "
- + this.downloadedMicrodescConsensuses
- + " microdesc consensus(es), " + this.downloadedVotes
- + " vote(s), " + this.downloadedMissingServerDescriptors
- + " missing server descriptor(s), "
- + this.downloadedAllServerDescriptors
- + " server descriptor(s) when downloading all descriptors, "
- + this.downloadedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s), "
- + this.downloadedAllExtraInfoDescriptors + " extra-info "
- + "descriptor(s) when downloading all descriptors, and "
- + this.downloadedMissingMicrodescriptors
- + " missing microdescriptor(s).");
- this.logger.info("At the end of this execution, we are missing "
- + missingConsensuses + " consensus(es), "
- + missingMicrodescConsensuses + " microdesc consensus(es), "
- + missingVotes + " vote(s), " + missingServerDescriptors
- + " server descriptor(s), " + missingExtraInfoDescriptors
- + " extra-info descriptor(s), and " + missingMicrodescriptors
- + " microdescriptor(s), some of which we may try in the next "
- + "execution.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
deleted file mode 100644
index 2873909..0000000
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
+++ /dev/null
@@ -1,332 +0,0 @@
-/* Copyright 2010--2014 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.relaydescs;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses relay descriptors including network status consensuses and
- * votes, server and extra-info descriptors, and passes the results to the
- * stats handlers, to the archive writer, or to the relay descriptor
- * downloader.
- */
-public class RelayDescriptorParser {
-
- /**
- * File writer that writes descriptor contents to files in a
- * directory-archive directory structure.
- */
- private ArchiveWriter aw;
-
- private ArchiveReader ar;
-
- /**
- * Missing descriptor downloader that uses the parse results to learn
- * which descriptors we are missing and want to download.
- */
- private RelayDescriptorDownloader rdd;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private SimpleDateFormat dateTimeFormat;
-
- /**
- * Initializes this class.
- */
- public RelayDescriptorParser(ArchiveWriter aw) {
- this.aw = aw;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
-
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- public void setRelayDescriptorDownloader(
- RelayDescriptorDownloader rdd) {
- this.rdd = rdd;
- }
-
- public void setArchiveReader(ArchiveReader ar) {
- this.ar = ar;
- }
-
- public boolean parse(byte[] data) {
- boolean stored = false;
- try {
- /* Convert descriptor to ASCII for parsing. This means we'll lose
- * the non-ASCII chars, but we don't care about them for parsing
- * anyway. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line;
- do {
- line = br.readLine();
- } while (line != null && line.startsWith("@"));
- if (line == null) {
- this.logger.fine("We were given an empty descriptor for "
- + "parsing. Ignoring.");
- return false;
- }
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.startsWith("network-status-version 3")) {
- String statusType = "consensus";
- if (line.equals("network-status-version 3 microdesc")) {
- statusType = "consensus-microdesc";
- }
- String validAfterTime = null, fingerprint = null,
- dirSource = null;
- long validAfter = -1L, dirKeyPublished = -1L;
- SortedSet<String> dirSources = new TreeSet<String>();
- SortedSet<String> serverDescriptors = new TreeSet<String>();
- SortedSet<String> serverDescriptorDigests = new TreeSet<String>();
- SortedSet<String> microdescriptorKeys = new TreeSet<String>();
- SortedSet<String> microdescriptorDigests = new TreeSet<String>();
- StringBuilder certificateStringBuilder = null;
- String certificateString = null;
- String lastRelayIdentity = null;
- while ((line = br.readLine()) != null) {
- if (certificateStringBuilder != null) {
- if (line.startsWith("r ")) {
- certificateString = certificateStringBuilder.toString();
- certificateStringBuilder = null;
- } else {
- certificateStringBuilder.append(line + "\n");
- }
- }
- if (line.equals("vote-status vote")) {
- statusType = "vote";
- } else if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- validAfter = parseFormat.parse(validAfterTime).getTime();
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("vote-digest ")) {
- dirSources.add(dirSource);
- } else if (line.startsWith("dir-key-certificate-version ")) {
- certificateStringBuilder = new StringBuilder();
- certificateStringBuilder.append(line + "\n");
- } else if (line.startsWith("fingerprint ")) {
- fingerprint = line.split(" ")[1];
- } else if (line.startsWith("dir-key-published ")) {
- String dirKeyPublishedTime = line.substring(
- "dir-key-published ".length());
- dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
- getTime();
- } else if (line.startsWith("r ")) {
- String[] parts = line.split(" ");
- if (parts.length == 8) {
- lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
- parts[2] + "=")).toLowerCase();
- } else if (parts.length == 9) {
- lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
- parts[2] + "=")).toLowerCase();
- String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- String publishedTime = parts[4] + " " + parts[5];
- serverDescriptors.add(publishedTime + ","
- + lastRelayIdentity + "," + serverDesc);
- serverDescriptorDigests.add(serverDesc);
- } else {
- this.logger.log(Level.WARNING, "Could not parse r line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- } else if (line.startsWith("m ")) {
- String[] parts = line.split(" ");
- if (parts.length == 2 && parts[1].length() == 43) {
- String digest256Base64 = parts[1];
- microdescriptorKeys.add(validAfterTime + ","
- + lastRelayIdentity + "," + digest256Base64);
- String digest256Hex = Hex.encodeHexString(
- Base64.decodeBase64(digest256Base64 + "=")).
- toLowerCase();
- microdescriptorDigests.add(digest256Hex);
- } else if (parts.length != 3 ||
- !parts[2].startsWith("sha256=") ||
- parts[2].length() != 50) {
- this.logger.log(Level.WARNING, "Could not parse m line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- }
- }
- if (statusType.equals("consensus")) {
- if (this.rdd != null) {
- this.rdd.haveParsedConsensus(validAfterTime, dirSources,
- serverDescriptors);
- }
- if (this.aw != null) {
- this.aw.storeConsensus(data, validAfter, dirSources,
- serverDescriptorDigests);
- stored = true;
- }
- } else if (statusType.equals("consensus-microdesc")) {
- if (this.rdd != null) {
- this.rdd.haveParsedMicrodescConsensus(validAfterTime,
- microdescriptorKeys);
- }
- if (this.ar != null) {
- this.ar.haveParsedMicrodescConsensus(validAfterTime,
- microdescriptorDigests);
- }
- if (this.aw != null) {
- this.aw.storeMicrodescConsensus(data, validAfter,
- microdescriptorDigests);
- stored = true;
- }
- } else {
- if (this.aw != null || this.rdd != null) {
- String ascii = new String(data, "US-ASCII");
- String startToken = "network-status-version ";
- String sigToken = "directory-signature ";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken);
- if (start >= 0 && sig >= 0 && sig > start) {
- sig += sigToken.length();
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- String digest = DigestUtils.shaHex(forDigest).toUpperCase();
- if (this.aw != null) {
- this.aw.storeVote(data, validAfter, dirSource, digest,
- serverDescriptorDigests);
- stored = true;
- }
- if (this.rdd != null) {
- this.rdd.haveParsedVote(validAfterTime, fingerprint,
- serverDescriptors);
- }
- }
- if (certificateString != null) {
- if (this.aw != null) {
- this.aw.storeCertificate(certificateString.getBytes(),
- dirSource, dirKeyPublished);
- stored = true;
- }
- }
- }
- }
- } else if (line.startsWith("router ")) {
- String publishedTime = null, extraInfoDigest = null,
- relayIdentifier = null;
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- relayIdentifier = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- extraInfoDigest = line.startsWith("opt ") ?
- line.split(" ")[2].toLowerCase() :
- line.split(" ")[1].toLowerCase();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null;
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeServerDescriptor(data, digest, published,
- extraInfoDigest);
- stored = true;
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedServerDescriptor(publishedTime,
- relayIdentifier, digest, extraInfoDigest);
- }
- } else if (line.startsWith("extra-info ")) {
- String publishedTime = null, relayIdentifier = line.split(" ")[2];
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String digest = null;
- int start = ascii.indexOf(startToken);
- if (start > 0) {
- /* Do not confuse "extra-info " in "@type extra-info 1.0" with
- * "extra-info 0000...". TODO This is a hack that should be
- * solved by using metrics-lib some day. */
- start = ascii.indexOf("\n" + startToken);
- if (start > 0) {
- start++;
- }
- }
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeExtraInfoDescriptor(data, digest, published);
- stored = true;
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
- relayIdentifier.toLowerCase(), digest);
- }
- } else if (line.equals("onion-key")) {
- /* Cannot store microdescriptors without knowing valid-after
- * time(s) of microdesc consensuses containing them, because we
- * don't know which month directories to put them in. Have to use
- * storeMicrodescriptor below. */
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- }
- return stored;
- }
-
- public void storeMicrodescriptor(byte[] data, String digest256Hex,
- String digest256Base64, long validAfter) {
- if (this.aw != null) {
- this.aw.storeMicrodescriptor(data, digest256Hex, validAfter);
- }
- if (this.rdd != null) {
- this.rdd.haveParsedMicrodescriptor(digest256Base64);
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
deleted file mode 100644
index ebca600..0000000
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ /dev/null
@@ -1,634 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.torperf;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.LockFile;
-import org.torproject.ernie.db.main.LoggingConfiguration;
-
-/* Download possibly truncated Torperf .data and .extradata files from
- * configured sources, append them to the files we already have, and merge
- * the two files into the .tpf format. */
-public class TorperfDownloader extends Thread {
-
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration("torperf");
- Logger logger = Logger.getLogger(TorperfDownloader.class.getName());
- logger.info("Starting torperf module of ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile("torperf");
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Process Torperf files
- new TorperfDownloader(config).run();
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating torperf module of ERNIE.");
- }
-
- private Configuration config;
-
- public TorperfDownloader(Configuration config) {
- this.config = config;
- }
-
- private File torperfOutputDirectory = null;
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFilesLines = null;
- private Logger logger = null;
- private SimpleDateFormat dateFormat;
-
- public void run() {
-
- File torperfOutputDirectory =
- new File(config.getTorperfOutputDirectory());
- SortedMap<String, String> torperfSources = config.getTorperfSources();
- List<String> torperfFilesLines = config.getTorperfFiles();
-
- this.torperfOutputDirectory = torperfOutputDirectory;
- this.torperfSources = torperfSources;
- this.torperfFilesLines = torperfFilesLines;
- if (!this.torperfOutputDirectory.exists()) {
- this.torperfOutputDirectory.mkdirs();
- }
- this.logger = Logger.getLogger(TorperfDownloader.class.getName());
- this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.readLastMergedTimestamps();
- for (String torperfFilesLine : this.torperfFilesLines) {
- this.downloadAndMergeFiles(torperfFilesLine);
- }
- this.writeLastMergedTimestamps();
-
- this.cleanUpRsyncDirectory();
- }
-
- private File torperfLastMergedFile =
- new File("stats/torperf-last-merged");
- SortedMap<String, String> lastMergedTimestamps =
- new TreeMap<String, String>();
- private void readLastMergedTimestamps() {
- if (!this.torperfLastMergedFile.exists()) {
- return;
- }
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.torperfLastMergedFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(" ");
- String fileName = null, timestamp = null;
- if (parts.length == 2) {
- try {
- Double.parseDouble(parts[1]);
- fileName = parts[0];
- timestamp = parts[1];
- } catch (NumberFormatException e) {
- /* Handle below. */
- }
- }
- if (fileName == null || timestamp == null) {
- this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
- + this.torperfLastMergedFile.getAbsolutePath() + ". "
- + "Ignoring past history of merging .data and .extradata "
- + "files.");
- this.lastMergedTimestamps.clear();
- break;
- }
- this.lastMergedTimestamps.put(fileName, timestamp);
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while reading '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
- + "past history of merging .data and .extradata files.");
- this.lastMergedTimestamps.clear();
- }
- }
-
- private void writeLastMergedTimestamps() {
- try {
- this.torperfLastMergedFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.torperfLastMergedFile));
- for (Map.Entry<String, String> e :
- this.lastMergedTimestamps.entrySet()) {
- String fileName = e.getKey();
- String timestamp = e.getValue();
- bw.write(fileName + " " + timestamp + "\n");
- }
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while writing '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
- + "result in ignoring history of merging .data and .extradata "
- + "files in the next execution.", e);
- }
- }
-
- private void downloadAndMergeFiles(String torperfFilesLine) {
- String[] parts = torperfFilesLine.split(" ");
- String sourceName = parts[1];
- int fileSize = -1;
- try {
- fileSize = Integer.parseInt(parts[2]);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Could not parse file size in "
- + "TorperfFiles configuration line '" + torperfFilesLine
- + "'.");
- return;
- }
-
- /* Download and append the .data file. */
- String dataFileName = parts[3];
- String sourceBaseUrl = torperfSources.get(sourceName);
- String dataUrl = sourceBaseUrl + dataFileName;
- String dataOutputFileName = sourceName + "-" + dataFileName;
- File dataOutputFile = new File(torperfOutputDirectory,
- dataOutputFileName);
- boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
- dataOutputFile, true);
-
- /* Download and append the .extradata file. */
- String extradataFileName = parts[4];
- String extradataUrl = sourceBaseUrl + extradataFileName;
- String extradataOutputFileName = sourceName + "-" + extradataFileName;
- File extradataOutputFile = new File(torperfOutputDirectory,
- extradataOutputFileName);
- boolean downloadedExtradataFile = this.downloadAndAppendFile(
- extradataUrl, extradataOutputFile, false);
-
- /* Merge both files into .tpf format. */
- if (!downloadedDataFile && !downloadedExtradataFile) {
- return;
- }
- String skipUntil = null;
- if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
- skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
- }
- try {
- skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
- sourceName, fileSize, skipUntil);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
- + " and " + extradataOutputFile + ".", e);
- }
- if (skipUntil != null) {
- this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
- }
- }
-
- private boolean downloadAndAppendFile(String url, File outputFile,
- boolean isDataFile) {
-
- /* Read an existing output file to determine which line will be the
- * first to append to it. */
- String lastTimestampLine = null;
- int linesAfterLastTimestampLine = 0;
- if (outputFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- outputFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- linesAfterLastTimestampLine = 0;
- } else {
- linesAfterLastTimestampLine++;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading '"
- + outputFile.getAbsolutePath() + "' to determine the first "
- + "line to append to it.", e);
- return false;
- }
- }
- try {
- this.logger.fine("Downloading " + (isDataFile ? ".data" :
- ".extradata") + " file from '" + url + "' and merging it into "
- + "'" + outputFile.getAbsolutePath() + "'.");
- URL u = new URL(url);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- BufferedReader br = new BufferedReader(new InputStreamReader(
- huc.getInputStream()));
- String line;
- BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
- true));
- boolean copyLines = lastTimestampLine == null;
- while ((line = br.readLine()) != null) {
- if (copyLines && linesAfterLastTimestampLine == 0) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- }
- bw.write(line + "\n");
- } else if (copyLines && linesAfterLastTimestampLine > 0) {
- linesAfterLastTimestampLine--;
- } else if (line.equals(lastTimestampLine)) {
- copyLines = true;
- }
- }
- bw.close();
- br.close();
- if (!copyLines) {
- this.logger.warning("The last timestamp line in '"
- + outputFile.getAbsolutePath() + "' is not contained in the "
- + "new file downloaded from '" + url + "'. Cannot append "
- + "new lines without possibly leaving a gap. Skipping.");
- return false;
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
- + url + "'.", e);
- return false;
- }
- if (lastTimestampLine == null) {
- this.logger.warning("'" + outputFile.getAbsolutePath()
- + "' doesn't contain any timestamp lines. Unable to check "
- + "whether that file is stale or not.");
- } else {
- long lastTimestampMillis = -1L;
- if (isDataFile) {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- 0, lastTimestampLine.indexOf(" "))) * 1000L;
- } else {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
- lastTimestampLine.indexOf(".",
- lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
- }
- if (lastTimestampMillis < System.currentTimeMillis()
- - 330L * 60L * 1000L) {
- this.logger.warning("The last timestamp in '"
- + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
- + "old: " + lastTimestampMillis);
- }
- }
- return true;
- }
-
- private String mergeFiles(File dataFile, File extradataFile,
- String source, int fileSize, String skipUntil) throws IOException {
- SortedMap<String, String> config = new TreeMap<String, String>();
- config.put("SOURCE", source);
- config.put("FILESIZE", String.valueOf(fileSize));
- if (!dataFile.exists() || !extradataFile.exists()) {
- this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
- + extradataFile.getAbsolutePath() + " is missing.");
- return null;
- }
- this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
- + extradataFile.getAbsolutePath() + " into .tpf format.");
- BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
- brE = new BufferedReader(new FileReader(extradataFile));
- String lineD = brD.readLine(), lineE = brE.readLine();
- int d = 1, e = 1;
- String maxDataComplete = null, maxUsedAt = null;
- while (lineD != null) {
-
- /* Parse .data line. Every valid .data line will go into the .tpf
- * format, either with additional information from the .extradata
- * file or without it. */
- if (lineD.isEmpty()) {
- this.logger.finer("Skipping empty line " + dataFile.getName()
- + ":" + d++ + ".");
- lineD = brD.readLine();
- continue;
- }
- SortedMap<String, String> data = this.parseDataLine(lineD);
- if (data == null) {
- this.logger.finer("Skipping illegal line " + dataFile.getName()
- + ":" + d++ + " '" + lineD + "'.");
- lineD = brD.readLine();
- continue;
- }
- String dataComplete = data.get("DATACOMPLETE");
- double dataCompleteSeconds = Double.parseDouble(dataComplete);
- if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + dataFile.getName() + ":"
- + d++ + " which we already processed before.");
- lineD = brD.readLine();
- continue;
- }
- maxDataComplete = dataComplete;
-
- /* Parse .extradata line if available and try to find the one that
- * matches the .data line. */
- SortedMap<String, String> extradata = null;
- while (lineE != null) {
- if (lineE.isEmpty()) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is empty.");
- lineE = brE.readLine();
- continue;
- }
- if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is a BUILDTIMEOUT_SET line.");
- lineE = brE.readLine();
- continue;
- } else if (lineE.startsWith("ok ") ||
- lineE.startsWith("error ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is in the old format.");
- lineE = brE.readLine();
- continue;
- }
- extradata = this.parseExtradataLine(lineE);
- if (extradata == null) {
- this.logger.finer("Skipping Illegal line "
- + extradataFile.getName() + ":" + e++ + " '" + lineE
- + "'.");
- lineE = brE.readLine();
- continue;
- }
- if (!extradata.containsKey("USED_AT")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which doesn't contain a USED_AT element.");
- lineE = brE.readLine();
- continue;
- }
- String usedAt = extradata.get("USED_AT");
- double usedAtSeconds = Double.parseDouble(usedAt);
- if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which we already processed before.");
- lineE = brE.readLine();
- continue;
- }
- maxUsedAt = usedAt;
- if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
- this.logger.fine("Merging " + extradataFile.getName() + ":"
- + e++ + " into the current .data line.");
- lineE = brE.readLine();
- break;
- } else if (usedAtSeconds > dataCompleteSeconds) {
- this.logger.finer("Comparing " + extradataFile.getName()
- + " to the next .data line.");
- extradata = null;
- break;
- } else {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is too old to be merged with "
- + dataFile.getName() + ":" + d + ".");
- lineE = brE.readLine();
- continue;
- }
- }
-
- /* Write output line to .tpf file. */
- SortedMap<String, String> keysAndValues =
- new TreeMap<String, String>();
- if (extradata != null) {
- keysAndValues.putAll(extradata);
- }
- keysAndValues.putAll(data);
- keysAndValues.putAll(config);
- this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
- lineD = brD.readLine();
- try {
- this.writeTpfLine(source, fileSize, keysAndValues);
- } catch (IOException ex) {
- this.logger.log(Level.WARNING, "Error writing output line. "
- + "Aborting to merge " + dataFile.getName() + " and "
- + extradataFile.getName() + ".", e);
- break;
- }
- }
- brD.close();
- brE.close();
- this.writeCachedTpfLines();
- if (maxDataComplete == null) {
- return maxUsedAt;
- } else if (maxUsedAt == null) {
- return maxDataComplete;
- } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
- return maxUsedAt;
- } else {
- return maxDataComplete;
- }
- }
-
- private SortedMap<Integer, String> dataTimestamps;
- private SortedMap<String, String> parseDataLine(String line) {
- String[] parts = line.trim().split(" ");
- if (line.length() == 0 || parts.length < 20) {
- return null;
- }
- if (this.dataTimestamps == null) {
- this.dataTimestamps = new TreeMap<Integer, String>();
- this.dataTimestamps.put(0, "START");
- this.dataTimestamps.put(2, "SOCKET");
- this.dataTimestamps.put(4, "CONNECT");
- this.dataTimestamps.put(6, "NEGOTIATE");
- this.dataTimestamps.put(8, "REQUEST");
- this.dataTimestamps.put(10, "RESPONSE");
- this.dataTimestamps.put(12, "DATAREQUEST");
- this.dataTimestamps.put(14, "DATARESPONSE");
- this.dataTimestamps.put(16, "DATACOMPLETE");
- this.dataTimestamps.put(21, "DATAPERC10");
- this.dataTimestamps.put(23, "DATAPERC20");
- this.dataTimestamps.put(25, "DATAPERC30");
- this.dataTimestamps.put(27, "DATAPERC40");
- this.dataTimestamps.put(29, "DATAPERC50");
- this.dataTimestamps.put(31, "DATAPERC60");
- this.dataTimestamps.put(33, "DATAPERC70");
- this.dataTimestamps.put(35, "DATAPERC80");
- this.dataTimestamps.put(37, "DATAPERC90");
- }
- SortedMap<String, String> data = new TreeMap<String, String>();
- try {
- for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
- int i = e.getKey();
- if (parts.length > i + 1) {
- String key = e.getValue();
- String value = String.format("%s.%02d", parts[i],
- Integer.parseInt(parts[i + 1]) / 10000);
- data.put(key, value);
- }
- }
- } catch (NumberFormatException e) {
- return null;
- }
- data.put("WRITEBYTES", parts[18]);
- data.put("READBYTES", parts[19]);
- if (parts.length >= 21) {
- data.put("DIDTIMEOUT", parts[20]);
- }
- return data;
- }
-
- private SortedMap<String, String> parseExtradataLine(String line) {
- String[] parts = line.split(" ");
- SortedMap<String, String> extradata = new TreeMap<String, String>();
- String previousKey = null;
- for (String part : parts) {
- String[] keyAndValue = part.split("=", -1);
- if (keyAndValue.length == 2) {
- String key = keyAndValue[0];
- previousKey = key;
- String value = keyAndValue[1];
- if (value.contains(".") && value.lastIndexOf(".") ==
- value.length() - 2) {
- /* Make sure that all floats have two trailing digits. */
- value += "0";
- }
- extradata.put(key, value);
- } else if (keyAndValue.length == 1 && previousKey != null) {
- String value = keyAndValue[0];
- if (previousKey.equals("STREAM_FAIL_REASONS") &&
- (value.equals("MISC") || value.equals("EXITPOLICY") ||
- value.equals("RESOURCELIMIT") ||
- value.equals("RESOLVEFAILED"))) {
- extradata.put(previousKey, extradata.get(previousKey) + ":"
- + value);
- } else {
- return null;
- }
- } else {
- return null;
- }
- }
- return extradata;
- }
-
- private String cachedSource;
- private int cachedFileSize;
- private String cachedStartDate;
- private SortedMap<String, String> cachedTpfLines;
- private void writeTpfLine(String source, int fileSize,
- SortedMap<String, String> keysAndValues) throws IOException {
- StringBuilder sb = new StringBuilder();
- int written = 0;
- for (Map.Entry<String, String> keyAndValue :
- keysAndValues.entrySet()) {
- String key = keyAndValue.getKey();
- String value = keyAndValue.getValue();
- sb.append((written++ > 0 ? " " : "") + key + "=" + value);
- }
- String line = sb.toString();
- String startString = keysAndValues.get("START");
- long startMillis = Long.parseLong(startString.substring(0,
- startString.indexOf("."))) * 1000L;
- String startDate = dateFormat.format(startMillis);
- if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
- fileSize != this.cachedFileSize ||
- !startDate.equals(this.cachedStartDate)) {
- this.writeCachedTpfLines();
- this.readTpfLinesToCache(source, fileSize, startDate);
- }
- if (!this.cachedTpfLines.containsKey(startString) ||
- line.length() > this.cachedTpfLines.get(startString).length()) {
- this.cachedTpfLines.put(startString, line);
- }
- }
-
- private void readTpfLinesToCache(String source, int fileSize,
- String startDate) throws IOException {
- this.cachedTpfLines = new TreeMap<String, String>();
- this.cachedSource = source;
- this.cachedFileSize = fileSize;
- this.cachedStartDate = startDate;
- File tpfFile = new File(torperfOutputDirectory,
- startDate.replaceAll("-", "/") + "/"
- + source + "-" + String.valueOf(fileSize) + "-" + startDate
- + ".tpf");
- if (!tpfFile.exists()) {
- return;
- }
- BufferedReader br = new BufferedReader(new FileReader(tpfFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("@type ")) {
- continue;
- }
- if (line.contains("START=")) {
- String startString = line.substring(line.indexOf("START=")
- + "START=".length()).split(" ")[0];
- this.cachedTpfLines.put(startString, line);
- }
- }
- br.close();
- }
-
- private void writeCachedTpfLines() throws IOException {
- if (this.cachedSource == null || this.cachedFileSize == 0 ||
- this.cachedStartDate == null || this.cachedTpfLines == null) {
- return;
- }
- File tarballFile = new File(torperfOutputDirectory,
- this.cachedStartDate.replaceAll("-", "/")
- + "/" + this.cachedSource + "-"
- + String.valueOf(this.cachedFileSize) + "-"
- + this.cachedStartDate + ".tpf");
- File rsyncFile = new File("recent/torperf/" + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- for (File outputFile : outputFiles) {
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile));
- for (String line : this.cachedTpfLines.values()) {
- bw.write("@type torperf 1.0\n");
- bw.write(line + "\n");
- }
- bw.close();
- }
- this.cachedSource = null;
- this.cachedFileSize = 0;
- this.cachedStartDate = null;
- this.cachedTpfLines = null;
- }
-
- /* Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<File>();
- allFiles.add(new File("recent/torperf"));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
- }
-}
-
More information about the tor-commits
mailing list