[tor-commits] [metrics-db/master] Group classes by kind of processed metrics data.
karsten at torproject.org
karsten at torproject.org
Sat Oct 27 20:07:46 UTC 2012
commit 8746badd1bfd8cb05983159be2336f1cf72cbd44
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri Oct 26 12:44:42 2012 -0400
Group classes by kind of processed metrics data.
Also remove unused unit tests.
---
build.xml | 24 +-
src/org/torproject/ernie/db/ArchiveReader.java | 146 ----
src/org/torproject/ernie/db/ArchiveWriter.java | 339 --------
.../ernie/db/BridgeDescriptorParser.java | 46 -
.../ernie/db/BridgePoolAssignmentsProcessor.java | 174 ----
.../torproject/ernie/db/BridgeSnapshotReader.java | 220 -----
.../ernie/db/CachedRelayDescriptorReader.java | 235 -----
src/org/torproject/ernie/db/Configuration.java | 359 --------
.../torproject/ernie/db/ExitListDownloader.java | 100 ---
src/org/torproject/ernie/db/LockFile.java | 52 --
.../torproject/ernie/db/LoggingConfiguration.java | 93 --
src/org/torproject/ernie/db/Main.java | 160 ----
.../ernie/db/RelayDescriptorDownloader.java | 821 ------------------
.../torproject/ernie/db/RelayDescriptorParser.java | 265 ------
src/org/torproject/ernie/db/RsyncDataProvider.java | 217 -----
.../ernie/db/SanitizedBridgesWriter.java | 911 --------------------
src/org/torproject/ernie/db/TorperfDownloader.java | 573 ------------
.../db/bridgedescs/BridgeDescriptorParser.java | 46 +
.../ernie/db/bridgedescs/BridgeSnapshotReader.java | 220 +++++
.../db/bridgedescs/SanitizedBridgesWriter.java | 911 ++++++++++++++++++++
.../BridgePoolAssignmentsProcessor.java | 174 ++++
.../ernie/db/exitlists/ExitListDownloader.java | 100 +++
.../torproject/ernie/db/main/Configuration.java | 359 ++++++++
src/org/torproject/ernie/db/main/LockFile.java | 52 ++
.../ernie/db/main/LoggingConfiguration.java | 93 ++
src/org/torproject/ernie/db/main/Main.java | 172 ++++
.../ernie/db/main/RsyncDataProvider.java | 217 +++++
.../ernie/db/relaydescs/ArchiveReader.java | 146 ++++
.../ernie/db/relaydescs/ArchiveWriter.java | 339 ++++++++
.../db/relaydescs/CachedRelayDescriptorReader.java | 235 +++++
.../db/relaydescs/RelayDescriptorDownloader.java | 821 ++++++++++++++++++
.../ernie/db/relaydescs/RelayDescriptorParser.java | 265 ++++++
.../ernie/db/torperf/TorperfDownloader.java | 573 ++++++++++++
.../org/torproject/ernie/db/ArchiveReaderTest.java | 32 -
.../org/torproject/ernie/db/ArchiveWriterTest.java | 19 -
.../ernie/db/BridgeSnapshotReaderTest.java | 32 -
.../ernie/db/CachedRelayDescriptorReaderTest.java | 31 -
.../ernie/db/SanitizedBridgesWriterTest.java | 38 -
38 files changed, 4724 insertions(+), 4886 deletions(-)
diff --git a/build.xml b/build.xml
index ce3e337..7cba58c 100644
--- a/build.xml
+++ b/build.xml
@@ -1,7 +1,6 @@
<project default="run" name="ERNIE" basedir=".">
<property name="sources" value="src/"/>
<property name="classes" value="classes/"/>
- <property name="tests" value="test"/>
<property name="docs" value="javadoc/"/>
<property name="name" value="ERNIE"/>
<path id="classpath">
@@ -27,7 +26,7 @@
<target name="run" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.Main">
+ classname="org.torproject.ernie.db.main.Main">
<classpath refid="classpath"/>
</java>
</target>
@@ -38,26 +37,5 @@
<fileset dir="${sources}/" includes="**/*.java" />
</javadoc>
</target>
- <target name="test" depends="compile">
- <javac destdir="${classes}"
- srcdir="${tests}"
- source="1.5"
- target="1.5"
- debug="true"
- deprecation="true"
- optimize="false"
- failonerror="true"
- includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- <junit haltonfailure="true" printsummary="off">
- <classpath refid="classpath"/>
- <formatter type="plain" usefile="false"/>
- <batchtest>
- <fileset dir="${classes}"
- includes="**/*Test.class"/>
- </batchtest>
- </junit>
- </target>
</project>
diff --git a/src/org/torproject/ernie/db/ArchiveReader.java b/src/org/torproject/ernie/db/ArchiveReader.java
deleted file mode 100644
index 06abf6c..0000000
--- a/src/org/torproject/ernie/db/ArchiveReader.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-
-/**
- * Read in all files in a given directory and pass buffered readers of
- * them to the relay descriptor parser.
- */
-public class ArchiveReader {
- public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
- File statsDirectory, boolean keepImportHistory) {
-
- if (rdp == null || archivesDirectory == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- int parsedFiles = 0, ignoredFiles = 0;
- Logger logger = Logger.getLogger(ArchiveReader.class.getName());
- SortedSet<String> archivesImportHistory = new TreeSet<String>();
- File archivesImportHistoryFile = new File(statsDirectory,
- "archives-import-history");
- if (keepImportHistory && archivesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- archivesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- archivesImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in archives import "
- + "history file. Skipping.");
- }
- }
- if (archivesDirectory.exists()) {
- logger.fine("Importing files in directory " + archivesDirectory
- + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(archivesDirectory);
- List<File> problems = new ArrayList<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else {
- if (rdp != null) {
- try {
- BufferedInputStream bis = null;
- if (keepImportHistory &&
- archivesImportHistory.contains(pop.getName())) {
- ignoredFiles++;
- continue;
- } else if (pop.getName().endsWith(".tar.bz2")) {
- logger.warning("Cannot parse compressed tarball "
- + pop.getAbsolutePath() + ". Skipping.");
- continue;
- } else if (pop.getName().endsWith(".bz2")) {
- FileInputStream fis = new FileInputStream(pop);
- BZip2CompressorInputStream bcis =
- new BZip2CompressorInputStream(fis);
- bis = new BufferedInputStream(bcis);
- } else {
- FileInputStream fis = new FileInputStream(pop);
- bis = new BufferedInputStream(fis);
- }
- if (keepImportHistory) {
- archivesImportHistory.add(pop.getName());
- }
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- rdp.parse(allData);
- parsedFiles++;
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
- }
- }
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory "
- + archivesDirectory + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + archivesDirectory + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
- }
- }
- }
- }
- if (keepImportHistory) {
- try {
- archivesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- archivesImportHistoryFile));
- for (String line : archivesImportHistory) {
- bw.write(line + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write archives import "
- + "history file.");
- }
- }
- logger.info("Finished importing relay descriptors from local "
- + "directory:\nParsed " + parsedFiles + ", ignored "
- + ignoredFiles + " files.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/ArchiveWriter.java b/src/org/torproject/ernie/db/ArchiveWriter.java
deleted file mode 100644
index d1b9499..0000000
--- a/src/org/torproject/ernie/db/ArchiveWriter.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.torproject.descriptor.DescriptorParser;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.impl.DescriptorParseException;
-
-public class ArchiveWriter {
- private Logger logger;
- private File outputDirectory;
- private DescriptorParser descriptorParser;
- private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0,
- storedServerDescriptors = 0, storedExtraInfoDescriptors = 0;
-
- public ArchiveWriter(File outputDirectory) {
-
- if (outputDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- this.logger = Logger.getLogger(ArchiveWriter.class.getName());
- this.outputDirectory = outputDirectory;
- this.descriptorParser =
- DescriptorSourceFactory.createDescriptorParser();
- }
-
- private boolean store(byte[] typeAnnotation, byte[] data,
- String filename) {
- try {
- File file = new File(filename);
- if (!file.exists()) {
- this.logger.finer("Storing " + filename);
- if (this.descriptorParser.parseDescriptors(data, filename).size()
- != 1) {
- this.logger.info("Relay descriptor file " + filename
- + " doesn't contain exactly one descriptor. Not storing.");
- return false;
- }
- file.getParentFile().mkdirs();
- BufferedOutputStream bos = new BufferedOutputStream(
- new FileOutputStream(file));
- if (data.length > 0 && data[0] != '@') {
- bos.write(typeAnnotation, 0, typeAnnotation.length);
- }
- bos.write(data, 0, data.length);
- bos.close();
- return true;
- }
- } catch (DescriptorParseException e) {
- this.logger.log(Level.WARNING, "Could not parse relay descriptor "
- + filename + " before storing it to disk. Skipping.", e);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store relay descriptor "
- + filename, e);
- }
- return false;
- }
-
- private static final byte[] CONSENSUS_ANNOTATION =
- "@type network-status-consensus-3 1.0\n".getBytes();
- public void storeConsensus(byte[] data, long validAfter) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/consensus/"
- + printFormat.format(new Date(validAfter)) + "-consensus";
- if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
- this.storedConsensuses++;
- }
- }
-
- private static final byte[] VOTE_ANNOTATION =
- "@type network-status-vote-3 1.0\n".getBytes();
- public void storeVote(byte[] data, long validAfter,
- String fingerprint, String digest) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/vote/"
- + printFormat.format(new Date(validAfter)) + "-vote-"
- + fingerprint + "-" + digest;
- if (this.store(VOTE_ANNOTATION, data, filename)) {
- this.storedVotes++;
- }
- }
-
- private static final byte[] CERTIFICATE_ANNOTATION =
- "@type dir-key-certificate-3 1.0\n".getBytes();
- public void storeCertificate(byte[] data, String fingerprint,
- long published) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/certs/"
- + fingerprint + "-" + printFormat.format(new Date(published));
- if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
- this.storedCerts++;
- }
- }
-
- private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
- "@type server-descriptor 1.0\n".getBytes();
- public void storeServerDescriptor(byte[] data, String digest,
- long published) {
- SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/server-descriptor/"
- + printFormat.format(new Date(published))
- + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
- + digest;
- if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
- this.storedServerDescriptors++;
- }
- }
-
- private static final byte[] EXTRA_INFO_ANNOTATION =
- "@type extra-info 1.0\n".getBytes();
- public void storeExtraInfoDescriptor(byte[] data,
- String extraInfoDigest, long published) {
- SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
- this.storedExtraInfoDescriptors++;
- }
- }
-
- private StringBuilder intermediateStats = new StringBuilder();
- public void intermediateStats(String event) {
- intermediateStats.append("While " + event + ", we stored "
- + this.storedConsensuses + " consensus(es), " + this.storedVotes
- + " vote(s), " + this.storedCerts + " certificate(s), "
- + this.storedServerDescriptors + " server descriptor(s), and "
- + this.storedExtraInfoDescriptors
- + " extra-info descriptor(s) to disk.\n");
- this.storedConsensuses = 0;
- this.storedVotes = 0;
- this.storedCerts = 0;
- this.storedServerDescriptors = 0;
- this.storedExtraInfoDescriptors = 0;
- }
- /**
- * Dump some statistics on the completeness of descriptors to the logs
- * on level INFO.
- */
- public void dumpStats() {
- StringBuilder sb = new StringBuilder("Finished writing relay "
- + "descriptors to disk.\n");
- sb.append(intermediateStats.toString());
- sb.append("Statistics on the completeness of written relay "
- + "descriptors of the last 3 consensuses (Consensus/Vote, "
- + "valid-after, votes, server descriptors, extra-infos):");
- try {
- SimpleDateFormat validAfterFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat consensusVoteFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat descriptorFormat =
- new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- SortedSet<File> consensuses = new TreeSet<File>();
- Stack<File> leftToParse = new Stack<File>();
- leftToParse.add(new File(outputDirectory + "/consensus"));
- while (!leftToParse.isEmpty()) {
- File pop = leftToParse.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- leftToParse.add(f);
- }
- } else if (pop.length() > 0) {
- consensuses.add(pop);
- }
- while (consensuses.size() > 3) {
- consensuses.remove(consensuses.first());
- }
- }
- for (File f : consensuses) {
- BufferedReader br = new BufferedReader(new FileReader(f));
- String line = null, validAfterTime = null,
- voteFilenamePrefix = null, dirSource = null;
- int allVotes = 0, foundVotes = 0,
- allServerDescs = 0, foundServerDescs = 0,
- allExtraInfos = 0, foundExtraInfos = 0;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- long validAfter = validAfterFormat.parse(
- validAfterTime).getTime();
- voteFilenamePrefix = outputDirectory + "/vote/"
- + consensusVoteFormat.format(new Date(validAfter))
- + "-vote-";
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("vote-digest ")) {
- allVotes++;
- File voteFile = new File(voteFilenamePrefix + dirSource + "-"
- + line.split(" ")[1]);
- if (voteFile.exists()) {
- foundVotes++;
- BufferedReader vbr = new BufferedReader(new FileReader(
- voteFile));
- String line3 = null;
- int voteAllServerDescs = 0, voteFoundServerDescs = 0,
- voteAllExtraInfos = 0, voteFoundExtraInfos = 0;
- while ((line3 = vbr.readLine()) != null) {
- if (line3.startsWith("r ")) {
- voteAllServerDescs++;
- String digest = Hex.encodeHexString(Base64.decodeBase64(
- line3.split(" ")[3] + "=")).toLowerCase();
- long published = validAfterFormat.parse(
- line3.split(" ")[4] + " "
- + line3.split(" ")[5]).getTime();
- String filename = outputDirectory
- + "/server-descriptor/"
- + descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/"
- + digest.substring(1, 2) + "/" + digest;
- if (new File(filename).exists()) {
- BufferedReader sbr = new BufferedReader(new FileReader(
- new File(filename)));
- String line2 = null;
- while ((line2 = sbr.readLine()) != null) {
- if (line2.startsWith("opt extra-info-digest ") ||
- line2.startsWith("extra-info-digest ")) {
- voteAllExtraInfos++;
- String extraInfoDigest = line2.startsWith("opt ") ?
- line2.split(" ")[2].toLowerCase() :
- line2.split(" ")[1].toLowerCase();
- String filename2 =
- outputDirectory.getAbsolutePath()
- + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (new File(filename2).exists()) {
- voteFoundExtraInfos++;
- }
- }
- }
- sbr.close();
- voteFoundServerDescs++;
- }
- }
- }
- vbr.close();
- sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%)", validAfterTime,
- voteFoundServerDescs, voteAllServerDescs,
- 100.0D * (double) voteFoundServerDescs /
- (double) voteAllServerDescs,
- voteFoundExtraInfos, voteAllExtraInfos,
- 100.0D * (double) voteFoundExtraInfos /
- (double) voteAllExtraInfos));
- }
- } else if (line.startsWith("r ")) {
- allServerDescs++;
- String digest = Hex.encodeHexString(Base64.decodeBase64(
- line.split(" ")[3] + "=")).toLowerCase();
- long published = validAfterFormat.parse(
- line.split(" ")[4] + " " + line.split(" ")[5]).getTime();
- String filename = outputDirectory.getAbsolutePath()
- + "/server-descriptor/"
- + descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/"
- + digest.substring(1, 2) + "/" + digest;
- if (new File (filename).exists()) {
- BufferedReader sbr = new BufferedReader(new FileReader(
- new File(filename)));
- String line2 = null;
- while ((line2 = sbr.readLine()) != null) {
- if (line2.startsWith("opt extra-info-digest ") ||
- line2.startsWith("extra-info-digest ")) {
- allExtraInfos++;
- String extraInfoDigest = line2.startsWith("opt ") ?
- line2.split(" ")[2].toLowerCase() :
- line2.split(" ")[1].toLowerCase();
- String filename2 = outputDirectory.getAbsolutePath()
- + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (new File (filename2).exists()) {
- foundExtraInfos++;
- }
- }
- }
- sbr.close();
- foundServerDescs++;
- }
- }
- }
- br.close();
- sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
- validAfterTime, foundVotes, allVotes,
- 100.0D * (double) foundVotes / (double) allVotes,
- foundServerDescs, allServerDescs,
- 100.0D * (double) foundServerDescs / (double) allServerDescs,
- foundExtraInfos, allExtraInfos,
- 100.0D * (double) foundExtraInfos / (double) allExtraInfos));
- }
- this.logger.info(sb.toString());
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
- e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
- e);
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
deleted file mode 100644
index 7773525..0000000
--- a/src/org/torproject/ernie/db/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class BridgeDescriptorParser {
- private SanitizedBridgesWriter sbw;
- private Logger logger;
- public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
- this.sbw = sbw;
- this.logger =
- Logger.getLogger(BridgeDescriptorParser.class.getName());
- }
- public void parse(byte[] allData, String dateTime) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = br.readLine();
- if (line == null) {
- return;
- } else if (line.startsWith("r ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
- }
- } else if (line.startsWith("router ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreServerDescriptor(allData);
- }
- } else if (line.startsWith("extra-info ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
deleted file mode 100644
index d03dcaf..0000000
--- a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright 2011--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-public class BridgePoolAssignmentsProcessor {
-
- public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
- File sanitizedAssignmentsDirectory) {
-
- Logger logger =
- Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
- if (assignmentsDirectory == null ||
- sanitizedAssignmentsDirectory == null) {
- IllegalArgumentException e = new IllegalArgumentException("Neither "
- + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
- + "be null!");
- throw e;
- }
-
- List<File> assignmentFiles = new ArrayList<File>();
- Stack<File> files = new Stack<File>();
- files.add(assignmentsDirectory);
- while (!files.isEmpty()) {
- File file = files.pop();
- if (file.isDirectory()) {
- files.addAll(Arrays.asList(file.listFiles()));
- } else if (!file.getName().endsWith(".gz")) {
- assignmentFiles.add(file);
- }
- }
-
- SimpleDateFormat assignmentFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat filenameFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- for (File assignmentFile : assignmentFiles) {
- logger.info("Processing bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'...");
- try {
- BufferedReader br = null;
- if (assignmentFile.getName().endsWith(".gz")) {
- br = new BufferedReader(new InputStreamReader(
- new GzipCompressorInputStream(new FileInputStream(
- assignmentFile))));
- } else {
- br = new BufferedReader(new FileReader(assignmentFile));
- }
- String line, bridgePoolAssignmentLine = null;
- SortedSet<String> sanitizedAssignments = new TreeSet<String>();
- boolean wroteLastLine = false, skipBefore20120504125947 = true;
- while ((line = br.readLine()) != null || !wroteLastLine) {
- if (line != null && line.startsWith("bridge-pool-assignment ")) {
- String[] parts = line.split(" ");
- if (parts.length != 3) {
- continue;
- }
- /* TODO Take out this temporary hack to ignore all assignments
- * coming from ponticum when byblos was still the official
- * BridgeDB host. */
- if (line.compareTo(
- "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
- skipBefore20120504125947 = false;
- }
- }
- if (skipBefore20120504125947) {
- if (line == null) {
- break;
- } else {
- continue;
- }
- }
- if (line == null ||
- line.startsWith("bridge-pool-assignment ")) {
- if (bridgePoolAssignmentLine != null) {
- try {
- long bridgePoolAssignmentTime = assignmentFormat.parse(
- bridgePoolAssignmentLine.substring(
- "bridge-pool-assignment ".length())).getTime();
- File sanitizedAssignmentsFile = new File(
- sanitizedAssignmentsDirectory, filenameFormat.format(
- bridgePoolAssignmentTime));
- if (!sanitizedAssignmentsFile.exists()) {
- sanitizedAssignmentsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- sanitizedAssignmentsFile));
- bw.write("@type bridge-pool-assignment 1.0\n");
- bw.write(bridgePoolAssignmentLine + "\n");
- for (String assignmentLine : sanitizedAssignments) {
- bw.write(assignmentLine + "\n");
- }
- bw.close();
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- }
- sanitizedAssignments.clear();
- }
- if (line == null) {
- wroteLastLine = true;
- } else {
- bridgePoolAssignmentLine = line;
- }
- } else {
- String[] parts = line.split(" ");
- if (parts.length < 2 || parts[0].length() < 40) {
- logger.warning("Unrecognized line '" + line
- + "'. Aborting.");
- break;
- }
- String hashedFingerprint = null;
- try {
- hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
- line.split(" ")[0].toCharArray())).toLowerCase();
- } catch (DecoderException e) {
- logger.warning("Unable to decode hex fingerprint in line '"
- + line + "'. Aborting.");
- break;
- }
- String assignmentDetails = line.substring(40);
- sanitizedAssignments.add(hashedFingerprint
- + assignmentDetails);
- }
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read bridge pool assignment "
- + "file '" + assignmentFile.getAbsolutePath()
- + "'. Skipping.", e);
- }
- }
-
- logger.info("Finished processing bridge pool assignment file(s).");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/BridgeSnapshotReader.java
deleted file mode 100644
index f21794d..0000000
--- a/src/org/torproject/ernie/db/BridgeSnapshotReader.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-/**
- * Reads the half-hourly snapshots of bridge descriptors from Tonga.
- */
-public class BridgeSnapshotReader {
- public BridgeSnapshotReader(BridgeDescriptorParser bdp,
- File bridgeDirectoriesDir, File statsDirectory) {
-
- if (bdp == null || bridgeDirectoriesDir == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- Logger logger =
- Logger.getLogger(BridgeSnapshotReader.class.getName());
- SortedSet<String> parsed = new TreeSet<String>();
- File bdDir = bridgeDirectoriesDir;
- File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
- boolean modified = false;
- if (bdDir.exists()) {
- if (pbdFile.exists()) {
- logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
- try {
- BufferedReader br = new BufferedReader(new FileReader(pbdFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- parsed.add(line);
- }
- br.close();
- logger.fine("Finished reading file "
- + pbdFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading file "
- + pbdFile.getAbsolutePath() + "!", e);
- return;
- }
- }
- logger.fine("Importing files in directory " + bridgeDirectoriesDir
- + "/...");
- Set<String> descriptorImportHistory = new HashSet<String>();
- int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
- parsedServerDescriptors = 0, skippedServerDescriptors = 0,
- parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(bdDir);
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else if (!parsed.contains(pop.getName())) {
- try {
- FileInputStream in = new FileInputStream(pop);
- if (in.available() > 0) {
- TarArchiveInputStream tais = null;
- if (pop.getName().endsWith(".tar.gz")) {
- GzipCompressorInputStream gcis =
- new GzipCompressorInputStream(in);
- tais = new TarArchiveInputStream(gcis);
- } else if (pop.getName().endsWith(".tar")) {
- tais = new TarArchiveInputStream(in);
- } else {
- continue;
- }
- BufferedInputStream bis = new BufferedInputStream(tais);
- String fn = pop.getName();
- String dateTime = fn.substring(11, 21) + " "
- + fn.substring(22, 24) + ":" + fn.substring(24, 26)
- + ":" + fn.substring(26, 28);
- while ((tais.getNextTarEntry()) != null) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- byte[] allData = baos.toByteArray();
- if (allData.length == 0) {
- continue;
- }
- String fileDigest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- String ascii = new String(allData, "US-ASCII");
- BufferedReader br3 = new BufferedReader(new StringReader(
- ascii));
- String firstLine = null;
- while ((firstLine = br3.readLine()) != null) {
- if (firstLine.startsWith("@")) {
- continue;
- } else {
- break;
- }
- }
- if (firstLine.startsWith("r ")) {
- bdp.parse(allData, dateTime);
- parsedStatuses++;
- } else if (descriptorImportHistory.contains(fileDigest)) {
- /* Skip server descriptors or extra-info descriptors if
- * we parsed them before. */
- skippedFiles++;
- continue;
- } else {
- int start = -1, sig = -1, end = -1;
- String startToken =
- firstLine.startsWith("router ") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0,
- end - start);
- String descriptorDigest = Hex.encodeHexString(
- DigestUtils.sha(descBytes));
- if (!descriptorImportHistory.contains(
- descriptorDigest)) {
- bdp.parse(descBytes, dateTime);
- descriptorImportHistory.add(descriptorDigest);
- if (firstLine.startsWith("router ")) {
- parsedServerDescriptors++;
- } else {
- parsedExtraInfoDescriptors++;
- }
- } else {
- if (firstLine.startsWith("router ")) {
- skippedServerDescriptors++;
- } else {
- skippedExtraInfoDescriptors++;
- }
- }
- }
- }
- descriptorImportHistory.add(fileDigest);
- parsedFiles++;
- }
- bis.close();
- }
- in.close();
-
- /* Let's give some memory back, or we'll run out of it. */
- System.gc();
-
- parsed.add(pop.getName());
- modified = true;
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not parse bridge snapshot "
- + pop.getName() + "!", e);
- continue;
- }
- }
- }
- logger.fine("Finished importing files in directory "
- + bridgeDirectoriesDir + "/. In total, we parsed "
- + parsedFiles + " files (skipped " + skippedFiles
- + ") containing " + parsedStatuses + " statuses, "
- + parsedServerDescriptors + " server descriptors (skipped "
- + skippedServerDescriptors + "), and "
- + parsedExtraInfoDescriptors + " extra-info descriptors "
- + "(skipped " + skippedExtraInfoDescriptors + ").");
- if (!parsed.isEmpty() && modified) {
- logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
- try {
- pbdFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
- for (String f : parsed) {
- bw.append(f + "\n");
- }
- bw.close();
- logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed writing file "
- + pbdFile.getAbsolutePath() + "!", e);
- }
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java
deleted file mode 100644
index 4da3e44..0000000
--- a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java
+++ /dev/null
@@ -1,235 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses all descriptors in local directory cacheddesc/ and sorts them
- * into directory structure in directory-archive/.
- */
-public class CachedRelayDescriptorReader {
- public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
- List<String> inputDirectories, File statsDirectory) {
-
- if (rdp == null || inputDirectories == null ||
- inputDirectories.isEmpty() || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- StringBuilder dumpStats = new StringBuilder("Finished importing "
- + "relay descriptors from local Tor data directories:");
- Logger logger = Logger.getLogger(
- CachedRelayDescriptorReader.class.getName());
-
- /* Read import history containing SHA-1 digests of previously parsed
- * statuses and descriptors, so that we can skip them in this run. */
- Set<String> lastImportHistory = new HashSet<String>(),
- currentImportHistory = new HashSet<String>();
- File importHistoryFile = new File(statsDirectory,
- "cacheddesc-import-history");
- if (importHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- importHistoryFile));
- String line;
- while ((line = br.readLine()) != null) {
- lastImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read import history from "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
- }
-
- /* Read cached descriptors directories. */
- for (String inputDirectory : inputDirectories) {
- File cachedDescDir = new File(inputDirectory);
- if (!cachedDescDir.exists()) {
- logger.warning("Directory " + cachedDescDir.getAbsolutePath()
- + " does not exist. Skipping.");
- continue;
- }
- logger.fine("Reading " + cachedDescDir.getAbsolutePath()
- + " directory.");
- for (File f : cachedDescDir.listFiles()) {
- try {
- // descriptors may contain non-ASCII chars; read as bytes to
- // determine digests
- BufferedInputStream bis =
- new BufferedInputStream(new FileInputStream(f));
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- if (f.getName().equals("cached-consensus")) {
- /* Check if directory information is stale. */
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("valid-after ")) {
- dumpStats.append("\n" + f.getName() + ": " + line.substring(
- "valid-after ".length()));
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (dateTimeFormat.parse(line.substring("valid-after ".
- length())).getTime() < System.currentTimeMillis()
- - 6L * 60L * 60L * 1000L) {
- logger.warning("Cached descriptor files in "
- + cachedDescDir.getAbsolutePath() + " are stale. "
- + "The valid-after line in cached-consensus is '"
- + line + "'.");
- dumpStats.append(" (stale!)");
- }
- break;
- }
- }
- br.close();
-
- /* Parse the cached consensus if we haven't parsed it before
- * (but regardless of whether it's stale or not). */
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(allData);
- } else {
- dumpStats.append(" (skipped)");
- }
- currentImportHistory.add(digest);
- }
- } else if (f.getName().equals("v3-status-votes")) {
- int parsedNum = 0, skippedNum = 0;
- String ascii = new String(allData, "US-ASCII");
- String startToken = "network-status-version ";
- int end = ascii.length();
- int start = ascii.indexOf(startToken);
- while (start >= 0 && start < end) {
- int next = ascii.indexOf(startToken, start + 1);
- if (next < 0) {
- next = end;
- }
- if (start < next) {
- byte[] rawNetworkStatusBytes = new byte[next - start];
- System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
- next - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- rawNetworkStatusBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(rawNetworkStatusBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- start = next;
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " votes");
- } else if (f.getName().startsWith("cached-descriptors") ||
- f.getName().startsWith("cached-extrainfo")) {
- String ascii = new String(allData, "US-ASCII");
- int start = -1, sig = -1, end = -1;
- String startToken =
- f.getName().startsWith("cached-descriptors") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- int parsedNum = 0, skippedNum = 0;
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- descBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(descBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " "
- + (f.getName().startsWith("cached-descriptors") ?
- "server" : "extra-info") + " descriptors");
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- }
- }
- logger.fine("Finished reading "
- + cachedDescDir.getAbsolutePath() + " directory.");
- }
-
- /* Write import history containing SHA-1 digests to disk. */
- try {
- importHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- importHistoryFile));
- for (String digest : currentImportHistory) {
- bw.write(digest + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write import history to "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
-
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
deleted file mode 100644
index e130dab..0000000
--- a/src/org/torproject/ernie/db/Configuration.java
+++ /dev/null
@@ -1,359 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Initialize configuration with hard-coded defaults, overwrite with
- * configuration in config file, if exists, and answer Main.java about our
- * configuration.
- */
-public class Configuration {
- private boolean writeDirectoryArchives = false;
- private String directoryArchivesOutputDirectory = "directory-archive/";
- private boolean importCachedRelayDescriptors = false;
- private List<String> cachedRelayDescriptorsDirectory =
- new ArrayList<String>(Arrays.asList("cacheddesc/".split(",")));
- private boolean importDirectoryArchives = false;
- private String directoryArchivesDirectory = "archives/";
- private boolean keepDirectoryArchiveImportHistory = false;
- private boolean writeSanitizedBridges = false;
- private boolean replaceIPAddressesWithHashes = false;
- private long limitBridgeDescriptorMappings = -1L;
- private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
- private boolean importBridgeSnapshots = false;
- private String bridgeSnapshotsDirectory = "bridge-directories/";
- private boolean downloadRelayDescriptors = false;
- private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
- "86.59.21.38,76.73.17.194:9030,213.115.239.118:443,"
- + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
- + "194.109.206.212,212.112.245.170").split(","));
- private boolean downloadCurrentConsensus = true;
- private boolean downloadCurrentVotes = true;
- private boolean downloadMissingServerDescriptors = true;
- private boolean downloadMissingExtraInfoDescriptors = true;
- private boolean downloadAllServerDescriptors = false;
- private boolean downloadAllExtraInfoDescriptors = false;
- private boolean compressRelayDescriptorDownloads;
- private boolean downloadExitList = false;
- private boolean processBridgePoolAssignments = false;
- private String assignmentsDirectory = "assignments/";
- private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
- private boolean processTorperfFiles = false;
- private String torperfOutputDirectory = "torperf/";
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFiles = null;
- private boolean provideFilesViaRsync = false;
- private String rsyncDirectory = "rsync";
- public Configuration() {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(Configuration.class.getName());
-
- /* Read config file, if present. */
- File configFile = new File("config");
- if (!configFile.exists()) {
- logger.warning("Could not find config file. In the default "
- + "configuration, we are not configured to read data from any "
- + "data source or write data to any data sink. You need to "
- + "create a config file (" + configFile.getAbsolutePath()
- + ") and provide at least one data source and one data sink. "
- + "Refer to the manual for more information.");
- return;
- }
- String line = null;
- boolean containsCachedRelayDescriptorsDirectory = false;
- try {
- BufferedReader br = new BufferedReader(new FileReader(configFile));
- while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.length() < 1) {
- continue;
- } else if (line.startsWith("WriteDirectoryArchives")) {
- this.writeDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
- this.directoryArchivesOutputDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportCachedRelayDescriptors")) {
- this.importCachedRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
- if (!containsCachedRelayDescriptorsDirectory) {
- this.cachedRelayDescriptorsDirectory.clear();
- containsCachedRelayDescriptorsDirectory = true;
- }
- this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
- } else if (line.startsWith("ImportDirectoryArchives")) {
- this.importDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesDirectory")) {
- this.directoryArchivesDirectory = line.split(" ")[1];
- } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
- this.keepDirectoryArchiveImportHistory = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("WriteSanitizedBridges")) {
- this.writeSanitizedBridges = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
- this.replaceIPAddressesWithHashes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
- this.limitBridgeDescriptorMappings = Long.parseLong(
- line.split(" ")[1]);
- } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
- this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportBridgeSnapshots")) {
- this.importBridgeSnapshots = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("BridgeSnapshotsDirectory")) {
- this.bridgeSnapshotsDirectory = line.split(" ")[1];
- } else if (line.startsWith("DownloadRelayDescriptors")) {
- this.downloadRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
- this.downloadFromDirectoryAuthorities = new ArrayList<String>();
- for (String dir : line.split(" ")[1].split(",")) {
- // test if IP:port pair has correct format
- if (dir.length() < 1) {
- logger.severe("Configuration file contains directory "
- + "authority IP:port of length 0 in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- new URL("http://" + dir + "/");
- this.downloadFromDirectoryAuthorities.add(dir);
- }
- } else if (line.startsWith("DownloadCurrentConsensus")) {
- this.downloadCurrentConsensus = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadCurrentVotes")) {
- this.downloadCurrentVotes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadMissingServerDescriptors")) {
- this.downloadMissingServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith(
- "DownloadMissingExtraInfoDescriptors")) {
- this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllServerDescriptors")) {
- this.downloadAllServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
- this.downloadAllExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
- this.compressRelayDescriptorDownloads = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadExitList")) {
- this.downloadExitList = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ProcessBridgePoolAssignments")) {
- this.processBridgePoolAssignments = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("AssignmentsDirectory")) {
- this.assignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
- this.sanitizedAssignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("ProcessTorperfFiles")) {
- this.processTorperfFiles = Integer.parseInt(line.split(" ")[1])
- != 0;
- } else if (line.startsWith("TorperfOutputDirectory")) {
- } else if (line.startsWith("TorperfSource")) {
- if (this.torperfSources == null) {
- this.torperfSources = new TreeMap<String, String>();
- }
- String[] parts = line.split(" ");
- String sourceName = parts[1];
- String baseUrl = parts[2];
- this.torperfSources.put(sourceName, baseUrl);
- } else if (line.startsWith("TorperfFiles")) {
- if (this.torperfFiles == null) {
- this.torperfFiles = new ArrayList<String>();
- }
- String[] parts = line.split(" ");
- if (parts.length != 5) {
- logger.severe("Configuration file contains TorperfFiles "
- + "option with wrong number of values in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- this.torperfFiles.add(line);
- } else if (line.startsWith("ProvideFilesViaRsync")) {
- this.provideFilesViaRsync = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RsyncDirectory")) {
- this.rsyncDirectory = line.split(" ")[1];
- } else {
- logger.severe("Configuration file contains unrecognized "
- + "configuration key in line '" + line + "'! Exiting!");
- System.exit(1);
- }
- }
- br.close();
- } catch (ArrayIndexOutOfBoundsException e) {
- logger.severe("Configuration file contains configuration key "
- + "without value in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (MalformedURLException e) {
- logger.severe("Configuration file contains illegal URL or IP:port "
- + "pair in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (NumberFormatException e) {
- logger.severe("Configuration file contains illegal value in line '"
- + line + "' with legal values being 0 or 1. Exiting!");
- System.exit(1);
- } catch (IOException e) {
- logger.log(Level.SEVERE, "Unknown problem while reading config "
- + "file! Exiting!", e);
- System.exit(1);
- }
-
- /** Make some checks if configuration is valid. */
- if (!this.importCachedRelayDescriptors &&
- !this.importDirectoryArchives && !this.downloadRelayDescriptors &&
- !this.importBridgeSnapshots &&
- !this.downloadExitList && !this.processBridgePoolAssignments &&
- !this.writeDirectoryArchives && !this.writeSanitizedBridges &&
- !this.processTorperfFiles) {
- logger.warning("We have not been configured to read data from any "
- + "data source or write data to any data sink. You need to "
- + "edit your config file (" + configFile.getAbsolutePath()
- + ") and provide at least one data source and one data sink. "
- + "Refer to the manual for more information.");
- }
- if ((this.importCachedRelayDescriptors ||
- this.importDirectoryArchives || this.downloadRelayDescriptors) &&
- !this.writeDirectoryArchives) {
- logger.warning("We are configured to import/download relay "
- + "descriptors, but we don't have a single data sink to write "
- + "relay descriptors to.");
- }
- if (!(this.importCachedRelayDescriptors ||
- this.importDirectoryArchives || this.downloadRelayDescriptors) &&
- this.writeDirectoryArchives) {
- logger.warning("We are configured to write relay descriptor to at "
- + "least one data sink, but we don't have a single data source "
- + "containing relay descriptors.");
- }
- if (this.importBridgeSnapshots && !this.writeSanitizedBridges) {
- logger.warning("We are configured to import/download bridge "
- + "descriptors, but we don't have a single data sink to write "
- + "bridge descriptors to.");
- }
- if (!this.importBridgeSnapshots && this.writeSanitizedBridges) {
- logger.warning("We are configured to write bridge descriptor to at "
- + "least one data sink, but we don't have a single data source "
- + "containing bridge descriptors.");
- }
- }
- public boolean getWriteDirectoryArchives() {
- return this.writeDirectoryArchives;
- }
- public String getDirectoryArchivesOutputDirectory() {
- return this.directoryArchivesOutputDirectory;
- }
- public boolean getImportCachedRelayDescriptors() {
- return this.importCachedRelayDescriptors;
- }
- public List<String> getCachedRelayDescriptorDirectory() {
- return this.cachedRelayDescriptorsDirectory;
- }
- public boolean getImportDirectoryArchives() {
- return this.importDirectoryArchives;
- }
- public String getDirectoryArchivesDirectory() {
- return this.directoryArchivesDirectory;
- }
- public boolean getKeepDirectoryArchiveImportHistory() {
- return this.keepDirectoryArchiveImportHistory;
- }
- public boolean getWriteSanitizedBridges() {
- return this.writeSanitizedBridges;
- }
- public boolean getReplaceIPAddressesWithHashes() {
- return this.replaceIPAddressesWithHashes;
- }
- public long getLimitBridgeDescriptorMappings() {
- return this.limitBridgeDescriptorMappings;
- }
- public String getSanitizedBridgesWriteDirectory() {
- return this.sanitizedBridgesWriteDirectory;
- }
- public boolean getImportBridgeSnapshots() {
- return this.importBridgeSnapshots;
- }
- public String getBridgeSnapshotsDirectory() {
- return this.bridgeSnapshotsDirectory;
- }
- public boolean getDownloadRelayDescriptors() {
- return this.downloadRelayDescriptors;
- }
- public List<String> getDownloadFromDirectoryAuthorities() {
- return this.downloadFromDirectoryAuthorities;
- }
- public boolean getDownloadCurrentConsensus() {
- return this.downloadCurrentConsensus;
- }
- public boolean getDownloadCurrentVotes() {
- return this.downloadCurrentVotes;
- }
- public boolean getDownloadMissingServerDescriptors() {
- return this.downloadMissingServerDescriptors;
- }
- public boolean getDownloadMissingExtraInfoDescriptors() {
- return this.downloadMissingExtraInfoDescriptors;
- }
- public boolean getDownloadAllServerDescriptors() {
- return this.downloadAllServerDescriptors;
- }
- public boolean getDownloadAllExtraInfoDescriptors() {
- return this.downloadAllExtraInfoDescriptors;
- }
- public boolean getCompressRelayDescriptorDownloads() {
- return this.compressRelayDescriptorDownloads;
- }
- public boolean getDownloadExitList() {
- return this.downloadExitList;
- }
- public boolean getProcessBridgePoolAssignments() {
- return processBridgePoolAssignments;
- }
- public String getAssignmentsDirectory() {
- return assignmentsDirectory;
- }
- public String getSanitizedAssignmentsDirectory() {
- return sanitizedAssignmentsDirectory;
- }
- public boolean getProcessTorperfFiles() {
- return this.processTorperfFiles;
- }
- public String getTorperfOutputDirectory() {
- return this.torperfOutputDirectory;
- }
- public SortedMap<String, String> getTorperfSources() {
- return this.torperfSources;
- }
- public List<String> getTorperfFiles() {
- return this.torperfFiles;
- }
- public boolean getProvideFilesViaRsync() {
- return this.provideFilesViaRsync;
- }
- public String getRsyncDirectory() {
- return this.rsyncDirectory;
- }
-}
-
diff --git a/src/org/torproject/ernie/db/ExitListDownloader.java b/src/org/torproject/ernie/db/ExitListDownloader.java
deleted file mode 100644
index 01a554f..0000000
--- a/src/org/torproject/ernie/db/ExitListDownloader.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class ExitListDownloader {
- public ExitListDownloader() {
- Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
- try {
- logger.fine("Downloading exit list...");
- String exitAddressesUrl =
- "http://exitlist.torproject.org/exit-addresses";
- URL u = new URL(exitAddressesUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response != 200) {
- logger.warning("Could not download exit list. Response code " +
- response);
- return;
- }
- BufferedInputStream in = new BufferedInputStream(
- huc.getInputStream());
- SimpleDateFormat printFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Date downloadedDate = new Date();
- File exitListFile = new File("exitlist/" + printFormat.format(
- downloadedDate));
- exitListFile.getParentFile().mkdirs();
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- exitListFile));
- bw.write("@type tordnsel 1.0\n");
- bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- bw.write(new String(data, 0, len));
- }
- in.close();
- bw.close();
- logger.fine("Finished downloading exit list.");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed downloading exit list", e);
- return;
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished downloading "
- + "exit list.\nLast three exit lists are:");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(new File("exitlist"));
- SortedSet<File> lastThreeExitLists = new TreeSet<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- SortedSet<File> lastThreeElements = new TreeSet<File>();
- for (File f : pop.listFiles()) {
- lastThreeElements.add(f);
- }
- while (lastThreeElements.size() > 3) {
- lastThreeElements.remove(lastThreeElements.first());
- }
- for (File f : lastThreeElements) {
- filesInInputDir.add(f);
- }
- } else {
- lastThreeExitLists.add(pop);
- while (lastThreeExitLists.size() > 3) {
- lastThreeExitLists.remove(lastThreeExitLists.first());
- }
- }
- }
- for (File f : lastThreeExitLists) {
- dumpStats.append("\n" + f.getName());
- }
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/src/org/torproject/ernie/db/LockFile.java b/src/org/torproject/ernie/db/LockFile.java
deleted file mode 100644
index 3255620..0000000
--- a/src/org/torproject/ernie/db/LockFile.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.logging.Logger;
-
-public class LockFile {
-
- private File lockFile;
- private Logger logger;
-
- public LockFile() {
- this.lockFile = new File("lock");
- this.logger = Logger.getLogger(LockFile.class.getName());
- }
-
- public boolean acquireLock() {
- this.logger.fine("Trying to acquire lock...");
- try {
- if (this.lockFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader("lock"));
- long runStarted = Long.parseLong(br.readLine());
- br.close();
- if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
- return false;
- }
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
- bw.append("" + System.currentTimeMillis() + "\n");
- bw.close();
- this.logger.fine("Acquired lock.");
- return true;
- } catch (IOException e) {
- this.logger.warning("Caught exception while trying to acquire "
- + "lock!");
- return false;
- }
- }
-
- public void releaseLock() {
- this.logger.fine("Releasing lock...");
- this.lockFile.delete();
- this.logger.fine("Released lock.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/LoggingConfiguration.java b/src/org/torproject/ernie/db/LoggingConfiguration.java
deleted file mode 100644
index b83ef53..0000000
--- a/src/org/torproject/ernie/db/LoggingConfiguration.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.TimeZone;
-import java.util.logging.ConsoleHandler;
-import java.util.logging.FileHandler;
-import java.util.logging.Formatter;
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-import java.util.logging.Logger;
-/**
- * Initialize logging configuration.
- *
- * Log levels used by ERNIE:
- *
- * - SEVERE: An event made it impossible to continue program execution.
- * - WARNING: A potential problem occurred that requires the operator to
- * look after the otherwise unattended setup
- * - INFO: Messages on INFO level are meant to help the operator in making
- * sure that operation works as expected.
- * - FINE: Debug messages that are used to identify problems and which are
- * turned on by default.
- * - FINER: More detailed debug messages to investigate problems in more
- * detail. Not turned on by default. Increase log file limit when using
- * FINER.
- * - FINEST: Most detailed debug messages. Not used.
- */
-public class LoggingConfiguration {
- public LoggingConfiguration() {
-
- /* Remove default console handler. */
- for (Handler h : Logger.getLogger("").getHandlers()) {
- Logger.getLogger("").removeHandler(h);
- }
-
- /* Disable logging of internal Sun classes. */
- Logger.getLogger("sun").setLevel(Level.OFF);
-
- /* Set minimum log level we care about from INFO to FINER. */
- Logger.getLogger("").setLevel(Level.FINER);
-
- /* Create log handler that writes messages on WARNING or higher to the
- * console. */
- final SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Formatter cf = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getMessage() + "\n";
- }
- };
- Handler ch = new ConsoleHandler();
- ch.setFormatter(cf);
- ch.setLevel(Level.WARNING);
- Logger.getLogger("").addHandler(ch);
-
- /* Initialize own logger for this class. */
- Logger logger = Logger.getLogger(
- LoggingConfiguration.class.getName());
-
- /* Create log handler that writes all messages on FINE or higher to a
- * local file. */
- Formatter ff = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getLevel() + " " + record.getSourceClassName() + " "
- + record.getSourceMethodName() + " " + record.getMessage()
- + (record.getThrown() != null ? " " + record.getThrown() : "")
- + "\n";
- }
- };
- try {
- FileHandler fh = new FileHandler("log", 5000000, 5, true);
- fh.setFormatter(ff);
- fh.setLevel(Level.FINE);
- Logger.getLogger("").addHandler(fh);
- } catch (SecurityException e) {
- logger.log(Level.WARNING, "No permission to create log file. "
- + "Logging to file is disabled.", e);
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write to log file. Logging to "
- + "file is disabled.", e);
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
deleted file mode 100644
index 04cc868..0000000
--- a/src/org/torproject/ernie/db/Main.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * Coordinate downloading and parsing of descriptors and extraction of
- * statistically relevant data for later processing with R.
- */
-public class Main {
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration();
-
- Logger logger = Logger.getLogger(Main.class.getName());
- logger.info("Starting ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile();
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Define stats directory for temporary files
- File statsDirectory = new File("stats");
-
- // Prepare writing relay descriptor archive to disk
- ArchiveWriter aw = config.getWriteDirectoryArchives() ?
- new ArchiveWriter(
- new File(config.getDirectoryArchivesOutputDirectory())) : null;
-
- // Prepare relay descriptor parser (only if we are writing stats or
- // directory archives to disk)
- RelayDescriptorParser rdp = aw != null ?
- new RelayDescriptorParser(aw) : null;
-
- // Import/download relay descriptors from the various sources
- if (rdp != null) {
- RelayDescriptorDownloader rdd = null;
- if (config.getDownloadRelayDescriptors()) {
- List<String> dirSources =
- config.getDownloadFromDirectoryAuthorities();
- rdd = new RelayDescriptorDownloader(rdp, dirSources,
- config.getDownloadCurrentConsensus(),
- config.getDownloadCurrentVotes(),
- config.getDownloadMissingServerDescriptors(),
- config.getDownloadMissingExtraInfoDescriptors(),
- config.getDownloadAllServerDescriptors(),
- config.getDownloadAllExtraInfoDescriptors(),
- config.getCompressRelayDescriptorDownloads());
- rdp.setRelayDescriptorDownloader(rdd);
- }
- if (config.getImportCachedRelayDescriptors()) {
- new CachedRelayDescriptorReader(rdp,
- config.getCachedRelayDescriptorDirectory(), statsDirectory);
- if (aw != null) {
- aw.intermediateStats("importing relay descriptors from local "
- + "Tor data directories");
- }
- }
- if (config.getImportDirectoryArchives()) {
- new ArchiveReader(rdp,
- new File(config.getDirectoryArchivesDirectory()),
- statsDirectory,
- config.getKeepDirectoryArchiveImportHistory());
- if (aw != null) {
- aw.intermediateStats("importing relay descriptors from local "
- + "directory");
- }
- }
- if (rdd != null) {
- rdd.downloadDescriptors();
- rdd.writeFile();
- rdd = null;
- if (aw != null) {
- aw.intermediateStats("downloading relay descriptors from the "
- + "directory authorities");
- }
- }
- }
-
- // Write output to disk that only depends on relay descriptors
- if (aw != null) {
- aw.dumpStats();
- aw = null;
- }
-
- // Prepare sanitized bridge descriptor writer
- SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
- new SanitizedBridgesWriter(
- new File(config.getSanitizedBridgesWriteDirectory()),
- statsDirectory, config.getReplaceIPAddressesWithHashes(),
- config.getLimitBridgeDescriptorMappings()) : null;
-
- // Prepare bridge descriptor parser
- BridgeDescriptorParser bdp = config.getWriteSanitizedBridges()
- ? new BridgeDescriptorParser(sbw) : null;
-
- // Import bridge descriptors
- if (bdp != null && config.getImportBridgeSnapshots()) {
- new BridgeSnapshotReader(bdp,
- new File(config.getBridgeSnapshotsDirectory()),
- statsDirectory);
- }
-
- // Finish writing sanitized bridge descriptors to disk
- if (sbw != null) {
- sbw.finishWriting();
- sbw = null;
- }
-
- // Download exit list and store it to disk
- if (config.getDownloadExitList()) {
- new ExitListDownloader();
- }
-
- // Process bridge pool assignments
- if (config.getProcessBridgePoolAssignments()) {
- new BridgePoolAssignmentsProcessor(
- new File(config.getAssignmentsDirectory()),
- new File(config.getSanitizedAssignmentsDirectory()));
- }
-
- // Process Torperf files
- if (config.getProcessTorperfFiles()) {
- new TorperfDownloader(new File(config.getTorperfOutputDirectory()),
- config.getTorperfSources(), config.getTorperfFiles());
- }
-
- // Copy recently published files to a local directory that can then
- // be served via rsync.
- if (config.getProvideFilesViaRsync()) {
- new RsyncDataProvider(
- !config.getWriteDirectoryArchives() ? null :
- new File(config.getDirectoryArchivesOutputDirectory()),
- !config.getWriteSanitizedBridges() ? null :
- new File(config.getSanitizedBridgesWriteDirectory()),
- !config.getProcessBridgePoolAssignments() ? null :
- new File(config.getSanitizedAssignmentsDirectory()),
- config.getDownloadExitList(),
- !config.getProcessTorperfFiles() ? null :
- new File(config.getTorperfOutputDirectory()),
- new File(config.getRsyncDirectory()));
- }
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating ERNIE.");
- }
-}
diff --git a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
deleted file mode 100644
index f7e9468..0000000
--- a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
+++ /dev/null
@@ -1,821 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.zip.InflaterInputStream;
-
-/**
- * Downloads relay descriptors from the directory authorities via HTTP.
- * Keeps a list of missing descriptors that gets updated by parse results
- * from <code>RelayDescriptorParser</code> and downloads all missing
- * descriptors that have been published in the last 24 hours. Also
- * downloads all server and extra-info descriptors known to a directory
- * authority at most once a day.
- */
-public class RelayDescriptorDownloader {
-
- /**
- * Text file containing the descriptors that we are missing and that we
- * want to download. Lines are formatted as:
- *
- * - "consensus,<validafter>,<parsed>",
- * - "vote,<validafter>,<fingerprint>,<parsed>",
- * - "server,<published>,<relayid>,<descid>,<parsed>", or
- * - "extra,<published>,<relayid>,<descid>,<parsed>".
- */
- private File missingDescriptorsFile;
-
- /**
- * Relay descriptors that we are missing and that we want to download
- * either in this execution or write to disk and try next time. Map keys
- * contain comma-separated values as in the missing descriptors files
- * without the "parsed" column. Map values contain the "parsed" column.
- */
- private SortedMap<String, String> missingDescriptors;
-
- /**
- * Text file containing the IP addresses (and Dir ports if not 80) of
- * directory authorities and when we last downloaded all server and
- * extra-info descriptors from them, so that we can avoid downloading
- * them too often.
- */
- private File lastDownloadedAllDescriptorsFile;
-
- /**
- * Map of directory authorities and when we last downloaded all server
- * and extra-info descriptors from them. Map keys are IP addresses (and
- * Dir ports if not 80), map values are timestamps.
- */
- private Map<String, String> lastDownloadedAllDescriptors;
-
- /**
- * <code>RelayDescriptorParser</code> that we will hand over the
- * downloaded descriptors for parsing.
- */
- private RelayDescriptorParser rdp;
-
- /**
- * Directory authorities that we will try to download missing
- * descriptors from.
- */
- private List<String> authorities;
-
- /**
- * Should we try to download the current consensus if we don't have it?
- */
- private boolean downloadCurrentConsensus;
-
- /**
- * Should we try to download current votes if we don't have them?
- */
- private boolean downloadCurrentVotes;
-
- /**
- * Should we try to download missing server descriptors that have been
- * published within the past 24 hours?
- */
- private boolean downloadMissingServerDescriptors;
-
- /**
- * Should we try to download missing extra-info descriptors that have
- * been published within the past 24 hours?
- */
- private boolean downloadMissingExtraInfos;
-
- /**
- * Should we try to download all server descriptors from the authorities
- * once every 24 hours?
- */
- private boolean downloadAllServerDescriptors;
-
- /**
- * Should we try to download all extra-info descriptors from the
- * authorities once every 24 hours?
- */
- private boolean downloadAllExtraInfos;
-
- /**
- * Should we download zlib-compressed versions of descriptors by adding
- * ".z" to URLs?
- */
- private boolean downloadCompressed;
-
- /**
- * valid-after time that we expect the current consensus and votes to
- * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
- * consensuses and votes with this valid-after time on the directory
- * authorities. This time is initialized as the beginning of the current
- * hour.
- */
- private String currentValidAfter;
-
- /**
- * Cut-off time for missing server and extra-info descriptors, formatted
- * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
- * time minus 24 hours.
- */
- private String descriptorCutOff;
-
- /**
- * Cut-off time for downloading all server and extra-info descriptors
- * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
- * time is initialized as the current system time minus 23:30 hours.
- */
- private String downloadAllDescriptorsCutOff;
-
- /**
- * Directory authorities that we plan to download all server and
- * extra-info descriptors from in this execution.
- */
- private Set<String> downloadAllDescriptorsFromAuthorities;
-
- /**
- * Current timestamp that is written to the missing list for descriptors
- * that we parsed in this execution and for authorities that we
- * downloaded all server and extra-info descriptors from.
- */
- private String currentTimestamp;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Number of descriptors requested by directory authority to be included
- * in logs.
- */
- private Map<String, Integer> requestsByAuthority;
-
- /**
- * Counters for descriptors that we had on the missing list at the
- * beginning of the execution, that we added to the missing list,
- * that we requested, and that we successfully downloaded in this
- * execution.
- */
- private int oldMissingConsensuses = 0, oldMissingVotes = 0,
- oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
- newMissingConsensuses = 0, newMissingVotes = 0,
- newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
- requestedConsensuses = 0, requestedVotes = 0,
- requestedMissingServerDescriptors = 0,
- requestedAllServerDescriptors = 0,
- requestedMissingExtraInfoDescriptors = 0,
- requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
- downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
- downloadedAllServerDescriptors = 0,
- downloadedMissingExtraInfoDescriptors = 0,
- downloadedAllExtraInfoDescriptors = 0;
-
- /**
- * Initializes this class, including reading in missing descriptors from
- * <code>stats/missing-relay-descriptors</code> and the times when we
- * last downloaded all server and extra-info descriptors from
- * <code>stats/last-downloaded-all-descriptors</code>.
- */
- public RelayDescriptorDownloader(RelayDescriptorParser rdp,
- List<String> authorities, boolean downloadCurrentConsensus,
- boolean downloadCurrentVotes,
- boolean downloadMissingServerDescriptors,
- boolean downloadMissingExtraInfos,
- boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
- boolean downloadCompressed) {
-
- /* Memorize argument values. */
- this.rdp = rdp;
- this.authorities = new ArrayList<String>(authorities);
- this.downloadCurrentConsensus = downloadCurrentConsensus;
- this.downloadCurrentVotes = downloadCurrentVotes;
- this.downloadMissingServerDescriptors =
- downloadMissingServerDescriptors;
- this.downloadMissingExtraInfos = downloadMissingExtraInfos;
- this.downloadAllServerDescriptors = downloadAllServerDescriptors;
- this.downloadAllExtraInfos = downloadAllExtraInfos;
- this.downloadCompressed = downloadCompressed;
-
- /* Shuffle list of authorities for better load balancing over time. */
- Collections.shuffle(this.authorities);
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- RelayDescriptorDownloader.class.getName());
-
- /* Prepare cut-off times and timestamp for the missing descriptors
- * list and the list of authorities to download all server and
- * extra-info descriptors from. */
- SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- format.setTimeZone(TimeZone.getTimeZone("UTC"));
- long now = System.currentTimeMillis();
- this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
- (60L * 60L * 1000L));
- this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
- this.currentTimestamp = format.format(now);
- this.downloadAllDescriptorsCutOff = format.format(now
- - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
-
- /* Read list of missing descriptors from disk and memorize those that
- * we are interested in and that are likely to be found on the
- * directory authorities. */
- this.missingDescriptors = new TreeMap<String, String>();
- this.missingDescriptorsFile = new File(
- "stats/missing-relay-descriptors");
- if (this.missingDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.missingDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length > 2) {
- String published = line.split(",")[1];
- if (((line.startsWith("consensus,") ||
- line.startsWith("vote,")) &&
- this.currentValidAfter.equals(published)) ||
- ((line.startsWith("server,") ||
- line.startsWith("extra,")) &&
- this.descriptorCutOff.compareTo(published) < 0)) {
- if (!line.endsWith("NA")) {
- /* Not missing. */
- } else if (line.startsWith("consensus,")) {
- oldMissingConsensuses++;
- } else if (line.startsWith("vote,")) {
- oldMissingVotes++;
- } else if (line.startsWith("server,")) {
- oldMissingServerDescriptors++;
- } else if (line.startsWith("extra,")) {
- oldMissingExtraInfoDescriptors++;
- }
- int separateAt = line.lastIndexOf(",");
- this.missingDescriptors.put(line.substring(0,
- separateAt), line.substring(separateAt + 1));
- }
- } else {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.missingDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.missingDescriptorsFile.getAbsolutePath()
- + "! This means that we might forget to dowload relay "
- + "descriptors we are missing.", e);
- }
- }
-
- /* Read list of directory authorities and when we last downloaded all
- * server and extra-info descriptors from them. */
- this.lastDownloadedAllDescriptors = new HashMap<String, String>();
- this.lastDownloadedAllDescriptorsFile = new File(
- "stats/last-downloaded-all-descriptors");
- if (this.lastDownloadedAllDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.lastDownloadedAllDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length != 2) {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- } else {
- String[] parts = line.split(",");
- String authority = parts[0];
- String lastDownloaded = parts[1];
- this.lastDownloadedAllDescriptors.put(authority,
- lastDownloaded);
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "! This means that we might download all server and "
- + "extra-info descriptors more often than we should.", e);
- }
- }
-
- /* Make a list of at most two directory authorities that we want to
- * download all server and extra-info descriptors from. */
- this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
- for (String authority : this.authorities) {
- if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
- this.lastDownloadedAllDescriptors.get(authority).compareTo(
- this.downloadAllDescriptorsCutOff) < 0) {
- this.downloadAllDescriptorsFromAuthorities.add(authority);
- }
- if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
- break;
- }
- }
-
- /* Prepare statistics on this execution. */
- this.requestsByAuthority = new HashMap<String, Integer>();
- for (String authority : this.authorities) {
- this.requestsByAuthority.put(authority, 0);
- }
- }
-
- /**
- * We have parsed a consensus. Take this consensus off the missing list
- * and add the votes created by the given <code>authorities</code> and
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedConsensus(String validAfter,
- Set<String> authorities, Set<String> serverDescriptors) {
-
- /* Mark consensus as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String consensusKey = "consensus," + validAfter;
- this.missingDescriptors.put(consensusKey, this.currentTimestamp);
-
- /* Add votes to missing list. */
- for (String authority : authorities) {
- String voteKey = "vote," + validAfter + "," + authority;
- if (!this.missingDescriptors.containsKey(voteKey)) {
- this.missingDescriptors.put(voteKey, "NA");
- this.newMissingVotes++;
- }
- }
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a vote. Take this vote off the missing list and add
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedVote(String validAfter, String fingerprint,
- Set<String> serverDescriptors) {
-
- /* Mark vote as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String voteKey = "vote," + validAfter + "," + fingerprint;
- this.missingDescriptors.put(voteKey, this.currentTimestamp);
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a server descriptor. Take this server descriptor off
- * the missing list and put the extra-info descriptor digest on that
- * list.
- */
- public void haveParsedServerDescriptor(String published,
- String relayIdentity, String serverDescriptorDigest,
- String extraInfoDigest) {
-
- /* Mark server descriptor as parsed. */
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + published + ","
- + relayIdentity + "," + serverDescriptorDigest;
- this.missingDescriptors.put(serverDescriptorKey,
- this.currentTimestamp);
-
- /* Add extra-info descriptor to missing list. */
- if (extraInfoDigest != null) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- if (!this.missingDescriptors.containsKey(extraInfoKey)) {
- this.missingDescriptors.put(extraInfoKey, "NA");
- this.newMissingExtraInfoDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed an extra-info descriptor. Take it off the missing
- * list.
- */
- public void haveParsedExtraInfoDescriptor(String published,
- String relayIdentity, String extraInfoDigest) {
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
- }
- }
-
- /**
- * Downloads missing descriptors that we think might still be available
- * on the directory authorities as well as all server and extra-info
- * descriptors once per day.
- */
- public void downloadDescriptors() {
-
- /* Put the current consensus on the missing list, unless we already
- * have it. */
- String consensusKey = "consensus," + this.currentValidAfter;
- if (!this.missingDescriptors.containsKey(consensusKey)) {
- this.missingDescriptors.put(consensusKey, "NA");
- this.newMissingConsensuses++;
- }
-
- /* Download descriptors from authorities which are in random order, so
- * that we distribute the load somewhat fairly over time. */
- for (String authority : authorities) {
-
- /* Make all requests to an authority in a single try block. If
- * something goes wrong with this authority, we give up on all
- * downloads and continue with the next authority. */
- /* TODO Some authorities provide very little bandwidth and could
- * slow down the entire download process. Ponder adding a timeout of
- * 3 or 5 minutes per authority to avoid getting in the way of the
- * next execution. */
- try {
-
- /* Start with downloading the current consensus, unless we already
- * have it. */
- if (downloadCurrentConsensus) {
- if (this.missingDescriptors.containsKey(consensusKey) &&
- this.missingDescriptors.get(consensusKey).equals("NA")) {
- this.requestedConsensuses++;
- this.downloadedConsensuses +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/consensus");
- }
- }
-
- /* Next, try to download current votes that we're missing. */
- if (downloadCurrentVotes) {
- String voteKeyPrefix = "vote," + this.currentValidAfter;
- SortedSet<String> fingerprints = new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(voteKeyPrefix)) {
- String fingerprint = e.getKey().split(",")[2];
- fingerprints.add(fingerprint);
- }
- }
- for (String fingerprint : fingerprints) {
- this.requestedVotes++;
- this.downloadedVotes +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/" + fingerprint);
- }
- }
-
- /* Download either all server and extra-info descriptors or only
- * those that we're missing. Start with server descriptors, then
- * request extra-info descriptors. */
- List<String> types = new ArrayList<String>(Arrays.asList(
- "server,extra".split(",")));
- for (String type : types) {
-
- /* Download all server or extra-info descriptors from this
- * authority if we haven't done so for 24 hours and if we're
- * configured to do so. */
- if (this.downloadAllDescriptorsFromAuthorities.contains(
- authority) && ((type.equals("server") &&
- this.downloadAllServerDescriptors) ||
- (type.equals("extra") && this.downloadAllExtraInfos))) {
- int downloadedAllDescriptors =
- this.downloadResourceFromAuthority(authority, "/tor/"
- + type + "/all");
- if (type.equals("server")) {
- this.requestedAllServerDescriptors++;
- this.downloadedAllServerDescriptors +=
- downloadedAllDescriptors;
- } else {
- this.requestedAllExtraInfoDescriptors++;
- this.downloadedAllExtraInfoDescriptors +=
- downloadedAllDescriptors;
- }
-
- /* Download missing server or extra-info descriptors if we're
- * configured to do so. */
- } else if ((type.equals("server") &&
- this.downloadMissingServerDescriptors) ||
- (type.equals("extra") && this.downloadMissingExtraInfos)) {
-
- /* Go through the list of missing descriptors of this type
- * and combine the descriptor identifiers to a URL of up to
- * 96 descriptors that we can download at once. */
- SortedSet<String> descriptorIdentifiers =
- new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(type + ",") &&
- this.descriptorCutOff.compareTo(
- e.getKey().split(",")[1]) < 0) {
- String descriptorIdentifier = e.getKey().split(",")[3];
- descriptorIdentifiers.add(descriptorIdentifier);
- }
- }
- StringBuilder combinedResource = null;
- int descriptorsInCombinedResource = 0,
- requestedDescriptors = 0, downloadedDescriptors = 0;
- for (String descriptorIdentifier : descriptorIdentifiers) {
- if (descriptorsInCombinedResource >= 96) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- combinedResource = null;
- descriptorsInCombinedResource = 0;
- }
- if (descriptorsInCombinedResource == 0) {
- combinedResource = new StringBuilder("/tor/" + type
- + "/d/" + descriptorIdentifier);
- } else {
- combinedResource.append("+" + descriptorIdentifier);
- }
- descriptorsInCombinedResource++;
- }
- if (descriptorsInCombinedResource > 0) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- }
- if (type.equals("server")) {
- this.requestedMissingServerDescriptors +=
- requestedDescriptors;
- this.downloadedMissingServerDescriptors +=
- downloadedDescriptors;
- } else {
- this.requestedMissingExtraInfoDescriptors +=
- requestedDescriptors;
- this.downloadedMissingExtraInfoDescriptors +=
- downloadedDescriptors;
- }
- }
- }
-
- /* If a download failed, stop requesting descriptors from this
- * authority and move on to the next. */
- } catch (IOException e) {
- logger.log(Level.FINE, "Failed downloading from " + authority
- + "!", e);
- }
- }
- }
-
- /**
- * Attempts to download one or more descriptors identified by a resource
- * string from a directory authority and passes the returned
- * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
- * Returns the number of descriptors contained in the reply. Throws an
- * <code>IOException</code> if something goes wrong while downloading.
- */
- private int downloadResourceFromAuthority(String authority,
- String resource) throws IOException {
- byte[] allData = null;
- this.requestsByAuthority.put(authority,
- this.requestsByAuthority.get(authority) + 1);
- /* TODO Disable compressed downloads for extra-info descriptors,
- * because zlib decompression doesn't work correctly. Figure out why
- * this is and fix it. */
- String fullUrl = "http://" + authority + resource
- + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
- ? ".z" : "");
- URL u = new URL(fullUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response == 200) {
- BufferedInputStream in = this.downloadCompressed &&
- !resource.startsWith("/tor/extra/")
- ? new BufferedInputStream(new InflaterInputStream(
- huc.getInputStream()))
- : new BufferedInputStream(huc.getInputStream());
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- in.close();
- allData = baos.toByteArray();
- }
- logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
- + (allData == null ? 0 : allData.length) + " bytes)");
- int receivedDescriptors = 0;
- if (allData != null) {
- if (resource.startsWith("/tor/status-vote/current/")) {
- this.rdp.parse(allData);
- receivedDescriptors = 1;
- } else if (resource.startsWith("/tor/server/") ||
- resource.startsWith("/tor/extra/")) {
- if (resource.equals("/tor/server/all")) {
- this.lastDownloadedAllDescriptors.put(authority,
- this.currentTimestamp);
- }
- String ascii = null;
- try {
- ascii = new String(allData, "US-ASCII");
- } catch (UnsupportedEncodingException e) {
- /* No way that US-ASCII is not supported. */
- }
- int start = -1, sig = -1, end = -1;
- String startToken = resource.startsWith("/tor/server/") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- this.rdp.parse(descBytes);
- receivedDescriptors++;
- }
- }
- }
- return receivedDescriptors;
- }
-
- /**
- * Writes status files to disk and logs statistics about downloading
- * relay descriptors in this execution.
- */
- public void writeFile() {
-
- /* Write missing descriptors file to disk. */
- int missingConsensuses = 0, missingVotes = 0,
- missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
- try {
- this.logger.fine("Writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- this.missingDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.missingDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- String key = e.getKey(), value = e.getValue();
- if (!value.equals("NA")) {
- /* Not missing. */
- } else if (key.startsWith("consensus,")) {
- missingConsensuses++;
- } else if (key.startsWith("vote,")) {
- missingVotes++;
- } else if (key.startsWith("server,")) {
- missingServerDescriptors++;
- } else if (key.startsWith("extra,")) {
- missingExtraInfoDescriptors++;
- }
- bw.write(key + "," + value + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
- }
-
- /* Write text file containing the directory authorities and when we
- * last downloaded all server and extra-info descriptors from them to
- * disk. */
- try {
- this.logger.fine("Writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.lastDownloadedAllDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.lastDownloadedAllDescriptors.entrySet()) {
- String authority = e.getKey();
- String lastDownloaded = e.getValue();
- bw.write(authority + "," + lastDownloaded + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
- e);
- }
-
- /* Log statistics about this execution. */
- this.logger.info("Finished downloading relay descriptors from the "
- + "directory authorities.");
- this.logger.info("At the beginning of this execution, we were "
- + "missing " + oldMissingConsensuses + " consensus(es), "
- + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
- + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
- + " extra-info descriptor(s).");
- this.logger.info("During this execution, we added "
- + this.newMissingConsensuses + " consensus(es), "
- + this.newMissingVotes + " vote(s), "
- + this.newMissingServerDescriptors + " server descriptor(s), and "
- + this.newMissingExtraInfoDescriptors + " extra-info "
- + "descriptor(s) to the missing list, some of which we also "
- + "requested and removed from the list again.");
- this.logger.info("We requested " + this.requestedConsensuses
- + " consensus(es), " + this.requestedVotes + " vote(s), "
- + this.requestedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.requestedAllServerDescriptors
- + " times all server descriptors, "
- + this.requestedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s), and "
- + this.requestedAllExtraInfoDescriptors + " times all extra-info "
- + "descriptors from the directory authorities.");
- StringBuilder sb = new StringBuilder();
- for (String authority : this.authorities) {
- sb.append(" " + authority + "="
- + this.requestsByAuthority.get(authority));
- }
- this.logger.info("We sent these numbers of requests to the directory "
- + "authorities:" + sb.toString());
- this.logger.info("We successfully downloaded "
- + this.downloadedConsensuses + " consensus(es), "
- + this.downloadedVotes + " vote(s), "
- + this.downloadedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.downloadedAllServerDescriptors
- + " server descriptor(s) when downloading all descriptors, "
- + this.downloadedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s) and "
- + this.downloadedAllExtraInfoDescriptors + " extra-info "
- + "descriptor(s) when downloading all descriptors.");
- this.logger.info("At the end of this execution, we are missing "
- + missingConsensuses + " consensus(es), " + missingVotes
- + " vote(s), " + missingServerDescriptors + " server "
- + "descriptor(s), and " + missingExtraInfoDescriptors
- + " extra-info descriptor(s), some of which we may try in the next "
- + "execution.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java
deleted file mode 100644
index be54656..0000000
--- a/src/org/torproject/ernie/db/RelayDescriptorParser.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses relay descriptors including network status consensuses and
- * votes, server and extra-info descriptors, and passes the results to the
- * stats handlers, to the archive writer, or to the relay descriptor
- * downloader.
- */
-public class RelayDescriptorParser {
-
- /**
- * File writer that writes descriptor contents to files in a
- * directory-archive directory structure.
- */
- private ArchiveWriter aw;
-
- /**
- * Missing descriptor downloader that uses the parse results to learn
- * which descriptors we are missing and want to download.
- */
- private RelayDescriptorDownloader rdd;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private SimpleDateFormat dateTimeFormat;
-
- /**
- * Initializes this class.
- */
- public RelayDescriptorParser(ArchiveWriter aw) {
- this.aw = aw;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
-
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- public void setRelayDescriptorDownloader(
- RelayDescriptorDownloader rdd) {
- this.rdd = rdd;
- }
-
- public void parse(byte[] data) {
- try {
- /* Convert descriptor to ASCII for parsing. This means we'll lose
- * the non-ASCII chars, but we don't care about them for parsing
- * anyway. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line;
- do {
- line = br.readLine();
- } while (line != null && line.startsWith("@"));
- if (line == null) {
- this.logger.fine("We were given an empty descriptor for "
- + "parsing. Ignoring.");
- return;
- }
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.equals("network-status-version 3")) {
- // TODO when parsing the current consensus, check the fresh-until
- // time to see when we switch from hourly to half-hourly
- // consensuses
- boolean isConsensus = true;
- String validAfterTime = null, fingerprint = null,
- dirSource = null;
- long validAfter = -1L, dirKeyPublished = -1L;
- SortedSet<String> dirSources = new TreeSet<String>();
- SortedSet<String> serverDescriptors = new TreeSet<String>();
- SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
- StringBuilder certificateStringBuilder = null;
- String certificateString = null;
- while ((line = br.readLine()) != null) {
- if (certificateStringBuilder != null) {
- if (line.startsWith("r ")) {
- certificateString = certificateStringBuilder.toString();
- certificateStringBuilder = null;
- } else {
- certificateStringBuilder.append(line + "\n");
- }
- }
- if (line.equals("vote-status vote")) {
- isConsensus = false;
- } else if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- validAfter = parseFormat.parse(validAfterTime).getTime();
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("vote-digest ")) {
- dirSources.add(dirSource);
- } else if (line.startsWith("dir-key-certificate-version ")) {
- certificateStringBuilder = new StringBuilder();
- certificateStringBuilder.append(line + "\n");
- } else if (line.startsWith("fingerprint ")) {
- fingerprint = line.split(" ")[1];
- } else if (line.startsWith("dir-key-published ")) {
- String dirKeyPublishedTime = line.substring(
- "dir-key-published ".length());
- dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
- getTime();
- } else if (line.startsWith("r ")) {
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- this.logger.log(Level.WARNING, "Could not parse r line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- String publishedTime = parts[4] + " " + parts[5];
- String relayIdentity = Hex.encodeHexString(
- Base64.decodeBase64(parts[2] + "=")).
- toLowerCase();
- String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- serverDescriptors.add(publishedTime + "," + relayIdentity
- + "," + serverDesc);
- hashedRelayIdentities.add(DigestUtils.shaHex(
- Base64.decodeBase64(parts[2] + "=")).
- toUpperCase());
- }
- }
- if (isConsensus) {
- if (this.rdd != null) {
- this.rdd.haveParsedConsensus(validAfterTime, dirSources,
- serverDescriptors);
- }
- if (this.aw != null) {
- this.aw.storeConsensus(data, validAfter);
- }
- } else {
- if (this.aw != null || this.rdd != null) {
- String ascii = new String(data, "US-ASCII");
- String startToken = "network-status-version ";
- String sigToken = "directory-signature ";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken);
- if (start >= 0 && sig >= 0 && sig > start) {
- sig += sigToken.length();
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- String digest = DigestUtils.shaHex(forDigest).toUpperCase();
- if (this.aw != null) {
- this.aw.storeVote(data, validAfter, dirSource, digest);
- }
- if (this.rdd != null) {
- this.rdd.haveParsedVote(validAfterTime, fingerprint,
- serverDescriptors);
- }
- }
- if (certificateString != null) {
- if (this.aw != null) {
- this.aw.storeCertificate(certificateString.getBytes(),
- dirSource, dirKeyPublished);
- }
- }
- }
- }
- } else if (line.startsWith("router ")) {
- String publishedTime = null, extraInfoDigest = null,
- relayIdentifier = null;
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- relayIdentifier = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- extraInfoDigest = line.startsWith("opt ") ?
- line.split(" ")[2].toLowerCase() :
- line.split(" ")[1].toLowerCase();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null;
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeServerDescriptor(data, digest, published);
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedServerDescriptor(publishedTime,
- relayIdentifier, digest, extraInfoDigest);
- }
- } else if (line.startsWith("extra-info ")) {
- String publishedTime = null, relayIdentifier = line.split(" ")[2];
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String digest = null;
- int start = ascii.indexOf(startToken);
- if (start > 0) {
- /* Do not confuse "extra-info " in "@type extra-info 1.0" with
- * "extra-info 0000...". TODO This is a hack that should be
- * solved by using metrics-lib some day. */
- start = ascii.indexOf("\n" + startToken);
- if (start > 0) {
- start++;
- }
- }
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeExtraInfoDescriptor(data, digest, published);
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
- relayIdentifier.toLowerCase(), digest);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/RsyncDataProvider.java b/src/org/torproject/ernie/db/RsyncDataProvider.java
deleted file mode 100644
index 2f9632e..0000000
--- a/src/org/torproject/ernie/db/RsyncDataProvider.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Stack;
-import java.util.logging.Logger;
-
-/**
- * Copy files published in the last 3 days to a local directory that can
- * then be served via rsync.
- */
-public class RsyncDataProvider {
- public RsyncDataProvider(File directoryArchivesOutputDirectory,
- File sanitizedBridgesWriteDirectory,
- File sanitizedAssignmentsDirectory,
- boolean downloadExitList,
- File torperfOutputDirectory, File rsyncDirectory) {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(RsyncDataProvider.class.getName());
-
- /* Determine the cut-off time for files in rsync/. */
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
-
- /* Create rsync/ directory if it doesn't exist. */
- if (!rsyncDirectory.exists()) {
- rsyncDirectory.mkdirs();
- }
-
- /* Make a list of all files in the rsync/ directory to delete those
- * that we didn't copy in this run. */
- Set<String> fileNamesInRsync = new HashSet<String>();
- Stack<File> files = new Stack<File>();
- files.add(rsyncDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else {
- fileNamesInRsync.add(pop.getName());
- }
- }
- logger.info("Found " + fileNamesInRsync.size() + " files in "
- + rsyncDirectory.getAbsolutePath() + " that we're either "
- + "overwriting or deleting in this execution.");
-
- /* Copy relay descriptors from the last 3 days. */
- if (directoryArchivesOutputDirectory != null) {
- files.add(directoryArchivesOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/consensus/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/consensuses/" + fileName));
- } else if (pop.getAbsolutePath().contains("/vote/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/votes/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptor/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-info/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying relay descriptors, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge descriptors from the last 3 days. */
- if (sanitizedBridgesWriteDirectory != null) {
- files.add(sanitizedBridgesWriteDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/statuses/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/statuses/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptors/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-infos/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge descriptors, there are "
- + "still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge pool assignments from the last 3 days. */
- if (sanitizedAssignmentsDirectory != null) {
- files.add(sanitizedAssignmentsDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-pool-assignments/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge pool assignments, there "
- + "are still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy exit lists from the last 3 days. */
- if (downloadExitList) {
- files.add(new File("exitlist"));
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "exit-lists/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying exit lists, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy Torperf files. */
- if (torperfOutputDirectory != null) {
- files.add(torperfOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.getName().endsWith(".tpf") &&
- pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "torperf/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying Torperf files, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Delete all files that we didn't (over-)write in this run. */
- files.add(rsyncDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (fileNamesInRsync.contains(pop.getName())) {
- fileNamesInRsync.remove(pop.getName());
- pop.delete();
- }
- }
- logger.info("After deleting files that we didn't overwrite in this "
- + "run, there are " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
- }
-
- private void copyFile(File from, File to) {
- if (from.exists() && to.exists() &&
- from.lastModified() == to.lastModified() &&
- from.length() == to.length()) {
- return;
- }
- try {
- to.getParentFile().mkdirs();
- FileInputStream fis = new FileInputStream(from);
- BufferedInputStream bis = new BufferedInputStream(fis);
- FileOutputStream fos = new FileOutputStream(to);
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- fos.write(data, 0, len);
- }
- bis.close();
- fos.close();
- to.setLastModified(from.lastModified());
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
deleted file mode 100644
index afafe11..0000000
--- a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
+++ /dev/null
@@ -1,911 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.*;
-import java.security.*;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.logging.*;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.digest.*;
-import org.apache.commons.codec.binary.*;
-
-/**
- * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
- * information from them, and writes them to a local directory structure.
- * During the sanitizing process, all information about the bridge
- * identity or IP address are removed or replaced. The goal is to keep the
- * sanitized bridge descriptors useful for statistical analysis while not
- * making it easier for an adversary to enumerate bridges.
- *
- * There are three types of bridge descriptors: bridge network statuses
- * (lists of all bridges at a given time), server descriptors (published
- * by the bridge to advertise their capabilities), and extra-info
- * descriptors (published by the bridge, mainly for statistical analysis).
- */
-public class SanitizedBridgesWriter {
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Output directory for writing sanitized bridge descriptors.
- */
- private File sanitizedBridgesDirectory;
-
- private boolean replaceIPAddressesWithHashes;
-
- private boolean persistenceProblemWithSecrets;
-
- private SortedMap<String, byte[]> secretsForHashingIPAddresses;
-
- private String bridgeSanitizingCutOffTimestamp;
-
- private boolean haveWarnedAboutInterval;
-
- private File bridgeIpSecretsFile;
-
- private SecureRandom secureRandom;
-
- /**
- * Initializes this class.
- */
- public SanitizedBridgesWriter(File sanitizedBridgesDirectory,
- File statsDirectory, boolean replaceIPAddressesWithHashes,
- long limitBridgeSanitizingInterval) {
-
- if (sanitizedBridgesDirectory == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- /* Memorize argument values. */
- this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
- this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- SanitizedBridgesWriter.class.getName());
-
- /* Initialize secure random number generator if we need it. */
- if (this.replaceIPAddressesWithHashes) {
- try {
- this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
- } catch (GeneralSecurityException e) {
- this.logger.log(Level.WARNING, "Could not initialize secure "
- + "random number generator! Not calculating any IP address "
- + "hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* Read hex-encoded secrets for replacing IP addresses with hashes
- * from disk. */
- this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
- this.bridgeIpSecretsFile = new File(statsDirectory,
- "bridge-ip-secrets");
- if (this.bridgeIpSecretsFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.bridgeIpSecretsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
- line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
- parts.length != 2) {
- this.logger.warning("Invalid line in bridge-ip-secrets file "
- + "starting with '" + line.substring(0, 7) + "'! "
- + "Not calculating any IP address hashes in this "
- + "execution!");
- this.persistenceProblemWithSecrets = true;
- break;
- }
- String month = parts[0];
- byte[] secret = Hex.decodeHex(parts[1].toCharArray());
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- br.close();
- if (!this.persistenceProblemWithSecrets) {
- this.logger.fine("Read "
- + this.secretsForHashingIPAddresses.size() + " secrets for "
- + "hashing bridge IP addresses.");
- }
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Failed to decode hex string in "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* If we're configured to keep secrets only for a limited time, define
- * the cut-off day and time. */
- if (limitBridgeSanitizingInterval >= 0L) {
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.bridgeSanitizingCutOffTimestamp = formatter.format(
- System.currentTimeMillis() - 24L * 60L * 60L * 1000L
- * limitBridgeSanitizingInterval);
- } else {
- this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
- }
- }
-
- private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
- String published) throws IOException {
- if (!orAddress.contains(":")) {
- /* Malformed or-address or a line. */
- return null;
- }
- String addressPart = orAddress.substring(0,
- orAddress.lastIndexOf(":"));
- String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
- String scrubbedAddressPart = null;
- if (addressPart.startsWith("[")) {
- scrubbedAddressPart = this.scrubIpv6Address(addressPart,
- fingerprintBytes, published);
- } else {
- scrubbedAddressPart = this.scrubIpv4Address(addressPart,
- fingerprintBytes, published);
- }
- return (scrubbedAddressPart == null ? null :
- scrubbedAddressPart + ":" + portPart);
- }
-
- private String scrubIpv4Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[4 + 20 + 31];
- String[] ipParts = address.split("\\.");
- for (int i = 0; i < 4; i++) {
- hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
- }
- System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 0, hashInput, 24, 31);
- byte[] hashOutput = DigestUtils.sha256(hashInput);
- String hashedAddress = "10."
- + (((int) hashOutput[0] + 256) % 256) + "."
- + (((int) hashOutput[1] + 256) % 256) + "."
- + (((int) hashOutput[2] + 256) % 256);
- return hashedAddress;
- } else {
- return "127.0.0.1";
- }
- }
-
- private String scrubIpv6Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[16 + 20 + 19];
- String[] doubleColonSeparatedParts = address.substring(1,
- address.length() - 1).split("::", -1);
- if (doubleColonSeparatedParts.length > 2) {
- /* Invalid IPv6 address. */
- return null;
- }
- List<String> hexParts = new ArrayList<String>();
- for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
- StringBuilder hexPart = new StringBuilder();
- String[] parts = doubleColonSeparatedPart.split(":", -1);
- if (parts.length < 1 || parts.length > 8) {
- /* Invalid IPv6 address. */
- return null;
- }
- for (int i = 0; i < parts.length; i++) {
- String part = parts[i];
- if (part.contains(".")) {
- String[] ipParts = part.split("\\.");
- byte[] ipv4Bytes = new byte[4];
- if (ipParts.length != 4) {
- /* Invalid IPv4 part in IPv6 address. */
- return null;
- }
- for (int m = 0; m < 4; m++) {
- ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
- }
- hexPart.append(Hex.encodeHexString(ipv4Bytes));
- } else if (part.length() > 4) {
- /* Invalid IPv6 address. */
- return null;
- } else {
- for (int k = part.length(); k < 4; k++) {
- hexPart.append("0");
- }
- hexPart.append(part);
- }
- }
- hexParts.add(hexPart.toString());
- }
- StringBuilder hex = new StringBuilder();
- hex.append(hexParts.get(0));
- if (hexParts.size() == 2) {
- for (int i = 32 - hexParts.get(0).length()
- - hexParts.get(1).length(); i > 0; i--) {
- hex.append("0");
- }
- hex.append(hexParts.get(1));
- }
- byte[] ipBytes = null;
- try {
- ipBytes = Hex.decodeHex(hex.toString().toCharArray());
- } catch (DecoderException e) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- if (ipBytes.length != 16) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- System.arraycopy(ipBytes, 0, hashInput, 0, 16);
- System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 31, hashInput, 36, 19);
- String hashOutput = DigestUtils.sha256Hex(hashInput);
- sb.append(hashOutput.substring(hashOutput.length() - 6,
- hashOutput.length() - 4));
- sb.append(":");
- sb.append(hashOutput.substring(hashOutput.length() - 4));
- }
- sb.append("]");
- return sb.toString();
- }
-
- private byte[] getSecretForMonth(String month) throws IOException {
- if (!this.secretsForHashingIPAddresses.containsKey(month) ||
- this.secretsForHashingIPAddresses.get(month).length == 31) {
- byte[] secret = new byte[50];
- this.secureRandom.nextBytes(secret);
- if (this.secretsForHashingIPAddresses.containsKey(month)) {
- System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
- secret, 0, 31);
- }
- if (month.compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- this.logger.warning("Generated a secret that we won't make "
- + "persistent, because it's outside our bridge descriptor "
- + "sanitizing interval.");
- } else {
- /* Append secret to file on disk immediately before using it, or
- * we might end with inconsistently sanitized bridges. */
- try {
- if (!this.bridgeIpSecretsFile.exists()) {
- this.bridgeIpSecretsFile.getParentFile().mkdirs();
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile,
- this.bridgeIpSecretsFile.exists()));
- bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store new secret "
- + "to disk! Not calculating any IP address hashes in "
- + "this execution!", e);
- this.persistenceProblemWithSecrets = true;
- throw new IOException(e);
- }
- }
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- return this.secretsForHashingIPAddresses.get(month);
- }
-
- /**
- * Sanitizes a network status and writes it to disk.
- */
- public void sanitizeAndStoreNetworkStatus(byte[] data,
- String publicationTime) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(publicationTime) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
- : Level.FINE, "Sanitizing and storing network status with "
- + "publication time outside our descriptor sanitizing "
- + "interval.");
- this.haveWarnedAboutInterval = true;
- }
-
- /* Parse the given network status line by line. */
- SortedMap<String, String> scrubbedLines =
- new TreeMap<String, String>();
- try {
- StringBuilder scrubbed = new StringBuilder();
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- String mostRecentDescPublished = null;
- byte[] fingerprintBytes = null;
- String descPublicationTime = null;
- String hashedBridgeIdentityHex = null;
- while ((line = br.readLine()) != null) {
-
- /* r lines contain sensitive information that needs to be removed
- * or replaced. */
- if (line.startsWith("r ")) {
-
- /* Clear buffer from previously scrubbed lines. */
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Parse the relevant parts of this r line. */
- String[] parts = line.split(" ");
- String nickname = parts[1];
- fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
- String descriptorIdentifier = parts[3];
- descPublicationTime = parts[4] + " " + parts[5];
- String address = parts[6];
- String orPort = parts[7];
- String dirPort = parts[8];
-
- /* Determine most recent descriptor publication time. */
- if (descPublicationTime.compareTo(publicationTime) <= 0 &&
- (mostRecentDescPublished == null ||
- descPublicationTime.compareTo(
- mostRecentDescPublished) > 0)) {
- mostRecentDescPublished = descPublicationTime;
- }
-
- /* Write scrubbed r line to buffer. */
- byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
- String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
- hashedBridgeIdentity).substring(0, 27);
- hashedBridgeIdentityHex = Hex.encodeHexString(
- hashedBridgeIdentity);
- String hashedDescriptorIdentifier = Base64.encodeBase64String(
- DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
- + "=="))).substring(0, 27);
- String scrubbedAddress = scrubIpv4Address(address,
- fingerprintBytes,
- descPublicationTime);
- scrubbed.append("r " + nickname + " "
- + hashedBridgeIdentityBase64 + " "
- + hashedDescriptorIdentifier + " " + descPublicationTime
- + " " + scrubbedAddress + " " + orPort + " " + dirPort
- + "\n");
-
- /* Sanitize any addresses in a lines using the fingerprint and
- * descriptor publication time from the previous r line. */
- } else if (line.startsWith("a ")) {
- String scrubbedOrAddress = scrubOrAddress(
- line.substring("a ".length()), fingerprintBytes,
- descPublicationTime);
- if (scrubbedOrAddress != null) {
- scrubbed.append("a " + scrubbedOrAddress + "\n");
- } else {
- this.logger.warning("Invalid address in line '" + line
- + "' in bridge network status. Skipping line!");
- }
-
- /* Nothing special about s, w, and p lines; just copy them. */
- } else if (line.startsWith("s ") || line.equals("s") ||
- line.startsWith("w ") || line.equals("w") ||
- line.startsWith("p ") || line.equals("p")) {
- scrubbed.append(line + "\n");
-
- /* There should be nothing else but r, w, p, and s lines in the
- * network status. If there is, we should probably learn before
- * writing anything to the sanitized descriptors. */
- } else {
- this.logger.fine("Unknown line '" + line + "' in bridge "
- + "network status. Not writing to disk!");
- return;
- }
- }
- br.close();
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Check if we can tell from the descriptor publication times
- * whether this status is possibly stale. */
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (formatter.parse(publicationTime).getTime() -
- formatter.parse(mostRecentDescPublished).getTime() >
- 60L * 60L * 1000L) {
- this.logger.warning("The most recent descriptor in the bridge "
- + "network status published at " + publicationTime + " was "
- + "published at " + mostRecentDescPublished + " which is "
- + "more than 1 hour before the status. This is a sign for "
- + "the status being stale. Please check!");
- }
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse timestamp in "
- + "bridge network status.", e);
- return;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge network "
- + "status.", e);
- return;
- }
-
- /* Write the sanitized network status to disk. */
- try {
-
- /* Determine file name. */
- String syear = publicationTime.substring(0, 4);
- String smonth = publicationTime.substring(5, 7);
- String sday = publicationTime.substring(8, 10);
- String stime = publicationTime.substring(11, 13)
- + publicationTime.substring(14, 16)
- + publicationTime.substring(17, 19);
- File statusFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
- + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
- + sday + "-" + stime + "-"
- + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
-
- /* Create all parent directories to write this network status. */
- statusFile.getParentFile().mkdirs();
-
- /* Write sanitized network status to disk. */
- BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
- bw.write("@type bridge-network-status 1.0\n");
- bw.write("published " + publicationTime + "\n");
- for (String scrubbed : scrubbedLines.values()) {
- bw.write(scrubbed);
- }
- bw.close();
-
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized bridge "
- + "network status to disk.", e);
- return;
- }
- }
-
- /**
- * Sanitizes a bridge server descriptor and writes it to disk.
- */
- public void sanitizeAndStoreServerDescriptor(byte[] data) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(data, "US-ASCII")));
- StringBuilder scrubbed = new StringBuilder();
- String line = null, hashedBridgeIdentity = null, address = null,
- routerLine = null, scrubbedAddress = null;
- List<String> orAddresses = null, scrubbedOrAddresses = null;
- boolean skipCrypto = false;
- while ((line = br.readLine()) != null) {
-
- /* Skip all crypto parts that might be used to derive the bridge's
- * identity fingerprint. */
- if (skipCrypto && !line.startsWith("-----END ")) {
- continue;
-
- /* Store the router line for later processing, because we may need
- * the bridge identity fingerprint for replacing the IP address in
- * the scrubbed version. */
- } else if (line.startsWith("router ")) {
- address = line.split(" ")[2];
- routerLine = line;
-
- /* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
- } else if (line.startsWith("or-address ")) {
- if (orAddresses == null) {
- orAddresses = new ArrayList<String>();
- }
- orAddresses.add(line.substring("or-address ".length()));
-
- /* Parse the publication time to see if we're still inside the
- * sanitizing interval. */
- } else if (line.startsWith("published ")) {
- published = line.substring("published ".length());
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(published) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval
- ? Level.WARNING : Level.FINE, "Sanitizing and storing "
- + "server descriptor with publication time outside our "
- + "descriptor sanitizing interval.");
- this.haveWarnedAboutInterval = true;
- }
- scrubbed.append(line + "\n");
-
- /* Parse the fingerprint to determine the hashed bridge
- * identity. */
- } else if (line.startsWith("opt fingerprint ") ||
- line.startsWith("fingerprint ")) {
- String fingerprint = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- byte[] fingerprintBytes = Hex.decodeHex(
- fingerprint.toCharArray());
- hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
- toLowerCase();
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- if (orAddresses != null) {
- scrubbedOrAddresses = new ArrayList<String>();
- for (String orAddress : orAddresses) {
- String scrubbedOrAddress = scrubOrAddress(orAddress,
- fingerprintBytes, published);
- if (scrubbedOrAddress != null) {
- scrubbedOrAddresses.add(scrubbedOrAddress);
- } else {
- this.logger.warning("Invalid address in line "
- + "'or-address " + orAddress + "' in bridge server "
- + "descriptor. Skipping line!");
- }
- }
- }
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "fingerprint");
- for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
- scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
- 4 * (i + 1)).toUpperCase());
- scrubbed.append("\n");
-
- /* Replace the contact line (if present) with a generic one. */
- } else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- String[] routerLineParts = routerLine.split(" ");
- scrubbedDesc = "router " + routerLineParts[1] + " "
- + scrubbedAddress + " " + routerLineParts[3] + " "
- + routerLineParts[4] + " " + routerLineParts[5] + "\n";
- if (scrubbedOrAddresses != null) {
- for (String scrubbedOrAddress : scrubbedOrAddresses) {
- scrubbedDesc = scrubbedDesc += "or-address "
- + scrubbedOrAddress + "\n";
- }
- }
- scrubbedDesc += scrubbed.toString();
- break;
-
- /* Replace extra-info digest with the hashed digest of the
- * non-scrubbed descriptor. */
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- String extraInfoDescriptorIdentifier = line.substring(
- line.indexOf("extra-info-digest ")
- + "extra-info-digest ".length());
- String hashedExtraInfoDescriptorIdentifier =
- DigestUtils.shaHex(Hex.decodeHex(
- extraInfoDescriptorIdentifier.toCharArray())).toUpperCase();
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier
- + "\n");
-
- /* Possibly sanitize reject lines if they contain the bridge's own
- * IP address. */
- } else if (line.startsWith("reject ")) {
- if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject " + scrubbedAddress
- + line.substring("reject ".length() + address.length())
- + "\n");
- } else {
- scrubbed.append(line + "\n");
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("accept ")
- || line.startsWith("platform ")
- || line.startsWith("opt protocols ")
- || line.startsWith("protocols ")
- || line.startsWith("uptime ")
- || line.startsWith("bandwidth ")
- || line.startsWith("opt hibernating ")
- || line.startsWith("hibernating ")
- || line.equals("opt hidden-service-dir")
- || line.equals("hidden-service-dir")
- || line.equals("opt caches-extra-info")
- || line.equals("caches-extra-info")
- || line.equals("opt allow-single-hop-exits")
- || line.equals("allow-single-hop-exits")) {
- scrubbed.append(line + "\n");
-
- /* Replace node fingerprints in the family line with their hashes
- * and leave nicknames unchanged. */
- } else if (line.startsWith("family ")) {
- StringBuilder familyLine = new StringBuilder("family");
- for (String s : line.substring(7).split(" ")) {
- if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
- s.substring(1).toCharArray())).toUpperCase());
- } else {
- familyLine.append(" " + s);
- }
- }
- scrubbed.append(familyLine.toString() + "\n");
-
- /* Skip the purpose line that the bridge authority adds to its
- * cached-descriptors file. */
- } else if (line.startsWith("@purpose ")) {
- continue;
-
- /* Skip all crypto parts that might leak the bridge's identity
- * fingerprint. */
- } else if (line.startsWith("-----BEGIN ")
- || line.equals("onion-key") || line.equals("signing-key")) {
- skipCrypto = true;
-
- /* Stop skipping lines when the crypto parts are over. */
- } else if (line.startsWith("-----END ")) {
- skipCrypto = false;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not parse server "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized server descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate server "
- + "descriptor digest.");
- return;
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File newFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/server-descriptors/"
- + "/" + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
-
- /* Write sanitized server descriptor to disk, including all its parent
- * directories. */
- try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-server-descriptor 1.0\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized server "
- + "descriptor to disk.", e);
- return;
- }
- }
-
- /**
- * Sanitizes an extra-info descriptor and writes it to disk.
- */
- public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- StringBuilder scrubbed = null;
- String hashedBridgeIdentity = null;
- while ((line = br.readLine()) != null) {
-
- /* Parse bridge identity from extra-info line and replace it with
- * its hash in the sanitized descriptor. */
- String[] parts = line.split(" ");
- if (line.startsWith("extra-info ")) {
- hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
- parts[2].toCharArray())).toLowerCase();
- scrubbed = new StringBuilder("extra-info " + parts[1] + " "
- + hashedBridgeIdentity.toUpperCase() + "\n");
-
- /* Parse the publication time to determine the file name. */
- } else if (line.startsWith("published ")) {
- scrubbed.append(line + "\n");
- published = line.substring("published ".length());
-
- /* Remove everything from transport lines except the transport
- * name. */
- } else if (line.startsWith("transport ")) {
- if (parts.length < 3) {
- this.logger.fine("Illegal line in extra-info descriptor: '"
- + line + "'. Skipping descriptor.");
- return;
- }
- scrubbed.append("transport " + parts[1] + "\n");
-
- /* Skip transport-info lines entirely. */
- } else if (line.startsWith("transport-info ")) {
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("write-history ")
- || line.startsWith("read-history ")
- || line.startsWith("geoip-start-time ")
- || line.startsWith("geoip-client-origins ")
- || line.startsWith("geoip-db-digest ")
- || line.startsWith("conn-bi-direct ")
- || line.startsWith("bridge-")
- || line.startsWith("dirreq-")
- || line.startsWith("cell-")
- || line.startsWith("entry-")
- || line.startsWith("exit-")) {
- scrubbed.append(line + "\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- scrubbedDesc = scrubbed.toString();
- break;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized extra-info descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate extra-info "
- + "descriptor digest.");
- return;
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File newFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/extra-infos/"
- + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
-
- /* Write sanitized extra-info descriptor to disk, including all its
- * parent directories. */
- try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-extra-info 1.1\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not write sanitized "
- + "extra-info descriptor to disk.", e);
- }
- }
-
- /**
- * Rewrite all network statuses that might contain references to server
- * descriptors we added or updated in this execution. This applies to
- * all statuses that have been published up to 24 hours after any added
- * or updated server descriptor.
- */
- public void finishWriting() {
-
- /* Delete secrets that we don't need anymore. */
- if (!this.secretsForHashingIPAddresses.isEmpty() &&
- this.secretsForHashingIPAddresses.firstKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- try {
- int kept = 0, deleted = 0;
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile));
- for (Map.Entry<String, byte[]> e :
- this.secretsForHashingIPAddresses.entrySet()) {
- if (e.getKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- deleted++;
- } else {
- bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
- + "\n");
- kept++;
- }
- }
- bw.close();
- this.logger.info("Deleted " + deleted + " secrets that we don't "
- + "need anymore and kept " + kept + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store reduced set of "
- + "secrets to disk! This is a bad sign, better check what's "
- + "going on!", e);
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/TorperfDownloader.java b/src/org/torproject/ernie/db/TorperfDownloader.java
deleted file mode 100644
index 058e29b..0000000
--- a/src/org/torproject/ernie/db/TorperfDownloader.java
+++ /dev/null
@@ -1,573 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/* Download possibly truncated Torperf .data and .extradata files from
- * configured sources, append them to the files we already have, and merge
- * the two files into the .tpf format. */
-public class TorperfDownloader {
-
- private File torperfOutputDirectory = null;
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFilesLines = null;
- private Logger logger = null;
- private SimpleDateFormat dateFormat;
-
- public TorperfDownloader(File torperfOutputDirectory,
- SortedMap<String, String> torperfSources,
- List<String> torperfFilesLines) {
- if (torperfOutputDirectory == null) {
- throw new IllegalArgumentException();
- }
- this.torperfOutputDirectory = torperfOutputDirectory;
- this.torperfSources = torperfSources;
- this.torperfFilesLines = torperfFilesLines;
- if (!this.torperfOutputDirectory.exists()) {
- this.torperfOutputDirectory.mkdirs();
- }
- this.logger = Logger.getLogger(TorperfDownloader.class.getName());
- this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.readLastMergedTimestamps();
- for (String torperfFilesLine : this.torperfFilesLines) {
- this.downloadAndMergeFiles(torperfFilesLine);
- }
- this.writeLastMergedTimestamps();
- }
-
- private File torperfLastMergedFile =
- new File("stats/torperf-last-merged");
- SortedMap<String, String> lastMergedTimestamps =
- new TreeMap<String, String>();
- private void readLastMergedTimestamps() {
- if (!this.torperfLastMergedFile.exists()) {
- return;
- }
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.torperfLastMergedFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(" ");
- String fileName = null, timestamp = null;
- if (parts.length == 2) {
- try {
- Double.parseDouble(parts[1]);
- fileName = parts[0];
- timestamp = parts[1];
- } catch (NumberFormatException e) {
- /* Handle below. */
- }
- }
- if (fileName == null || timestamp == null) {
- this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
- + this.torperfLastMergedFile.getAbsolutePath() + ". "
- + "Ignoring past history of merging .data and .extradata "
- + "files.");
- this.lastMergedTimestamps.clear();
- break;
- }
- this.lastMergedTimestamps.put(fileName, timestamp);
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while reading '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
- + "past history of merging .data and .extradata files.");
- this.lastMergedTimestamps.clear();
- }
- }
-
- private void writeLastMergedTimestamps() {
- try {
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.torperfLastMergedFile));
- for (Map.Entry<String, String> e :
- this.lastMergedTimestamps.entrySet()) {
- String fileName = e.getKey();
- String timestamp = e.getValue();
- bw.write(fileName + " " + timestamp + "\n");
- }
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while writing '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
- + "result in ignoring history of merging .data and .extradata "
- + "files in the next execution.", e);
- }
- }
-
- private void downloadAndMergeFiles(String torperfFilesLine) {
- String[] parts = torperfFilesLine.split(" ");
- String sourceName = parts[1];
- int fileSize = -1;
- try {
- fileSize = Integer.parseInt(parts[2]);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Could not parse file size in "
- + "TorperfFiles configuration line '" + torperfFilesLine
- + "'.");
- return;
- }
-
- /* Download and append the .data file. */
- String dataFileName = parts[3];
- String sourceBaseUrl = torperfSources.get(sourceName);
- String dataUrl = sourceBaseUrl + dataFileName;
- String dataOutputFileName = sourceName + "-" + dataFileName;
- File dataOutputFile = new File(torperfOutputDirectory,
- dataOutputFileName);
- boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
- dataOutputFile, true);
-
- /* Download and append the .extradata file. */
- String extradataFileName = parts[4];
- String extradataUrl = sourceBaseUrl + extradataFileName;
- String extradataOutputFileName = sourceName + "-" + extradataFileName;
- File extradataOutputFile = new File(torperfOutputDirectory,
- extradataOutputFileName);
- boolean downloadedExtradataFile = this.downloadAndAppendFile(
- extradataUrl, extradataOutputFile, false);
-
- /* Merge both files into .tpf format. */
- if (!downloadedDataFile && !downloadedExtradataFile) {
- return;
- }
- String skipUntil = null;
- if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
- skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
- }
- try {
- skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
- sourceName, fileSize, skipUntil);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
- + " and " + extradataOutputFile + ".", e);
- }
- if (skipUntil != null) {
- this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
- }
- }
-
- private boolean downloadAndAppendFile(String url, File outputFile,
- boolean isDataFile) {
-
- /* Read an existing output file to determine which line will be the
- * first to append to it. */
- String lastTimestampLine = null;
- int linesAfterLastTimestampLine = 0;
- if (outputFile.exists() && outputFile.lastModified() >
- System.currentTimeMillis() - 330L * 60L * 1000L) {
- return false;
- } else if (outputFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- outputFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- linesAfterLastTimestampLine = 0;
- } else {
- linesAfterLastTimestampLine++;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading '"
- + outputFile.getAbsolutePath() + "' to determine the first "
- + "line to append to it.", e);
- return false;
- }
- }
- try {
- this.logger.fine("Downloading " + (isDataFile ? ".data" :
- ".extradata") + " file from '" + url + "' and merging it into "
- + "'" + outputFile.getAbsolutePath() + "'.");
- URL u = new URL(url);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- BufferedReader br = new BufferedReader(new InputStreamReader(
- huc.getInputStream()));
- String line;
- BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
- true));
- boolean copyLines = lastTimestampLine == null;
- while ((line = br.readLine()) != null) {
- if (copyLines && linesAfterLastTimestampLine == 0) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- }
- bw.write(line + "\n");
- } else if (copyLines && linesAfterLastTimestampLine > 0) {
- linesAfterLastTimestampLine--;
- } else if (line.equals(lastTimestampLine)) {
- copyLines = true;
- }
- }
- bw.close();
- br.close();
- if (!copyLines) {
- this.logger.warning("The last timestamp line in '"
- + outputFile.getAbsolutePath() + "' is not contained in the "
- + "new file downloaded from '" + url + "'. Cannot append "
- + "new lines without possibly leaving a gap. Skipping.");
- return false;
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
- + url + "'.", e);
- return false;
- }
- if (lastTimestampLine == null) {
- this.logger.warning("'" + outputFile.getAbsolutePath()
- + "' doesn't contain any timestamp lines. Unable to check "
- + "whether that file is stale or not.");
- } else {
- long lastTimestampMillis = -1L;
- if (isDataFile) {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- 0, lastTimestampLine.indexOf(" "))) * 1000L;
- } else {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
- lastTimestampLine.indexOf(".",
- lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
- }
- if (lastTimestampMillis < System.currentTimeMillis()
- - 330L * 60L * 1000L) {
- this.logger.warning("The last timestamp in '"
- + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
- + "old: " + lastTimestampMillis);
- }
- }
- return true;
- }
-
- private String mergeFiles(File dataFile, File extradataFile,
- String source, int fileSize, String skipUntil) throws IOException {
- SortedMap<String, String> config = new TreeMap<String, String>();
- config.put("SOURCE", source);
- config.put("FILESIZE", String.valueOf(fileSize));
- if (!dataFile.exists() || !extradataFile.exists()) {
- this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
- + extradataFile.getAbsolutePath() + " is missing.");
- return null;
- }
- this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
- + extradataFile.getAbsolutePath() + " into .tpf format.");
- BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
- brE = new BufferedReader(new FileReader(extradataFile));
- String lineD = brD.readLine(), lineE = brE.readLine();
- int d = 1, e = 1;
- String maxDataComplete = null, maxUsedAt = null;
- while (lineD != null) {
-
- /* Parse .data line. Every valid .data line will go into the .tpf
- * format, either with additional information from the .extradata
- * file or without it. */
- if (lineD.isEmpty()) {
- this.logger.finer("Skipping empty line " + dataFile.getName()
- + ":" + d++ + ".");
- lineD = brD.readLine();
- continue;
- }
- SortedMap<String, String> data = this.parseDataLine(lineD);
- if (data == null) {
- this.logger.finer("Skipping illegal line " + dataFile.getName()
- + ":" + d++ + " '" + lineD + "'.");
- lineD = brD.readLine();
- continue;
- }
- String dataComplete = data.get("DATACOMPLETE");
- double dataCompleteSeconds = Double.parseDouble(dataComplete);
- if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + dataFile.getName() + ":"
- + d++ + " which we already processed before.");
- lineD = brD.readLine();
- continue;
- }
- maxDataComplete = dataComplete;
-
- /* Parse .extradata line if available and try to find the one that
- * matches the .data line. */
- SortedMap<String, String> extradata = null;
- while (lineE != null) {
- if (lineE.isEmpty()) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is empty.");
- lineE = brE.readLine();
- continue;
- }
- if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is a BUILDTIMEOUT_SET line.");
- lineE = brE.readLine();
- continue;
- } else if (lineE.startsWith("ok ") ||
- lineE.startsWith("error ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is in the old format.");
- lineE = brE.readLine();
- continue;
- }
- extradata = this.parseExtradataLine(lineE);
- if (extradata == null) {
- this.logger.finer("Skipping Illegal line "
- + extradataFile.getName() + ":" + e++ + " '" + lineE
- + "'.");
- lineE = brE.readLine();
- continue;
- }
- if (!extradata.containsKey("USED_AT")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which doesn't contain a USED_AT element.");
- lineE = brE.readLine();
- continue;
- }
- String usedAt = extradata.get("USED_AT");
- double usedAtSeconds = Double.parseDouble(usedAt);
- if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which we already processed before.");
- lineE = brE.readLine();
- continue;
- }
- maxUsedAt = usedAt;
- if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
- this.logger.fine("Merging " + extradataFile.getName() + ":"
- + e++ + " into the current .data line.");
- lineE = brE.readLine();
- break;
- } else if (usedAtSeconds > dataCompleteSeconds) {
- this.logger.finer("Comparing " + extradataFile.getName()
- + " to the next .data line.");
- extradata = null;
- break;
- } else {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is too old to be merged with "
- + dataFile.getName() + ":" + d + ".");
- lineE = brE.readLine();
- continue;
- }
- }
-
- /* Write output line to .tpf file. */
- SortedMap<String, String> keysAndValues =
- new TreeMap<String, String>();
- if (extradata != null) {
- keysAndValues.putAll(extradata);
- }
- keysAndValues.putAll(data);
- keysAndValues.putAll(config);
- this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
- lineD = brD.readLine();
- try {
- this.writeTpfLine(source, fileSize, keysAndValues);
- } catch (IOException ex) {
- this.logger.log(Level.WARNING, "Error writing output line. "
- + "Aborting to merge " + dataFile.getName() + " and "
- + extradataFile.getName() + ".", e);
- break;
- }
- }
- brD.close();
- brE.close();
- this.writeCachedTpfLines();
- if (maxDataComplete == null) {
- return maxUsedAt;
- } else if (maxUsedAt == null) {
- return maxDataComplete;
- } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
- return maxUsedAt;
- } else {
- return maxDataComplete;
- }
- }
-
- private SortedMap<Integer, String> dataTimestamps;
- private SortedMap<String, String> parseDataLine(String line) {
- String[] parts = line.trim().split(" ");
- if (line.length() == 0 || parts.length < 20) {
- return null;
- }
- if (this.dataTimestamps == null) {
- this.dataTimestamps = new TreeMap<Integer, String>();
- this.dataTimestamps.put(0, "START");
- this.dataTimestamps.put(2, "SOCKET");
- this.dataTimestamps.put(4, "CONNECT");
- this.dataTimestamps.put(6, "NEGOTIATE");
- this.dataTimestamps.put(8, "REQUEST");
- this.dataTimestamps.put(10, "RESPONSE");
- this.dataTimestamps.put(12, "DATAREQUEST");
- this.dataTimestamps.put(14, "DATARESPONSE");
- this.dataTimestamps.put(16, "DATACOMPLETE");
- this.dataTimestamps.put(21, "DATAPERC10");
- this.dataTimestamps.put(23, "DATAPERC20");
- this.dataTimestamps.put(25, "DATAPERC30");
- this.dataTimestamps.put(27, "DATAPERC40");
- this.dataTimestamps.put(29, "DATAPERC50");
- this.dataTimestamps.put(31, "DATAPERC60");
- this.dataTimestamps.put(33, "DATAPERC70");
- this.dataTimestamps.put(35, "DATAPERC80");
- this.dataTimestamps.put(37, "DATAPERC90");
- }
- SortedMap<String, String> data = new TreeMap<String, String>();
- try {
- for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
- int i = e.getKey();
- if (parts.length > i + 1) {
- String key = e.getValue();
- String value = String.format("%s.%02d", parts[i],
- Integer.parseInt(parts[i + 1]) / 10000);
- data.put(key, value);
- }
- }
- } catch (NumberFormatException e) {
- return null;
- }
- data.put("WRITEBYTES", parts[18]);
- data.put("READBYTES", parts[19]);
- if (parts.length >= 21) {
- data.put("DIDTIMEOUT", parts[20]);
- }
- return data;
- }
-
- private SortedMap<String, String> parseExtradataLine(String line) {
- String[] parts = line.split(" ");
- SortedMap<String, String> extradata = new TreeMap<String, String>();
- String previousKey = null;
- for (String part : parts) {
- String[] keyAndValue = part.split("=", -1);
- if (keyAndValue.length == 2) {
- String key = keyAndValue[0];
- previousKey = key;
- String value = keyAndValue[1];
- if (value.contains(".") && value.lastIndexOf(".") ==
- value.length() - 2) {
- /* Make sure that all floats have two trailing digits. */
- value += "0";
- }
- extradata.put(key, value);
- } else if (keyAndValue.length == 1 && previousKey != null) {
- String value = keyAndValue[0];
- if (previousKey.equals("STREAM_FAIL_REASONS") &&
- (value.equals("MISC") || value.equals("EXITPOLICY") ||
- value.equals("RESOURCELIMIT") ||
- value.equals("RESOLVEFAILED"))) {
- extradata.put(previousKey, extradata.get(previousKey) + ":"
- + value);
- } else {
- return null;
- }
- } else {
- return null;
- }
- }
- return extradata;
- }
-
- private String cachedSource;
- private int cachedFileSize;
- private String cachedStartDate;
- private SortedMap<String, String> cachedTpfLines;
- private void writeTpfLine(String source, int fileSize,
- SortedMap<String, String> keysAndValues) throws IOException {
- StringBuilder sb = new StringBuilder();
- int written = 0;
- for (Map.Entry<String, String> keyAndValue :
- keysAndValues.entrySet()) {
- String key = keyAndValue.getKey();
- String value = keyAndValue.getValue();
- sb.append((written++ > 0 ? " " : "") + key + "=" + value);
- }
- String line = sb.toString();
- String startString = keysAndValues.get("START");
- long startMillis = Long.parseLong(startString.substring(0,
- startString.indexOf("."))) * 1000L;
- String startDate = dateFormat.format(startMillis);
- if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
- fileSize != this.cachedFileSize ||
- !startDate.equals(this.cachedStartDate)) {
- this.writeCachedTpfLines();
- this.readTpfLinesToCache(source, fileSize, startDate);
- }
- if (!this.cachedTpfLines.containsKey(startString) ||
- line.length() > this.cachedTpfLines.get(startString).length()) {
- this.cachedTpfLines.put(startString, line);
- }
- }
-
- private void readTpfLinesToCache(String source, int fileSize,
- String startDate) throws IOException {
- this.cachedTpfLines = new TreeMap<String, String>();
- this.cachedSource = source;
- this.cachedFileSize = fileSize;
- this.cachedStartDate = startDate;
- File tpfFile = new File(torperfOutputDirectory,
- startDate.replaceAll("-", "/") + "/"
- + source + "-" + String.valueOf(fileSize) + "-" + startDate
- + ".tpf");
- if (!tpfFile.exists()) {
- return;
- }
- BufferedReader br = new BufferedReader(new FileReader(tpfFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("@type ")) {
- continue;
- }
- if (line.contains("START=")) {
- String startString = line.substring(line.indexOf("START=")
- + "START=".length()).split(" ")[0];
- this.cachedTpfLines.put(startString, line);
- }
- }
- br.close();
- }
-
- private void writeCachedTpfLines() throws IOException {
- if (this.cachedSource == null || this.cachedFileSize == 0 ||
- this.cachedStartDate == null || this.cachedTpfLines == null) {
- return;
- }
- File tpfFile = new File(torperfOutputDirectory,
- this.cachedStartDate.replaceAll("-", "/")
- + "/" + this.cachedSource + "-"
- + String.valueOf(this.cachedFileSize) + "-"
- + this.cachedStartDate + ".tpf");
- tpfFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
- bw.write("@type torperf 1.0\n");
- for (String line : this.cachedTpfLines.values()) {
- bw.write(line + "\n");
- }
- bw.close();
- this.cachedSource = null;
- this.cachedFileSize = 0;
- this.cachedStartDate = null;
- this.cachedTpfLines = null;
- }
-}
-
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
new file mode 100644
index 0000000..f0b617a
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
@@ -0,0 +1,46 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class BridgeDescriptorParser {
+ private SanitizedBridgesWriter sbw;
+ private Logger logger;
+ public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
+ this.sbw = sbw;
+ this.logger =
+ Logger.getLogger(BridgeDescriptorParser.class.getName());
+ }
+ public void parse(byte[] allData, String dateTime) {
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = br.readLine();
+ if (line == null) {
+ return;
+ } else if (line.startsWith("r ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
+ }
+ } else if (line.startsWith("router ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreServerDescriptor(allData);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
new file mode 100644
index 0000000..783775c
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
@@ -0,0 +1,220 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+/**
+ * Reads the half-hourly snapshots of bridge descriptors from Tonga.
+ */
+public class BridgeSnapshotReader {
+ public BridgeSnapshotReader(BridgeDescriptorParser bdp,
+ File bridgeDirectoriesDir, File statsDirectory) {
+
+ if (bdp == null || bridgeDirectoriesDir == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger =
+ Logger.getLogger(BridgeSnapshotReader.class.getName());
+ SortedSet<String> parsed = new TreeSet<String>();
+ File bdDir = bridgeDirectoriesDir;
+ File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+ boolean modified = false;
+ if (bdDir.exists()) {
+ if (pbdFile.exists()) {
+ logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(pbdFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ parsed.add(line);
+ }
+ br.close();
+ logger.fine("Finished reading file "
+ + pbdFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ return;
+ }
+ }
+ logger.fine("Importing files in directory " + bridgeDirectoriesDir
+ + "/...");
+ Set<String> descriptorImportHistory = new HashSet<String>();
+ int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
+ parsedServerDescriptors = 0, skippedServerDescriptors = 0,
+ parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(bdDir);
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else if (!parsed.contains(pop.getName())) {
+ try {
+ FileInputStream in = new FileInputStream(pop);
+ if (in.available() > 0) {
+ TarArchiveInputStream tais = null;
+ if (pop.getName().endsWith(".tar.gz")) {
+ GzipCompressorInputStream gcis =
+ new GzipCompressorInputStream(in);
+ tais = new TarArchiveInputStream(gcis);
+ } else if (pop.getName().endsWith(".tar")) {
+ tais = new TarArchiveInputStream(in);
+ } else {
+ continue;
+ }
+ BufferedInputStream bis = new BufferedInputStream(tais);
+ String fn = pop.getName();
+ String dateTime = fn.substring(11, 21) + " "
+ + fn.substring(22, 24) + ":" + fn.substring(24, 26)
+ + ":" + fn.substring(26, 28);
+ while ((tais.getNextTarEntry()) != null) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ byte[] allData = baos.toByteArray();
+ if (allData.length == 0) {
+ continue;
+ }
+ String fileDigest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ String ascii = new String(allData, "US-ASCII");
+ BufferedReader br3 = new BufferedReader(new StringReader(
+ ascii));
+ String firstLine = null;
+ while ((firstLine = br3.readLine()) != null) {
+ if (firstLine.startsWith("@")) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ if (firstLine.startsWith("r ")) {
+ bdp.parse(allData, dateTime);
+ parsedStatuses++;
+ } else if (descriptorImportHistory.contains(fileDigest)) {
+ /* Skip server descriptors or extra-info descriptors if
+ * we parsed them before. */
+ skippedFiles++;
+ continue;
+ } else {
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ firstLine.startsWith("router ") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0,
+ end - start);
+ String descriptorDigest = Hex.encodeHexString(
+ DigestUtils.sha(descBytes));
+ if (!descriptorImportHistory.contains(
+ descriptorDigest)) {
+ bdp.parse(descBytes, dateTime);
+ descriptorImportHistory.add(descriptorDigest);
+ if (firstLine.startsWith("router ")) {
+ parsedServerDescriptors++;
+ } else {
+ parsedExtraInfoDescriptors++;
+ }
+ } else {
+ if (firstLine.startsWith("router ")) {
+ skippedServerDescriptors++;
+ } else {
+ skippedExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+ descriptorImportHistory.add(fileDigest);
+ parsedFiles++;
+ }
+ bis.close();
+ }
+ in.close();
+
+ /* Let's give some memory back, or we'll run out of it. */
+ System.gc();
+
+ parsed.add(pop.getName());
+ modified = true;
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not parse bridge snapshot "
+ + pop.getName() + "!", e);
+ continue;
+ }
+ }
+ }
+ logger.fine("Finished importing files in directory "
+ + bridgeDirectoriesDir + "/. In total, we parsed "
+ + parsedFiles + " files (skipped " + skippedFiles
+ + ") containing " + parsedStatuses + " statuses, "
+ + parsedServerDescriptors + " server descriptors (skipped "
+ + skippedServerDescriptors + "), and "
+ + parsedExtraInfoDescriptors + " extra-info descriptors "
+ + "(skipped " + skippedExtraInfoDescriptors + ").");
+ if (!parsed.isEmpty() && modified) {
+ logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ pbdFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
+ for (String f : parsed) {
+ bw.append(f + "\n");
+ }
+ bw.close();
+ logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
new file mode 100644
index 0000000..ec7ad4b
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -0,0 +1,911 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.*;
+import java.security.*;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.logging.*;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.digest.*;
+import org.apache.commons.codec.binary.*;
+
+/**
+ * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
+ * information from them, and writes them to a local directory structure.
+ * During the sanitizing process, all information about the bridge
+ * identity or IP address are removed or replaced. The goal is to keep the
+ * sanitized bridge descriptors useful for statistical analysis while not
+ * making it easier for an adversary to enumerate bridges.
+ *
+ * There are three types of bridge descriptors: bridge network statuses
+ * (lists of all bridges at a given time), server descriptors (published
+ * by the bridge to advertise their capabilities), and extra-info
+ * descriptors (published by the bridge, mainly for statistical analysis).
+ */
+public class SanitizedBridgesWriter {
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Output directory for writing sanitized bridge descriptors.
+ */
+ private File sanitizedBridgesDirectory;
+
+ private boolean replaceIPAddressesWithHashes;
+
+ private boolean persistenceProblemWithSecrets;
+
+ private SortedMap<String, byte[]> secretsForHashingIPAddresses;
+
+ private String bridgeSanitizingCutOffTimestamp;
+
+ private boolean haveWarnedAboutInterval;
+
+ private File bridgeIpSecretsFile;
+
+ private SecureRandom secureRandom;
+
+ /**
+ * Initializes this class.
+ */
+ public SanitizedBridgesWriter(File sanitizedBridgesDirectory,
+ File statsDirectory, boolean replaceIPAddressesWithHashes,
+ long limitBridgeSanitizingInterval) {
+
+ if (sanitizedBridgesDirectory == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /* Memorize argument values. */
+ this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
+ this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ SanitizedBridgesWriter.class.getName());
+
+ /* Initialize secure random number generator if we need it. */
+ if (this.replaceIPAddressesWithHashes) {
+ try {
+ this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
+ } catch (GeneralSecurityException e) {
+ this.logger.log(Level.WARNING, "Could not initialize secure "
+ + "random number generator! Not calculating any IP address "
+ + "hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* Read hex-encoded secrets for replacing IP addresses with hashes
+ * from disk. */
+ this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
+ this.bridgeIpSecretsFile = new File(statsDirectory,
+ "bridge-ip-secrets");
+ if (this.bridgeIpSecretsFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeIpSecretsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
+ line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
+ parts.length != 2) {
+ this.logger.warning("Invalid line in bridge-ip-secrets file "
+ + "starting with '" + line.substring(0, 7) + "'! "
+ + "Not calculating any IP address hashes in this "
+ + "execution!");
+ this.persistenceProblemWithSecrets = true;
+ break;
+ }
+ String month = parts[0];
+ byte[] secret = Hex.decodeHex(parts[1].toCharArray());
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ br.close();
+ if (!this.persistenceProblemWithSecrets) {
+ this.logger.fine("Read "
+ + this.secretsForHashingIPAddresses.size() + " secrets for "
+ + "hashing bridge IP addresses.");
+ }
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Failed to decode hex string in "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* If we're configured to keep secrets only for a limited time, define
+ * the cut-off day and time. */
+ if (limitBridgeSanitizingInterval >= 0L) {
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.bridgeSanitizingCutOffTimestamp = formatter.format(
+ System.currentTimeMillis() - 24L * 60L * 60L * 1000L
+ * limitBridgeSanitizingInterval);
+ } else {
+ this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
+ }
+ }
+
+ private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (!orAddress.contains(":")) {
+ /* Malformed or-address or a line. */
+ return null;
+ }
+ String addressPart = orAddress.substring(0,
+ orAddress.lastIndexOf(":"));
+ String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
+ String scrubbedAddressPart = null;
+ if (addressPart.startsWith("[")) {
+ scrubbedAddressPart = this.scrubIpv6Address(addressPart,
+ fingerprintBytes, published);
+ } else {
+ scrubbedAddressPart = this.scrubIpv4Address(addressPart,
+ fingerprintBytes, published);
+ }
+ return (scrubbedAddressPart == null ? null :
+ scrubbedAddressPart + ":" + portPart);
+ }
+
+ private String scrubIpv4Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[4 + 20 + 31];
+ String[] ipParts = address.split("\\.");
+ for (int i = 0; i < 4; i++) {
+ hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+ }
+ System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 0, hashInput, 24, 31);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ String hashedAddress = "10."
+ + (((int) hashOutput[0] + 256) % 256) + "."
+ + (((int) hashOutput[1] + 256) % 256) + "."
+ + (((int) hashOutput[2] + 256) % 256);
+ return hashedAddress;
+ } else {
+ return "127.0.0.1";
+ }
+ }
+
+ private String scrubIpv6Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[16 + 20 + 19];
+ String[] doubleColonSeparatedParts = address.substring(1,
+ address.length() - 1).split("::", -1);
+ if (doubleColonSeparatedParts.length > 2) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ List<String> hexParts = new ArrayList<String>();
+ for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
+ StringBuilder hexPart = new StringBuilder();
+ String[] parts = doubleColonSeparatedPart.split(":", -1);
+ if (parts.length < 1 || parts.length > 8) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ for (int i = 0; i < parts.length; i++) {
+ String part = parts[i];
+ if (part.contains(".")) {
+ String[] ipParts = part.split("\\.");
+ byte[] ipv4Bytes = new byte[4];
+ if (ipParts.length != 4) {
+ /* Invalid IPv4 part in IPv6 address. */
+ return null;
+ }
+ for (int m = 0; m < 4; m++) {
+ ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
+ }
+ hexPart.append(Hex.encodeHexString(ipv4Bytes));
+ } else if (part.length() > 4) {
+ /* Invalid IPv6 address. */
+ return null;
+ } else {
+ for (int k = part.length(); k < 4; k++) {
+ hexPart.append("0");
+ }
+ hexPart.append(part);
+ }
+ }
+ hexParts.add(hexPart.toString());
+ }
+ StringBuilder hex = new StringBuilder();
+ hex.append(hexParts.get(0));
+ if (hexParts.size() == 2) {
+ for (int i = 32 - hexParts.get(0).length()
+ - hexParts.get(1).length(); i > 0; i--) {
+ hex.append("0");
+ }
+ hex.append(hexParts.get(1));
+ }
+ byte[] ipBytes = null;
+ try {
+ ipBytes = Hex.decodeHex(hex.toString().toCharArray());
+ } catch (DecoderException e) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ if (ipBytes.length != 16) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ System.arraycopy(ipBytes, 0, hashInput, 0, 16);
+ System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 31, hashInput, 36, 19);
+ String hashOutput = DigestUtils.sha256Hex(hashInput);
+ sb.append(hashOutput.substring(hashOutput.length() - 6,
+ hashOutput.length() - 4));
+ sb.append(":");
+ sb.append(hashOutput.substring(hashOutput.length() - 4));
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+
+ private byte[] getSecretForMonth(String month) throws IOException {
+ if (!this.secretsForHashingIPAddresses.containsKey(month) ||
+ this.secretsForHashingIPAddresses.get(month).length == 31) {
+ byte[] secret = new byte[50];
+ this.secureRandom.nextBytes(secret);
+ if (this.secretsForHashingIPAddresses.containsKey(month)) {
+ System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
+ secret, 0, 31);
+ }
+ if (month.compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ this.logger.warning("Generated a secret that we won't make "
+ + "persistent, because it's outside our bridge descriptor "
+ + "sanitizing interval.");
+ } else {
+ /* Append secret to file on disk immediately before using it, or
+ * we might end with inconsistently sanitized bridges. */
+ try {
+ if (!this.bridgeIpSecretsFile.exists()) {
+ this.bridgeIpSecretsFile.getParentFile().mkdirs();
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile,
+ this.bridgeIpSecretsFile.exists()));
+ bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store new secret "
+ + "to disk! Not calculating any IP address hashes in "
+ + "this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ throw new IOException(e);
+ }
+ }
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ return this.secretsForHashingIPAddresses.get(month);
+ }
+
+ /**
+ * Sanitizes a network status and writes it to disk.
+ */
+ public void sanitizeAndStoreNetworkStatus(byte[] data,
+ String publicationTime) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(publicationTime) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
+ : Level.FINE, "Sanitizing and storing network status with "
+ + "publication time outside our descriptor sanitizing "
+ + "interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+
+ /* Parse the given network status line by line. */
+ SortedMap<String, String> scrubbedLines =
+ new TreeMap<String, String>();
+ try {
+ StringBuilder scrubbed = new StringBuilder();
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ String mostRecentDescPublished = null;
+ byte[] fingerprintBytes = null;
+ String descPublicationTime = null;
+ String hashedBridgeIdentityHex = null;
+ while ((line = br.readLine()) != null) {
+
+ /* r lines contain sensitive information that needs to be removed
+ * or replaced. */
+ if (line.startsWith("r ")) {
+
+ /* Clear buffer from previously scrubbed lines. */
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Parse the relevant parts of this r line. */
+ String[] parts = line.split(" ");
+ String nickname = parts[1];
+ fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
+ String descriptorIdentifier = parts[3];
+ descPublicationTime = parts[4] + " " + parts[5];
+ String address = parts[6];
+ String orPort = parts[7];
+ String dirPort = parts[8];
+
+ /* Determine most recent descriptor publication time. */
+ if (descPublicationTime.compareTo(publicationTime) <= 0 &&
+ (mostRecentDescPublished == null ||
+ descPublicationTime.compareTo(
+ mostRecentDescPublished) > 0)) {
+ mostRecentDescPublished = descPublicationTime;
+ }
+
+ /* Write scrubbed r line to buffer. */
+ byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
+ String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
+ hashedBridgeIdentity).substring(0, 27);
+ hashedBridgeIdentityHex = Hex.encodeHexString(
+ hashedBridgeIdentity);
+ String hashedDescriptorIdentifier = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
+ + "=="))).substring(0, 27);
+ String scrubbedAddress = scrubIpv4Address(address,
+ fingerprintBytes,
+ descPublicationTime);
+ scrubbed.append("r " + nickname + " "
+ + hashedBridgeIdentityBase64 + " "
+ + hashedDescriptorIdentifier + " " + descPublicationTime
+ + " " + scrubbedAddress + " " + orPort + " " + dirPort
+ + "\n");
+
+ /* Sanitize any addresses in a lines using the fingerprint and
+ * descriptor publication time from the previous r line. */
+ } else if (line.startsWith("a ")) {
+ String scrubbedOrAddress = scrubOrAddress(
+ line.substring("a ".length()), fingerprintBytes,
+ descPublicationTime);
+ if (scrubbedOrAddress != null) {
+ scrubbed.append("a " + scrubbedOrAddress + "\n");
+ } else {
+ this.logger.warning("Invalid address in line '" + line
+ + "' in bridge network status. Skipping line!");
+ }
+
+ /* Nothing special about s, w, and p lines; just copy them. */
+ } else if (line.startsWith("s ") || line.equals("s") ||
+ line.startsWith("w ") || line.equals("w") ||
+ line.startsWith("p ") || line.equals("p")) {
+ scrubbed.append(line + "\n");
+
+ /* There should be nothing else but r, w, p, and s lines in the
+ * network status. If there is, we should probably learn before
+ * writing anything to the sanitized descriptors. */
+ } else {
+ this.logger.fine("Unknown line '" + line + "' in bridge "
+ + "network status. Not writing to disk!");
+ return;
+ }
+ }
+ br.close();
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Check if we can tell from the descriptor publication times
+ * whether this status is possibly stale. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (formatter.parse(publicationTime).getTime() -
+ formatter.parse(mostRecentDescPublished).getTime() >
+ 60L * 60L * 1000L) {
+ this.logger.warning("The most recent descriptor in the bridge "
+ + "network status published at " + publicationTime + " was "
+ + "published at " + mostRecentDescPublished + " which is "
+ + "more than 1 hour before the status. This is a sign for "
+ + "the status being stale. Please check!");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse timestamp in "
+ + "bridge network status.", e);
+ return;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge network "
+ + "status.", e);
+ return;
+ }
+
+ /* Write the sanitized network status to disk. */
+ try {
+
+ /* Determine file name. */
+ String syear = publicationTime.substring(0, 4);
+ String smonth = publicationTime.substring(5, 7);
+ String sday = publicationTime.substring(8, 10);
+ String stime = publicationTime.substring(11, 13)
+ + publicationTime.substring(14, 16)
+ + publicationTime.substring(17, 19);
+ File statusFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
+ + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
+ + sday + "-" + stime + "-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+
+ /* Create all parent directories to write this network status. */
+ statusFile.getParentFile().mkdirs();
+
+ /* Write sanitized network status to disk. */
+ BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
+ bw.write("@type bridge-network-status 1.0\n");
+ bw.write("published " + publicationTime + "\n");
+ for (String scrubbed : scrubbedLines.values()) {
+ bw.write(scrubbed);
+ }
+ bw.close();
+
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized bridge "
+ + "network status to disk.", e);
+ return;
+ }
+ }
+
+ /**
+ * Sanitizes a bridge server descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreServerDescriptor(byte[] data) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(data, "US-ASCII")));
+ StringBuilder scrubbed = new StringBuilder();
+ String line = null, hashedBridgeIdentity = null, address = null,
+ routerLine = null, scrubbedAddress = null;
+ List<String> orAddresses = null, scrubbedOrAddresses = null;
+ boolean skipCrypto = false;
+ while ((line = br.readLine()) != null) {
+
+ /* Skip all crypto parts that might be used to derive the bridge's
+ * identity fingerprint. */
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ continue;
+
+ /* Store the router line for later processing, because we may need
+ * the bridge identity fingerprint for replacing the IP address in
+ * the scrubbed version. */
+ } else if (line.startsWith("router ")) {
+ address = line.split(" ")[2];
+ routerLine = line;
+
+ /* Store or-address parts in a list and sanitize them when we have
+ * read the fingerprint. */
+ } else if (line.startsWith("or-address ")) {
+ if (orAddresses == null) {
+ orAddresses = new ArrayList<String>();
+ }
+ orAddresses.add(line.substring("or-address ".length()));
+
+ /* Parse the publication time to see if we're still inside the
+ * sanitizing interval. */
+ } else if (line.startsWith("published ")) {
+ published = line.substring("published ".length());
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(published) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval
+ ? Level.WARNING : Level.FINE, "Sanitizing and storing "
+ + "server descriptor with publication time outside our "
+ + "descriptor sanitizing interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+ scrubbed.append(line + "\n");
+
+ /* Parse the fingerprint to determine the hashed bridge
+ * identity. */
+ } else if (line.startsWith("opt fingerprint ") ||
+ line.startsWith("fingerprint ")) {
+ String fingerprint = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ byte[] fingerprintBytes = Hex.decodeHex(
+ fingerprint.toCharArray());
+ hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
+ toLowerCase();
+ try {
+ scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
+ published);
+ if (orAddresses != null) {
+ scrubbedOrAddresses = new ArrayList<String>();
+ for (String orAddress : orAddresses) {
+ String scrubbedOrAddress = scrubOrAddress(orAddress,
+ fingerprintBytes, published);
+ if (scrubbedOrAddress != null) {
+ scrubbedOrAddresses.add(scrubbedOrAddress);
+ } else {
+ this.logger.warning("Invalid address in line "
+ + "'or-address " + orAddress + "' in bridge server "
+ + "descriptor. Skipping line!");
+ }
+ }
+ }
+ } catch (IOException e) {
+ /* There's a persistence problem, so we shouldn't scrub more
+ * IP addresses in this execution. */
+ this.persistenceProblemWithSecrets = true;
+ return;
+ }
+ scrubbed.append((line.startsWith("opt ") ? "opt " : "")
+ + "fingerprint");
+ for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
+ scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+ 4 * (i + 1)).toUpperCase());
+ scrubbed.append("\n");
+
+ /* Replace the contact line (if present) with a generic one. */
+ } else if (line.startsWith("contact ")) {
+ scrubbed.append("contact somebody\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ String[] routerLineParts = routerLine.split(" ");
+ scrubbedDesc = "router " + routerLineParts[1] + " "
+ + scrubbedAddress + " " + routerLineParts[3] + " "
+ + routerLineParts[4] + " " + routerLineParts[5] + "\n";
+ if (scrubbedOrAddresses != null) {
+ for (String scrubbedOrAddress : scrubbedOrAddresses) {
+ scrubbedDesc = scrubbedDesc += "or-address "
+ + scrubbedOrAddress + "\n";
+ }
+ }
+ scrubbedDesc += scrubbed.toString();
+ break;
+
+ /* Replace extra-info digest with the hashed digest of the
+ * non-scrubbed descriptor. */
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ String extraInfoDescriptorIdentifier = line.substring(
+ line.indexOf("extra-info-digest ")
+ + "extra-info-digest ".length());
+ String hashedExtraInfoDescriptorIdentifier =
+ DigestUtils.shaHex(Hex.decodeHex(
+ extraInfoDescriptorIdentifier.toCharArray())).toUpperCase();
+ scrubbed.append((line.startsWith("opt ") ? "opt " : "")
+ + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier
+ + "\n");
+
+ /* Possibly sanitize reject lines if they contain the bridge's own
+ * IP address. */
+ } else if (line.startsWith("reject ")) {
+ if (address != null && line.startsWith("reject " + address)) {
+ scrubbed.append("reject " + scrubbedAddress
+ + line.substring("reject ".length() + address.length())
+ + "\n");
+ } else {
+ scrubbed.append(line + "\n");
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("accept ")
+ || line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("protocols ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("opt hibernating ")
+ || line.startsWith("hibernating ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("hidden-service-dir")
+ || line.equals("opt caches-extra-info")
+ || line.equals("caches-extra-info")
+ || line.equals("opt allow-single-hop-exits")
+ || line.equals("allow-single-hop-exits")) {
+ scrubbed.append(line + "\n");
+
+ /* Replace node fingerprints in the family line with their hashes
+ * and leave nicknames unchanged. */
+ } else if (line.startsWith("family ")) {
+ StringBuilder familyLine = new StringBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$")) {
+ familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ } else {
+ familyLine.append(" " + s);
+ }
+ }
+ scrubbed.append(familyLine.toString() + "\n");
+
+ /* Skip the purpose line that the bridge authority adds to its
+ * cached-descriptors file. */
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+
+ /* Skip all crypto parts that might leak the bridge's identity
+ * fingerprint. */
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key")) {
+ skipCrypto = true;
+
+ /* Stop skipping lines when the crypto parts are over. */
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not parse server "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized server descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate server "
+ + "descriptor digest.");
+ return;
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File newFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/server-descriptors/"
+ + "/" + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+
+ /* Write sanitized server descriptor to disk, including all its parent
+ * directories. */
+ try {
+ newFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
+ bw.write("@type bridge-server-descriptor 1.0\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized server "
+ + "descriptor to disk.", e);
+ return;
+ }
+ }
+
+ /**
+ * Sanitizes an extra-info descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ StringBuilder scrubbed = null;
+ String hashedBridgeIdentity = null;
+ while ((line = br.readLine()) != null) {
+
+ /* Parse bridge identity from extra-info line and replace it with
+ * its hash in the sanitized descriptor. */
+ String[] parts = line.split(" ");
+ if (line.startsWith("extra-info ")) {
+ hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
+ parts[2].toCharArray())).toLowerCase();
+ scrubbed = new StringBuilder("extra-info " + parts[1] + " "
+ + hashedBridgeIdentity.toUpperCase() + "\n");
+
+ /* Parse the publication time to determine the file name. */
+ } else if (line.startsWith("published ")) {
+ scrubbed.append(line + "\n");
+ published = line.substring("published ".length());
+
+ /* Remove everything from transport lines except the transport
+ * name. */
+ } else if (line.startsWith("transport ")) {
+ if (parts.length < 3) {
+ this.logger.fine("Illegal line in extra-info descriptor: '"
+ + line + "'. Skipping descriptor.");
+ return;
+ }
+ scrubbed.append("transport " + parts[1] + "\n");
+
+ /* Skip transport-info lines entirely. */
+ } else if (line.startsWith("transport-info ")) {
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")
+ || line.startsWith("geoip-db-digest ")
+ || line.startsWith("conn-bi-direct ")
+ || line.startsWith("bridge-")
+ || line.startsWith("dirreq-")
+ || line.startsWith("cell-")
+ || line.startsWith("entry-")
+ || line.startsWith("exit-")) {
+ scrubbed.append(line + "\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ scrubbedDesc = scrubbed.toString();
+ break;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized extra-info descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate extra-info "
+ + "descriptor digest.");
+ return;
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File newFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/extra-infos/"
+ + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+
+ /* Write sanitized extra-info descriptor to disk, including all its
+ * parent directories. */
+ try {
+ newFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
+ bw.write("@type bridge-extra-info 1.1\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized "
+ + "extra-info descriptor to disk.", e);
+ }
+ }
+
+ /**
+ * Rewrite all network statuses that might contain references to server
+ * descriptors we added or updated in this execution. This applies to
+ * all statuses that have been published up to 24 hours after any added
+ * or updated server descriptor.
+ */
+ public void finishWriting() {
+
+ /* Delete secrets that we don't need anymore. */
+ if (!this.secretsForHashingIPAddresses.isEmpty() &&
+ this.secretsForHashingIPAddresses.firstKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ try {
+ int kept = 0, deleted = 0;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile));
+ for (Map.Entry<String, byte[]> e :
+ this.secretsForHashingIPAddresses.entrySet()) {
+ if (e.getKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ deleted++;
+ } else {
+ bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
+ + "\n");
+ kept++;
+ }
+ }
+ bw.close();
+ this.logger.info("Deleted " + deleted + " secrets that we don't "
+ + "need anymore and kept " + kept + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store reduced set of "
+ + "secrets to disk! This is a bad sign, better check what's "
+ + "going on!", e);
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
new file mode 100644
index 0000000..470f6ab
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -0,0 +1,174 @@
+/* Copyright 2011--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgepools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+public class BridgePoolAssignmentsProcessor {
+
+ public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
+ File sanitizedAssignmentsDirectory) {
+
+ Logger logger =
+ Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
+ if (assignmentsDirectory == null ||
+ sanitizedAssignmentsDirectory == null) {
+ IllegalArgumentException e = new IllegalArgumentException("Neither "
+ + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
+ + "be null!");
+ throw e;
+ }
+
+ List<File> assignmentFiles = new ArrayList<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(assignmentsDirectory);
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (!file.getName().endsWith(".gz")) {
+ assignmentFiles.add(file);
+ }
+ }
+
+ SimpleDateFormat assignmentFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat filenameFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (File assignmentFile : assignmentFiles) {
+ logger.info("Processing bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'...");
+ try {
+ BufferedReader br = null;
+ if (assignmentFile.getName().endsWith(".gz")) {
+ br = new BufferedReader(new InputStreamReader(
+ new GzipCompressorInputStream(new FileInputStream(
+ assignmentFile))));
+ } else {
+ br = new BufferedReader(new FileReader(assignmentFile));
+ }
+ String line, bridgePoolAssignmentLine = null;
+ SortedSet<String> sanitizedAssignments = new TreeSet<String>();
+ boolean wroteLastLine = false, skipBefore20120504125947 = true;
+ while ((line = br.readLine()) != null || !wroteLastLine) {
+ if (line != null && line.startsWith("bridge-pool-assignment ")) {
+ String[] parts = line.split(" ");
+ if (parts.length != 3) {
+ continue;
+ }
+ /* TODO Take out this temporary hack to ignore all assignments
+ * coming from ponticum when byblos was still the official
+ * BridgeDB host. */
+ if (line.compareTo(
+ "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
+ skipBefore20120504125947 = false;
+ }
+ }
+ if (skipBefore20120504125947) {
+ if (line == null) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ if (line == null ||
+ line.startsWith("bridge-pool-assignment ")) {
+ if (bridgePoolAssignmentLine != null) {
+ try {
+ long bridgePoolAssignmentTime = assignmentFormat.parse(
+ bridgePoolAssignmentLine.substring(
+ "bridge-pool-assignment ".length())).getTime();
+ File sanitizedAssignmentsFile = new File(
+ sanitizedAssignmentsDirectory, filenameFormat.format(
+ bridgePoolAssignmentTime));
+ if (!sanitizedAssignmentsFile.exists()) {
+ sanitizedAssignmentsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ sanitizedAssignmentsFile));
+ bw.write("@type bridge-pool-assignment 1.0\n");
+ bw.write(bridgePoolAssignmentLine + "\n");
+ for (String assignmentLine : sanitizedAssignments) {
+ bw.write(assignmentLine + "\n");
+ }
+ bw.close();
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ }
+ sanitizedAssignments.clear();
+ }
+ if (line == null) {
+ wroteLastLine = true;
+ } else {
+ bridgePoolAssignmentLine = line;
+ }
+ } else {
+ String[] parts = line.split(" ");
+ if (parts.length < 2 || parts[0].length() < 40) {
+ logger.warning("Unrecognized line '" + line
+ + "'. Aborting.");
+ break;
+ }
+ String hashedFingerprint = null;
+ try {
+ hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[0].toCharArray())).toLowerCase();
+ } catch (DecoderException e) {
+ logger.warning("Unable to decode hex fingerprint in line '"
+ + line + "'. Aborting.");
+ break;
+ }
+ String assignmentDetails = line.substring(40);
+ sanitizedAssignments.add(hashedFingerprint
+ + assignmentDetails);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read bridge pool assignment "
+ + "file '" + assignmentFile.getAbsolutePath()
+ + "'. Skipping.", e);
+ }
+ }
+
+ logger.info("Finished processing bridge pool assignment file(s).");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
new file mode 100644
index 0000000..64f6a3b
--- /dev/null
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -0,0 +1,100 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.exitlists;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class ExitListDownloader {
+ public ExitListDownloader() {
+ Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+ try {
+ logger.fine("Downloading exit list...");
+ String exitAddressesUrl =
+ "http://exitlist.torproject.org/exit-addresses";
+ URL u = new URL(exitAddressesUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response != 200) {
+ logger.warning("Could not download exit list. Response code " +
+ response);
+ return;
+ }
+ BufferedInputStream in = new BufferedInputStream(
+ huc.getInputStream());
+ SimpleDateFormat printFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Date downloadedDate = new Date();
+ File exitListFile = new File("exitlist/" + printFormat.format(
+ downloadedDate));
+ exitListFile.getParentFile().mkdirs();
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ exitListFile));
+ bw.write("@type tordnsel 1.0\n");
+ bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ bw.write(new String(data, 0, len));
+ }
+ in.close();
+ bw.close();
+ logger.fine("Finished downloading exit list.");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed downloading exit list", e);
+ return;
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished downloading "
+ + "exit list.\nLast three exit lists are:");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(new File("exitlist"));
+ SortedSet<File> lastThreeExitLists = new TreeSet<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ SortedSet<File> lastThreeElements = new TreeSet<File>();
+ for (File f : pop.listFiles()) {
+ lastThreeElements.add(f);
+ }
+ while (lastThreeElements.size() > 3) {
+ lastThreeElements.remove(lastThreeElements.first());
+ }
+ for (File f : lastThreeElements) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ lastThreeExitLists.add(pop);
+ while (lastThreeExitLists.size() > 3) {
+ lastThreeExitLists.remove(lastThreeExitLists.first());
+ }
+ }
+ }
+ for (File f : lastThreeExitLists) {
+ dumpStats.append("\n" + f.getName());
+ }
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
new file mode 100644
index 0000000..adf22cc
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/Configuration.java
@@ -0,0 +1,359 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Initialize configuration with hard-coded defaults, overwrite with
+ * configuration in config file, if exists, and answer Main.java about our
+ * configuration.
+ */
+public class Configuration {
+ private boolean writeDirectoryArchives = false;
+ private String directoryArchivesOutputDirectory = "directory-archive/";
+ private boolean importCachedRelayDescriptors = false;
+ private List<String> cachedRelayDescriptorsDirectory =
+ new ArrayList<String>(Arrays.asList("cacheddesc/".split(",")));
+ private boolean importDirectoryArchives = false;
+ private String directoryArchivesDirectory = "archives/";
+ private boolean keepDirectoryArchiveImportHistory = false;
+ private boolean writeSanitizedBridges = false;
+ private boolean replaceIPAddressesWithHashes = false;
+ private long limitBridgeDescriptorMappings = -1L;
+ private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
+ private boolean importBridgeSnapshots = false;
+ private String bridgeSnapshotsDirectory = "bridge-directories/";
+ private boolean downloadRelayDescriptors = false;
+ private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
+ "86.59.21.38,76.73.17.194:9030,213.115.239.118:443,"
+ + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
+ + "194.109.206.212,212.112.245.170").split(","));
+ private boolean downloadCurrentConsensus = true;
+ private boolean downloadCurrentVotes = true;
+ private boolean downloadMissingServerDescriptors = true;
+ private boolean downloadMissingExtraInfoDescriptors = true;
+ private boolean downloadAllServerDescriptors = false;
+ private boolean downloadAllExtraInfoDescriptors = false;
+ private boolean compressRelayDescriptorDownloads;
+ private boolean downloadExitList = false;
+ private boolean processBridgePoolAssignments = false;
+ private String assignmentsDirectory = "assignments/";
+ private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
+ private boolean processTorperfFiles = false;
+ private String torperfOutputDirectory = "torperf/";
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFiles = null;
+ private boolean provideFilesViaRsync = false;
+ private String rsyncDirectory = "rsync";
+ public Configuration() {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(Configuration.class.getName());
+
+ /* Read config file, if present. */
+ File configFile = new File("config");
+ if (!configFile.exists()) {
+ logger.warning("Could not find config file. In the default "
+ + "configuration, we are not configured to read data from any "
+ + "data source or write data to any data sink. You need to "
+ + "create a config file (" + configFile.getAbsolutePath()
+ + ") and provide at least one data source and one data sink. "
+ + "Refer to the manual for more information.");
+ return;
+ }
+ String line = null;
+ boolean containsCachedRelayDescriptorsDirectory = false;
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(configFile));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("#") || line.length() < 1) {
+ continue;
+ } else if (line.startsWith("WriteDirectoryArchives")) {
+ this.writeDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
+ this.directoryArchivesOutputDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ImportCachedRelayDescriptors")) {
+ this.importCachedRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
+ if (!containsCachedRelayDescriptorsDirectory) {
+ this.cachedRelayDescriptorsDirectory.clear();
+ containsCachedRelayDescriptorsDirectory = true;
+ }
+ this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
+ } else if (line.startsWith("ImportDirectoryArchives")) {
+ this.importDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesDirectory")) {
+ this.directoryArchivesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
+ this.keepDirectoryArchiveImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("WriteSanitizedBridges")) {
+ this.writeSanitizedBridges = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
+ this.replaceIPAddressesWithHashes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
+ this.limitBridgeDescriptorMappings = Long.parseLong(
+ line.split(" ")[1]);
+ } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
+ this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ImportBridgeSnapshots")) {
+ this.importBridgeSnapshots = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("BridgeSnapshotsDirectory")) {
+ this.bridgeSnapshotsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("DownloadRelayDescriptors")) {
+ this.downloadRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
+ this.downloadFromDirectoryAuthorities = new ArrayList<String>();
+ for (String dir : line.split(" ")[1].split(",")) {
+ // test if IP:port pair has correct format
+ if (dir.length() < 1) {
+ logger.severe("Configuration file contains directory "
+ + "authority IP:port of length 0 in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ new URL("http://" + dir + "/");
+ this.downloadFromDirectoryAuthorities.add(dir);
+ }
+ } else if (line.startsWith("DownloadCurrentConsensus")) {
+ this.downloadCurrentConsensus = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadCurrentVotes")) {
+ this.downloadCurrentVotes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadMissingServerDescriptors")) {
+ this.downloadMissingServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith(
+ "DownloadMissingExtraInfoDescriptors")) {
+ this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllServerDescriptors")) {
+ this.downloadAllServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
+ this.downloadAllExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
+ this.compressRelayDescriptorDownloads = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadExitList")) {
+ this.downloadExitList = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ProcessBridgePoolAssignments")) {
+ this.processBridgePoolAssignments = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("AssignmentsDirectory")) {
+ this.assignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
+ this.sanitizedAssignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ProcessTorperfFiles")) {
+ this.processTorperfFiles = Integer.parseInt(line.split(" ")[1])
+ != 0;
+ } else if (line.startsWith("TorperfOutputDirectory")) {
+ } else if (line.startsWith("TorperfSource")) {
+ if (this.torperfSources == null) {
+ this.torperfSources = new TreeMap<String, String>();
+ }
+ String[] parts = line.split(" ");
+ String sourceName = parts[1];
+ String baseUrl = parts[2];
+ this.torperfSources.put(sourceName, baseUrl);
+ } else if (line.startsWith("TorperfFiles")) {
+ if (this.torperfFiles == null) {
+ this.torperfFiles = new ArrayList<String>();
+ }
+ String[] parts = line.split(" ");
+ if (parts.length != 5) {
+ logger.severe("Configuration file contains TorperfFiles "
+ + "option with wrong number of values in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ this.torperfFiles.add(line);
+ } else if (line.startsWith("ProvideFilesViaRsync")) {
+ this.provideFilesViaRsync = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("RsyncDirectory")) {
+ this.rsyncDirectory = line.split(" ")[1];
+ } else {
+ logger.severe("Configuration file contains unrecognized "
+ + "configuration key in line '" + line + "'! Exiting!");
+ System.exit(1);
+ }
+ }
+ br.close();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ logger.severe("Configuration file contains configuration key "
+ + "without value in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (MalformedURLException e) {
+ logger.severe("Configuration file contains illegal URL or IP:port "
+ + "pair in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (NumberFormatException e) {
+ logger.severe("Configuration file contains illegal value in line '"
+ + line + "' with legal values being 0 or 1. Exiting!");
+ System.exit(1);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Unknown problem while reading config "
+ + "file! Exiting!", e);
+ System.exit(1);
+ }
+
+ /** Make some checks if configuration is valid. */
+ if (!this.importCachedRelayDescriptors &&
+ !this.importDirectoryArchives && !this.downloadRelayDescriptors &&
+ !this.importBridgeSnapshots &&
+ !this.downloadExitList && !this.processBridgePoolAssignments &&
+ !this.writeDirectoryArchives && !this.writeSanitizedBridges &&
+ !this.processTorperfFiles) {
+ logger.warning("We have not been configured to read data from any "
+ + "data source or write data to any data sink. You need to "
+ + "edit your config file (" + configFile.getAbsolutePath()
+ + ") and provide at least one data source and one data sink. "
+ + "Refer to the manual for more information.");
+ }
+ if ((this.importCachedRelayDescriptors ||
+ this.importDirectoryArchives || this.downloadRelayDescriptors) &&
+ !this.writeDirectoryArchives) {
+ logger.warning("We are configured to import/download relay "
+ + "descriptors, but we don't have a single data sink to write "
+ + "relay descriptors to.");
+ }
+ if (!(this.importCachedRelayDescriptors ||
+ this.importDirectoryArchives || this.downloadRelayDescriptors) &&
+ this.writeDirectoryArchives) {
+ logger.warning("We are configured to write relay descriptor to at "
+ + "least one data sink, but we don't have a single data source "
+ + "containing relay descriptors.");
+ }
+ if (this.importBridgeSnapshots && !this.writeSanitizedBridges) {
+ logger.warning("We are configured to import/download bridge "
+ + "descriptors, but we don't have a single data sink to write "
+ + "bridge descriptors to.");
+ }
+ if (!this.importBridgeSnapshots && this.writeSanitizedBridges) {
+ logger.warning("We are configured to write bridge descriptor to at "
+ + "least one data sink, but we don't have a single data source "
+ + "containing bridge descriptors.");
+ }
+ }
+ public boolean getWriteDirectoryArchives() {
+ return this.writeDirectoryArchives;
+ }
+ public String getDirectoryArchivesOutputDirectory() {
+ return this.directoryArchivesOutputDirectory;
+ }
+ public boolean getImportCachedRelayDescriptors() {
+ return this.importCachedRelayDescriptors;
+ }
+ public List<String> getCachedRelayDescriptorDirectory() {
+ return this.cachedRelayDescriptorsDirectory;
+ }
+ public boolean getImportDirectoryArchives() {
+ return this.importDirectoryArchives;
+ }
+ public String getDirectoryArchivesDirectory() {
+ return this.directoryArchivesDirectory;
+ }
+ public boolean getKeepDirectoryArchiveImportHistory() {
+ return this.keepDirectoryArchiveImportHistory;
+ }
+ public boolean getWriteSanitizedBridges() {
+ return this.writeSanitizedBridges;
+ }
+ public boolean getReplaceIPAddressesWithHashes() {
+ return this.replaceIPAddressesWithHashes;
+ }
+ public long getLimitBridgeDescriptorMappings() {
+ return this.limitBridgeDescriptorMappings;
+ }
+ public String getSanitizedBridgesWriteDirectory() {
+ return this.sanitizedBridgesWriteDirectory;
+ }
+ public boolean getImportBridgeSnapshots() {
+ return this.importBridgeSnapshots;
+ }
+ public String getBridgeSnapshotsDirectory() {
+ return this.bridgeSnapshotsDirectory;
+ }
+ public boolean getDownloadRelayDescriptors() {
+ return this.downloadRelayDescriptors;
+ }
+ public List<String> getDownloadFromDirectoryAuthorities() {
+ return this.downloadFromDirectoryAuthorities;
+ }
+ public boolean getDownloadCurrentConsensus() {
+ return this.downloadCurrentConsensus;
+ }
+ public boolean getDownloadCurrentVotes() {
+ return this.downloadCurrentVotes;
+ }
+ public boolean getDownloadMissingServerDescriptors() {
+ return this.downloadMissingServerDescriptors;
+ }
+ public boolean getDownloadMissingExtraInfoDescriptors() {
+ return this.downloadMissingExtraInfoDescriptors;
+ }
+ public boolean getDownloadAllServerDescriptors() {
+ return this.downloadAllServerDescriptors;
+ }
+ public boolean getDownloadAllExtraInfoDescriptors() {
+ return this.downloadAllExtraInfoDescriptors;
+ }
+ public boolean getCompressRelayDescriptorDownloads() {
+ return this.compressRelayDescriptorDownloads;
+ }
+ public boolean getDownloadExitList() {
+ return this.downloadExitList;
+ }
+ public boolean getProcessBridgePoolAssignments() {
+ return processBridgePoolAssignments;
+ }
+ public String getAssignmentsDirectory() {
+ return assignmentsDirectory;
+ }
+ public String getSanitizedAssignmentsDirectory() {
+ return sanitizedAssignmentsDirectory;
+ }
+ public boolean getProcessTorperfFiles() {
+ return this.processTorperfFiles;
+ }
+ public String getTorperfOutputDirectory() {
+ return this.torperfOutputDirectory;
+ }
+ public SortedMap<String, String> getTorperfSources() {
+ return this.torperfSources;
+ }
+ public List<String> getTorperfFiles() {
+ return this.torperfFiles;
+ }
+ public boolean getProvideFilesViaRsync() {
+ return this.provideFilesViaRsync;
+ }
+ public String getRsyncDirectory() {
+ return this.rsyncDirectory;
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/LockFile.java b/src/org/torproject/ernie/db/main/LockFile.java
new file mode 100644
index 0000000..68375ec
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/LockFile.java
@@ -0,0 +1,52 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+public class LockFile {
+
+ private File lockFile;
+ private Logger logger;
+
+ public LockFile() {
+ this.lockFile = new File("lock");
+ this.logger = Logger.getLogger(LockFile.class.getName());
+ }
+
+ public boolean acquireLock() {
+ this.logger.fine("Trying to acquire lock...");
+ try {
+ if (this.lockFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader("lock"));
+ long runStarted = Long.parseLong(br.readLine());
+ br.close();
+ if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
+ return false;
+ }
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
+ bw.append("" + System.currentTimeMillis() + "\n");
+ bw.close();
+ this.logger.fine("Acquired lock.");
+ return true;
+ } catch (IOException e) {
+ this.logger.warning("Caught exception while trying to acquire "
+ + "lock!");
+ return false;
+ }
+ }
+
+ public void releaseLock() {
+ this.logger.fine("Releasing lock...");
+ this.lockFile.delete();
+ this.logger.fine("Released lock.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/LoggingConfiguration.java b/src/org/torproject/ernie/db/main/LoggingConfiguration.java
new file mode 100644
index 0000000..b0ddeaa
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/LoggingConfiguration.java
@@ -0,0 +1,93 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+/**
+ * Initialize logging configuration.
+ *
+ * Log levels used by ERNIE:
+ *
+ * - SEVERE: An event made it impossible to continue program execution.
+ * - WARNING: A potential problem occurred that requires the operator to
+ * look after the otherwise unattended setup
+ * - INFO: Messages on INFO level are meant to help the operator in making
+ * sure that operation works as expected.
+ * - FINE: Debug messages that are used to identify problems and which are
+ * turned on by default.
+ * - FINER: More detailed debug messages to investigate problems in more
+ * detail. Not turned on by default. Increase log file limit when using
+ * FINER.
+ * - FINEST: Most detailed debug messages. Not used.
+ */
+public class LoggingConfiguration {
+ public LoggingConfiguration() {
+
+ /* Remove default console handler. */
+ for (Handler h : Logger.getLogger("").getHandlers()) {
+ Logger.getLogger("").removeHandler(h);
+ }
+
+ /* Disable logging of internal Sun classes. */
+ Logger.getLogger("sun").setLevel(Level.OFF);
+
+ /* Set minimum log level we care about from INFO to FINER. */
+ Logger.getLogger("").setLevel(Level.FINER);
+
+ /* Create log handler that writes messages on WARNING or higher to the
+ * console. */
+ final SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Formatter cf = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getMessage() + "\n";
+ }
+ };
+ Handler ch = new ConsoleHandler();
+ ch.setFormatter(cf);
+ ch.setLevel(Level.WARNING);
+ Logger.getLogger("").addHandler(ch);
+
+ /* Initialize own logger for this class. */
+ Logger logger = Logger.getLogger(
+ LoggingConfiguration.class.getName());
+
+ /* Create log handler that writes all messages on FINE or higher to a
+ * local file. */
+ Formatter ff = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getLevel() + " " + record.getSourceClassName() + " "
+ + record.getSourceMethodName() + " " + record.getMessage()
+ + (record.getThrown() != null ? " " + record.getThrown() : "")
+ + "\n";
+ }
+ };
+ try {
+ FileHandler fh = new FileHandler("log", 5000000, 5, true);
+ fh.setFormatter(ff);
+ fh.setLevel(Level.FINE);
+ Logger.getLogger("").addHandler(fh);
+ } catch (SecurityException e) {
+ logger.log(Level.WARNING, "No permission to create log file. "
+ + "Logging to file is disabled.", e);
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write to log file. Logging to "
+ + "file is disabled.", e);
+ }
+ }
+}
diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java
new file mode 100644
index 0000000..e008eca
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/Main.java
@@ -0,0 +1,172 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.File;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.torproject.ernie.db.bridgedescs.BridgeDescriptorParser;
+import org.torproject.ernie.db.bridgedescs.BridgeSnapshotReader;
+import org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter;
+import org.torproject.ernie.db.bridgepools.BridgePoolAssignmentsProcessor;
+import org.torproject.ernie.db.exitlists.ExitListDownloader;
+import org.torproject.ernie.db.relaydescs.ArchiveReader;
+import org.torproject.ernie.db.relaydescs.ArchiveWriter;
+import org.torproject.ernie.db.relaydescs.CachedRelayDescriptorReader;
+import org.torproject.ernie.db.relaydescs.RelayDescriptorDownloader;
+import org.torproject.ernie.db.relaydescs.RelayDescriptorParser;
+import org.torproject.ernie.db.torperf.TorperfDownloader;
+
+/**
+ * Coordinate downloading and parsing of descriptors and extraction of
+ * statistically relevant data for later processing with R.
+ */
+public class Main {
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration();
+
+ Logger logger = Logger.getLogger(Main.class.getName());
+ logger.info("Starting ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile();
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Define stats directory for temporary files
+ File statsDirectory = new File("stats");
+
+ // Prepare writing relay descriptor archive to disk
+ ArchiveWriter aw = config.getWriteDirectoryArchives() ?
+ new ArchiveWriter(
+ new File(config.getDirectoryArchivesOutputDirectory())) : null;
+
+ // Prepare relay descriptor parser (only if we are writing stats or
+ // directory archives to disk)
+ RelayDescriptorParser rdp = aw != null ?
+ new RelayDescriptorParser(aw) : null;
+
+ // Import/download relay descriptors from the various sources
+ if (rdp != null) {
+ RelayDescriptorDownloader rdd = null;
+ if (config.getDownloadRelayDescriptors()) {
+ List<String> dirSources =
+ config.getDownloadFromDirectoryAuthorities();
+ rdd = new RelayDescriptorDownloader(rdp, dirSources,
+ config.getDownloadCurrentConsensus(),
+ config.getDownloadCurrentVotes(),
+ config.getDownloadMissingServerDescriptors(),
+ config.getDownloadMissingExtraInfoDescriptors(),
+ config.getDownloadAllServerDescriptors(),
+ config.getDownloadAllExtraInfoDescriptors(),
+ config.getCompressRelayDescriptorDownloads());
+ rdp.setRelayDescriptorDownloader(rdd);
+ }
+ if (config.getImportCachedRelayDescriptors()) {
+ new CachedRelayDescriptorReader(rdp,
+ config.getCachedRelayDescriptorDirectory(), statsDirectory);
+ if (aw != null) {
+ aw.intermediateStats("importing relay descriptors from local "
+ + "Tor data directories");
+ }
+ }
+ if (config.getImportDirectoryArchives()) {
+ new ArchiveReader(rdp,
+ new File(config.getDirectoryArchivesDirectory()),
+ statsDirectory,
+ config.getKeepDirectoryArchiveImportHistory());
+ if (aw != null) {
+ aw.intermediateStats("importing relay descriptors from local "
+ + "directory");
+ }
+ }
+ if (rdd != null) {
+ rdd.downloadDescriptors();
+ rdd.writeFile();
+ rdd = null;
+ if (aw != null) {
+ aw.intermediateStats("downloading relay descriptors from the "
+ + "directory authorities");
+ }
+ }
+ }
+
+ // Write output to disk that only depends on relay descriptors
+ if (aw != null) {
+ aw.dumpStats();
+ aw = null;
+ }
+
+ // Prepare sanitized bridge descriptor writer
+ SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
+ new SanitizedBridgesWriter(
+ new File(config.getSanitizedBridgesWriteDirectory()),
+ statsDirectory, config.getReplaceIPAddressesWithHashes(),
+ config.getLimitBridgeDescriptorMappings()) : null;
+
+ // Prepare bridge descriptor parser
+ BridgeDescriptorParser bdp = config.getWriteSanitizedBridges()
+ ? new BridgeDescriptorParser(sbw) : null;
+
+ // Import bridge descriptors
+ if (bdp != null && config.getImportBridgeSnapshots()) {
+ new BridgeSnapshotReader(bdp,
+ new File(config.getBridgeSnapshotsDirectory()),
+ statsDirectory);
+ }
+
+ // Finish writing sanitized bridge descriptors to disk
+ if (sbw != null) {
+ sbw.finishWriting();
+ sbw = null;
+ }
+
+ // Download exit list and store it to disk
+ if (config.getDownloadExitList()) {
+ new ExitListDownloader();
+ }
+
+ // Process bridge pool assignments
+ if (config.getProcessBridgePoolAssignments()) {
+ new BridgePoolAssignmentsProcessor(
+ new File(config.getAssignmentsDirectory()),
+ new File(config.getSanitizedAssignmentsDirectory()));
+ }
+
+ // Process Torperf files
+ if (config.getProcessTorperfFiles()) {
+ new TorperfDownloader(new File(config.getTorperfOutputDirectory()),
+ config.getTorperfSources(), config.getTorperfFiles());
+ }
+
+ // Copy recently published files to a local directory that can then
+ // be served via rsync.
+ if (config.getProvideFilesViaRsync()) {
+ new RsyncDataProvider(
+ !config.getWriteDirectoryArchives() ? null :
+ new File(config.getDirectoryArchivesOutputDirectory()),
+ !config.getWriteSanitizedBridges() ? null :
+ new File(config.getSanitizedBridgesWriteDirectory()),
+ !config.getProcessBridgePoolAssignments() ? null :
+ new File(config.getSanitizedAssignmentsDirectory()),
+ config.getDownloadExitList(),
+ !config.getProcessTorperfFiles() ? null :
+ new File(config.getTorperfOutputDirectory()),
+ new File(config.getRsyncDirectory()));
+ }
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating ERNIE.");
+ }
+}
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
new file mode 100644
index 0000000..cd4a6f9
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
@@ -0,0 +1,217 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Stack;
+import java.util.logging.Logger;
+
+/**
+ * Copy files published in the last 3 days to a local directory that can
+ * then be served via rsync.
+ */
+public class RsyncDataProvider {
+ public RsyncDataProvider(File directoryArchivesOutputDirectory,
+ File sanitizedBridgesWriteDirectory,
+ File sanitizedAssignmentsDirectory,
+ boolean downloadExitList,
+ File torperfOutputDirectory, File rsyncDirectory) {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(RsyncDataProvider.class.getName());
+
+ /* Determine the cut-off time for files in rsync/. */
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+
+ /* Create rsync/ directory if it doesn't exist. */
+ if (!rsyncDirectory.exists()) {
+ rsyncDirectory.mkdirs();
+ }
+
+ /* Make a list of all files in the rsync/ directory to delete those
+ * that we didn't copy in this run. */
+ Set<String> fileNamesInRsync = new HashSet<String>();
+ Stack<File> files = new Stack<File>();
+ files.add(rsyncDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else {
+ fileNamesInRsync.add(pop.getName());
+ }
+ }
+ logger.info("Found " + fileNamesInRsync.size() + " files in "
+ + rsyncDirectory.getAbsolutePath() + " that we're either "
+ + "overwriting or deleting in this execution.");
+
+ /* Copy relay descriptors from the last 3 days. */
+ if (directoryArchivesOutputDirectory != null) {
+ files.add(directoryArchivesOutputDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ if (pop.getAbsolutePath().contains("/consensus/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/consensuses/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/vote/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/votes/" + fileName));
+ } else if (pop.getAbsolutePath().contains(
+ "/server-descriptor/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/server-descriptors/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/extra-info/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/extra-infos/" + fileName));
+ } else {
+ continue;
+ }
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying relay descriptors, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy sanitized bridge descriptors from the last 3 days. */
+ if (sanitizedBridgesWriteDirectory != null) {
+ files.add(sanitizedBridgesWriteDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ if (pop.getAbsolutePath().contains("/statuses/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/statuses/" + fileName));
+ } else if (pop.getAbsolutePath().contains(
+ "/server-descriptors/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/server-descriptors/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/extra-infos/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/extra-infos/" + fileName));
+ } else {
+ continue;
+ }
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying sanitized bridge descriptors, there are "
+ + "still " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy sanitized bridge pool assignments from the last 3 days. */
+ if (sanitizedAssignmentsDirectory != null) {
+ files.add(sanitizedAssignmentsDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-pool-assignments/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying sanitized bridge pool assignments, there "
+ + "are still " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy exit lists from the last 3 days. */
+ if (downloadExitList) {
+ files.add(new File("exitlist"));
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "exit-lists/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying exit lists, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy Torperf files. */
+ if (torperfOutputDirectory != null) {
+ files.add(torperfOutputDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.getName().endsWith(".tpf") &&
+ pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "torperf/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying Torperf files, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Delete all files that we didn't (over-)write in this run. */
+ files.add(rsyncDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (fileNamesInRsync.contains(pop.getName())) {
+ fileNamesInRsync.remove(pop.getName());
+ pop.delete();
+ }
+ }
+ logger.info("After deleting files that we didn't overwrite in this "
+ + "run, there are " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+ }
+
+ private void copyFile(File from, File to) {
+ if (from.exists() && to.exists() &&
+ from.lastModified() == to.lastModified() &&
+ from.length() == to.length()) {
+ return;
+ }
+ try {
+ to.getParentFile().mkdirs();
+ FileInputStream fis = new FileInputStream(from);
+ BufferedInputStream bis = new BufferedInputStream(fis);
+ FileOutputStream fos = new FileOutputStream(to);
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ fos.write(data, 0, len);
+ }
+ bis.close();
+ fos.close();
+ to.setLastModified(from.lastModified());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
new file mode 100644
index 0000000..fba0a9f
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
@@ -0,0 +1,146 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+
+/**
+ * Read in all files in a given directory and pass buffered readers of
+ * them to the relay descriptor parser.
+ */
+public class ArchiveReader {
+ public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
+ File statsDirectory, boolean keepImportHistory) {
+
+ if (rdp == null || archivesDirectory == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ int parsedFiles = 0, ignoredFiles = 0;
+ Logger logger = Logger.getLogger(ArchiveReader.class.getName());
+ SortedSet<String> archivesImportHistory = new TreeSet<String>();
+ File archivesImportHistoryFile = new File(statsDirectory,
+ "archives-import-history");
+ if (keepImportHistory && archivesImportHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ archivesImportHistoryFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ archivesImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read in archives import "
+ + "history file. Skipping.");
+ }
+ }
+ if (archivesDirectory.exists()) {
+ logger.fine("Importing files in directory " + archivesDirectory
+ + "/...");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(archivesDirectory);
+ List<File> problems = new ArrayList<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ if (rdp != null) {
+ try {
+ BufferedInputStream bis = null;
+ if (keepImportHistory &&
+ archivesImportHistory.contains(pop.getName())) {
+ ignoredFiles++;
+ continue;
+ } else if (pop.getName().endsWith(".tar.bz2")) {
+ logger.warning("Cannot parse compressed tarball "
+ + pop.getAbsolutePath() + ". Skipping.");
+ continue;
+ } else if (pop.getName().endsWith(".bz2")) {
+ FileInputStream fis = new FileInputStream(pop);
+ BZip2CompressorInputStream bcis =
+ new BZip2CompressorInputStream(fis);
+ bis = new BufferedInputStream(bcis);
+ } else {
+ FileInputStream fis = new FileInputStream(pop);
+ bis = new BufferedInputStream(fis);
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ rdp.parse(allData);
+ parsedFiles++;
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (problems.isEmpty()) {
+ logger.fine("Finished importing files in directory "
+ + archivesDirectory + "/.");
+ } else {
+ StringBuilder sb = new StringBuilder("Failed importing files in "
+ + "directory " + archivesDirectory + "/:");
+ int printed = 0;
+ for (File f : problems) {
+ sb.append("\n " + f.getAbsolutePath());
+ if (++printed >= 3) {
+ sb.append("\n ... more");
+ break;
+ }
+ }
+ }
+ }
+ if (keepImportHistory) {
+ try {
+ archivesImportHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ archivesImportHistoryFile));
+ for (String line : archivesImportHistory) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write archives import "
+ + "history file.");
+ }
+ }
+ logger.info("Finished importing relay descriptors from local "
+ + "directory:\nParsed " + parsedFiles + ", ignored "
+ + ignoredFiles + " files.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
new file mode 100644
index 0000000..c632656
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -0,0 +1,339 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.impl.DescriptorParseException;
+
+public class ArchiveWriter {
+ private Logger logger;
+ private File outputDirectory;
+ private DescriptorParser descriptorParser;
+ private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0,
+ storedServerDescriptors = 0, storedExtraInfoDescriptors = 0;
+
+ public ArchiveWriter(File outputDirectory) {
+
+ if (outputDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ this.logger = Logger.getLogger(ArchiveWriter.class.getName());
+ this.outputDirectory = outputDirectory;
+ this.descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ }
+
+ private boolean store(byte[] typeAnnotation, byte[] data,
+ String filename) {
+ try {
+ File file = new File(filename);
+ if (!file.exists()) {
+ this.logger.finer("Storing " + filename);
+ if (this.descriptorParser.parseDescriptors(data, filename).size()
+ != 1) {
+ this.logger.info("Relay descriptor file " + filename
+ + " doesn't contain exactly one descriptor. Not storing.");
+ return false;
+ }
+ file.getParentFile().mkdirs();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(file));
+ if (data.length > 0 && data[0] != '@') {
+ bos.write(typeAnnotation, 0, typeAnnotation.length);
+ }
+ bos.write(data, 0, data.length);
+ bos.close();
+ return true;
+ }
+ } catch (DescriptorParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse relay descriptor "
+ + filename + " before storing it to disk. Skipping.", e);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store relay descriptor "
+ + filename, e);
+ }
+ return false;
+ }
+
+ private static final byte[] CONSENSUS_ANNOTATION =
+ "@type network-status-consensus-3 1.0\n".getBytes();
+ public void storeConsensus(byte[] data, long validAfter) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/consensus/"
+ + printFormat.format(new Date(validAfter)) + "-consensus";
+ if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
+ this.storedConsensuses++;
+ }
+ }
+
+ private static final byte[] VOTE_ANNOTATION =
+ "@type network-status-vote-3 1.0\n".getBytes();
+ public void storeVote(byte[] data, long validAfter,
+ String fingerprint, String digest) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/vote/"
+ + printFormat.format(new Date(validAfter)) + "-vote-"
+ + fingerprint + "-" + digest;
+ if (this.store(VOTE_ANNOTATION, data, filename)) {
+ this.storedVotes++;
+ }
+ }
+
+ private static final byte[] CERTIFICATE_ANNOTATION =
+ "@type dir-key-certificate-3 1.0\n".getBytes();
+ public void storeCertificate(byte[] data, String fingerprint,
+ long published) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/certs/"
+ + fingerprint + "-" + printFormat.format(new Date(published));
+ if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
+ this.storedCerts++;
+ }
+ }
+
+ private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
+ "@type server-descriptor 1.0\n".getBytes();
+ public void storeServerDescriptor(byte[] data, String digest,
+ long published) {
+ SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/server-descriptor/"
+ + printFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+ + digest;
+ if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
+ this.storedServerDescriptors++;
+ }
+ }
+
+ private static final byte[] EXTRA_INFO_ANNOTATION =
+ "@type extra-info 1.0\n".getBytes();
+ public void storeExtraInfoDescriptor(byte[] data,
+ String extraInfoDigest, long published) {
+ SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
+ this.storedExtraInfoDescriptors++;
+ }
+ }
+
+ private StringBuilder intermediateStats = new StringBuilder();
+ public void intermediateStats(String event) {
+ intermediateStats.append("While " + event + ", we stored "
+ + this.storedConsensuses + " consensus(es), " + this.storedVotes
+ + " vote(s), " + this.storedCerts + " certificate(s), "
+ + this.storedServerDescriptors + " server descriptor(s), and "
+ + this.storedExtraInfoDescriptors
+ + " extra-info descriptor(s) to disk.\n");
+ this.storedConsensuses = 0;
+ this.storedVotes = 0;
+ this.storedCerts = 0;
+ this.storedServerDescriptors = 0;
+ this.storedExtraInfoDescriptors = 0;
+ }
+ /**
+ * Dump some statistics on the completeness of descriptors to the logs
+ * on level INFO.
+ */
+ public void dumpStats() {
+ StringBuilder sb = new StringBuilder("Finished writing relay "
+ + "descriptors to disk.\n");
+ sb.append(intermediateStats.toString());
+ sb.append("Statistics on the completeness of written relay "
+ + "descriptors of the last 3 consensuses (Consensus/Vote, "
+ + "valid-after, votes, server descriptors, extra-infos):");
+ try {
+ SimpleDateFormat validAfterFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat consensusVoteFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat descriptorFormat =
+ new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ SortedSet<File> consensuses = new TreeSet<File>();
+ Stack<File> leftToParse = new Stack<File>();
+ leftToParse.add(new File(outputDirectory + "/consensus"));
+ while (!leftToParse.isEmpty()) {
+ File pop = leftToParse.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ leftToParse.add(f);
+ }
+ } else if (pop.length() > 0) {
+ consensuses.add(pop);
+ }
+ while (consensuses.size() > 3) {
+ consensuses.remove(consensuses.first());
+ }
+ }
+ for (File f : consensuses) {
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line = null, validAfterTime = null,
+ voteFilenamePrefix = null, dirSource = null;
+ int allVotes = 0, foundVotes = 0,
+ allServerDescs = 0, foundServerDescs = 0,
+ allExtraInfos = 0, foundExtraInfos = 0;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ long validAfter = validAfterFormat.parse(
+ validAfterTime).getTime();
+ voteFilenamePrefix = outputDirectory + "/vote/"
+ + consensusVoteFormat.format(new Date(validAfter))
+ + "-vote-";
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("vote-digest ")) {
+ allVotes++;
+ File voteFile = new File(voteFilenamePrefix + dirSource + "-"
+ + line.split(" ")[1]);
+ if (voteFile.exists()) {
+ foundVotes++;
+ BufferedReader vbr = new BufferedReader(new FileReader(
+ voteFile));
+ String line3 = null;
+ int voteAllServerDescs = 0, voteFoundServerDescs = 0,
+ voteAllExtraInfos = 0, voteFoundExtraInfos = 0;
+ while ((line3 = vbr.readLine()) != null) {
+ if (line3.startsWith("r ")) {
+ voteAllServerDescs++;
+ String digest = Hex.encodeHexString(Base64.decodeBase64(
+ line3.split(" ")[3] + "=")).toLowerCase();
+ long published = validAfterFormat.parse(
+ line3.split(" ")[4] + " "
+ + line3.split(" ")[5]).getTime();
+ String filename = outputDirectory
+ + "/server-descriptor/"
+ + descriptorFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/"
+ + digest.substring(1, 2) + "/" + digest;
+ if (new File(filename).exists()) {
+ BufferedReader sbr = new BufferedReader(new FileReader(
+ new File(filename)));
+ String line2 = null;
+ while ((line2 = sbr.readLine()) != null) {
+ if (line2.startsWith("opt extra-info-digest ") ||
+ line2.startsWith("extra-info-digest ")) {
+ voteAllExtraInfos++;
+ String extraInfoDigest = line2.startsWith("opt ") ?
+ line2.split(" ")[2].toLowerCase() :
+ line2.split(" ")[1].toLowerCase();
+ String filename2 =
+ outputDirectory.getAbsolutePath()
+ + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (new File(filename2).exists()) {
+ voteFoundExtraInfos++;
+ }
+ }
+ }
+ sbr.close();
+ voteFoundServerDescs++;
+ }
+ }
+ }
+ vbr.close();
+ sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
+ + "%d/%d (%.1f%%)", validAfterTime,
+ voteFoundServerDescs, voteAllServerDescs,
+ 100.0D * (double) voteFoundServerDescs /
+ (double) voteAllServerDescs,
+ voteFoundExtraInfos, voteAllExtraInfos,
+ 100.0D * (double) voteFoundExtraInfos /
+ (double) voteAllExtraInfos));
+ }
+ } else if (line.startsWith("r ")) {
+ allServerDescs++;
+ String digest = Hex.encodeHexString(Base64.decodeBase64(
+ line.split(" ")[3] + "=")).toLowerCase();
+ long published = validAfterFormat.parse(
+ line.split(" ")[4] + " " + line.split(" ")[5]).getTime();
+ String filename = outputDirectory.getAbsolutePath()
+ + "/server-descriptor/"
+ + descriptorFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/"
+ + digest.substring(1, 2) + "/" + digest;
+ if (new File (filename).exists()) {
+ BufferedReader sbr = new BufferedReader(new FileReader(
+ new File(filename)));
+ String line2 = null;
+ while ((line2 = sbr.readLine()) != null) {
+ if (line2.startsWith("opt extra-info-digest ") ||
+ line2.startsWith("extra-info-digest ")) {
+ allExtraInfos++;
+ String extraInfoDigest = line2.startsWith("opt ") ?
+ line2.split(" ")[2].toLowerCase() :
+ line2.split(" ")[1].toLowerCase();
+ String filename2 = outputDirectory.getAbsolutePath()
+ + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (new File (filename2).exists()) {
+ foundExtraInfos++;
+ }
+ }
+ }
+ sbr.close();
+ foundServerDescs++;
+ }
+ }
+ }
+ br.close();
+ sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
+ + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
+ validAfterTime, foundVotes, allVotes,
+ 100.0D * (double) foundVotes / (double) allVotes,
+ foundServerDescs, allServerDescs,
+ 100.0D * (double) foundServerDescs / (double) allServerDescs,
+ foundExtraInfos, allExtraInfos,
+ 100.0D * (double) foundExtraInfos / (double) allExtraInfos));
+ }
+ this.logger.info(sb.toString());
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
+ e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
+ e);
+ }
+ }
+}
diff --git a/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
new file mode 100644
index 0000000..194e0a2
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
@@ -0,0 +1,235 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses all descriptors in local directory cacheddesc/ and sorts them
+ * into directory structure in directory-archive/.
+ */
+public class CachedRelayDescriptorReader {
+ public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
+ List<String> inputDirectories, File statsDirectory) {
+
+ if (rdp == null || inputDirectories == null ||
+ inputDirectories.isEmpty() || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ StringBuilder dumpStats = new StringBuilder("Finished importing "
+ + "relay descriptors from local Tor data directories:");
+ Logger logger = Logger.getLogger(
+ CachedRelayDescriptorReader.class.getName());
+
+ /* Read import history containing SHA-1 digests of previously parsed
+ * statuses and descriptors, so that we can skip them in this run. */
+ Set<String> lastImportHistory = new HashSet<String>(),
+ currentImportHistory = new HashSet<String>();
+ File importHistoryFile = new File(statsDirectory,
+ "cacheddesc-import-history");
+ if (importHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ importHistoryFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ lastImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read import history from "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+ }
+
+ /* Read cached descriptors directories. */
+ for (String inputDirectory : inputDirectories) {
+ File cachedDescDir = new File(inputDirectory);
+ if (!cachedDescDir.exists()) {
+ logger.warning("Directory " + cachedDescDir.getAbsolutePath()
+ + " does not exist. Skipping.");
+ continue;
+ }
+ logger.fine("Reading " + cachedDescDir.getAbsolutePath()
+ + " directory.");
+ for (File f : cachedDescDir.listFiles()) {
+ try {
+ // descriptors may contain non-ASCII chars; read as bytes to
+ // determine digests
+ BufferedInputStream bis =
+ new BufferedInputStream(new FileInputStream(f));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ if (f.getName().equals("cached-consensus")) {
+ /* Check if directory information is stale. */
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ dumpStats.append("\n" + f.getName() + ": " + line.substring(
+ "valid-after ".length()));
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (dateTimeFormat.parse(line.substring("valid-after ".
+ length())).getTime() < System.currentTimeMillis()
+ - 6L * 60L * 60L * 1000L) {
+ logger.warning("Cached descriptor files in "
+ + cachedDescDir.getAbsolutePath() + " are stale. "
+ + "The valid-after line in cached-consensus is '"
+ + line + "'.");
+ dumpStats.append(" (stale!)");
+ }
+ break;
+ }
+ }
+ br.close();
+
+ /* Parse the cached consensus if we haven't parsed it before
+ * (but regardless of whether it's stale or not). */
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(allData);
+ } else {
+ dumpStats.append(" (skipped)");
+ }
+ currentImportHistory.add(digest);
+ }
+ } else if (f.getName().equals("v3-status-votes")) {
+ int parsedNum = 0, skippedNum = 0;
+ String ascii = new String(allData, "US-ASCII");
+ String startToken = "network-status-version ";
+ int end = ascii.length();
+ int start = ascii.indexOf(startToken);
+ while (start >= 0 && start < end) {
+ int next = ascii.indexOf(startToken, start + 1);
+ if (next < 0) {
+ next = end;
+ }
+ if (start < next) {
+ byte[] rawNetworkStatusBytes = new byte[next - start];
+ System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
+ next - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ rawNetworkStatusBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(rawNetworkStatusBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ start = next;
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " votes");
+ } else if (f.getName().startsWith("cached-descriptors") ||
+ f.getName().startsWith("cached-extrainfo")) {
+ String ascii = new String(allData, "US-ASCII");
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ f.getName().startsWith("cached-descriptors") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ int parsedNum = 0, skippedNum = 0;
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ descBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(descBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " "
+ + (f.getName().startsWith("cached-descriptors") ?
+ "server" : "extra-info") + " descriptors");
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ }
+ }
+ logger.fine("Finished reading "
+ + cachedDescDir.getAbsolutePath() + " directory.");
+ }
+
+ /* Write import history containing SHA-1 digests to disk. */
+ try {
+ importHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ importHistoryFile));
+ for (String digest : currentImportHistory) {
+ bw.write(digest + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write import history to "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
new file mode 100644
index 0000000..0bea50a
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
@@ -0,0 +1,821 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.InflaterInputStream;
+
+/**
+ * Downloads relay descriptors from the directory authorities via HTTP.
+ * Keeps a list of missing descriptors that gets updated by parse results
+ * from <code>RelayDescriptorParser</code> and downloads all missing
+ * descriptors that have been published in the last 24 hours. Also
+ * downloads all server and extra-info descriptors known to a directory
+ * authority at most once a day.
+ */
+public class RelayDescriptorDownloader {
+
+ /**
+ * Text file containing the descriptors that we are missing and that we
+ * want to download. Lines are formatted as:
+ *
+ * - "consensus,<validafter>,<parsed>",
+ * - "vote,<validafter>,<fingerprint>,<parsed>",
+ * - "server,<published>,<relayid>,<descid>,<parsed>", or
+ * - "extra,<published>,<relayid>,<descid>,<parsed>".
+ */
+ private File missingDescriptorsFile;
+
+ /**
+ * Relay descriptors that we are missing and that we want to download
+ * either in this execution or write to disk and try next time. Map keys
+ * contain comma-separated values as in the missing descriptors files
+ * without the "parsed" column. Map values contain the "parsed" column.
+ */
+ private SortedMap<String, String> missingDescriptors;
+
+ /**
+ * Text file containing the IP addresses (and Dir ports if not 80) of
+ * directory authorities and when we last downloaded all server and
+ * extra-info descriptors from them, so that we can avoid downloading
+ * them too often.
+ */
+ private File lastDownloadedAllDescriptorsFile;
+
+ /**
+ * Map of directory authorities and when we last downloaded all server
+ * and extra-info descriptors from them. Map keys are IP addresses (and
+ * Dir ports if not 80), map values are timestamps.
+ */
+ private Map<String, String> lastDownloadedAllDescriptors;
+
+ /**
+ * <code>RelayDescriptorParser</code> that we will hand over the
+ * downloaded descriptors for parsing.
+ */
+ private RelayDescriptorParser rdp;
+
+ /**
+ * Directory authorities that we will try to download missing
+ * descriptors from.
+ */
+ private List<String> authorities;
+
+ /**
+ * Should we try to download the current consensus if we don't have it?
+ */
+ private boolean downloadCurrentConsensus;
+
+ /**
+ * Should we try to download current votes if we don't have them?
+ */
+ private boolean downloadCurrentVotes;
+
+ /**
+ * Should we try to download missing server descriptors that have been
+ * published within the past 24 hours?
+ */
+ private boolean downloadMissingServerDescriptors;
+
+ /**
+ * Should we try to download missing extra-info descriptors that have
+ * been published within the past 24 hours?
+ */
+ private boolean downloadMissingExtraInfos;
+
+ /**
+ * Should we try to download all server descriptors from the authorities
+ * once every 24 hours?
+ */
+ private boolean downloadAllServerDescriptors;
+
+ /**
+ * Should we try to download all extra-info descriptors from the
+ * authorities once every 24 hours?
+ */
+ private boolean downloadAllExtraInfos;
+
+ /**
+ * Should we download zlib-compressed versions of descriptors by adding
+ * ".z" to URLs?
+ */
+ private boolean downloadCompressed;
+
+ /**
+ * valid-after time that we expect the current consensus and votes to
+ * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
+ * consensuses and votes with this valid-after time on the directory
+ * authorities. This time is initialized as the beginning of the current
+ * hour.
+ */
+ private String currentValidAfter;
+
+ /**
+ * Cut-off time for missing server and extra-info descriptors, formatted
+ * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
+ * time minus 24 hours.
+ */
+ private String descriptorCutOff;
+
+ /**
+ * Cut-off time for downloading all server and extra-info descriptors
+ * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
+ * time is initialized as the current system time minus 23:30 hours.
+ */
+ private String downloadAllDescriptorsCutOff;
+
+ /**
+ * Directory authorities that we plan to download all server and
+ * extra-info descriptors from in this execution.
+ */
+ private Set<String> downloadAllDescriptorsFromAuthorities;
+
+ /**
+ * Current timestamp that is written to the missing list for descriptors
+ * that we parsed in this execution and for authorities that we
+ * downloaded all server and extra-info descriptors from.
+ */
+ private String currentTimestamp;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Number of descriptors requested by directory authority to be included
+ * in logs.
+ */
+ private Map<String, Integer> requestsByAuthority;
+
+ /**
+ * Counters for descriptors that we had on the missing list at the
+ * beginning of the execution, that we added to the missing list,
+ * that we requested, and that we successfully downloaded in this
+ * execution.
+ */
+ private int oldMissingConsensuses = 0, oldMissingVotes = 0,
+ oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
+ newMissingConsensuses = 0, newMissingVotes = 0,
+ newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
+ requestedConsensuses = 0, requestedVotes = 0,
+ requestedMissingServerDescriptors = 0,
+ requestedAllServerDescriptors = 0,
+ requestedMissingExtraInfoDescriptors = 0,
+ requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
+ downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
+ downloadedAllServerDescriptors = 0,
+ downloadedMissingExtraInfoDescriptors = 0,
+ downloadedAllExtraInfoDescriptors = 0;
+
+ /**
+ * Initializes this class, including reading in missing descriptors from
+ * <code>stats/missing-relay-descriptors</code> and the times when we
+ * last downloaded all server and extra-info descriptors from
+ * <code>stats/last-downloaded-all-descriptors</code>.
+ */
+ public RelayDescriptorDownloader(RelayDescriptorParser rdp,
+ List<String> authorities, boolean downloadCurrentConsensus,
+ boolean downloadCurrentVotes,
+ boolean downloadMissingServerDescriptors,
+ boolean downloadMissingExtraInfos,
+ boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
+ boolean downloadCompressed) {
+
+ /* Memorize argument values. */
+ this.rdp = rdp;
+ this.authorities = new ArrayList<String>(authorities);
+ this.downloadCurrentConsensus = downloadCurrentConsensus;
+ this.downloadCurrentVotes = downloadCurrentVotes;
+ this.downloadMissingServerDescriptors =
+ downloadMissingServerDescriptors;
+ this.downloadMissingExtraInfos = downloadMissingExtraInfos;
+ this.downloadAllServerDescriptors = downloadAllServerDescriptors;
+ this.downloadAllExtraInfos = downloadAllExtraInfos;
+ this.downloadCompressed = downloadCompressed;
+
+ /* Shuffle list of authorities for better load balancing over time. */
+ Collections.shuffle(this.authorities);
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ RelayDescriptorDownloader.class.getName());
+
+ /* Prepare cut-off times and timestamp for the missing descriptors
+ * list and the list of authorities to download all server and
+ * extra-info descriptors from. */
+ SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ format.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long now = System.currentTimeMillis();
+ this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
+ (60L * 60L * 1000L));
+ this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
+ this.currentTimestamp = format.format(now);
+ this.downloadAllDescriptorsCutOff = format.format(now
+ - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
+
+ /* Read list of missing descriptors from disk and memorize those that
+ * we are interested in and that are likely to be found on the
+ * directory authorities. */
+ this.missingDescriptors = new TreeMap<String, String>();
+ this.missingDescriptorsFile = new File(
+ "stats/missing-relay-descriptors");
+ if (this.missingDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.missingDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length > 2) {
+ String published = line.split(",")[1];
+ if (((line.startsWith("consensus,") ||
+ line.startsWith("vote,")) &&
+ this.currentValidAfter.equals(published)) ||
+ ((line.startsWith("server,") ||
+ line.startsWith("extra,")) &&
+ this.descriptorCutOff.compareTo(published) < 0)) {
+ if (!line.endsWith("NA")) {
+ /* Not missing. */
+ } else if (line.startsWith("consensus,")) {
+ oldMissingConsensuses++;
+ } else if (line.startsWith("vote,")) {
+ oldMissingVotes++;
+ } else if (line.startsWith("server,")) {
+ oldMissingServerDescriptors++;
+ } else if (line.startsWith("extra,")) {
+ oldMissingExtraInfoDescriptors++;
+ }
+ int separateAt = line.lastIndexOf(",");
+ this.missingDescriptors.put(line.substring(0,
+ separateAt), line.substring(separateAt + 1));
+ }
+ } else {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + "! This means that we might forget to dowload relay "
+ + "descriptors we are missing.", e);
+ }
+ }
+
+ /* Read list of directory authorities and when we last downloaded all
+ * server and extra-info descriptors from them. */
+ this.lastDownloadedAllDescriptors = new HashMap<String, String>();
+ this.lastDownloadedAllDescriptorsFile = new File(
+ "stats/last-downloaded-all-descriptors");
+ if (this.lastDownloadedAllDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.lastDownloadedAllDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length != 2) {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ } else {
+ String[] parts = line.split(",");
+ String authority = parts[0];
+ String lastDownloaded = parts[1];
+ this.lastDownloadedAllDescriptors.put(authority,
+ lastDownloaded);
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "! This means that we might download all server and "
+ + "extra-info descriptors more often than we should.", e);
+ }
+ }
+
+ /* Make a list of at most two directory authorities that we want to
+ * download all server and extra-info descriptors from. */
+ this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
+ for (String authority : this.authorities) {
+ if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
+ this.lastDownloadedAllDescriptors.get(authority).compareTo(
+ this.downloadAllDescriptorsCutOff) < 0) {
+ this.downloadAllDescriptorsFromAuthorities.add(authority);
+ }
+ if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
+ break;
+ }
+ }
+
+ /* Prepare statistics on this execution. */
+ this.requestsByAuthority = new HashMap<String, Integer>();
+ for (String authority : this.authorities) {
+ this.requestsByAuthority.put(authority, 0);
+ }
+ }
+
+ /**
+ * We have parsed a consensus. Take this consensus off the missing list
+ * and add the votes created by the given <code>authorities</code> and
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedConsensus(String validAfter,
+ Set<String> authorities, Set<String> serverDescriptors) {
+
+ /* Mark consensus as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String consensusKey = "consensus," + validAfter;
+ this.missingDescriptors.put(consensusKey, this.currentTimestamp);
+
+ /* Add votes to missing list. */
+ for (String authority : authorities) {
+ String voteKey = "vote," + validAfter + "," + authority;
+ if (!this.missingDescriptors.containsKey(voteKey)) {
+ this.missingDescriptors.put(voteKey, "NA");
+ this.newMissingVotes++;
+ }
+ }
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a vote. Take this vote off the missing list and add
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedVote(String validAfter, String fingerprint,
+ Set<String> serverDescriptors) {
+
+ /* Mark vote as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String voteKey = "vote," + validAfter + "," + fingerprint;
+ this.missingDescriptors.put(voteKey, this.currentTimestamp);
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a server descriptor. Take this server descriptor off
+ * the missing list and put the extra-info descriptor digest on that
+ * list.
+ */
+ public void haveParsedServerDescriptor(String published,
+ String relayIdentity, String serverDescriptorDigest,
+ String extraInfoDigest) {
+
+ /* Mark server descriptor as parsed. */
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + published + ","
+ + relayIdentity + "," + serverDescriptorDigest;
+ this.missingDescriptors.put(serverDescriptorKey,
+ this.currentTimestamp);
+
+ /* Add extra-info descriptor to missing list. */
+ if (extraInfoDigest != null) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ if (!this.missingDescriptors.containsKey(extraInfoKey)) {
+ this.missingDescriptors.put(extraInfoKey, "NA");
+ this.newMissingExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed an extra-info descriptor. Take it off the missing
+ * list.
+ */
+ public void haveParsedExtraInfoDescriptor(String published,
+ String relayIdentity, String extraInfoDigest) {
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
+ }
+ }
+
+ /**
+ * Downloads missing descriptors that we think might still be available
+ * on the directory authorities as well as all server and extra-info
+ * descriptors once per day.
+ */
+ public void downloadDescriptors() {
+
+ /* Put the current consensus on the missing list, unless we already
+ * have it. */
+ String consensusKey = "consensus," + this.currentValidAfter;
+ if (!this.missingDescriptors.containsKey(consensusKey)) {
+ this.missingDescriptors.put(consensusKey, "NA");
+ this.newMissingConsensuses++;
+ }
+
+ /* Download descriptors from authorities which are in random order, so
+ * that we distribute the load somewhat fairly over time. */
+ for (String authority : authorities) {
+
+ /* Make all requests to an authority in a single try block. If
+ * something goes wrong with this authority, we give up on all
+ * downloads and continue with the next authority. */
+ /* TODO Some authorities provide very little bandwidth and could
+ * slow down the entire download process. Ponder adding a timeout of
+ * 3 or 5 minutes per authority to avoid getting in the way of the
+ * next execution. */
+ try {
+
+ /* Start with downloading the current consensus, unless we already
+ * have it. */
+ if (downloadCurrentConsensus) {
+ if (this.missingDescriptors.containsKey(consensusKey) &&
+ this.missingDescriptors.get(consensusKey).equals("NA")) {
+ this.requestedConsensuses++;
+ this.downloadedConsensuses +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/consensus");
+ }
+ }
+
+ /* Next, try to download current votes that we're missing. */
+ if (downloadCurrentVotes) {
+ String voteKeyPrefix = "vote," + this.currentValidAfter;
+ SortedSet<String> fingerprints = new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(voteKeyPrefix)) {
+ String fingerprint = e.getKey().split(",")[2];
+ fingerprints.add(fingerprint);
+ }
+ }
+ for (String fingerprint : fingerprints) {
+ this.requestedVotes++;
+ this.downloadedVotes +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/" + fingerprint);
+ }
+ }
+
+ /* Download either all server and extra-info descriptors or only
+ * those that we're missing. Start with server descriptors, then
+ * request extra-info descriptors. */
+ List<String> types = new ArrayList<String>(Arrays.asList(
+ "server,extra".split(",")));
+ for (String type : types) {
+
+ /* Download all server or extra-info descriptors from this
+ * authority if we haven't done so for 24 hours and if we're
+ * configured to do so. */
+ if (this.downloadAllDescriptorsFromAuthorities.contains(
+ authority) && ((type.equals("server") &&
+ this.downloadAllServerDescriptors) ||
+ (type.equals("extra") && this.downloadAllExtraInfos))) {
+ int downloadedAllDescriptors =
+ this.downloadResourceFromAuthority(authority, "/tor/"
+ + type + "/all");
+ if (type.equals("server")) {
+ this.requestedAllServerDescriptors++;
+ this.downloadedAllServerDescriptors +=
+ downloadedAllDescriptors;
+ } else {
+ this.requestedAllExtraInfoDescriptors++;
+ this.downloadedAllExtraInfoDescriptors +=
+ downloadedAllDescriptors;
+ }
+
+ /* Download missing server or extra-info descriptors if we're
+ * configured to do so. */
+ } else if ((type.equals("server") &&
+ this.downloadMissingServerDescriptors) ||
+ (type.equals("extra") && this.downloadMissingExtraInfos)) {
+
+ /* Go through the list of missing descriptors of this type
+ * and combine the descriptor identifiers to a URL of up to
+ * 96 descriptors that we can download at once. */
+ SortedSet<String> descriptorIdentifiers =
+ new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(type + ",") &&
+ this.descriptorCutOff.compareTo(
+ e.getKey().split(",")[1]) < 0) {
+ String descriptorIdentifier = e.getKey().split(",")[3];
+ descriptorIdentifiers.add(descriptorIdentifier);
+ }
+ }
+ StringBuilder combinedResource = null;
+ int descriptorsInCombinedResource = 0,
+ requestedDescriptors = 0, downloadedDescriptors = 0;
+ for (String descriptorIdentifier : descriptorIdentifiers) {
+ if (descriptorsInCombinedResource >= 96) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ combinedResource = null;
+ descriptorsInCombinedResource = 0;
+ }
+ if (descriptorsInCombinedResource == 0) {
+ combinedResource = new StringBuilder("/tor/" + type
+ + "/d/" + descriptorIdentifier);
+ } else {
+ combinedResource.append("+" + descriptorIdentifier);
+ }
+ descriptorsInCombinedResource++;
+ }
+ if (descriptorsInCombinedResource > 0) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ }
+ if (type.equals("server")) {
+ this.requestedMissingServerDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingServerDescriptors +=
+ downloadedDescriptors;
+ } else {
+ this.requestedMissingExtraInfoDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingExtraInfoDescriptors +=
+ downloadedDescriptors;
+ }
+ }
+ }
+
+ /* If a download failed, stop requesting descriptors from this
+ * authority and move on to the next. */
+ } catch (IOException e) {
+ logger.log(Level.FINE, "Failed downloading from " + authority
+ + "!", e);
+ }
+ }
+ }
+
+ /**
+ * Attempts to download one or more descriptors identified by a resource
+ * string from a directory authority and passes the returned
+ * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
+ * Returns the number of descriptors contained in the reply. Throws an
+ * <code>IOException</code> if something goes wrong while downloading.
+ */
+ private int downloadResourceFromAuthority(String authority,
+ String resource) throws IOException {
+ byte[] allData = null;
+ this.requestsByAuthority.put(authority,
+ this.requestsByAuthority.get(authority) + 1);
+ /* TODO Disable compressed downloads for extra-info descriptors,
+ * because zlib decompression doesn't work correctly. Figure out why
+ * this is and fix it. */
+ String fullUrl = "http://" + authority + resource
+ + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
+ ? ".z" : "");
+ URL u = new URL(fullUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response == 200) {
+ BufferedInputStream in = this.downloadCompressed &&
+ !resource.startsWith("/tor/extra/")
+ ? new BufferedInputStream(new InflaterInputStream(
+ huc.getInputStream()))
+ : new BufferedInputStream(huc.getInputStream());
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ in.close();
+ allData = baos.toByteArray();
+ }
+ logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
+ + (allData == null ? 0 : allData.length) + " bytes)");
+ int receivedDescriptors = 0;
+ if (allData != null) {
+ if (resource.startsWith("/tor/status-vote/current/")) {
+ this.rdp.parse(allData);
+ receivedDescriptors = 1;
+ } else if (resource.startsWith("/tor/server/") ||
+ resource.startsWith("/tor/extra/")) {
+ if (resource.equals("/tor/server/all")) {
+ this.lastDownloadedAllDescriptors.put(authority,
+ this.currentTimestamp);
+ }
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, sig = -1, end = -1;
+ String startToken = resource.startsWith("/tor/server/") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ this.rdp.parse(descBytes);
+ receivedDescriptors++;
+ }
+ }
+ }
+ return receivedDescriptors;
+ }
+
+ /**
+ * Writes status files to disk and logs statistics about downloading
+ * relay descriptors in this execution.
+ */
+ public void writeFile() {
+
+ /* Write missing descriptors file to disk. */
+ int missingConsensuses = 0, missingVotes = 0,
+ missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
+ try {
+ this.logger.fine("Writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ this.missingDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.missingDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ String key = e.getKey(), value = e.getValue();
+ if (!value.equals("NA")) {
+ /* Not missing. */
+ } else if (key.startsWith("consensus,")) {
+ missingConsensuses++;
+ } else if (key.startsWith("vote,")) {
+ missingVotes++;
+ } else if (key.startsWith("server,")) {
+ missingServerDescriptors++;
+ } else if (key.startsWith("extra,")) {
+ missingExtraInfoDescriptors++;
+ }
+ bw.write(key + "," + value + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write text file containing the directory authorities and when we
+ * last downloaded all server and extra-info descriptors from them to
+ * disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.lastDownloadedAllDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.lastDownloadedAllDescriptors.entrySet()) {
+ String authority = e.getKey();
+ String lastDownloaded = e.getValue();
+ bw.write(authority + "," + lastDownloaded + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
+ e);
+ }
+
+ /* Log statistics about this execution. */
+ this.logger.info("Finished downloading relay descriptors from the "
+ + "directory authorities.");
+ this.logger.info("At the beginning of this execution, we were "
+ + "missing " + oldMissingConsensuses + " consensus(es), "
+ + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
+ + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
+ + " extra-info descriptor(s).");
+ this.logger.info("During this execution, we added "
+ + this.newMissingConsensuses + " consensus(es), "
+ + this.newMissingVotes + " vote(s), "
+ + this.newMissingServerDescriptors + " server descriptor(s), and "
+ + this.newMissingExtraInfoDescriptors + " extra-info "
+ + "descriptor(s) to the missing list, some of which we also "
+ + "requested and removed from the list again.");
+ this.logger.info("We requested " + this.requestedConsensuses
+ + " consensus(es), " + this.requestedVotes + " vote(s), "
+ + this.requestedMissingServerDescriptors + " missing server "
+ + "descriptor(s), " + this.requestedAllServerDescriptors
+ + " times all server descriptors, "
+ + this.requestedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s), and "
+ + this.requestedAllExtraInfoDescriptors + " times all extra-info "
+ + "descriptors from the directory authorities.");
+ StringBuilder sb = new StringBuilder();
+ for (String authority : this.authorities) {
+ sb.append(" " + authority + "="
+ + this.requestsByAuthority.get(authority));
+ }
+ this.logger.info("We sent these numbers of requests to the directory "
+ + "authorities:" + sb.toString());
+ this.logger.info("We successfully downloaded "
+ + this.downloadedConsensuses + " consensus(es), "
+ + this.downloadedVotes + " vote(s), "
+ + this.downloadedMissingServerDescriptors + " missing server "
+ + "descriptor(s), " + this.downloadedAllServerDescriptors
+ + " server descriptor(s) when downloading all descriptors, "
+ + this.downloadedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s) and "
+ + this.downloadedAllExtraInfoDescriptors + " extra-info "
+ + "descriptor(s) when downloading all descriptors.");
+ this.logger.info("At the end of this execution, we are missing "
+ + missingConsensuses + " consensus(es), " + missingVotes
+ + " vote(s), " + missingServerDescriptors + " server "
+ + "descriptor(s), and " + missingExtraInfoDescriptors
+ + " extra-info descriptor(s), some of which we may try in the next "
+ + "execution.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
new file mode 100644
index 0000000..6f04c20
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
@@ -0,0 +1,265 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses relay descriptors including network status consensuses and
+ * votes, server and extra-info descriptors, and passes the results to the
+ * stats handlers, to the archive writer, or to the relay descriptor
+ * downloader.
+ */
+public class RelayDescriptorParser {
+
+ /**
+ * File writer that writes descriptor contents to files in a
+ * directory-archive directory structure.
+ */
+ private ArchiveWriter aw;
+
+ /**
+ * Missing descriptor downloader that uses the parse results to learn
+ * which descriptors we are missing and want to download.
+ */
+ private RelayDescriptorDownloader rdd;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private SimpleDateFormat dateTimeFormat;
+
+ /**
+ * Initializes this class.
+ */
+ public RelayDescriptorParser(ArchiveWriter aw) {
+ this.aw = aw;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
+
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ public void setRelayDescriptorDownloader(
+ RelayDescriptorDownloader rdd) {
+ this.rdd = rdd;
+ }
+
+ public void parse(byte[] data) {
+ try {
+ /* Convert descriptor to ASCII for parsing. This means we'll lose
+ * the non-ASCII chars, but we don't care about them for parsing
+ * anyway. */
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line;
+ do {
+ line = br.readLine();
+ } while (line != null && line.startsWith("@"));
+ if (line == null) {
+ this.logger.fine("We were given an empty descriptor for "
+ + "parsing. Ignoring.");
+ return;
+ }
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (line.equals("network-status-version 3")) {
+ // TODO when parsing the current consensus, check the fresh-until
+ // time to see when we switch from hourly to half-hourly
+ // consensuses
+ boolean isConsensus = true;
+ String validAfterTime = null, fingerprint = null,
+ dirSource = null;
+ long validAfter = -1L, dirKeyPublished = -1L;
+ SortedSet<String> dirSources = new TreeSet<String>();
+ SortedSet<String> serverDescriptors = new TreeSet<String>();
+ SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
+ StringBuilder certificateStringBuilder = null;
+ String certificateString = null;
+ while ((line = br.readLine()) != null) {
+ if (certificateStringBuilder != null) {
+ if (line.startsWith("r ")) {
+ certificateString = certificateStringBuilder.toString();
+ certificateStringBuilder = null;
+ } else {
+ certificateStringBuilder.append(line + "\n");
+ }
+ }
+ if (line.equals("vote-status vote")) {
+ isConsensus = false;
+ } else if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ validAfter = parseFormat.parse(validAfterTime).getTime();
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("vote-digest ")) {
+ dirSources.add(dirSource);
+ } else if (line.startsWith("dir-key-certificate-version ")) {
+ certificateStringBuilder = new StringBuilder();
+ certificateStringBuilder.append(line + "\n");
+ } else if (line.startsWith("fingerprint ")) {
+ fingerprint = line.split(" ")[1];
+ } else if (line.startsWith("dir-key-published ")) {
+ String dirKeyPublishedTime = line.substring(
+ "dir-key-published ".length());
+ dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
+ getTime();
+ } else if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ if (parts.length < 9) {
+ this.logger.log(Level.WARNING, "Could not parse r line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
+ String publishedTime = parts[4] + " " + parts[5];
+ String relayIdentity = Hex.encodeHexString(
+ Base64.decodeBase64(parts[2] + "=")).
+ toLowerCase();
+ String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "=")).toLowerCase();
+ serverDescriptors.add(publishedTime + "," + relayIdentity
+ + "," + serverDesc);
+ hashedRelayIdentities.add(DigestUtils.shaHex(
+ Base64.decodeBase64(parts[2] + "=")).
+ toUpperCase());
+ }
+ }
+ if (isConsensus) {
+ if (this.rdd != null) {
+ this.rdd.haveParsedConsensus(validAfterTime, dirSources,
+ serverDescriptors);
+ }
+ if (this.aw != null) {
+ this.aw.storeConsensus(data, validAfter);
+ }
+ } else {
+ if (this.aw != null || this.rdd != null) {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "network-status-version ";
+ String sigToken = "directory-signature ";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken);
+ if (start >= 0 && sig >= 0 && sig > start) {
+ sig += sigToken.length();
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ String digest = DigestUtils.shaHex(forDigest).toUpperCase();
+ if (this.aw != null) {
+ this.aw.storeVote(data, validAfter, dirSource, digest);
+ }
+ if (this.rdd != null) {
+ this.rdd.haveParsedVote(validAfterTime, fingerprint,
+ serverDescriptors);
+ }
+ }
+ if (certificateString != null) {
+ if (this.aw != null) {
+ this.aw.storeCertificate(certificateString.getBytes(),
+ dirSource, dirKeyPublished);
+ }
+ }
+ }
+ }
+ } else if (line.startsWith("router ")) {
+ String publishedTime = null, extraInfoDigest = null,
+ relayIdentifier = null;
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ relayIdentifier = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ extraInfoDigest = line.startsWith("opt ") ?
+ line.split(" ")[2].toLowerCase() :
+ line.split(" ")[1].toLowerCase();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ String digest = null;
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeServerDescriptor(data, digest, published);
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedServerDescriptor(publishedTime,
+ relayIdentifier, digest, extraInfoDigest);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ String publishedTime = null, relayIdentifier = line.split(" ")[2];
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String digest = null;
+ int start = ascii.indexOf(startToken);
+ if (start > 0) {
+ /* Do not confuse "extra-info " in "@type extra-info 1.0" with
+ * "extra-info 0000...". TODO This is a hack that should be
+ * solved by using metrics-lib some day. */
+ start = ascii.indexOf("\n" + startToken);
+ if (start > 0) {
+ start++;
+ }
+ }
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeExtraInfoDescriptor(data, digest, published);
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
+ relayIdentifier.toLowerCase(), digest);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
new file mode 100644
index 0000000..1ac593a
--- /dev/null
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -0,0 +1,573 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.torperf;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/* Download possibly truncated Torperf .data and .extradata files from
+ * configured sources, append them to the files we already have, and merge
+ * the two files into the .tpf format. */
+public class TorperfDownloader {
+
+ private File torperfOutputDirectory = null;
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFilesLines = null;
+ private Logger logger = null;
+ private SimpleDateFormat dateFormat;
+
+ public TorperfDownloader(File torperfOutputDirectory,
+ SortedMap<String, String> torperfSources,
+ List<String> torperfFilesLines) {
+ if (torperfOutputDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+ this.torperfOutputDirectory = torperfOutputDirectory;
+ this.torperfSources = torperfSources;
+ this.torperfFilesLines = torperfFilesLines;
+ if (!this.torperfOutputDirectory.exists()) {
+ this.torperfOutputDirectory.mkdirs();
+ }
+ this.logger = Logger.getLogger(TorperfDownloader.class.getName());
+ this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.readLastMergedTimestamps();
+ for (String torperfFilesLine : this.torperfFilesLines) {
+ this.downloadAndMergeFiles(torperfFilesLine);
+ }
+ this.writeLastMergedTimestamps();
+ }
+
+ private File torperfLastMergedFile =
+ new File("stats/torperf-last-merged");
+ SortedMap<String, String> lastMergedTimestamps =
+ new TreeMap<String, String>();
+ private void readLastMergedTimestamps() {
+ if (!this.torperfLastMergedFile.exists()) {
+ return;
+ }
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.torperfLastMergedFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ String fileName = null, timestamp = null;
+ if (parts.length == 2) {
+ try {
+ Double.parseDouble(parts[1]);
+ fileName = parts[0];
+ timestamp = parts[1];
+ } catch (NumberFormatException e) {
+ /* Handle below. */
+ }
+ }
+ if (fileName == null || timestamp == null) {
+ this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
+ + this.torperfLastMergedFile.getAbsolutePath() + ". "
+ + "Ignoring past history of merging .data and .extradata "
+ + "files.");
+ this.lastMergedTimestamps.clear();
+ break;
+ }
+ this.lastMergedTimestamps.put(fileName, timestamp);
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while reading '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
+ + "past history of merging .data and .extradata files.");
+ this.lastMergedTimestamps.clear();
+ }
+ }
+
+ private void writeLastMergedTimestamps() {
+ try {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.torperfLastMergedFile));
+ for (Map.Entry<String, String> e :
+ this.lastMergedTimestamps.entrySet()) {
+ String fileName = e.getKey();
+ String timestamp = e.getValue();
+ bw.write(fileName + " " + timestamp + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while writing '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
+ + "result in ignoring history of merging .data and .extradata "
+ + "files in the next execution.", e);
+ }
+ }
+
+ private void downloadAndMergeFiles(String torperfFilesLine) {
+ String[] parts = torperfFilesLine.split(" ");
+ String sourceName = parts[1];
+ int fileSize = -1;
+ try {
+ fileSize = Integer.parseInt(parts[2]);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Could not parse file size in "
+ + "TorperfFiles configuration line '" + torperfFilesLine
+ + "'.");
+ return;
+ }
+
+ /* Download and append the .data file. */
+ String dataFileName = parts[3];
+ String sourceBaseUrl = torperfSources.get(sourceName);
+ String dataUrl = sourceBaseUrl + dataFileName;
+ String dataOutputFileName = sourceName + "-" + dataFileName;
+ File dataOutputFile = new File(torperfOutputDirectory,
+ dataOutputFileName);
+ boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
+ dataOutputFile, true);
+
+ /* Download and append the .extradata file. */
+ String extradataFileName = parts[4];
+ String extradataUrl = sourceBaseUrl + extradataFileName;
+ String extradataOutputFileName = sourceName + "-" + extradataFileName;
+ File extradataOutputFile = new File(torperfOutputDirectory,
+ extradataOutputFileName);
+ boolean downloadedExtradataFile = this.downloadAndAppendFile(
+ extradataUrl, extradataOutputFile, false);
+
+ /* Merge both files into .tpf format. */
+ if (!downloadedDataFile && !downloadedExtradataFile) {
+ return;
+ }
+ String skipUntil = null;
+ if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
+ skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
+ }
+ try {
+ skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
+ sourceName, fileSize, skipUntil);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
+ + " and " + extradataOutputFile + ".", e);
+ }
+ if (skipUntil != null) {
+ this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
+ }
+ }
+
+ private boolean downloadAndAppendFile(String url, File outputFile,
+ boolean isDataFile) {
+
+ /* Read an existing output file to determine which line will be the
+ * first to append to it. */
+ String lastTimestampLine = null;
+ int linesAfterLastTimestampLine = 0;
+ if (outputFile.exists() && outputFile.lastModified() >
+ System.currentTimeMillis() - 330L * 60L * 1000L) {
+ return false;
+ } else if (outputFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ outputFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ linesAfterLastTimestampLine = 0;
+ } else {
+ linesAfterLastTimestampLine++;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed reading '"
+ + outputFile.getAbsolutePath() + "' to determine the first "
+ + "line to append to it.", e);
+ return false;
+ }
+ }
+ try {
+ this.logger.fine("Downloading " + (isDataFile ? ".data" :
+ ".extradata") + " file from '" + url + "' and merging it into "
+ + "'" + outputFile.getAbsolutePath() + "'.");
+ URL u = new URL(url);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ huc.getInputStream()));
+ String line;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
+ true));
+ boolean copyLines = lastTimestampLine == null;
+ while ((line = br.readLine()) != null) {
+ if (copyLines && linesAfterLastTimestampLine == 0) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ }
+ bw.write(line + "\n");
+ } else if (copyLines && linesAfterLastTimestampLine > 0) {
+ linesAfterLastTimestampLine--;
+ } else if (line.equals(lastTimestampLine)) {
+ copyLines = true;
+ }
+ }
+ bw.close();
+ br.close();
+ if (!copyLines) {
+ this.logger.warning("The last timestamp line in '"
+ + outputFile.getAbsolutePath() + "' is not contained in the "
+ + "new file downloaded from '" + url + "'. Cannot append "
+ + "new lines without possibly leaving a gap. Skipping.");
+ return false;
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
+ + url + "'.", e);
+ return false;
+ }
+ if (lastTimestampLine == null) {
+ this.logger.warning("'" + outputFile.getAbsolutePath()
+ + "' doesn't contain any timestamp lines. Unable to check "
+ + "whether that file is stale or not.");
+ } else {
+ long lastTimestampMillis = -1L;
+ if (isDataFile) {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ 0, lastTimestampLine.indexOf(" "))) * 1000L;
+ } else {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
+ lastTimestampLine.indexOf(".",
+ lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
+ }
+ if (lastTimestampMillis < System.currentTimeMillis()
+ - 330L * 60L * 1000L) {
+ this.logger.warning("The last timestamp in '"
+ + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
+ + "old: " + lastTimestampMillis);
+ }
+ }
+ return true;
+ }
+
+ private String mergeFiles(File dataFile, File extradataFile,
+ String source, int fileSize, String skipUntil) throws IOException {
+ SortedMap<String, String> config = new TreeMap<String, String>();
+ config.put("SOURCE", source);
+ config.put("FILESIZE", String.valueOf(fileSize));
+ if (!dataFile.exists() || !extradataFile.exists()) {
+ this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
+ + extradataFile.getAbsolutePath() + " is missing.");
+ return null;
+ }
+ this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
+ + extradataFile.getAbsolutePath() + " into .tpf format.");
+ BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
+ brE = new BufferedReader(new FileReader(extradataFile));
+ String lineD = brD.readLine(), lineE = brE.readLine();
+ int d = 1, e = 1;
+ String maxDataComplete = null, maxUsedAt = null;
+ while (lineD != null) {
+
+ /* Parse .data line. Every valid .data line will go into the .tpf
+ * format, either with additional information from the .extradata
+ * file or without it. */
+ if (lineD.isEmpty()) {
+ this.logger.finer("Skipping empty line " + dataFile.getName()
+ + ":" + d++ + ".");
+ lineD = brD.readLine();
+ continue;
+ }
+ SortedMap<String, String> data = this.parseDataLine(lineD);
+ if (data == null) {
+ this.logger.finer("Skipping illegal line " + dataFile.getName()
+ + ":" + d++ + " '" + lineD + "'.");
+ lineD = brD.readLine();
+ continue;
+ }
+ String dataComplete = data.get("DATACOMPLETE");
+ double dataCompleteSeconds = Double.parseDouble(dataComplete);
+ if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + dataFile.getName() + ":"
+ + d++ + " which we already processed before.");
+ lineD = brD.readLine();
+ continue;
+ }
+ maxDataComplete = dataComplete;
+
+ /* Parse .extradata line if available and try to find the one that
+ * matches the .data line. */
+ SortedMap<String, String> extradata = null;
+ while (lineE != null) {
+ if (lineE.isEmpty()) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is empty.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is a BUILDTIMEOUT_SET line.");
+ lineE = brE.readLine();
+ continue;
+ } else if (lineE.startsWith("ok ") ||
+ lineE.startsWith("error ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is in the old format.");
+ lineE = brE.readLine();
+ continue;
+ }
+ extradata = this.parseExtradataLine(lineE);
+ if (extradata == null) {
+ this.logger.finer("Skipping Illegal line "
+ + extradataFile.getName() + ":" + e++ + " '" + lineE
+ + "'.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (!extradata.containsKey("USED_AT")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which doesn't contain a USED_AT element.");
+ lineE = brE.readLine();
+ continue;
+ }
+ String usedAt = extradata.get("USED_AT");
+ double usedAtSeconds = Double.parseDouble(usedAt);
+ if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which we already processed before.");
+ lineE = brE.readLine();
+ continue;
+ }
+ maxUsedAt = usedAt;
+ if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
+ this.logger.fine("Merging " + extradataFile.getName() + ":"
+ + e++ + " into the current .data line.");
+ lineE = brE.readLine();
+ break;
+ } else if (usedAtSeconds > dataCompleteSeconds) {
+ this.logger.finer("Comparing " + extradataFile.getName()
+ + " to the next .data line.");
+ extradata = null;
+ break;
+ } else {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is too old to be merged with "
+ + dataFile.getName() + ":" + d + ".");
+ lineE = brE.readLine();
+ continue;
+ }
+ }
+
+ /* Write output line to .tpf file. */
+ SortedMap<String, String> keysAndValues =
+ new TreeMap<String, String>();
+ if (extradata != null) {
+ keysAndValues.putAll(extradata);
+ }
+ keysAndValues.putAll(data);
+ keysAndValues.putAll(config);
+ this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
+ lineD = brD.readLine();
+ try {
+ this.writeTpfLine(source, fileSize, keysAndValues);
+ } catch (IOException ex) {
+ this.logger.log(Level.WARNING, "Error writing output line. "
+ + "Aborting to merge " + dataFile.getName() + " and "
+ + extradataFile.getName() + ".", e);
+ break;
+ }
+ }
+ brD.close();
+ brE.close();
+ this.writeCachedTpfLines();
+ if (maxDataComplete == null) {
+ return maxUsedAt;
+ } else if (maxUsedAt == null) {
+ return maxDataComplete;
+ } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
+ return maxUsedAt;
+ } else {
+ return maxDataComplete;
+ }
+ }
+
+ private SortedMap<Integer, String> dataTimestamps;
+ private SortedMap<String, String> parseDataLine(String line) {
+ String[] parts = line.trim().split(" ");
+ if (line.length() == 0 || parts.length < 20) {
+ return null;
+ }
+ if (this.dataTimestamps == null) {
+ this.dataTimestamps = new TreeMap<Integer, String>();
+ this.dataTimestamps.put(0, "START");
+ this.dataTimestamps.put(2, "SOCKET");
+ this.dataTimestamps.put(4, "CONNECT");
+ this.dataTimestamps.put(6, "NEGOTIATE");
+ this.dataTimestamps.put(8, "REQUEST");
+ this.dataTimestamps.put(10, "RESPONSE");
+ this.dataTimestamps.put(12, "DATAREQUEST");
+ this.dataTimestamps.put(14, "DATARESPONSE");
+ this.dataTimestamps.put(16, "DATACOMPLETE");
+ this.dataTimestamps.put(21, "DATAPERC10");
+ this.dataTimestamps.put(23, "DATAPERC20");
+ this.dataTimestamps.put(25, "DATAPERC30");
+ this.dataTimestamps.put(27, "DATAPERC40");
+ this.dataTimestamps.put(29, "DATAPERC50");
+ this.dataTimestamps.put(31, "DATAPERC60");
+ this.dataTimestamps.put(33, "DATAPERC70");
+ this.dataTimestamps.put(35, "DATAPERC80");
+ this.dataTimestamps.put(37, "DATAPERC90");
+ }
+ SortedMap<String, String> data = new TreeMap<String, String>();
+ try {
+ for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
+ int i = e.getKey();
+ if (parts.length > i + 1) {
+ String key = e.getValue();
+ String value = String.format("%s.%02d", parts[i],
+ Integer.parseInt(parts[i + 1]) / 10000);
+ data.put(key, value);
+ }
+ }
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ data.put("WRITEBYTES", parts[18]);
+ data.put("READBYTES", parts[19]);
+ if (parts.length >= 21) {
+ data.put("DIDTIMEOUT", parts[20]);
+ }
+ return data;
+ }
+
+ private SortedMap<String, String> parseExtradataLine(String line) {
+ String[] parts = line.split(" ");
+ SortedMap<String, String> extradata = new TreeMap<String, String>();
+ String previousKey = null;
+ for (String part : parts) {
+ String[] keyAndValue = part.split("=", -1);
+ if (keyAndValue.length == 2) {
+ String key = keyAndValue[0];
+ previousKey = key;
+ String value = keyAndValue[1];
+ if (value.contains(".") && value.lastIndexOf(".") ==
+ value.length() - 2) {
+ /* Make sure that all floats have two trailing digits. */
+ value += "0";
+ }
+ extradata.put(key, value);
+ } else if (keyAndValue.length == 1 && previousKey != null) {
+ String value = keyAndValue[0];
+ if (previousKey.equals("STREAM_FAIL_REASONS") &&
+ (value.equals("MISC") || value.equals("EXITPOLICY") ||
+ value.equals("RESOURCELIMIT") ||
+ value.equals("RESOLVEFAILED"))) {
+ extradata.put(previousKey, extradata.get(previousKey) + ":"
+ + value);
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ return extradata;
+ }
+
+ private String cachedSource;
+ private int cachedFileSize;
+ private String cachedStartDate;
+ private SortedMap<String, String> cachedTpfLines;
+ private void writeTpfLine(String source, int fileSize,
+ SortedMap<String, String> keysAndValues) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ int written = 0;
+ for (Map.Entry<String, String> keyAndValue :
+ keysAndValues.entrySet()) {
+ String key = keyAndValue.getKey();
+ String value = keyAndValue.getValue();
+ sb.append((written++ > 0 ? " " : "") + key + "=" + value);
+ }
+ String line = sb.toString();
+ String startString = keysAndValues.get("START");
+ long startMillis = Long.parseLong(startString.substring(0,
+ startString.indexOf("."))) * 1000L;
+ String startDate = dateFormat.format(startMillis);
+ if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
+ fileSize != this.cachedFileSize ||
+ !startDate.equals(this.cachedStartDate)) {
+ this.writeCachedTpfLines();
+ this.readTpfLinesToCache(source, fileSize, startDate);
+ }
+ if (!this.cachedTpfLines.containsKey(startString) ||
+ line.length() > this.cachedTpfLines.get(startString).length()) {
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+
+ private void readTpfLinesToCache(String source, int fileSize,
+ String startDate) throws IOException {
+ this.cachedTpfLines = new TreeMap<String, String>();
+ this.cachedSource = source;
+ this.cachedFileSize = fileSize;
+ this.cachedStartDate = startDate;
+ File tpfFile = new File(torperfOutputDirectory,
+ startDate.replaceAll("-", "/") + "/"
+ + source + "-" + String.valueOf(fileSize) + "-" + startDate
+ + ".tpf");
+ if (!tpfFile.exists()) {
+ return;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(tpfFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("@type ")) {
+ continue;
+ }
+ if (line.contains("START=")) {
+ String startString = line.substring(line.indexOf("START=")
+ + "START=".length()).split(" ")[0];
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+ br.close();
+ }
+
+ private void writeCachedTpfLines() throws IOException {
+ if (this.cachedSource == null || this.cachedFileSize == 0 ||
+ this.cachedStartDate == null || this.cachedTpfLines == null) {
+ return;
+ }
+ File tpfFile = new File(torperfOutputDirectory,
+ this.cachedStartDate.replaceAll("-", "/")
+ + "/" + this.cachedSource + "-"
+ + String.valueOf(this.cachedFileSize) + "-"
+ + this.cachedStartDate + ".tpf");
+ tpfFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
+ bw.write("@type torperf 1.0\n");
+ for (String line : this.cachedTpfLines.values()) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ this.cachedSource = null;
+ this.cachedFileSize = 0;
+ this.cachedStartDate = null;
+ this.cachedTpfLines = null;
+ }
+}
+
diff --git a/test/org/torproject/ernie/db/ArchiveReaderTest.java b/test/org/torproject/ernie/db/ArchiveReaderTest.java
deleted file mode 100644
index cb3dea7..0000000
--- a/test/org/torproject/ernie/db/ArchiveReaderTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class ArchiveReaderTest {
-
- private File tempArchivesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempArchivesDirectory = folder.newFolder("sanitized-bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testRelayDescriptorParserNull() {
- new ArchiveReader(null, this.tempArchivesDirectory,
- this.tempStatsDirectory, false);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/ArchiveWriterTest.java b/test/org/torproject/ernie/db/ArchiveWriterTest.java
deleted file mode 100644
index 2a18e74..0000000
--- a/test/org/torproject/ernie/db/ArchiveWriterTest.java
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class ArchiveWriterTest {
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Test(expected = IllegalArgumentException.class)
- public void testArchivesDirectoryNull() {
- new ArchiveWriter(null);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java b/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java
deleted file mode 100644
index f1ad03b..0000000
--- a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class BridgeSnapshotReaderTest {
-
- private File tempBridgeDirectoriesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempBridgeDirectoriesDirectory = folder.newFolder("bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testBridgeDescriptorParserNull() {
- new BridgeSnapshotReader(null, this.tempBridgeDirectoriesDirectory,
- this.tempStatsDirectory);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java b/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java
deleted file mode 100644
index f101249..0000000
--- a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-import java.util.ArrayList;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class CachedRelayDescriptorReaderTest {
-
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testRelayDescriptorParserNull() {
- new CachedRelayDescriptorReader(null, new ArrayList<String>(),
- this.tempStatsDirectory);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java b/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java
deleted file mode 100644
index f8b4cdd..0000000
--- a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class SanitizedBridgesWriterTest {
-
- private File tempSanitizedBridgesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempSanitizedBridgesDirectory =
- folder.newFolder("sanitized-bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testSanitizedBridgesDirectoryNull() {
- new SanitizedBridgesWriter(null, this.tempStatsDirectory, false, -1L);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testStatsDirectoryNull() {
- new SanitizedBridgesWriter(this.tempSanitizedBridgesDirectory, null,
- false, -1L);
- }
-}
-
More information about the tor-commits
mailing list