[tor-commits] [metrics-web/master] Prepare bridge statistics as part of metrics-web.
karsten at torproject.org
karsten at torproject.org
Thu Mar 3 12:50:53 UTC 2011
commit d547c8ecbadca64b046a041d94e80dddbeecc8c3
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Mar 2 20:31:42 2011 +0100
Prepare bridge statistics as part of metrics-web.
---
build.xml | 1 +
config.template | 15 +
lib/junit-4.8.2.jar | Bin 0 -> 237344 bytes
.../ernie/cron/BridgeDescriptorParser.java | 141 ++++++
.../ernie/cron/BridgeStatsFileHandler.java | 519 ++++++++++++++++++++
src/org/torproject/ernie/cron/Configuration.java | 27 +
.../ernie/cron/ConsensusStatsFileHandler.java | 282 +++++++++++
src/org/torproject/ernie/cron/Main.java | 34 ++-
.../ernie/cron/RelayDescriptorParser.java | 17 +-
.../ernie/cron/SanitizedBridgesReader.java | 114 +++++
.../ernie/test/SanitizedBridgesReaderTest.java | 33 ++
11 files changed, 1181 insertions(+), 2 deletions(-)
diff --git a/build.xml b/build.xml
index 446572b..cdae4e1 100644
--- a/build.xml
+++ b/build.xml
@@ -14,6 +14,7 @@
<pathelement path="${classes}"/>
<pathelement location="lib/commons-codec-1.4.jar"/>
<pathelement location="lib/postgresql-8.4-702.jdbc3.jar"/>
+ <pathelement location="lib/junit-4.8.2.jar"/>
</path>
<target name="init">
diff --git a/config.template b/config.template
index 1d0701c..f2dc9cb 100644
--- a/config.template
+++ b/config.template
@@ -10,6 +10,18 @@
## again, but it can be confusing to users who don't know about it.
#KeepDirectoryArchiveImportHistory 0
#
+## Import sanitized bridges from disk, if available
+#ImportSanitizedBridges 0
+#
+## Relative path to directory to import sanitized bridges from
+#SanitizedBridgesDirectory bridges/
+#
+## Keep a history of imported sanitized bridge descriptors. This history
+## can be useful when importing from a changing data source to avoid
+## importing descriptors more than once, but it can be confusing to users
+## who don't know about it.
+#KeepSanitizedBridgesImportHistory 0
+#
## Write relay descriptors to the database
#WriteRelayDescriptorDatabase 0
#
@@ -27,4 +39,7 @@
## Write statistics about the current consensus and votes to the
## website
#WriteConsensusHealth 0
+#
+## Write bridge stats to disk
+#WriteBridgeStats 0
diff --git a/lib/junit-4.8.2.jar b/lib/junit-4.8.2.jar
new file mode 100644
index 0000000..5b4bb84
Binary files /dev/null and b/lib/junit-4.8.2.jar differ
diff --git a/src/org/torproject/ernie/cron/BridgeDescriptorParser.java b/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
new file mode 100644
index 0000000..4777f58
--- /dev/null
+++ b/src/org/torproject/ernie/cron/BridgeDescriptorParser.java
@@ -0,0 +1,141 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+import org.apache.commons.codec.digest.*;
+
+public class BridgeDescriptorParser {
+ private ConsensusStatsFileHandler csfh;
+ private BridgeStatsFileHandler bsfh;
+ private Logger logger;
+ public BridgeDescriptorParser(ConsensusStatsFileHandler csfh,
+ BridgeStatsFileHandler bsfh) {
+ this.csfh = csfh;
+ this.bsfh = bsfh;
+ this.logger =
+ Logger.getLogger(BridgeDescriptorParser.class.getName());
+ }
+ public void parse(byte[] allData, String dateTime, boolean sanitized) {
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ SimpleDateFormat timeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String hashedIdentity = null, platformLine = null,
+ publishedLine = null, geoipStartTimeLine = null,
+ bridgeStatsEndLine = null;
+ boolean skip = false;
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ int runningBridges = 0;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("s ") && line.contains(" Running")) {
+ runningBridges++;
+ }
+ }
+ if (this.csfh != null) {
+ this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
+ }
+ } else if (line.startsWith("router ")) {
+ } else if (line.startsWith("extra-info ")) {
+ hashedIdentity = sanitized ? line.split(" ")[2]
+ : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
+ if (this.bsfh != null) {
+ skip = this.bsfh.isKnownRelay(hashedIdentity);
+ }
+ } else if (!skip && line.startsWith("platform ")) {
+ platformLine = line;
+ } else if (!skip && line.startsWith("published ")) {
+ publishedLine = line;
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ String identity = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ hashedIdentity = sanitized ? identity
+ : DigestUtils.shaHex(identity).toUpperCase();
+ } else if (!skip && line.startsWith("geoip-start-time ")) {
+ geoipStartTimeLine = line;
+ } else if (!skip && line.startsWith("geoip-client-origins")
+ && line.split(" ").length > 1) {
+ if (publishedLine == null ||
+ geoipStartTimeLine == null) {
+ this.logger.warning("Either published line or "
+ + "geoip-start-time line is not present in "
+ + (sanitized ? "sanitized" : "non-sanitized")
+ + " bridge descriptors from " + dateTime + ".");
+ break;
+ }
+ long published = timeFormat.parse(publishedLine.
+ substring("published ".length())).getTime();
+ long started = timeFormat.parse(geoipStartTimeLine.
+ substring("geoip-start-time ".length())).getTime();
+ long seconds = (published - started) / 1000L;
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ String[] parts = line.split(" ")[1].split(",");
+ for (String p : parts) {
+ String country = p.substring(0, 2);
+ double users = ((double) Long.parseLong(p.substring(3)) - 4L)
+ * 86400.0D / ((double) seconds);
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ String date = publishedLine.split(" ")[1];
+ String time = publishedLine.split(" ")[2];
+ if (this.bsfh != null) {
+ this.bsfh.addObs(hashedIdentity, date, time, obs);
+ }
+ } else if (!skip && line.startsWith("bridge-stats-end ")) {
+ bridgeStatsEndLine = line;
+ } else if (!skip && line.startsWith("bridge-ips")
+ && line.split(" ").length > 1) {
+ if (bridgeStatsEndLine == null) {
+ this.logger.warning("bridge-ips line without preceding "
+ + "bridge-stats-end line in "
+ + (sanitized ? "sanitized" : "non-sanitized")
+ + " bridge descriptor.");
+ break;
+ }
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ String[] parts = line.split(" ")[1].split(",");
+ for (String p : parts) {
+ String country = p.substring(0, 2);
+ double users = (double) Long.parseLong(p.substring(3)) - 4L;
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ String date = bridgeStatsEndLine.split(" ")[1];
+ String time = bridgeStatsEndLine.split(" ")[2];
+ if (this.bsfh != null) {
+ this.bsfh.addObs(hashedIdentity, date, time, obs);
+ }
+ }
+ }
+ if (this.bsfh != null && platformLine != null &&
+ platformLine.startsWith("platform Tor 0.2.2")) {
+ String date = publishedLine.split(" ")[1];
+ String time = publishedLine.split(" ")[2];
+ this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
new file mode 100644
index 0000000..58ee3a2
--- /dev/null
+++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
@@ -0,0 +1,519 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+
+/**
+ * Determines estimates of bridge users per country and day from the
+ * extra-info descriptors that bridges publish. In a first step, the
+ * number of unique IP addresses that bridges see are normalized to a
+ * 24-hour period. In the next step, all bridges are excluded that have
+ * been running as a relay. Finally, observations are summed up and
+ * written to <code>stats/bridge-stats</code>.
+ */
+public class BridgeStatsFileHandler {
+
+ /**
+ * Two-letter country codes of known countries.
+ */
+ private SortedSet<String> countries;
+
+ /**
+ * Intermediate results file containing bridge user numbers by country
+ * as seen by single bridges, normalized to 24-hour periods.
+ */
+ private File bridgeStatsRawFile;
+
+ /**
+ * Bridge user numbers by country as seen by single bridges on a given
+ * day. Map keys are bridge and date written as "bridge,date", map
+ * values are lines as read from <code>stats/bridge-stats-raw</code>.
+ */
+ private SortedMap<String, Map<String, String>> bridgeUsersRaw;
+
+ /**
+ * Helper file containing the hashed relay identities of all known
+ * relays. These hashes are compared to the bridge identity hashes to
+ * exclude bridges that have been known as relays from the statistics.
+ */
+ private File hashedRelayIdentitiesFile;
+
+ /**
+ * Known hashed relay identities used to exclude bridges that have been
+ * running as relays.
+ */
+ private SortedSet<String> hashedRelays;
+
+ /**
+ * Helper file containing extra-info descriptors published by 0.2.2.x
+ * bridges. If these descriptors contain geoip-stats, they are not
+ * included in the results, because stats are very likely broken.
+ */
+ private File zeroTwoTwoDescriptorsFile;
+
+ /**
+ * Extra-info descriptors published by 0.2.2.x bridges. If these
+ * descriptors contain geoip-stats, they are not included in the
+ * results, because stats are very likely broken.
+ */
+ private SortedSet<String> zeroTwoTwoDescriptors;
+
+ /**
+ * Final results file containing the number of bridge users per country
+ * and day. This file is not read in during initialization, but
+ * overwritten at the end of the execution.
+ */
+ private File bridgeStatsFile;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /* Database connection string. */
+ private String connectionURL = null;
+
+ /**
+ * Initializes this class, including reading in intermediate results
+ * files <code>stats/bridge-stats-raw</code> and
+ * <code>stats/hashed-relay-identities</code>.
+ */
+ public BridgeStatsFileHandler(String connectionURL) {
+
+ /* Initialize set of known countries. */
+ this.countries = new TreeSet<String>();
+ this.countries.add("zy");
+
+ /* Initialize local data structures to hold results. */
+ this.bridgeUsersRaw = new TreeMap<String, Map<String, String>>();
+ this.hashedRelays = new TreeSet<String>();
+ this.zeroTwoTwoDescriptors = new TreeSet<String>();
+
+ /* Initialize file names for intermediate and final results. */
+ this.bridgeStatsRawFile = new File("stats/bridge-stats-raw");
+ this.bridgeStatsFile = new File("stats/bridge-stats");
+ this.hashedRelayIdentitiesFile = new File(
+ "stats/hashed-relay-identities");
+ this.zeroTwoTwoDescriptorsFile = new File(
+ "stats/v022-bridge-descriptors");
+
+ /* Initialize database connection string. */
+ this.connectionURL = connectionURL;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ BridgeStatsFileHandler.class.getName());
+
+ /* Read in bridge user numbers by country as seen by single bridges,
+ * normalized to 24-hour periods. */
+ if (this.bridgeStatsRawFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeStatsRawFile));
+ String line = br.readLine();
+ if (line != null) {
+ /* The first line should contain headers that we need to parse
+ * in order to learn what countries we were interested in when
+ * writing this file. */
+ if (!line.startsWith("bridge,date,time,")) {
+ this.logger.warning("Incorrect first line '" + line + "' in "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "! This line "
+ + "should contain headers! Aborting to read in this "
+ + "file!");
+ } else {
+ String[] headers = line.split(",");
+ for (int i = 3; i < headers.length; i++) {
+ if (!headers[i].equals("all")) {
+ this.countries.add(headers[i]);
+ }
+ }
+ /* Read in the rest of the file. */
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != headers.length) {
+ this.logger.warning("Corrupt line '" + line + "' in file "
+ + this.bridgeStatsRawFile.getAbsolutePath()
+ + "! Aborting to read this file!");
+ break;
+ }
+ String hashedBridgeIdentity = parts[0];
+ String date = parts[1];
+ String time = parts[2];
+ SortedMap<String, String> obs =
+ new TreeMap<String, String>();
+ for (int i = 3; i < parts.length; i++) {
+ if (parts[i].equals("NA")) {
+ continue;
+ }
+ if (headers[i].equals("all")) {
+ obs.put("zy", parts[i]);
+ } else {
+ obs.put(headers[i], parts[i]);
+ }
+ }
+ this.addObs(hashedBridgeIdentity, date, time, obs);
+ }
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+ }
+ }
+
+ /* Read in known hashed relay identities used to exclude bridges that
+ * have been running as relays. */
+ if (this.hashedRelayIdentitiesFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.hashedRelayIdentitiesFile));
+ String line = null;
+ /* Read in all lines from the file and memorize them. */
+ while ((line = br.readLine()) != null) {
+ this.hashedRelays.add(line);
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
+ }
+ }
+
+ /* Read in known extra-info descriptors published by 0.2.2.x
+ * bridges. */
+ if (this.zeroTwoTwoDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.zeroTwoTwoDescriptorsFile));
+ String line = null;
+ /* Read in all lines from the file and memorize them. */
+ while ((line = br.readLine()) != null) {
+ this.zeroTwoTwoDescriptors.add(line);
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e);
+ }
+ }
+ }
+
+ /**
+ * Adds a hashed relay identity string to the list of bridges that we
+ * are going to ignore in the future. If we counted user numbers from
+ * bridges that have been running as relays, our numbers would be far
+ * higher than what we think is correct.
+ */
+ public void addHashedRelay(String hashedRelayIdentity) {
+ if (!this.hashedRelays.contains(hashedRelayIdentity)) {
+ this.logger.finer("Adding new hashed relay identity: "
+ + hashedRelayIdentity);
+ this.hashedRelays.add(hashedRelayIdentity);
+ }
+ }
+
+ /**
+ * Adds an extra-info descriptor identifier published by an 0.2.2.x
+ * bridges. If this extra-info descriptor contains geoip-stats, they are
+ * not included in the results, because stats are very likely broken.
+ */
+ public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity,
+ String date, String time) {
+ String value = hashedBridgeIdentity.toUpperCase() + "," + date + ","
+ + time;
+ if (!this.zeroTwoTwoDescriptors.contains(value)) {
+ this.logger.finer("Adding new bridge 0.2.2.x extra-info "
+ + "descriptor: " + value);
+ this.zeroTwoTwoDescriptors.add(value);
+ }
+ }
+
+ /**
+ * Returns whether the given fingerprint is a known hashed relay
+ * identity. <code>BridgeDescriptorParser</code> uses this information
+ * to decide whether to continue parsing a bridge extra-descriptor
+ * descriptor or not.
+ */
+ public boolean isKnownRelay(String hashedBridgeIdentity) {
+ return this.hashedRelays.contains(hashedBridgeIdentity);
+ }
+
+ /**
+ * Adds bridge user numbers by country as seen by a single bridge on a
+ * given date and time. Bridges can publish statistics on unique IP
+ * addresses multiple times a day, but we only want to include one
+ * observation per day. If we already have an observation from the given
+ * bridge and day, we keep the one with the later publication time and
+ * discard the other one.
+ */
+ public void addObs(String hashedIdentity, String date, String time,
+ Map<String, String> obs) {
+ for (String country : obs.keySet()) {
+ this.countries.add(country);
+ }
+ String shortKey = hashedIdentity + "," + date;
+ String longKey = shortKey + "," + time;
+ SortedMap<String, Map<String, String>> tailMap =
+ this.bridgeUsersRaw.tailMap(shortKey);
+ String nextKey = tailMap.isEmpty() ? null : tailMap.firstKey();
+ if (nextKey == null || !nextKey.startsWith(shortKey)) {
+ this.logger.finer("Adding new bridge user numbers for key "
+ + longKey);
+ this.bridgeUsersRaw.put(longKey, obs);
+ } else if (longKey.compareTo(nextKey) > 0) {
+ this.logger.finer("Replacing existing bridge user numbers (" +
+ nextKey + " with new numbers: " + longKey);
+ this.bridgeUsersRaw.put(longKey, obs);
+ } else {
+ this.logger.finer("Not replacing existing bridge user numbers (" +
+ nextKey + " with new numbers (" + longKey + ").");
+ }
+ }
+
+ /**
+ * Writes the list of hashed relay identities and bridge user numbers as
+ * observed by single bridges to disk, aggregates per-day statistics for
+ * all bridges, and writes those to disk, too.
+ */
+ public void writeFiles() {
+
+ /* Write hashed relay identities to disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
+ this.hashedRelayIdentitiesFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.hashedRelayIdentitiesFile));
+ for (String hashedRelay : this.hashedRelays) {
+ bw.append(hashedRelay + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write bridge extra-info descriptor identifiers to disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "...");
+ this.zeroTwoTwoDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.zeroTwoTwoDescriptorsFile));
+ for (String descriptorIdentifier : this.zeroTwoTwoDescriptors) {
+ bw.append(descriptorIdentifier + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write observations made by single bridges to disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "...");
+ this.bridgeStatsRawFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeStatsRawFile));
+ bw.append("bridge,date,time");
+ for (String c : this.countries) {
+ if (c.equals("zy")) {
+ bw.append(",all");
+ } else {
+ bw.append("," + c);
+ }
+ }
+ bw.append("\n");
+ for (Map.Entry<String, Map<String, String>> e :
+ this.bridgeUsersRaw.entrySet()) {
+ String longKey = e.getKey();
+ String[] parts = longKey.split(",");
+ String hashedBridgeIdentity = parts[0];
+ String date = parts[1];
+ String time = parts[2];
+ if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
+ !this.zeroTwoTwoDescriptors.contains(longKey)) {
+ Map<String, String> obs = e.getValue();
+ StringBuilder sb = new StringBuilder(longKey);
+ for (String c : this.countries) {
+ sb.append("," + (obs.containsKey(c) &&
+ !obs.get(c).startsWith("-") ? obs.get(c) : "NA"));
+ }
+ String line = sb.toString();
+ bw.append(line + "\n");
+ }
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Aggregate per-day statistics. */
+ SortedMap<String, double[]> bridgeUsersPerDay =
+ new TreeMap<String, double[]>();
+ for (Map.Entry<String, Map<String, String>> e :
+ this.bridgeUsersRaw.entrySet()) {
+ String longKey = e.getKey();
+ String[] parts = longKey.split(",");
+ String hashedBridgeIdentity = parts[0];
+ String date = parts[1];
+ String time = parts[2];
+ if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
+ !this.zeroTwoTwoDescriptors.contains(longKey)) {
+ double[] users = bridgeUsersPerDay.get(date);
+ Map<String, String> obs = e.getValue();
+ if (users == null) {
+ users = new double[this.countries.size()];
+ bridgeUsersPerDay.put(date, users);
+ }
+ int i = 0;
+ for (String c : this.countries) {
+ if (obs.containsKey(c) && !obs.get(c).startsWith("-")) {
+ users[i] += Double.parseDouble(obs.get(c));
+ }
+ i++;
+ }
+ }
+ }
+
+ /* Write final results of bridge users per day and country to
+ * <code>stats/bridge-stats</code>. */
+ try {
+ this.logger.fine("Writing file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "...");
+ this.bridgeStatsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeStatsFile));
+ bw.append("date");
+ for (String c : this.countries) {
+ if (c.equals("zy")) {
+ bw.append(",all");
+ } else {
+ bw.append("," + c);
+ }
+ }
+ bw.append("\n");
+
+ /* Write current observation. */
+ for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) {
+ String date = e.getKey();
+ bw.append(date);
+ double[] users = e.getValue();
+ for (int i = 0; i < users.length; i++) {
+ bw.append("," + String.format("%.2f", users[i]));
+ }
+ bw.append("\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.bridgeStatsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.bridgeStatsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Add daily bridge users to database. */
+ if (connectionURL != null) {
+ try {
+ List<String> countryList = new ArrayList<String>();
+ for (String c : this.countries) {
+ countryList.add(c);
+ }
+ Map<String, Integer> insertRows = new HashMap<String, Integer>(),
+ updateRows = new HashMap<String, Integer>();
+ for (Map.Entry<String, double[]> e :
+ bridgeUsersPerDay.entrySet()) {
+ String date = e.getKey();
+ double[] users = e.getValue();
+ for (int i = 0; i < users.length; i++) {
+ int usersInt = (int) users[i];
+ if (usersInt < 1) {
+ continue;
+ }
+ String country = countryList.get(i);
+ String key = date + "," + country;
+ insertRows.put(key, usersInt);
+ }
+ }
+ Connection conn = DriverManager.getConnection(connectionURL);
+ conn.setAutoCommit(false);
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(
+ "SELECT date, country, users FROM bridge_stats");
+ while (rs.next()) {
+ String date = rs.getDate(1).toString();
+ String country = rs.getString(2);
+ String key = date + "," + country;
+ if (insertRows.containsKey(key)) {
+ int insertRow = insertRows.remove(key);
+ int oldUsers = rs.getInt(3);
+ if (oldUsers != insertRow) {
+ updateRows.put(key, insertRow);
+ }
+ }
+ }
+ rs.close();
+ PreparedStatement psU = conn.prepareStatement(
+ "UPDATE bridge_stats SET users = ? "
+ + "WHERE date = ? AND country = ?");
+ for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
+ String[] keyParts = e.getKey().split(",");
+ java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
+ String country = keyParts[1];
+ int users = e.getValue();
+ psU.clearParameters();
+ psU.setInt(1, users);
+ psU.setDate(2, date);
+ psU.setString(3, country);
+ psU.executeUpdate();
+ }
+ PreparedStatement psI = conn.prepareStatement(
+ "INSERT INTO bridge_stats (users, date, country) "
+ + "VALUES (?, ?, ?)");
+ for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
+ String[] keyParts = e.getKey().split(",");
+ java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
+ String country = keyParts[1];
+ int users = e.getValue();
+ psI.clearParameters();
+ psI.setInt(1, users);
+ psI.setDate(2, date);
+ psI.setString(3, country);
+ psI.executeUpdate();
+ }
+ conn.commit();
+ conn.close();
+ } catch (SQLException e) {
+ logger.log(Level.WARNING, "Failed to add daily bridge users to "
+ + "database.", e);
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index 66ad778..818f9e9 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -16,12 +16,16 @@ public class Configuration {
private boolean importDirectoryArchives = false;
private String directoryArchivesDirectory = "archives/";
private boolean keepDirectoryArchiveImportHistory = false;
+ private boolean importSanitizedBridges = false;
+ private String sanitizedBridgesDirectory = "bridges/";
+ private boolean keepSanitizedBridgesImportHistory = false;
private boolean writeRelayDescriptorDatabase = false;
private String relayDescriptorDatabaseJdbc =
"jdbc:postgresql://localhost/tordir?user=metrics&password=password";
private boolean writeRelayDescriptorsRawFiles = false;
private String relayDescriptorRawFilesDirectory = "pg-import/";
private boolean writeConsensusHealth = false;
+ private boolean writeBridgeStats = false;
public Configuration() {
/* Initialize logger. */
@@ -47,6 +51,14 @@ public class Configuration {
} else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
this.keepDirectoryArchiveImportHistory = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ImportSanitizedBridges")) {
+ this.importSanitizedBridges = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("SanitizedBridgesDirectory")) {
+ this.sanitizedBridgesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) {
+ this.keepSanitizedBridgesImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
} else if (line.startsWith("WriteRelayDescriptorDatabase")) {
this.writeRelayDescriptorDatabase = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -60,6 +72,9 @@ public class Configuration {
} else if (line.startsWith("WriteConsensusHealth")) {
this.writeConsensusHealth = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("WriteBridgeStats")) {
+ this.writeBridgeStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
} else {
logger.severe("Configuration file contains unrecognized "
+ "configuration key in line '" + line + "'! Exiting!");
@@ -97,6 +112,15 @@ public class Configuration {
public boolean getWriteRelayDescriptorDatabase() {
return this.writeRelayDescriptorDatabase;
}
+ public boolean getImportSanitizedBridges() {
+ return this.importSanitizedBridges;
+ }
+ public String getSanitizedBridgesDirectory() {
+ return this.sanitizedBridgesDirectory;
+ }
+ public boolean getKeepSanitizedBridgesImportHistory() {
+ return this.keepSanitizedBridgesImportHistory;
+ }
public String getRelayDescriptorDatabaseJDBC() {
return this.relayDescriptorDatabaseJdbc;
}
@@ -109,5 +133,8 @@ public class Configuration {
public boolean getWriteConsensusHealth() {
return this.writeConsensusHealth;
}
+ public boolean getWriteBridgeStats() {
+ return this.writeBridgeStats;
+ }
}
diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
new file mode 100644
index 0000000..4ad5300
--- /dev/null
+++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
@@ -0,0 +1,282 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+
+/**
+ * Generates statistics on the average number of relays and bridges per
+ * day. Accepts parse results from <code>RelayDescriptorParser</code> and
+ * <code>BridgeDescriptorParser</code> and stores them in intermediate
+ * result files <code>stats/consensus-stats-raw</code> and
+ * <code>stats/bridge-consensus-stats-raw</code>. Writes final results to
+ * <code>stats/consensus-stats</code> for all days for which at least half
+ * of the expected consensuses or statuses are known.
+ */
+public class ConsensusStatsFileHandler {
+
+ /**
+ * Intermediate results file holding the number of running bridges per
+ * bridge status.
+ */
+ private File bridgeConsensusStatsRawFile;
+
+ /**
+ * Number of running bridges in a given bridge status. Map keys are
+ * bridge status times formatted as "yyyy-MM-dd HH:mm:ss", map values
+ * are lines as read from <code>stats/bridge-consensus-stats-raw</code>.
+ */
+ private SortedMap<String, String> bridgesRaw;
+
+ /**
+ * Average number of running bridges per day. Map keys are dates
+ * formatted as "yyyy-MM-dd", map values are the last column as written
+ * to <code>stats/consensus-stats</code>.
+ */
+ private SortedMap<String, String> bridgesPerDay;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private int bridgeResultsAdded = 0;
+
+ /* Database connection string. */
+ private String connectionURL = null;
+
+ /**
+ * Initializes this class, including reading in intermediate results
+ * files <code>stats/consensus-stats-raw</code> and
+ * <code>stats/bridge-consensus-stats-raw</code> and final results file
+ * <code>stats/consensus-stats</code>.
+ */
+ public ConsensusStatsFileHandler(String connectionURL) {
+
+ /* Initialize local data structures to hold intermediate and final
+ * results. */
+ this.bridgesPerDay = new TreeMap<String, String>();
+ this.bridgesRaw = new TreeMap<String, String>();
+
+ /* Initialize file names for intermediate and final results files. */
+ this.bridgeConsensusStatsRawFile = new File(
+ "stats/bridge-consensus-stats-raw");
+
+ /* Initialize database connection string. */
+ this.connectionURL = connectionURL;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ ConsensusStatsFileHandler.class.getName());
+
+ /* Read in number of running bridges per bridge status. */
+ if (this.bridgeConsensusStatsRawFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeConsensusStatsRawFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("date")) {
+ /* Skip headers. */
+ continue;
+ }
+ String[] parts = line.split(",");
+ if (parts.length != 2) {
+ this.logger.warning("Corrupt line '" + line + "' in file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath()
+ + "! Aborting to read this file!");
+ break;
+ }
+ String dateTime = parts[0];
+ this.bridgesRaw.put(dateTime, line);
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
+ e);
+ }
+ }
+ }
+
+ /**
+ * Adds the intermediate results of the number of running bridges in a
+ * given bridge status to the existing observations.
+ */
+ public void addBridgeConsensusResults(String published, int running) {
+ String line = published + "," + running;
+ if (!this.bridgesRaw.containsKey(published)) {
+ this.logger.finer("Adding new bridge numbers: " + line);
+ this.bridgesRaw.put(published, line);
+ this.bridgeResultsAdded++;
+ } else if (!line.equals(this.bridgesRaw.get(published))) {
+ this.logger.warning("The numbers of running bridges we were just "
+ + "given (" + line + ") are different from what we learned "
+ + "before (" + this.bridgesRaw.get(published) + ")! "
+ + "Overwriting!");
+ this.bridgesRaw.put(published, line);
+ }
+ }
+
+ /**
+ * Aggregates the raw observations on relay and bridge numbers and
+ * writes both raw and aggregate observations to disk.
+ */
+ public void writeFiles() {
+
+ /* Did we learn anything new about average relay or bridge numbers in
+ * this run? */
+ boolean writeConsensusStats = false;
+
+ /* Go through raw observations of numbers of running bridges in bridge
+ * statuses, calculate averages per day, and add these averages to
+ * final results. */
+ if (!this.bridgesRaw.isEmpty()) {
+ String tempDate = null;
+ int brunning = 0, statuses = 0;
+ Iterator<String> it = this.bridgesRaw.values().iterator();
+ boolean haveWrittenFinalLine = false;
+ while (it.hasNext() || !haveWrittenFinalLine) {
+ String next = it.hasNext() ? it.next() : null;
+ /* Finished reading a day or even all lines? */
+ if (tempDate != null && (next == null
+ || !next.substring(0, 10).equals(tempDate))) {
+ /* Only write results if we have seen at least half of all
+ * statuses. */
+ if (statuses >= 24) {
+ String line = "," + (brunning / statuses);
+ /* Are our results new? */
+ if (!this.bridgesPerDay.containsKey(tempDate)) {
+ this.logger.finer("Adding new average bridge numbers: "
+ + tempDate + line);
+ this.bridgesPerDay.put(tempDate, line);
+ writeConsensusStats = true;
+ } else if (!line.equals(this.bridgesPerDay.get(tempDate))) {
+ this.logger.finer("Replacing existing average bridge "
+ + "numbers (" + this.bridgesPerDay.get(tempDate)
+ + " with new numbers: " + line);
+ this.bridgesPerDay.put(tempDate, line);
+ writeConsensusStats = true;
+ }
+ }
+ brunning = statuses = 0;
+ haveWrittenFinalLine = (next == null);
+ }
+ /* Sum up number of running bridges. */
+ if (next != null) {
+ tempDate = next.substring(0, 10);
+ statuses++;
+ brunning += Integer.parseInt(next.split(",")[1]);
+ }
+ }
+ }
+
+ /* Write raw numbers of running bridges to disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
+ this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(
+ new FileWriter(this.bridgeConsensusStatsRawFile));
+ bw.append("datetime,brunning\n");
+ for (String line : this.bridgesRaw.values()) {
+ bw.append(line + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write file "
+ + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!",
+ e);
+ }
+
+ /* Add average number of bridges per day to the database. */
+ if (connectionURL != null) {
+ try {
+ Map<String, String> insertRows = new HashMap<String, String>(),
+ updateRows = new HashMap<String, String>();
+ insertRows.putAll(this.bridgesPerDay);
+ Connection conn = DriverManager.getConnection(connectionURL);
+ conn.setAutoCommit(false);
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(
+ "SELECT date, avg_running FROM bridge_network_size");
+ while (rs.next()) {
+ String date = rs.getDate(1).toString();
+ if (insertRows.containsKey(date)) {
+ String insertRow = insertRows.remove(date);
+ long newAvgRunning = Long.parseLong(insertRow.substring(1));
+ long oldAvgRunning = rs.getLong(2);
+ if (newAvgRunning != oldAvgRunning) {
+ updateRows.put(date, insertRow);
+ }
+ }
+ }
+ rs.close();
+ PreparedStatement psU = conn.prepareStatement(
+ "UPDATE bridge_network_size SET avg_running = ? "
+ + "WHERE date = ?");
+ for (Map.Entry<String, String> e : updateRows.entrySet()) {
+ java.sql.Date date = java.sql.Date.valueOf(e.getKey());
+ long avgRunning = Long.parseLong(e.getValue().substring(1));
+ psU.clearParameters();
+ psU.setLong(1, avgRunning);
+ psU.setDate(2, date);
+ psU.executeUpdate();
+ }
+ PreparedStatement psI = conn.prepareStatement(
+ "INSERT INTO bridge_network_size (avg_running, date) "
+ + "VALUES (?, ?)");
+ for (Map.Entry<String, String> e : insertRows.entrySet()) {
+ java.sql.Date date = java.sql.Date.valueOf(e.getKey());
+ long avgRunning = Long.parseLong(e.getValue().substring(1));
+ psI.clearParameters();
+ psI.setLong(1, avgRunning);
+ psI.setDate(2, date);
+ psI.executeUpdate();
+ }
+ conn.commit();
+ conn.close();
+ } catch (SQLException e) {
+ logger.log(Level.WARNING, "Failed to add average bridge numbers "
+ + "to database.", e);
+ }
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished writing "
+ + "statistics on bridge network statuses to disk.\nAdded "
+ + this.bridgeResultsAdded + " bridge network status(es) in this "
+ + "execution.");
+ long now = System.currentTimeMillis();
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (this.bridgesRaw.isEmpty()) {
+ dumpStats.append("\nNo bridge status known yet.");
+ } else {
+ dumpStats.append("\nLast known bridge status was published "
+ + this.bridgesRaw.lastKey() + ".");
+ try {
+ if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse(
+ this.bridgesRaw.lastKey()).getTime()) {
+ logger.warning("Last known bridge status is more than 6 hours "
+ + "old: " + this.bridgesRaw.lastKey());
+ }
+ } catch (ParseException e) {
+ /* Can't parse the timestamp? Whatever. */
+ }
+ }
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 1a125bc..0551586 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -33,6 +33,11 @@ public class Main {
// Define stats directory for temporary files
File statsDirectory = new File("stats");
+ // Prepare bridge stats file handler
+ BridgeStatsFileHandler bsfh = config.getWriteBridgeStats() ?
+ new BridgeStatsFileHandler(
+ config.getRelayDescriptorDatabaseJDBC()) : null;
+
// Prepare consensus health checker
ConsensusHealthChecker chc = config.getWriteConsensusHealth() ?
new ConsensusHealthChecker() : null;
@@ -50,7 +55,7 @@ public class Main {
// Prepare relay descriptor parser (only if we are writing the
// consensus-health page to disk)
RelayDescriptorParser rdp = chc != null || rddi != null ?
- new RelayDescriptorParser(chc, rddi) : null;
+ new RelayDescriptorParser(chc, rddi, bsfh) : null;
// Import relay descriptors
if (rdp != null) {
@@ -73,6 +78,33 @@ public class Main {
chc = null;
}
+ // Prepare consensus stats file handler (used for stats on running
+ // bridges only)
+ ConsensusStatsFileHandler csfh = config.getWriteBridgeStats() ?
+ new ConsensusStatsFileHandler(
+ config.getRelayDescriptorDatabaseJDBC()) : null;
+
+ // Prepare bridge descriptor parser
+ BridgeDescriptorParser bdp = config.getWriteBridgeStats() ?
+ new BridgeDescriptorParser(csfh, bsfh) : null;
+
+ // Import bridge descriptors
+ if (bdp != null && config.getImportSanitizedBridges()) {
+ new SanitizedBridgesReader(bdp,
+ new File(config.getSanitizedBridgesDirectory()),
+ statsDirectory, config.getKeepSanitizedBridgesImportHistory());
+ }
+
+ // Write updated stats files to disk
+ if (bsfh != null) {
+ bsfh.writeFiles();
+ bsfh = null;
+ }
+ if (csfh != null) {
+ csfh.writeFiles();
+ csfh = null;
+ }
+
// Remove lock file
lf.releaseLock();
diff --git a/src/org/torproject/ernie/cron/RelayDescriptorParser.java b/src/org/torproject/ernie/cron/RelayDescriptorParser.java
index d02022a..a9a0d3d 100644
--- a/src/org/torproject/ernie/cron/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/cron/RelayDescriptorParser.java
@@ -18,6 +18,11 @@ import org.apache.commons.codec.binary.*;
public class RelayDescriptorParser {
/**
+ * Stats file handler that accepts parse results for bridge statistics.
+ */
+ private BridgeStatsFileHandler bsfh;
+
+ /**
* Relay descriptor database importer that stores relay descriptor
* contents for later evaluation.
*/
@@ -36,9 +41,10 @@ public class RelayDescriptorParser {
* Initializes this class.
*/
public RelayDescriptorParser(ConsensusHealthChecker chc,
- RelayDescriptorDatabaseImporter rddi) {
+ RelayDescriptorDatabaseImporter rddi, BridgeStatsFileHandler bsfh) {
this.chc = chc;
this.rddi = rddi;
+ this.bsfh = bsfh;
/* Initialize logger. */
this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
@@ -76,6 +82,7 @@ public class RelayDescriptorParser {
orPort = 0L, dirPort = 0L;
SortedSet<String> relayFlags = null;
StringBuilder rawStatusEntry = null;
+ SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
while ((line = br.readLine()) != null) {
if (line.equals("vote-status vote")) {
isConsensus = false;
@@ -111,6 +118,9 @@ public class RelayDescriptorParser {
relayIdentity = Hex.encodeHexString(
Base64.decodeBase64(parts[2] + "=")).
toLowerCase();
+ hashedRelayIdentities.add(DigestUtils.shaHex(
+ Base64.decodeBase64(parts[2] + "=")).
+ toUpperCase());
serverDesc = Hex.encodeHexString(Base64.decodeBase64(
parts[3] + "=")).toLowerCase();
published = parseFormat.parse(parts[4] + " " + parts[5]).
@@ -144,6 +154,11 @@ public class RelayDescriptorParser {
}
}
if (isConsensus) {
+ if (this.bsfh != null) {
+ for (String hashedRelayIdentity : hashedRelayIdentities) {
+ this.bsfh.addHashedRelay(hashedRelayIdentity);
+ }
+ }
if (this.chc != null) {
this.chc.processConsensus(validAfterTime, data);
}
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
new file mode 100644
index 0000000..b7f1d44
--- /dev/null
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -0,0 +1,114 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.util.*;
+import java.util.logging.*;
+
+public class SanitizedBridgesReader {
+ public SanitizedBridgesReader(BridgeDescriptorParser bdp,
+ File bridgesDir, File statsDirectory, boolean keepImportHistory) {
+
+ if (bdp == null || bridgesDir == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger =
+ Logger.getLogger(SanitizedBridgesReader.class.getName());
+ SortedSet<String> bridgesImportHistory = new TreeSet<String>();
+ File bridgesImportHistoryFile =
+ new File(statsDirectory, "bridges-import-history");
+ if (keepImportHistory && bridgesImportHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ bridgesImportHistoryFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ bridgesImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read in bridge descriptor "
+ + "import history file. Skipping.");
+ }
+ }
+ if (bridgesDir.exists()) {
+ logger.fine("Importing files in directory " + bridgesDir + "/...");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(bridgesDir);
+ List<File> problems = new ArrayList<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ continue;
+ } else if (keepImportHistory && bridgesImportHistory.contains(
+ pop.getName())) {
+ continue;
+ } else {
+ try {
+ BufferedInputStream bis = new BufferedInputStream(
+ new FileInputStream(pop));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ String fn = pop.getName();
+ // TODO dateTime extraction doesn't work for sanitized network
+ // statuses!
+ String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
+ + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
+ + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
+ bdp.parse(allData, dateTime, true);
+ if (keepImportHistory) {
+ bridgesImportHistory.add(pop.getName());
+ }
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
+ if (problems.isEmpty()) {
+ logger.fine("Finished importing files in directory " + bridgesDir
+ + "/.");
+ } else {
+ StringBuilder sb = new StringBuilder("Failed importing files in "
+ + "directory " + bridgesDir + "/:");
+ int printed = 0;
+ for (File f : problems) {
+ sb.append("\n " + f.getAbsolutePath());
+ if (++printed >= 3) {
+ sb.append("\n ... more");
+ break;
+ }
+ }
+ logger.warning(sb.toString());
+ }
+ if (keepImportHistory) {
+ try {
+ bridgesImportHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ bridgesImportHistoryFile));
+ for (String line : bridgesImportHistory) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write bridge descriptor "
+ + "import history file.");
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java b/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
new file mode 100644
index 0000000..6dd9132
--- /dev/null
+++ b/src/org/torproject/ernie/test/SanitizedBridgesReaderTest.java
@@ -0,0 +1,33 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.test;
+
+import org.torproject.ernie.cron.*;
+
+import java.io.*;
+
+import org.junit.*;
+import org.junit.rules.*;
+import static org.junit.Assert.*;
+
+public class SanitizedBridgesReaderTest {
+
+ private File tempSanitizedBridgesDirectory;
+ private File tempStatsDirectory;
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder();
+
+ @Before
+ public void createTempDirectories() {
+ this.tempSanitizedBridgesDirectory = folder.newFolder("bridges");
+ this.tempStatsDirectory = folder.newFolder("stats");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testBridgeDescriptorParserNull() {
+ new SanitizedBridgesReader(null, this.tempSanitizedBridgesDirectory,
+ this.tempStatsDirectory, false);
+ }
+}
+
More information about the tor-commits
mailing list