[or-cvs] [ernie/master 1/2] Clean up and document bridge-stats and dirreq-stats writers.
karsten at torproject.org
karsten at torproject.org
Sun Feb 28 15:05:58 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sun, 28 Feb 2010 15:52:14 +0100
Subject: Clean up and document bridge-stats and dirreq-stats writers.
Commit: 07bfb5dd3c078f312cec774964f17d4527a60998
---
src/BridgeStatsFileHandler.java | 386 ++++++++++++++++++++++++++----------
src/ConsensusStatsFileHandler.java | 67 ++++---
src/DirreqStatsFileHandler.java | 221 +++++++++++++++------
src/Main.java | 2 +-
4 files changed, 480 insertions(+), 196 deletions(-)
diff --git a/src/BridgeStatsFileHandler.java b/src/BridgeStatsFileHandler.java
index 2d1fa88..8bc8fac 100644
--- a/src/BridgeStatsFileHandler.java
+++ b/src/BridgeStatsFileHandler.java
@@ -1,199 +1,375 @@
import java.io.*;
+import java.text.*;
import java.util.*;
import java.util.logging.*;
/**
- *
+ * Determines estimates of bridge users per country and day from the
+ * extra-info descriptors that bridges publish. In a first step, the
+ * number of unique IP addresses that bridges see are normalized to a
+ * 24-hour period. In the next step, all bridges are excluded that have
+ * been running as a relay. Finally, observations are summed up and
+ * written to <code>stats/bridge-stats</code>.
*/
public class BridgeStatsFileHandler {
+
+ /**
+ * Two-letter country codes of countries that we care about.
+ */
+ private SortedSet<String> countries;
+
+ /**
+ * Intermediate results file containing bridge user numbers by country
+ * as seen by single bridges, normalized to 24-hour periods.
+ */
private File bridgeStatsRawFile;
- private File bridgeStatsFile;
+
+ /**
+ * Bridge user numbers by country as seen by single bridges on a given
+ * day. Map keys are bridge and date written as "bridge,date", map
+ * values are lines as read from <code>stats/bridge-stats-raw</code>.
+ */
+ private SortedMap<String, String> bridgeUsersRaw;
+
+ /**
+ * Modification flag for <code>bridgeUsersRaw</code>. This flag is used to
+ * decide whether the contents of <code>bridgeUsersRaw</code> need to be
+ * written to disk during <code>writeFiles</code>.
+ */
+ private boolean bridgeUsersRawModified;
+
+ /**
+ * Helper file containing the hashed relay identities of all known
+ * relays. These hashes are compared to the bridge identity hashes to
+ * exclude bridges that have been known as relays from the statistics.
+ */
private File hashedRelayIdentitiesFile;
- private SortedSet<String> countries;
- private SortedSet<String> hashedRelays = new TreeSet<String>();
- private SortedMap<String, String> observations;
+
+ /**
+ * Known hashed relay identities used to exclude bridges that have been
+ * running as relays.
+ */
+ private SortedSet<String> hashedRelays;
+
+ /**
+ * Modification flag for <code>hashedRelays</code>. This flag is used to
+ * decide whether the contents of <code>hashedRelays</code> need to be
+ * written to disk during <code>writeFiles</code>.
+ */
private boolean hashedRelaysModified;
- private boolean observationsModified;
+
+ /**
+ * Final results file containing the number of bridge users per country
+ * and day. If this file exists on disk, it is not read in during
+ * initialization, but overwritten if either <code>bridgeUsersRaw</code>
+ * or <code>hashedRelays</code> have been modified.
+ */
+ private File bridgeStatsFile;
+
+ /**
+ * Logger for this class.
+ */
private Logger logger;
+
+ /**
+ * Initializes this class, including reading in intermediate results
+ * files <code>stats/bridge-stats-raw</code> and
+ * <code>stats/hashed-relay-identities</code>.
+ */
public BridgeStatsFileHandler(SortedSet<String> countries) {
+
+ /* Memorize the set of countries we care about. */
+ this.countries = countries;
+
+ /* Initialize local data structures to hold results. */
+ this.bridgeUsersRaw = new TreeMap<String, String>();
+ this.hashedRelays = new TreeSet<String>();
+
+ /* Initialize file names for intermediate and final results. */
this.bridgeStatsRawFile = new File("stats/bridge-stats-raw");
this.bridgeStatsFile = new File("stats/bridge-stats");
this.hashedRelayIdentitiesFile = new File(
"stats/hashed-relay-identities");
- this.countries = countries;
- this.observations = new TreeMap<String, String>();
- this.logger =
- Logger.getLogger(BridgeStatsFileHandler.class.getName());
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ BridgeStatsFileHandler.class.getName());
+
+ /* Read in bridge user numbers by country as seen by single bridges,
+ * normalized to 24-hour periods. */
if (this.bridgeStatsRawFile.exists()) {
- this.logger.info("Reading file "
- + this.bridgeStatsRawFile.getAbsolutePath() + "...");
try {
+ this.logger.fine("Reading file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.bridgeStatsRawFile));
String line = br.readLine();
if (line != null) {
- String[] headers = line.split(",");
- for (int i = 3; i < headers.length; i++) {
- this.countries.add(headers[i]);
- }
- while ((line = br.readLine()) != null) {
- String[] readData = line.split(",");
- String hashedBridgeIdentity = readData[0];
- String date = readData[1];
- String time = readData[2];
- SortedMap<String, String> obs = new TreeMap<String, String>();
- for (int i = 3; i < readData.length; i++) {
- obs.put(headers[i], readData[i]);
+ /* The first line should contain headers that we need to parse
+ * in order to learn what countries we were interested in when
+ * writing this file. */
+ if (!line.startsWith("bridge,date,time,")) {
+ this.logger.warning("Incorrect first line '" + line + "' in "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "! This line "
+ + "should contain headers! Aborting to read in this "
+ + "file!");
+ } else {
+ String[] headers = line.split(",");
+ for (int i = 3; i < headers.length; i++) {
+ this.countries.add(headers[i]);
+ }
+ /* Read in the rest of the file. */
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != headers.length) {
+ this.logger.warning("Corrupt line '" + line + "' in file "
+ + this.bridgeStatsRawFile.getAbsolutePath()
+ + "! Aborting to read this file!");
+ break;
+ }
+ String hashedBridgeIdentity = parts[0];
+ String date = parts[1];
+ String time = parts[2];
+ SortedMap<String, String> obs =
+ new TreeMap<String, String>();
+ for (int i = 3; i < parts.length; i++) {
+ obs.put(headers[i], parts[i]);
+ }
+ this.addObs(hashedBridgeIdentity, date, time, obs);
}
- this.addObs(hashedBridgeIdentity, date, time, obs);
}
}
br.close();
- this.observationsModified = false;
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.bridgeStatsRawFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading file "
+ this.logger.log(Level.WARNING, "Failed to read file "
+ this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
}
}
+
+ /* Read in known hashed relay identities used to exclude bridges that
+ * have been running as relays. */
if (this.hashedRelayIdentitiesFile.exists()) {
- this.logger.info("Reading file "
- + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
try {
+ this.logger.fine("Reading file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.hashedRelayIdentitiesFile));
String line = null;
+ /* Read in all lines from the file and memorize them. */
while ((line = br.readLine()) != null) {
this.hashedRelays.add(line);
}
br.close();
- this.hashedRelaysModified = false;
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading file "
+ this.logger.log(Level.WARNING, "Failed to read file "
+ this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
}
+
+ /* Set modification flags to false. */
+ this.bridgeUsersRawModified = this.hashedRelaysModified = false;
}
}
- public void addHashedRelay(String hashedRelayIdentity)
- throws IOException {
- this.hashedRelays.add(hashedRelayIdentity);
- this.hashedRelaysModified = true;
+
+ /**
+ * Adds a hashed relay identity string to the list of bridges that we
+ * are going to ignore in the future. If we counted user numbers from
+ * bridges that have been running as relays, our numbers would be far
+ * higher than what we think is correct.
+ */
+ public void addHashedRelay(String hashedRelayIdentity) {
+ if (!this.hashedRelays.contains(hashedRelayIdentity)) {
+ this.logger.finer("Adding new hashed relay identity: "
+ + hashedRelayIdentity);
+ this.hashedRelays.add(hashedRelayIdentity);
+ this.hashedRelaysModified = true;
+ }
}
- public boolean isKnownRelay(String hashedBridgeIdentity)
- throws IOException {
+
+ /**
+ * Returns whether the given fingerprint is a known hashed relay
+ * identity. <code>BridgeDescriptorParser</code> uses this information
+ * to decide whether to continue parsing a bridge extra-descriptor
+ * descriptor or not.
+ */
+ public boolean isKnownRelay(String hashedBridgeIdentity) {
return this.hashedRelays.contains(hashedBridgeIdentity);
}
+
+ /**
+ * Adds bridge user numbers by country as seen by a single bridge on a
+ * given date and time. Bridges can publish statistics on unique IP
+ * addresses multiple times a day, but we only want to include one
+ * observation per day. If we already have an observation from the given
+ * bridge and day, we keep the one with the later publication time and
+ * discard the other one.
+ */
public void addObs(String hashedIdentity, String date,
- String time, Map<String, String> obs) throws IOException {
+ String time, Map<String, String> obs) {
String key = hashedIdentity + "," + date;
StringBuilder sb = new StringBuilder(key + "," + time);
for (String c : countries) {
sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0.0"));
}
String value = sb.toString();
- if (!this.observations.containsKey(key)
- || value.compareTo(this.observations.get(key)) > 0) {
- this.observations.put(key, value);
- this.observationsModified = true;
+ if (!this.bridgeUsersRaw.containsKey(key)) {
+ this.logger.finer("Adding new bridge user numbers: " + value);
+ this.bridgeUsersRaw.put(key, value);
+ this.bridgeUsersRawModified = true;
+ } else if (value.compareTo(this.bridgeUsersRaw.get(key)) > 0) {
+ this.logger.finer("Replacing existing bridge user numbers (" +
+ this.bridgeUsersRaw.get(key) + " with new numbers: "
+ + value);
+ this.bridgeUsersRaw.put(key, value);
+ this.bridgeUsersRawModified = true;
+ } else {
+ this.logger.finer("Not replacing existing bridge user numbers (" +
+ this.bridgeUsersRaw.get(key) + " with new numbers (" + value
+ + ").");
}
}
- public void writeFile() {
- if (!this.hashedRelays.isEmpty() && this.hashedRelaysModified) {
+ /**
+ * Writes the list of hashed relay identities and bridge user numbers as
+ * observed by single bridges to disk, aggregates per-day statistics for
+ * all bridges, and writes those to disk, too.
+ */
+ public void writeFiles() {
+
+ /* Write hashed relay identities to disk. */
+ if (this.hashedRelaysModified) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
this.hashedRelayIdentitiesFile.getParentFile().mkdirs();
- BufferedWriter bwRelayIdentities = new BufferedWriter(
- new FileWriter(this.hashedRelayIdentitiesFile));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.hashedRelayIdentitiesFile));
for (String hashedRelay : this.hashedRelays) {
- bwRelayIdentities.append(hashedRelay + "\n");
+ bw.append(hashedRelay + "\n");
}
- bwRelayIdentities.close();
- this.logger.info("Finished writing file "
+ bw.close();
+ this.logger.fine("Finished writing file "
+ this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
+ this.logger.log(Level.WARNING, "Failed to write "
+ this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
}
+ } else {
+ this.logger.fine("Not writing file "
+ + this.hashedRelayIdentitiesFile.getAbsolutePath()
+ + ", because nothing has changed.");
}
- if (!this.observations.isEmpty() && this.observationsModified) {
+
+ /* Write observations made by single bridges to disk. */
+ if (this.bridgeUsersRawModified) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.bridgeStatsRawFile.getAbsolutePath() + "...");
this.bridgeStatsRawFile.getParentFile().mkdirs();
- BufferedWriter bwBridgeStats = new BufferedWriter(
- new FileWriter(this.bridgeStatsRawFile));
- bwBridgeStats.append("bridge,date,time");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeStatsRawFile));
+ bw.append("bridge,date,time");
for (String c : this.countries) {
- bwBridgeStats.append("," + c);
+ bw.append("," + c);
}
- bwBridgeStats.append("\n");
- SortedMap<String, Set<double[]>> observationsPerDay =
- new TreeMap<String, Set<double[]>>();
- for (String observation : this.observations.values()) {
- String hashedBridgeIdentity = observation.split(",")[0];
+ bw.append("\n");
+ for (String line : this.bridgeUsersRaw.values()) {
+ String hashedBridgeIdentity = line.split(",")[0];
if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
- bwBridgeStats.append(observation + "\n");
- String[] parts = observation.split(",");
- String date = parts[1];
- double[] users = new double[countries.size()];
- for (int i = 3; i < parts.length; i++) {
- users[i - 3] = Double.parseDouble(parts[i]);
- }
- Set<double[]> perDay = observationsPerDay.get(date);
- if (perDay == null) {
- perDay = new HashSet<double[]>();
- observationsPerDay.put(date, perDay);
- }
- perDay.add(users);
+ bw.append(line + "\n");
}
}
- bwBridgeStats.close();
- this.logger.info("Finished writing file "
+ bw.close();
+ this.logger.fine("Finished writing file "
+ this.bridgeStatsRawFile.getAbsolutePath() + ".");
- this.logger.info("Writing file "
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+ }
+ } else {
+ this.logger.fine("Not writing file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + ", because "
+ + "nothing has changed.");
+ }
+
+ /* Aggregate per-day statistics. */
+ if (this.hashedRelaysModified || this.bridgeUsersRawModified) {
+ SortedMap<String, double[]> bridgeUsersPerDay =
+ new TreeMap<String, double[]>();
+ for (String line : this.bridgeUsersRaw.values()) {
+ String[] parts = line.split(",");
+ String hashedBridgeIdentity = parts[0];
+ if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
+ String date = parts[1];
+ double[] users = bridgeUsersPerDay.get(date);
+ if (users == null) {
+ users = new double[countries.size()];
+ bridgeUsersPerDay.put(date, users);
+ }
+ for (int i = 3; i < parts.length; i++) {
+ users[i - 3] += Double.parseDouble(parts[i]);
+ }
+ }
+ }
+ /* Write final results of bridge users per day and country to
+ * <code>stats/bridge-stats</code>. */
+ try {
+ this.logger.fine("Writing file "
+ this.bridgeStatsRawFile.getAbsolutePath() + "...");
this.bridgeStatsFile.getParentFile().mkdirs();
- BufferedWriter bwBridgeStatsDate = new BufferedWriter(
- new FileWriter(this.bridgeStatsFile));
- bwBridgeStatsDate.append("date");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeStatsFile));
+ bw.append("date");
for (String c : this.countries) {
- bwBridgeStatsDate.append("," + c);
+ bw.append("," + c);
}
- bwBridgeStatsDate.append("\n");
- for (Map.Entry<String, Set<double[]>> e :
- observationsPerDay.entrySet()) {
+ bw.append("\n");
+ /* Memorize last written date fill missing dates with NA's. */
+ long lastDateMillis = 0L;
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) {
String date = e.getKey();
- double[] sums = null;
- for (double[] users : e.getValue()) {
- if (sums == null) {
- sums = users;
- } else {
- for (int i = 0; i < sums.length; i++) {
- sums[i] += users[i];
- }
- }
+ long currentDateMillis = dateFormat.parse(date).getTime();
+ while (currentDateMillis - 24L * 60L * 60L * 1000L
+ > lastDateMillis) {
+ lastDateMillis += 24L * 60L * 60L * 1000L;
+ bw.append(dateFormat.format(new Date(lastDateMillis)));
+ for (String c : this.countries) {
+ bw.append(",NA");
+ }
+ bw.append("\n");
}
- bwBridgeStatsDate.append(date);
- for (int i = 0; i < sums.length; i++) {
- bwBridgeStatsDate.append(","
- + String.format("%.2f", sums[i]));
+ lastDateMillis = currentDateMillis;
+ /* Write current observation. */
+ bw.append(date);
+ double[] users = e.getValue();
+ for (int i = 0; i < users.length; i++) {
+ bw.append("," + String.format("%.2f", users[i]));
}
- bwBridgeStatsDate.append("\n");
+ bw.append("\n");
}
- bwBridgeStatsDate.close();
- this.logger.info("Finished writing file "
+ bw.close();
+ this.logger.fine("Finished writing file "
+ this.bridgeStatsFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.bridgeStatsRawFile.getAbsolutePath() + " or "
+ this.logger.log(Level.WARNING, "Failed to write "
+ + this.bridgeStatsFile.getAbsolutePath() + "!", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Failed to write "
+ this.bridgeStatsFile.getAbsolutePath() + "!", e);
}
+ } else {
+ this.logger.fine("Not writing file "
+ + this.bridgeStatsFile.getAbsolutePath() + ", because nothing "
+ + "has changed.");
}
+
+ /* Set modification flags to false again. */
+ this.bridgeUsersRawModified = this.hashedRelaysModified = false;
}
}
diff --git a/src/ConsensusStatsFileHandler.java b/src/ConsensusStatsFileHandler.java
index 83128a3..ceb7a48 100644
--- a/src/ConsensusStatsFileHandler.java
+++ b/src/ConsensusStatsFileHandler.java
@@ -83,10 +83,10 @@ public class ConsensusStatsFileHandler {
private Logger logger;
/**
- * Initializes <code>ConsensusStatsFileHandler</code>, including reading
- * in intermediate results files <code>stats/consensus-stats-raw</code>
- * and <code>stats/bridge-consensus-stats-raw</code> and final results
- * file <code>stats/consensus-stats</code>.
+ * Initializes this class, including reading in intermediate results
+ * files <code>stats/consensus-stats-raw</code> and
+ * <code>stats/bridge-consensus-stats-raw</code> and final results file
+ * <code>stats/consensus-stats</code>.
*/
public ConsensusStatsFileHandler() {
@@ -110,13 +110,14 @@ public class ConsensusStatsFileHandler {
/* Read in number of relays with flags set per consensus. */
if (this.consensusStatsRawFile.exists()) {
try {
- this.logger.info("Reading file "
+ this.logger.fine("Reading file "
+ this.consensusStatsRawFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.consensusStatsRawFile));
String line = null;
while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.startsWith("date")) {
+ if (line.startsWith("date")) {
+ /* Skip headers. */
continue;
}
String[] parts = line.split(",");
@@ -130,7 +131,7 @@ public class ConsensusStatsFileHandler {
this.relaysRaw.put(dateTime, line);
}
br.close();
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.consensusStatsRawFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to read file "
@@ -141,13 +142,14 @@ public class ConsensusStatsFileHandler {
/* Read in number of running bridges per bridge status. */
if (this.bridgeConsensusStatsRawFile.exists()) {
try {
- this.logger.info("Reading file "
+ this.logger.fine("Reading file "
+ this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.bridgeConsensusStatsRawFile));
String line = null;
while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.startsWith("date")) {
+ if (line.startsWith("date")) {
+ /* Skip headers. */
continue;
}
String[] parts = line.split(",");
@@ -161,7 +163,7 @@ public class ConsensusStatsFileHandler {
this.bridgesRaw.put(dateTime, line);
}
br.close();
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to read file "
@@ -174,13 +176,14 @@ public class ConsensusStatsFileHandler {
* bridges per day. */
if (this.consensusStatsFile.exists()) {
try {
- this.logger.info("Reading file "
+ this.logger.fine("Reading file "
+ this.consensusStatsFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.consensusStatsFile));
String line = null;
while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.startsWith("date")) {
+ if (line.startsWith("date")) {
+ /* Skip headers. */
continue;
}
String[] parts = line.split(",");
@@ -204,7 +207,7 @@ public class ConsensusStatsFileHandler {
}
}
br.close();
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.consensusStatsFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write file "
@@ -221,11 +224,11 @@ public class ConsensusStatsFileHandler {
* flags in a given consensus to the existing observations.
*/
public void addConsensusResults(String validAfter, int exit, int fast,
- int guard, int running, int stable) throws IOException {
+ int guard, int running, int stable) {
String line = validAfter + "," + exit + "," + fast + "," + guard + ","
+ running + "," + stable;
if (!this.relaysRaw.containsKey(validAfter)) {
- this.logger.fine("Adding new relay numbers: " + line);
+ this.logger.finer("Adding new relay numbers: " + line);
this.relaysRaw.put(validAfter, line);
this.relaysRawModified = true;
} else if (!line.equals(this.relaysRaw.get(validAfter))) {
@@ -242,11 +245,10 @@ public class ConsensusStatsFileHandler {
* Adds the intermediate results of the number of running bridges in a
* given bridge status to the existing observations.
*/
- public void addBridgeConsensusResults(String published, int running)
- throws IOException {
+ public void addBridgeConsensusResults(String published, int running) {
String line = published + "," + running;
if (!this.bridgesRaw.containsKey(published)) {
- this.logger.fine("Adding new bridge numbers: " + line);
+ this.logger.finer("Adding new bridge numbers: " + line);
this.bridgesRaw.put(published, line);
this.bridgesRawModified = true;
} else if (!line.equals(this.bridgesRaw.get(published))) {
@@ -291,12 +293,12 @@ public class ConsensusStatsFileHandler {
+ (running/ consensuses) + "," + (stable/ consensuses);
/* Are our results new? */
if (!this.relaysPerDay.containsKey(tempDate)) {
- this.logger.fine("Adding new average relay numbers: "
+ this.logger.finer("Adding new average relay numbers: "
+ line);
this.relaysPerDay.put(tempDate, line);
writeConsensusStats = true;
} else if (!line.equals(this.relaysPerDay.get(tempDate))) {
- this.logger.info("Replacing existing average relay numbers "
+ this.logger.finer("Replacing existing average relay numbers "
+ "(" + this.relaysPerDay.get(tempDate) + " with new "
+ "numbers: " + line);
this.relaysPerDay.put(tempDate, line);
@@ -339,12 +341,12 @@ public class ConsensusStatsFileHandler {
String line = "," + (brunning / statuses);
/* Are our results new? */
if (!this.bridgesPerDay.containsKey(tempDate)) {
- this.logger.fine("Adding new average bridge numbers: "
+ this.logger.finer("Adding new average bridge numbers: "
+ tempDate + line);
this.bridgesPerDay.put(tempDate, line);
writeConsensusStats = true;
} else if (!line.equals(this.bridgesPerDay.get(tempDate))) {
- this.logger.info("Replacing existing average bridge "
+ this.logger.finer("Replacing existing average bridge "
+ "numbers (" + this.bridgesPerDay.get(tempDate)
+ " with new numbers: " + line);
this.bridgesPerDay.put(tempDate, line);
@@ -366,7 +368,7 @@ public class ConsensusStatsFileHandler {
/* Write raw numbers of relays with flags set to disk. */
if (this.relaysRawModified) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.consensusStatsRawFile.getAbsolutePath() + "...");
this.consensusStatsRawFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
@@ -376,14 +378,14 @@ public class ConsensusStatsFileHandler {
bw.append(line + "\n");
}
bw.close();
- this.logger.info("Finished writing file "
+ this.logger.fine("Finished writing file "
+ this.consensusStatsRawFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write file "
+ this.consensusStatsRawFile.getAbsolutePath() + "!", e);
}
} else {
- this.logger.info("Not writing file "
+ this.logger.fine("Not writing file "
+ this.consensusStatsRawFile.getAbsolutePath() + ", because "
+ "nothing has changed.");
}
@@ -391,7 +393,7 @@ public class ConsensusStatsFileHandler {
/* Write raw numbers of running bridges to disk. */
if (this.bridgesRawModified) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(
@@ -401,7 +403,7 @@ public class ConsensusStatsFileHandler {
bw.append(line + "\n");
}
bw.close();
- this.logger.info("Finished writing file "
+ this.logger.fine("Finished writing file "
+ this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write file "
@@ -409,7 +411,7 @@ public class ConsensusStatsFileHandler {
e);
}
} else {
- this.logger.info("Not writing file "
+ this.logger.fine("Not writing file "
+ this.bridgeConsensusStatsRawFile.getAbsolutePath()
+ ", because nothing has changed.");
}
@@ -418,7 +420,7 @@ public class ConsensusStatsFileHandler {
* to disk. */
if (writeConsensusStats) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.consensusStatsFile.getAbsolutePath() + "...");
this.consensusStatsFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
@@ -453,7 +455,7 @@ public class ConsensusStatsFileHandler {
currentDateMillis += 24L * 60L * 60L * 1000L;
}
bw.close();
- this.logger.info("Finished writing file "
+ this.logger.fine("Finished writing file "
+ this.consensusStatsFile.getAbsolutePath() + ".");
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write file "
@@ -463,10 +465,13 @@ public class ConsensusStatsFileHandler {
+ this.consensusStatsFile.getAbsolutePath() + "!", e);
}
} else {
- this.logger.info("Not writing file "
+ this.logger.fine("Not writing file "
+ this.consensusStatsFile.getAbsolutePath()
+ ", because nothing has changed.");
}
+
+ /* Set modification flags to false again. */
+ this.relaysRawModified = this.bridgesRawModified = false;
}
}
diff --git a/src/DirreqStatsFileHandler.java b/src/DirreqStatsFileHandler.java
index a815701..347d189 100644
--- a/src/DirreqStatsFileHandler.java
+++ b/src/DirreqStatsFileHandler.java
@@ -1,115 +1,218 @@
import java.io.*;
+import java.text.*;
import java.util.*;
import java.util.logging.*;
-import java.text.*;
/**
- *
+ * Extracts statistics on v3 directory requests by country from extra-info
+ * descriptors and writes them to a CSV file that is easily parsable by R.
+ * Parse results come from <code>RelayDescriptorParser</code> and are
+ * written to <code>stats/dirreq-stats</code>.
*/
public class DirreqStatsFileHandler {
+
+ /**
+ * Two-letter country codes of countries that we care about.
+ */
private SortedSet<String> countries;
+
+ /**
+ * Results file containing v3 directory requests by country.
+ */
private File dirreqStatsFile;
- private SortedMap<String, String> observations;
- private boolean modified;
+
+ /**
+ * Directory requests by directory and date. Map keys are directory and
+ * date written as "directory,date", map values are lines as read from
+ * <code>stats/dirreq-stats</code>.
+ */
+ private SortedMap<String, String> dirreqs;
+
+ /**
+ * Modification flag for directory requests stored in memory. This flag
+ * is used to decide whether the contents of <code>dirreqs</code> need
+ * to be written to disk during <code>writeFile</code>.
+ */
+ private boolean dirreqsModified;
+
+ /**
+ * Logger for this class.
+ */
private Logger logger;
+
+ /**
+ * Initializes this class, including reading in previous results from
+ * <code>stats/dirreq-stats</code>.
+ */
public DirreqStatsFileHandler(SortedSet<String> countries) {
- this.dirreqStatsFile = new File("stats/dirreq-stats");
+
+ /* Memorize the set of countries we care about. */
this.countries = countries;
- this.observations = new TreeMap<String, String>();
- this.logger =
- Logger.getLogger(DirreqStatsFileHandler.class.getName());
+
+ /* Initialize local data structure to hold observations received from
+ * RelayDescriptorParser. */
+ this.dirreqs = new TreeMap<String, String>();
+
+ /* Initialize file name for observations file. */
+ this.dirreqStatsFile = new File("stats/dirreq-stats");
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ DirreqStatsFileHandler.class.getName());
+
+ /* Read in previously stored results. */
if (this.dirreqStatsFile.exists()) {
- this.logger.info("Reading file "
- + this.dirreqStatsFile.getAbsolutePath() + "...");
try {
+ this.logger.fine("Reading file "
+ + this.dirreqStatsFile.getAbsolutePath() + "...");
BufferedReader br = new BufferedReader(new FileReader(
this.dirreqStatsFile));
String line = br.readLine();
if (line != null) {
- String[] headers = line.split(",");
- for (int i = 2; i < headers.length - 1; i++) {
- this.countries.add(headers[i]);
- }
- while ((line = br.readLine()) != null) {
- String[] readData = line.split(",");
- String dirNickname = readData[0];
- String date = readData[1];
- if (!readData[readData.length - 1].equals("NA")) {
- Map<String, String> obs = new HashMap<String, String>();
- for (int i = 2; i < readData.length - 1; i++) {
- obs.put(headers[i], readData[i]);
+ /* The first line should contain headers that we need to parse
+ * in order to learn what countries we were interested in when
+ * writing this file. */
+ if (!line.startsWith("directory,date,")) {
+ this.logger.warning("Incorrect first line '" + line + "' in "
+ + this.dirreqStatsFile.getAbsolutePath() + "! This line "
+ + "should contain headers! Aborting to read in this "
+ + "file!");
+ } else {
+ String[] headers = line.split(",");
+ for (int i = 2; i < headers.length - 1; i++) {
+ this.countries.add(headers[i]);
+ }
+ /* Read in the rest of the file. */
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != headers.length) {
+ this.logger.warning("Corrupt line '" + line + "' in file "
+ + this.dirreqStatsFile.getAbsolutePath() + "! This "
+ + "line has either fewer or more columns than the "
+ + "file has column headers! Aborting to read this "
+ + "file!");
+ break;
+ }
+ String directory = parts[0];
+ String date = parts[1];
+ /* If the share column contains NA, all the other columns do.
+ * We only want to read in non-NA lines here. */
+ if (!parts[parts.length - 1].equals("NA")) {
+ Map<String, String> obs = new HashMap<String, String>();
+ for (int i = 2; i < parts.length - 1; i++) {
+ obs.put(headers[i], parts[i]);
+ }
+ String share = parts[parts.length - 1];
+ this.addObs(directory, date, obs, share);
}
- String share = readData[readData.length - 1];
- this.addObs(dirNickname, date, obs, share);
}
}
}
br.close();
- this.logger.info("Finished reading file "
+ this.logger.fine("Finished reading file "
+ this.dirreqStatsFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading file "
+ this.logger.log(Level.WARNING, "Failed to read file "
+ this.dirreqStatsFile.getAbsolutePath() + "!", e);
}
}
+
+ /* Set modification flag to false. */
+ this.dirreqsModified = false;
}
- public void addObs(String dirNickname, String date,
- Map<String, String> obs, String share) throws IOException {
- String obsKey = dirNickname + "," + date;
- StringBuilder sb = new StringBuilder(obsKey);
+
+ /**
+ * Adds observations on the number of directory requests by country as
+ * seen on a directory at a given data that expected to see the given
+ * share of all directory requests in the network.
+ */
+ public void addObs(String directory, String date,
+ Map<String, String> obs, String share) {
+ String key = directory + "," + date;
+ StringBuilder sb = new StringBuilder(key);
for (String c : this.countries) {
sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0"));
}
sb.append("," + share);
- this.observations.put(obsKey, sb.toString());
- this.modified = true;
+ String value = sb.toString();
+ if (!this.dirreqs.containsKey(key)) {
+ this.logger.finer("Adding new directory request numbers: " + value);
+ this.dirreqs.put(key, value);
+ this.dirreqsModified = true;
+ } else if (value.compareTo(this.dirreqs.get(key)) > 0) {
+ this.logger.warning("The directory request numbers we were just "
+ + "given (" + value + ") are different from what we learned "
+ + "before (" + this.dirreqs.get(key) + "! Overwriting!");
+ this.dirreqs.put(key, value);
+ this.dirreqsModified = true;
+ }
}
+
+ /**
+ * Writes the v3 directory request numbers from memory to
+ * <code>stats/dirreq-stats</code> if they have changed.
+ */
public void writeFile() {
- if (this.modified && !this.observations.isEmpty()) {
+
+ /* Only write file if we learned something new. */
+ if (this.dirreqsModified) {
try {
- this.logger.info("Writing file "
+ this.logger.fine("Writing file "
+ this.dirreqStatsFile.getAbsolutePath() + "...");
this.dirreqStatsFile.getParentFile().mkdirs();
- BufferedWriter bwDirreqStats = new BufferedWriter(
- new FileWriter(this.dirreqStatsFile));
- bwDirreqStats.append("directory,date");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.dirreqStatsFile));
+ /* Write header. */
+ bw.append("directory,date");
for (String country : this.countries) {
- bwDirreqStats.append("," + country);
+ bw.append("," + country);
}
- bwDirreqStats.append(",share\n");
- long lastDate = 0L;
+ bw.append(",share\n");
+ /* Memorize last written date and directory to fill missing dates
+ * with NA's. */
+ long lastDateMillis = 0L;
String lastDirectory = null;
- SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd");
- timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- for (String observation : this.observations.values()) {
- String currentDirectory = observation.split(",")[0];
- long currentDate = timeFormat.parse(observation.split(",")[1]).
- getTime();
- while (currentDirectory.equals(lastDirectory)
- && lastDate > 0L && currentDate - 86400000L > lastDate) {
- lastDate += 86400000L;
- bwDirreqStats.append(currentDirectory + ","
- + timeFormat.format(new Date(lastDate)));
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (String line : this.dirreqs.values()) {
+ /* Fill missing dates with NA's. */
+ String[] parts = line.split(",");
+ String currentDirectory = parts[0];
+ long currentDateMillis = dateFormat.parse(parts[1]).getTime();
+ while (currentDirectory.equals(lastDirectory) &&
+ currentDateMillis - 24L * 60L * 60L * 1000L
+ > lastDateMillis) {
+ lastDateMillis += 24L * 60L * 60L * 1000L;
+ bw.append(currentDirectory + ","
+ + dateFormat.format(new Date(lastDateMillis)));
for (String country : this.countries) {
- bwDirreqStats.append(",NA");
+ bw.append(",NA");
}
- bwDirreqStats.append(",NA\n");
+ bw.append(",NA\n");
}
- lastDate = currentDate;
+ lastDateMillis = currentDateMillis;
lastDirectory = currentDirectory;
- bwDirreqStats.append(observation + "\n");
+ /* Write current observation. */
+ bw.append(line + "\n");
}
- bwDirreqStats.close();
- this.logger.info("Finished writing file "
+ bw.close();
+ this.logger.fine("Finished writing file "
+ this.dirreqStatsFile.getAbsolutePath() + ".");
} catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing file "
+ this.logger.log(Level.WARNING, "Failed to write file "
+ this.dirreqStatsFile.getAbsolutePath() + "!", e);
} catch (ParseException e) {
- this.logger.log(Level.WARNING, "Failed writing file "
+ this.logger.log(Level.WARNING, "Failed to write file "
+ this.dirreqStatsFile.getAbsolutePath() + "!", e);
}
+ } else {
+ this.logger.fine("Not writing file "
+ + this.dirreqStatsFile.getAbsolutePath() + ", because "
+ + "nothing has changed.");
}
+
+ /* Set modification flag to false again. */
+ this.dirreqsModified = false;
}
}
diff --git a/src/Main.java b/src/Main.java
index 1902116..3c02697 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -89,7 +89,7 @@ public class Main {
// Write updated stats files to disk
if (bsfh != null) {
- bsfh.writeFile();
+ bsfh.writeFiles();
bsfh = null;
}
if (csfh != null) {
--
1.6.5
More information about the tor-commits
mailing list