[tor-commits] [metrics-tasks/master] Update analysis code for #3261.
karsten at torproject.org
karsten at torproject.org
Fri Apr 27 06:52:55 UTC 2012
commit 0dddee5316aebf046db58858af81220bdf3fcb6a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri Apr 27 08:50:31 2012 +0200
Update analysis code for #3261.
---
task-3261/.gitignore | 2 +
task-3261/AggregateStats.java | 122 +++++++++
task-3261/AnalyzeDescriptorParts.java | 315 ++++++++++++++++++++++
task-3261/AnalyzeStatsCoverage.java | 478 ---------------------------------
task-3261/ExtractDescriptorParts.java | 172 ++++++++++++
task-3261/README | 38 +++-
task-3261/plot.R | 65 +++++
task-3261/stats-coverage.R | 25 --
8 files changed, 711 insertions(+), 506 deletions(-)
diff --git a/task-3261/.gitignore b/task-3261/.gitignore
index 2bfd23b..5f2b4dc 100644
--- a/task-3261/.gitignore
+++ b/task-3261/.gitignore
@@ -2,6 +2,8 @@
*.png
*.pdf
*.csv
+bridge-network-statuses
+parse-history
in/
temp/
*.jar
diff --git a/task-3261/AggregateStats.java b/task-3261/AggregateStats.java
new file mode 100755
index 0000000..73f7279
--- /dev/null
+++ b/task-3261/AggregateStats.java
@@ -0,0 +1,122 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Aggregate half-hourly per-bridge data to daily statistics. */
+public class AggregateStats {
+ public static void main(String[] args) throws Exception {
+
+ /* Read file containing publication times of bridge statuses and count
+ * statuses per day. */
+ SortedMap<String, Long> publishedStatuses =
+ new TreeMap<String, Long>();
+ File statusFile = new File("bridge-network-statuses");
+ if (!statusFile.exists()) {
+ System.err.println(statusFile.getAbsolutePath() + " does not "
+ + "exist. Exiting.");
+ System.exit(1);
+ } else {
+ BufferedReader br = new BufferedReader(new FileReader(statusFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String date = line.split(" ")[0];
+ if (publishedStatuses.containsKey(date)) {
+ publishedStatuses.put(date, publishedStatuses.get(date) + 1L);
+ } else {
+ publishedStatuses.put(date, 1L);
+ }
+ }
+ }
+
+ /* Aggregate single observations in memory. */
+ SortedMap<String, Map<String, Long>> aggregatedStats =
+ new TreeMap<String, Map<String, Long>>();
+ SortedSet<String> allKeys = new TreeSet<String>();
+ File evalOutFile = new File("eval-out.csv");
+ if (!evalOutFile.exists()) {
+ System.err.println(evalOutFile.getAbsolutePath() + " does not "
+ + "exist. Exiting.");
+ System.exit(1);
+ } else {
+ BufferedReader ebr = new BufferedReader(new FileReader(evalOutFile));
+ String line;
+ while ((line = ebr.readLine()) != null) {
+ String[] parts = line.split(",");
+ String date = parts[0].split(" ")[0];
+ String key = parts[2] + "," + parts[3] + "," + parts[4];
+ allKeys.add(key);
+ Map<String, Long> stats = null;
+ if (aggregatedStats.containsKey(date)) {
+ stats = aggregatedStats.get(date);
+ } else {
+ stats = new HashMap<String, Long>();
+ aggregatedStats.put(date, stats);
+ }
+ if (stats.containsKey(key)) {
+ stats.put(key, stats.get(key) + 1L);
+ } else {
+ stats.put(key, 1L);
+ }
+ }
+ ebr.close();
+ }
+
+ /* Write aggregated statistics to aggregated.csv. */
+ File aggregatedFile = new File("aggregated.csv");
+ BufferedWriter abw = new BufferedWriter(new FileWriter(
+ aggregatedFile));
+ abw.write("date,reported,discarded,reason,bridges,statuses\n");
+ long previousDateMillis = -1L;
+ final long DAY = 24L * 60L * 60L * 1000L;
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (Map.Entry<String, Map<String, Long>> e :
+ aggregatedStats.entrySet()) {
+ String date = e.getKey();
+ long currentDateMillis = dateFormat.parse(date).getTime();
+ while (previousDateMillis > -1L &&
+ currentDateMillis - previousDateMillis > DAY) {
+ previousDateMillis += DAY;
+ String tempDate = dateFormat.format(previousDateMillis);
+ for (String key : allKeys) {
+ abw.write(tempDate + "," + key + ",NA,0\n");
+ }
+ }
+ previousDateMillis = currentDateMillis;
+ String nextDate = dateFormat.format(currentDateMillis + DAY);
+ String nextPlusOneDate = dateFormat.format(currentDateMillis
+ + 2 * DAY);
+ long statuses = publishedStatuses.containsKey(date) ?
+ publishedStatuses.get(date) : 0L;
+ Map<String, Long> stats = e.getValue();
+ if (!aggregatedStats.containsKey(nextDate) ||
+ !aggregatedStats.containsKey(nextPlusOneDate) ||
+ statuses < 40) {
+ for (String key : allKeys) {
+ abw.write(date + "," + key + ",NA," + statuses + "\n");
+ }
+ } else {
+ for (String key : allKeys) {
+ if (stats.containsKey(key)) {
+ abw.write(date + "," + key + "," + (stats.get(key) / statuses)
+ + "," + statuses + "\n");
+ } else {
+ abw.write(date + "," + key + ",0," + statuses + "\n");
+ }
+ }
+ }
+ }
+ abw.close();
+ }
+}
+
diff --git a/task-3261/AnalyzeDescriptorParts.java b/task-3261/AnalyzeDescriptorParts.java
new file mode 100755
index 0000000..7f4bbc4
--- /dev/null
+++ b/task-3261/AnalyzeDescriptorParts.java
@@ -0,0 +1,315 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Analyze descriptors parts bridge by bridge and determine whether a
+ * bridge reported usage statistics at a given time, and if not, find out
+ * why not. */
+public class AnalyzeDescriptorParts {
+ public static void main(String[] args) throws Exception {
+
+ /* Define paths: we read descriptor part files from temp/ and append
+ * statistics on half hour detail to eval-out.csv. */
+ File tempDirectory = new File("temp");
+ File evalOutFile = new File("eval-out.csv");
+
+ /* Parse descriptor part files bridge by bridge. */
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ final long HALF_HOUR = 30L * 60L * 1000L;
+ BufferedWriter ebw = new BufferedWriter(new FileWriter(evalOutFile));
+ for (File tempFile : tempDirectory.listFiles()) {
+ String fingerprint = tempFile.getName();
+ BufferedReader br = new BufferedReader(new FileReader(tempFile));
+ String line;
+
+ /* For each bridge, determine when it was first seen as relay. All
+ * timestamps are in half hours since 1970-01-01 00:00:00 UTC. */
+ long firstRunningRelay = Long.MAX_VALUE;
+
+ /* For each time the bridge was listed in a bridge network status as
+ * Running, remember the status publication time and referenced
+ * descriptor digest. */
+ SortedMap<Long, String> runningBridgeHalfHours =
+ new TreeMap<Long, String>();
+
+ /* For each descriptor published by the bridge, remember seven
+ * timestamps in an array:
+ * 0: when the bridge was started due to the descriptor publication
+ * time and reported uptime,
+ * 1: when the descriptor was published,
+ * 2: when the descriptor was first referenced in a status,
+ * 3: when the descriptor was last referenced in status,
+ * 4: when the first descriptor in the same uptime session was first
+ * referenced in a status,
+ * 5: when the last descriptor in the same uptime session was last
+ * referenced in a status, and
+ * 6: when the last descriptor in the same uptime session was
+ * published. */
+ Map<String, long[]> descriptorSessions =
+ new HashMap<String, long[]>();
+
+ /* For each descriptor, remember the platform string. */
+ Map<String, String> descriptorPlatforms =
+ new HashMap<String, String>();
+
+ /* For each bridge-stats or geoip-stats line, remember a long[] with
+ * two timestamps and a boolean:
+ * 0: when the statistics interval started,
+ * 1: when the statistics interval ended,
+ * 2: whether the bridge reported its geoip file digest (only
+ * 0.2.3.x or higher). */
+ SortedMap<Long, long[]> bridgeStats = new TreeMap<Long, long[]>(),
+ geoipStats = new TreeMap<Long, long[]>();
+
+ /* Parse the file in temp/ line by line. */
+ while ((line = br.readLine()) != null) {
+
+ /* Remember when a descriptor was published and which platform
+ * string it contained. */
+ if (line.startsWith("server-descriptor ")) {
+ String[] parts = line.split(" ");
+ long publishedMillis = dateTimeFormat.parse(parts[1] + " "
+ + parts[2]).getTime();
+ long publishedHalfHour = publishedMillis / HALF_HOUR + 1L;
+ String descriptor = parts[3];
+ long startedHalfHour = (publishedMillis
+ - Long.parseLong(parts[4]) * 1000L) / HALF_HOUR + 1L;
+ long[] descriptorSession;
+ if (descriptorSessions.containsKey(descriptor)) {
+ descriptorSession = descriptorSessions.get(descriptor);
+ } else {
+ descriptorSession = new long[7];
+ descriptorSessions.put(descriptor, descriptorSession);
+ }
+ if (descriptorSession[0] == 0) {
+ descriptorSession[0] = startedHalfHour;
+ descriptorSession[1] = publishedHalfHour;
+ }
+ String platform = line.substring(line.indexOf("Tor "));
+ descriptorPlatforms.put(descriptor, platform);
+
+ /* Remember when a descriptor was first and last referenced from a
+ * bridge network status. */
+ } else if (line.startsWith("running-bridge ")) {
+ String[] parts = line.split(" ");
+ long publishedMillis = dateTimeFormat.parse(parts[1] + " "
+ + parts[2]).getTime();
+ long publishedHalfHour = publishedMillis / HALF_HOUR;
+ String descriptor = parts[3];
+ long[] descriptorSession;
+ if (descriptorSessions.containsKey(descriptor)) {
+ descriptorSession = descriptorSessions.get(descriptor);
+ if (descriptorSession[2] == 0 ||
+ publishedHalfHour < descriptorSession[2]) {
+ descriptorSession[2] = publishedHalfHour;
+ }
+ if (publishedHalfHour > descriptorSession[3]) {
+ descriptorSession[3] = publishedHalfHour;
+ }
+ } else {
+ descriptorSession = new long[7];
+ descriptorSession[2] = publishedHalfHour;
+ descriptorSession[3] = publishedHalfHour;
+ descriptorSessions.put(descriptor, descriptorSession);
+ }
+ runningBridgeHalfHours.put(publishedHalfHour, descriptor);
+
+ /* Remember the start and end of a bridge-stats or geoip-stats
+ * interval, and remember whether the extra-info descriptor
+ * contained a geoip-db-digest line. */
+ } else if (line.startsWith("bridge-stats ") ||
+ line.startsWith("geoip-stats ")) {
+ String parts[] = line.split(" ");
+ long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
+ + parts[2]).getTime();
+ long statsEnd = statsEndMillis / HALF_HOUR;
+ long statsStart = (statsEndMillis - Long.parseLong(parts[3])
+ * 1000L) / HALF_HOUR;
+ boolean hasGeoipFile = !parts[4].equals("NA");
+ long[] stats = new long[3];
+ stats[0] = statsStart;
+ stats[1] = statsEnd;
+ stats[2] = hasGeoipFile ? 1L : 0L;
+ if (line.startsWith("bridge-stats ")) {
+ bridgeStats.put(statsStart, stats);
+ } else {
+ geoipStats.put(statsStart, stats);
+ }
+
+ /* Remember when this bridge was first seen as a relay in the
+ * consensus. */
+ } else if (line.startsWith("running-relay ")) {
+ long runningRelayMillis = dateTimeFormat.parse(line.substring(
+ "running-relay ".length())).getTime() / HALF_HOUR;
+ firstRunningRelay = Math.min(firstRunningRelay,
+ runningRelayMillis);
+ }
+ }
+ br.close();
+
+ /* Sort descriptors by their first reference in a bridge network
+ * status. */
+ SortedMap<Long, String> descriptorsByFirstReferenced =
+ new TreeMap<Long, String>();
+ for (Map.Entry<String, long[]> e : descriptorSessions.entrySet()) {
+ if (e.getValue()[2] == 0) {
+ continue;
+ }
+ descriptorsByFirstReferenced.put(e.getValue()[2], e.getKey());
+ }
+ if (descriptorsByFirstReferenced.isEmpty()) {
+ continue;
+ }
+
+ /* Go through list of descriptors and see if two or more of them
+ * belong to the same bridge uptime session. Two descriptors are
+ * considered as part of the same uptime session if a) they are
+ * referenced from two subsequent statuses and b) the start time in
+ * the second descriptor lies before the publication time of the
+ * first descriptor. First make a list of all descriptors of a
+ * session and then update their long[] values to contain session
+ * information. */
+ long[] previousDescriptorTimestamps = null;
+ long firstStatusInSession = Long.MAX_VALUE,
+ lastStatusInSession = -1L, lastDescriptorPublished = -1L;
+ Set<String> descriptorsInSession = new HashSet<String>();
+ for (String descriptor : descriptorsByFirstReferenced.values()) {
+ long[] currentDescriptorTimestamps =
+ descriptorSessions.get(descriptor);
+ String currentDescriptor = descriptor;
+ if (previousDescriptorTimestamps != null) {
+ boolean sameSession =
+ previousDescriptorTimestamps[3] + 1L ==
+ currentDescriptorTimestamps[2] &&
+ currentDescriptorTimestamps[0] <=
+ previousDescriptorTimestamps[1];
+ if (!sameSession) {
+ for (String descriptorInSession : descriptorsInSession) {
+ long[] descriptorTimestamps = descriptorSessions.get(
+ descriptorInSession);
+ descriptorTimestamps[4] = firstStatusInSession;
+ descriptorTimestamps[5] = lastStatusInSession;
+ descriptorTimestamps[6] = lastDescriptorPublished;
+ }
+ firstStatusInSession = Long.MAX_VALUE;
+ lastStatusInSession = lastDescriptorPublished = -1L;
+ descriptorsInSession.clear();
+ }
+ }
+ firstStatusInSession = Math.min(firstStatusInSession,
+ currentDescriptorTimestamps[2]);
+ lastStatusInSession = Math.max(lastStatusInSession,
+ currentDescriptorTimestamps[3]);
+ lastDescriptorPublished = Math.max(lastDescriptorPublished,
+ currentDescriptorTimestamps[1]);
+ descriptorsInSession.add(currentDescriptor);
+ previousDescriptorTimestamps = currentDescriptorTimestamps;
+ }
+ for (String descriptorInSession : descriptorsInSession) {
+ long[] descriptorTimestamps = descriptorSessions.get(
+ descriptorInSession);
+ descriptorTimestamps[4] = firstStatusInSession;
+ descriptorTimestamps[5] = lastStatusInSession;
+ descriptorTimestamps[6] = lastDescriptorPublished;
+ }
+
+ /* Go through all statuses listing this bridge as Running, determine
+ * if it reported usage statistics and if they were considered for
+ * aggregation, and find out possible reasons for the bridge not
+ * reporting usage statistics. */
+ for (Map.Entry<Long, String> e :
+ runningBridgeHalfHours.entrySet()) {
+ long statusPublished = e.getKey();
+ String descriptor = e.getValue();
+ String platform = descriptorPlatforms.get(descriptor);
+ boolean reported = false, discarded = false;
+ String reason = "none";
+ if (firstRunningRelay <= statusPublished) {
+ /* The bridge was running as a relay before. */
+ discarded = true;
+ reason = "runasrelay";
+ }
+ if (!geoipStats.headMap(statusPublished + 1).isEmpty()) {
+ long[] stats = geoipStats.get(geoipStats.headMap(statusPublished
+ + 1).lastKey());
+ if (stats[0] <= statusPublished && stats[1] > statusPublished) {
+ /* Status publication time falls into stats interval. */
+ reported = true;
+ if (platform != null && platform.compareTo("Tor 0.2.2") > 0) {
+ /* geoip stats published by versions 0.2.2.x or higher are
+ * buggy and therefore discarded. */
+ discarded = true;
+ reason = "geoip022";
+ }
+ }
+ }
+ if (!bridgeStats.headMap(statusPublished + 1).isEmpty()) {
+ long[] stats = bridgeStats.get(bridgeStats.headMap(
+ statusPublished + 1).lastKey());
+ if (stats[0] <= statusPublished && stats[1] > statusPublished) {
+ /* Status publication time falls into stats interval. */
+ reported = true;
+ if (platform != null && platform.compareTo("Tor 0.2.3") > 0 &&
+ stats[2] == 0) {
+ /* The bridge running version 0.2.3.x did not have a geoip
+ * file and therefore published bad bridge statistics. */
+ discarded = true;
+ reason = "nogeoipfile";
+ }
+ }
+ }
+ if (!reported) {
+ /* The bridge didn't report statistics, so it doesn't matter
+ * whether we'd have discarded them. */
+ discarded = false;
+ if (!descriptorSessions.containsKey(descriptor)) {
+ /* The descriptor referenced in the bridge network status is
+ * unavailable, which means we cannot make any statement why the
+ * bridge did not report usage statistics. */
+ reason = "noserverdesc";
+ } else {
+ long[] descriptorTimestamps = descriptorSessions.get(descriptor);
+ long sessionStart = descriptorTimestamps[4],
+ sessionEnd = descriptorTimestamps[5],
+ lastDescPubl = descriptorTimestamps[6];
+ long currentStatsEnd = sessionStart
+ + 48 * ((statusPublished - sessionStart) / 48 + 1);
+ if (sessionEnd <= currentStatsEnd) {
+ /* The current uptime session ends before the 24-hour statistics
+ * interval. */
+ reason = "lessthan24h";
+ } else if (currentStatsEnd > lastDescPubl) {
+ /* The current uptime session ended after the 24-hour statistics
+ * interval, but the bridge didn't publish a descriptor
+ * containing the statistics. */
+ reason = "publdelay";
+ } else {
+ /* There is some other reason why the bridge did not report
+ * statistics. */
+ reason = "other";
+ }
+ }
+ }
+ ebw.write(dateTimeFormat.format(statusPublished * HALF_HOUR) + ","
+ + fingerprint + "," + reported + "," + discarded + ","
+ + reason + "\n");
+ }
+ }
+ ebw.close();
+ }
+}
+
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java
deleted file mode 100644
index 4688bde..0000000
--- a/task-3261/AnalyzeStatsCoverage.java
+++ /dev/null
@@ -1,478 +0,0 @@
-import java.io.*;
-import java.text.*;
-import java.util.*;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-public class AnalyzeStatsCoverage {
- public static void main(String[] args) throws Exception {
- File inDirectory = new File("in");
- File tempDirectory = new File("temp");
- File outFile = new File("stats-coverage.csv");
-
- /* Extract relevant lines from extra-info descriptors in inDirectory
- * and write them to files tempDirectory/$date/$fingerprint-$date for
- * later processing by fingerprint and date. */
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (inDirectory.exists() && inDirectory.isDirectory()) {
- System.out.println("Parsing descriptors in '"
- + inDirectory.getAbsolutePath() + "'.");
- long started = System.currentTimeMillis();
- tempDirectory.mkdirs();
- Stack<File> dirs = new Stack<File>();
- SortedSet<File> files = new TreeSet<File>();
- dirs.add(inDirectory);
- while (!dirs.isEmpty()) {
- File file = dirs.pop();
- if (file.isDirectory()) {
- if (file.getName().equals("statuses")) {
- continue;
- }
- for (File f : file.listFiles()) {
- dirs.add(f);
- }
- } else {
- files.add(file);
- }
- }
- int totalFiles = files.size(), fileNumber = 0;
- for (File file : files) {
- if (++fileNumber % (totalFiles / 1000) == 0) {
- int numberLength = String.valueOf(totalFiles).length();
- long minutesLeft = (((System.currentTimeMillis() - started)
- * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
- System.out.printf("Parsed %" + numberLength + "d of %"
- + numberLength + "d descriptors (%3d %%) %d minutes left%n",
- fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
- minutesLeft);
- }
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line, fingerprint = null, publishedLine = null;
- SortedMap<String, SortedSet<String>> linesByDate =
- new TreeMap<String, SortedSet<String>>();
- while ((line = br.readLine()) != null) {
- if (line.startsWith("extra-info ")) {
- fingerprint = line.split(" ")[2];
- } else if (line.startsWith("write-history ") ||
- line.startsWith("read-history ")) {
- String[] parts = line.split(" ");
- if (parts.length < 6) {
- continue;
- }
- String historyEndDate = parts[1];
- long historyEndMillis = dateTimeFormat.parse(parts[1] + " "
- + parts[2]).getTime();
- long intervalLength = Long.parseLong(parts[3].substring(1));
- if (intervalLength != 900L) {
- System.out.println("Non-standard interval length in "
- + "line '" + line + "' in file "
- + file.getAbsolutePath() + ". Skipping this line.");
- continue;
- }
- int intervals = parts[5].split(",").length;
- long historyStartMillis = historyEndMillis
- - (intervals * intervalLength * 1000L);
- long currentMillis = historyStartMillis;
- String currentDate;
- while ((currentDate = dateFormat.format(currentMillis)).
- compareTo(historyEndDate) <= 0) {
- if (!linesByDate.containsKey(currentDate)) {
- linesByDate.put(currentDate, new TreeSet<String>());
- }
- linesByDate.get(currentDate).add(line);
- currentMillis += 24L * 60L * 60L * 1000L;
- }
- } else if (line.startsWith("dirreq-stats-end ") ||
- line.startsWith("entry-stats-end ") ||
- line.startsWith("exit-stats-end ") ||
- line.startsWith("cell-stats-end ") ||
- line.startsWith("conn-bi-direct ") ||
- line.startsWith("bridge-stats-end ")) {
- String[] parts = line.split(" ");
- if (parts.length < 5) {
- System.out.println("Malformed line '" + line + "' in "
- + "file " + file.getAbsolutePath() + ". Skipping "
- + "this line.");
- continue;
- }
- String statsEndDate = parts[1];
- long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
- + parts[2]).getTime();
- long intervalLength = Long.parseLong(parts[3].substring(1));
- long statsStartMillis = statsEndMillis
- - intervalLength * 1000L;
- long currentMillis = statsStartMillis;
- String currentDate;
- while ((currentDate = dateFormat.format(currentMillis)).
- compareTo(statsEndDate) <= 0) {
- if (!linesByDate.containsKey(currentDate)) {
- linesByDate.put(currentDate, new TreeSet<String>());
- }
- linesByDate.get(currentDate).add(line);
- currentMillis += 24L * 60L * 60L * 1000L;
- }
- } else if (line.startsWith("published ")) {
- publishedLine = line;
- } else if (line.startsWith("geoip-start-time ")) {
- if (publishedLine == null) {
- System.out.println("Missing published line in file "
- + file.getAbsolutePath() + ". Skipping "
- + "geoip-start-time line.");
- continue;
- }
- String[] publishedParts = publishedLine.split(" ");
- if (publishedParts.length < 3) {
- System.out.println("Malformed line '" + publishedLine
- + "' in file " + file.getAbsolutePath() + ". "
- + "Skipping geoip-start-time line.");
- continue;
- }
- String[] parts = line.split(" ");
- if (parts.length < 3) {
- System.out.println("Malformed line '" + line + "' in "
- + "file " + file.getAbsolutePath() + ". Skipping "
- + "this line.");
- continue;
- }
- String statsEndDate = parts[1];
- long statsEndMillis = dateTimeFormat.parse(
- publishedParts[1] + " " + publishedParts[2]).getTime();
- long statsStartMillis = dateTimeFormat.parse(parts[1] + " "
- + parts[2]).getTime();
- long intervalLength = (statsEndMillis - statsStartMillis)
- / 1000L;
- String rewrittenLine = "geoip-stats-end "
- + publishedParts[1] + " " + publishedParts[2] + " ("
- + intervalLength + " s)";
- long currentMillis = statsStartMillis;
- String currentDate;
- while ((currentDate = dateFormat.format(currentMillis)).
- compareTo(statsEndDate) <= 0) {
- if (!linesByDate.containsKey(currentDate)) {
- linesByDate.put(currentDate, new TreeSet<String>());
- }
- linesByDate.get(currentDate).add(rewrittenLine);
- currentMillis += 24L * 60L * 60L * 1000L;
- }
- }
- }
- br.close();
- for (Map.Entry<String, SortedSet<String>> e :
- linesByDate.entrySet()) {
- String date = e.getKey();
- SortedSet<String> lines = e.getValue();
- File outputFile = new File(tempDirectory, date + "/"
- + fingerprint + "-" + date);
- if (outputFile.exists()) {
- br = new BufferedReader(new FileReader(outputFile));
- while ((line = br.readLine()) != null) {
- lines.add(line);
- }
- br.close();
- }
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile));
- for (String l : lines) {
- bw.write(l + "\n");
- }
- bw.close();
- }
- }
- }
-
- /* Parse bridge network statuses and append "running " lines to
- * files tempDirectory/$date/$fingerprint-$date for later processing
- * by fingerprint and date. */
- SimpleDateFormat statusFormat =
- new SimpleDateFormat("yyyyMMdd-HHmmss");
- statusFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (inDirectory.exists() && inDirectory.isDirectory()) {
- System.out.println("Parsing statuses in '"
- + inDirectory.getAbsolutePath() + "'.");
- long started = System.currentTimeMillis();
- tempDirectory.mkdirs();
- Stack<File> dirs = new Stack<File>();
- SortedSet<File> files = new TreeSet<File>();
- dirs.add(inDirectory);
- while (!dirs.isEmpty()) {
- File file = dirs.pop();
- if (file.isDirectory()) {
- if (file.getName().equals("extra-infos")) {
- continue;
- }
- for (File f : file.listFiles()) {
- dirs.add(f);
- }
- } else {
- files.add(file);
- }
- }
- int totalFiles = files.size(), fileNumber = 0;
- for (File file : files) {
- if (++fileNumber % (totalFiles / 1000) == 0) {
- int numberLength = String.valueOf(totalFiles).length();
- long minutesLeft = (((System.currentTimeMillis() - started)
- * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
- System.out.printf("Parsed %" + numberLength + "d of %"
- + numberLength + "d statuses (%3d %%) %d minutes left%n",
- fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
- minutesLeft);
- }
- long statusPublishedMillis = statusFormat.parse(
- file.getName().substring(0, "YYYYMMdd-HHmmss".length())).
- getTime();
- SortedSet<String> statusPublishedDates = new TreeSet<String>();
- String statusPublishedString = dateTimeFormat.format(
- statusPublishedMillis);
- statusPublishedDates.add(dateFormat.format(
- statusPublishedMillis));
- statusPublishedDates.add(dateFormat.format(
- statusPublishedMillis + 15L * 60L * 1000L));
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line, rLine = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("r ")) {
- rLine = line;
- } else if (line.startsWith("s ") && line.contains(" Running") &&
- rLine != null) {
- String[] parts = rLine.split(" ");
- if (parts.length != 9) {
- System.out.println("Illegal line '" + rLine + "' in "
- + file.getAbsolutePath() + ". Skipping this line.");
- continue;
- }
- String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
- parts[2] + "=="));
- for (String date : statusPublishedDates) {
- File outputFile = new File(tempDirectory, date + "/"
- + fingerprint.toUpperCase() + "-" + date);
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile, true));
- bw.write("running " + statusPublishedString + "\n");
- bw.close();
- }
- }
- }
- }
- }
-
- /* Parse relevant lines by fingerprint and date. The result will be
- * how many bytes that relay or bridge read/wrote in total, and how
- * many bytes were included in the different reported statistics.
- * Other results are the number of seconds for which this relay or
- * bridge reported byte histories and other statistics, either based
- * on self-reported bandwidth histories or based on the Running flag
- * in bridge network statuses. */
- if (tempDirectory.exists() && tempDirectory.isDirectory()) {
- System.out.println("Evaluating previously parsed descriptors in '"
- + tempDirectory.getAbsolutePath() + "'.");
- BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
- bw.write("fingerprint,date,totalwritten,totalread,totalseconds,"
- + "totalrunning,dirreqwritten,dirreqread,dirreqseconds,"
- + "dirreqrunning,entrywritten,entryread,entryseconds,"
- + "entryrunning,exitwritten,exitread,exitseconds,exitrunning,"
- + "cellwritten,cellread,cellseconds,cellrunning,"
- + "connbidirectwritten,connbidirectread,connbidirectseconds,"
- + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds,"
- + "bridgerunning,geoipwritten,geoipread,geoipseconds,"
- + "geoiprunning\n");
- Stack<File> dirs = new Stack<File>();
- SortedSet<File> files = new TreeSet<File>();
- dirs.add(tempDirectory);
- while (!dirs.isEmpty()) {
- File file = dirs.pop();
- if (file.isDirectory()) {
- for (File f : file.listFiles()) {
- dirs.add(f);
- }
- } else {
- files.add(file);
- }
- }
- int totalFiles = files.size(), fileNumber = 0;
- for (File file : files) {
- if (++fileNumber % (totalFiles / 1000) == 0) {
- int numberLength = String.valueOf(totalFiles).length();
- System.out.printf("Evaluated %" + numberLength + "d of %"
- + numberLength + "d descriptors/days (%3d %%)%n",
- fileNumber, totalFiles, (fileNumber * 100) / totalFiles);
- }
- String fingerprint = file.getName().substring(0, 40);
- String date = file.getName().substring(41);
- long dateStartMillis = dateFormat.parse(date).getTime();
- long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L;
- long[] writeHistory = new long[96], readHistory = new long[96];
- boolean[] upBridge = new boolean[96],
- upStatus = new boolean[96],
- dirreqStats = new boolean[96],
- entryStats = new boolean[96],
- exitStats = new boolean[96],
- cellStats = new boolean[96],
- connBiDirectStats = new boolean[96],
- bridgeStats = new boolean[96],
- geoipStats = new boolean[96];
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("running ")) {
- long statusPublishedMillis = dateTimeFormat.parse(
- line.substring("running ".length())).getTime();
- int j = (int) ((statusPublishedMillis - dateStartMillis)
- / (900L * 1000L));
- for (int i = 0; i < 2; i++) {
- if (j + i >= 0 && j + i < 96) {
- upStatus[j + i] = true;
- }
- }
- } else if (line.startsWith("write-history ") ||
- line.startsWith("read-history ")) {
- long[] history = line.startsWith("write-history ")
- ? writeHistory : readHistory;
- String[] parts = line.split(" ");
- long historyEndMillis = dateTimeFormat.parse(parts[1] + " "
- + parts[2]).getTime();
- String[] historyValues = parts[5].split(",");
- long historyStartMillis = historyEndMillis
- - (historyValues.length * 900L * 1000L);
- long currentMillis = historyStartMillis;
- for (int i = 0; i < historyValues.length; i++) {
- if (currentMillis >= dateStartMillis &&
- currentMillis < dateEndMillis) {
- int j = (int) ((currentMillis - dateStartMillis)
- / (900L * 1000L));
- if (j < 0 || j >= 96) {
- System.out.println("Internal error when processing "
- + "line '" + line + "'. Index = " + j
- + ". Exiting.");
- System.exit(1);
- }
- history[j] = Long.parseLong(historyValues[i]);
- upBridge[j] = true;
- }
- currentMillis += 15L * 60L * 1000L;
- }
- } else if (line.startsWith("dirreq-stats-end ") ||
- line.startsWith("entry-stats-end ") ||
- line.startsWith("exit-stats-end ") ||
- line.startsWith("cell-stats-end ") ||
- line.startsWith("conn-bi-direct ") ||
- line.startsWith("bridge-stats-end ") ||
- line.startsWith("geoip-stats-end ")) {
- boolean[] stats = null;
- if (line.startsWith("dirreq-stats-end ")) {
- stats = dirreqStats;
- } else if (line.startsWith("entry-stats-end ")) {
- stats = entryStats;
- } else if (line.startsWith("exit-stats-end ")) {
- stats = exitStats;
- } else if (line.startsWith("cell-stats-end ")) {
- stats = cellStats;
- } else if (line.startsWith("conn-bi-direct ")) {
- stats = connBiDirectStats;
- } else if (line.startsWith("bridge-stats-end ")) {
- stats = bridgeStats;
- } else if (line.startsWith("geoip-stats-end ")) {
- stats = geoipStats;
- } else {
- System.out.println("Internal error when processing line '"
- + line + "'. Exiting.");
- System.exit(1);
- }
- String[] parts = line.split(" ");
- long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
- + parts[2]).getTime();
- long intervalLength = Long.parseLong(parts[3].substring(1));
- long statsStartMillis = statsEndMillis
- - intervalLength * 1000L;
- long currentMillis = statsStartMillis;
- while (currentMillis < dateEndMillis) {
- if (currentMillis >= dateStartMillis) {
- int j = (int) ((currentMillis - dateStartMillis)
- / (900L * 1000L));
- if (j < 0 || j >= 96) {
- System.out.println("Internal error when processing "
- + "line '" + line + "'. Index = " + j
- + ". Exiting.");
- System.exit(1);
- }
- stats[j] = true;
- }
- currentMillis += 15L * 60L * 1000L;
- }
- }
- }
- br.close();
- bw.write(fingerprint + "," + date + ",");
- long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L,
- totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L,
- dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L,
- entryRead = 0L, entrySeconds = 0L, entryRunning = 0L,
- exitWritten = 0L, exitRead = 0L, exitSeconds = 0L,
- exitRunning = 0L, cellWritten = 0L, cellRead = 0L,
- cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L,
- connBiDirectRead = 0L, connBiDirectSeconds = 0L,
- connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L,
- bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L,
- geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L;
- for (int i = 0; i < 96; i++) {
- totalWritten += writeHistory[i];
- totalRead += readHistory[i];
- totalSeconds += upBridge[i] ? 900L : 0L;
- totalRunning += upStatus[i] ? 900L : 0L;
- dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L;
- dirreqRead += dirreqStats[i] ? readHistory[i] : 0L;
- dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L;
- dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L;
- entryWritten += entryStats[i] ? writeHistory[i] : 0L;
- entryRead += entryStats[i] ? readHistory[i] : 0L;
- entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L;
- entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L;
- exitWritten += exitStats[i] ? writeHistory[i] : 0L;
- exitRead += exitStats[i] ? readHistory[i] : 0L;
- exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L;
- exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L;
- cellWritten += cellStats[i] ? writeHistory[i] : 0L;
- cellRead += cellStats[i] ? readHistory[i] : 0L;
- cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L;
- cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L;
- connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i]
- : 0L;
- connBiDirectRead += connBiDirectStats[i] ? readHistory[i]
- : 0L;
- connBiDirectSeconds += connBiDirectStats[i] && upBridge[i]
- ? 900L : 0L;
- connBiDirectRunning += connBiDirectStats[i] && upStatus[i]
- ? 900L : 0L;
- bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L;
- bridgeRead += bridgeStats[i] ? readHistory[i] : 0L;
- bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L;
- bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L;
- geoipWritten += geoipStats[i] ? writeHistory[i] : 0L;
- geoipRead += geoipStats[i] ? readHistory[i] : 0L;
- geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L;
- geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L;
- }
- bw.write(totalWritten + "," + totalRead + "," + totalSeconds + ","
- + totalRunning + "," + dirreqWritten + "," + dirreqRead + ","
- + dirreqSeconds + "," + dirreqRunning + "," + entryWritten
- + "," + entryRead + "," + entrySeconds + "," + entryRunning
- + "," + exitWritten + "," + exitRead + "," + exitSeconds + ","
- + exitRunning + "," + cellWritten + "," + cellRead + ","
- + cellSeconds + "," + cellRunning + "," + connBiDirectWritten
- + "," + connBiDirectRead + "," + connBiDirectSeconds + ","
- + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead
- + "," + bridgeSeconds + "," + bridgeRunning + ","
- + geoipWritten + "," + geoipRead + "," + geoipSeconds + ","
- + geoipRunning + "\n");
- }
- bw.close();
- }
- }
-}
-
diff --git a/task-3261/ExtractDescriptorParts.java b/task-3261/ExtractDescriptorParts.java
new file mode 100755
index 0000000..544022d
--- /dev/null
+++ b/task-3261/ExtractDescriptorParts.java
@@ -0,0 +1,172 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+/* Extract the relevant parts from bridge descriptors and consensuses that
+ * are required to answer what fraction of bridges are not reporting
+ * bridge usage statistics. */
+public class ExtractDescriptorParts {
+ public static void main(String[] args) throws Exception {
+
+ /* Define paths: we parse descriptor (tarballs) from in/, store the
+ * parse history to parse-history, write relevant parts per bridge to
+ * temp/, and write publication times of bridge network statuses to
+ * bridge-network-statuses. */
+ File inDirectory = new File("in");
+ File parseHistoryFile = new File("parse-history");
+ File tempDirectory = new File("temp");
+ File statusFile = new File("bridge-network-statuses");
+
+ /* Read descriptors. */
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.addDirectory(inDirectory);
+ reader.setExcludeFiles(parseHistoryFile);
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+
+ /* Extract bridge-stats and geoip-stats from bridge extra-info
+ * descriptors. */
+ if (descriptor instanceof ExtraInfoDescriptor) {
+ System.out.print("e");
+ SortedSet<String> lines = new TreeSet<String>();
+ ExtraInfoDescriptor extraInfoDescriptor =
+ (ExtraInfoDescriptor) descriptor;
+ if (extraInfoDescriptor.getBridgeStatsEndMillis() > 0) {
+ lines.add("bridge-stats " + dateTimeFormat.format(
+ extraInfoDescriptor.getBridgeStatsEndMillis()) + " "
+ + extraInfoDescriptor.getBridgeStatsIntervalLength()
+ + " " + (extraInfoDescriptor.getGeoipDbDigest() == null
+ ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
+ }
+ if (extraInfoDescriptor.getGeoipStartTimeMillis() > 0) {
+ long intervalLength =
+ (extraInfoDescriptor.getPublishedMillis()
+ - extraInfoDescriptor.getGeoipStartTimeMillis())
+ / 1000L;
+ String geoipStatsEnd = dateTimeFormat.format(
+ extraInfoDescriptor.getPublishedMillis());
+ lines.add("geoip-stats " + geoipStatsEnd + " "
+ + intervalLength + " "
+ + (extraInfoDescriptor.getGeoipDbDigest() == null
+ ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
+ }
+ if (!lines.isEmpty()) {
+ File outputFile = new File(tempDirectory,
+ extraInfoDescriptor.getFingerprint().toUpperCase());
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, true));
+ for (String l : lines) {
+ bw.write(l + "\n");
+ }
+ bw.close();
+ }
+
+ /* Extract all bridges with the Running flag from bridge network
+ * statuses. Also extract the status publication time. */
+ } else if (descriptor instanceof BridgeNetworkStatus) {
+ System.out.print("n");
+ BridgeNetworkStatus status = (BridgeNetworkStatus) descriptor;
+ String published = dateTimeFormat.format(
+ status.getPublishedMillis());
+ if (status.getStatusEntries() != null) {
+ for (NetworkStatusEntry entry :
+ status.getStatusEntries().values()) {
+ if (entry.getFlags().contains("Running")) {
+ File outputFile = new File(tempDirectory,
+ entry.getFingerprint().toUpperCase());
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, true));
+ String digest = entry.getDescriptor().toUpperCase();
+ bw.write("running-bridge " + published + " " + digest
+ + "\n");
+ bw.close();
+ }
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ statusFile, true));
+ bw.write(published + "\n");
+ bw.close();
+ }
+
+ /* Extract publication time, digest, uptime, and platform string
+ * from bridge server descriptors. */
+ } else if (descriptor instanceof ServerDescriptor) {
+ System.out.print("s");
+ ServerDescriptor serverDescriptor =
+ (ServerDescriptor) descriptor;
+ String published = dateTimeFormat.format(
+ serverDescriptor.getPublishedMillis());
+ String digest = descriptorFile.getFileName().substring(
+ descriptorFile.getFileName().lastIndexOf("/") + 1).
+ toUpperCase();
+ String uptime = serverDescriptor.getUptime() == null ? "-1"
+ : String.valueOf(serverDescriptor.getUptime());
+ String platform = serverDescriptor.getPlatform() == null
+ ? "NA" : serverDescriptor.getPlatform();
+ File outputFile = new File(tempDirectory,
+ serverDescriptor.getFingerprint().toUpperCase());
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, true));
+ bw.write("server-descriptor " + published + " "
+ + digest + " " + uptime + " " + platform + "\n");
+ bw.close();
+
+ /* Extract hashed fingerprints of all relays with the Running
+ * flag from relay network status consensuses. */
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ System.out.print("r");
+ RelayNetworkStatusConsensus status =
+ (RelayNetworkStatusConsensus) descriptor;
+ if (status.getStatusEntries() != null) {
+ for (NetworkStatusEntry entry :
+ status.getStatusEntries().values()) {
+ if (entry.getFlags().contains("Running")) {
+ String hashedFingerprint = Hex.encodeHexString(
+ DigestUtils.sha(Hex.decodeHex(
+ entry.getFingerprint().toCharArray()))).
+ toUpperCase();
+ File outputFile = new File(tempDirectory,
+ hashedFingerprint);
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, true));
+ bw.write("running-relay " + dateTimeFormat.format(
+ status.getValidAfterMillis()) + "\n");
+ bw.close();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/task-3261/README b/task-3261/README
old mode 100644
new mode 100755
index cb430ac..43ef208
--- a/task-3261/README
+++ b/task-3261/README
@@ -1,4 +1,36 @@
-$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java
-$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage
-$ R --slave -f stats-coverage.R
+What fraction of our bridges are not reporting usage statistics?
+================================================================
+
+Usage:
+
+1. Put metrics tarballs into a directory called in/. The best parsing
+ performance can be achieved by decompressing tarballs without
+ extracting them. The bridge-descriptors-* and consensuses-* tarballs
+ are required for this analysis.
+
+2. Clone metrics-lib.git, build descriptor.jar, and put it in this
+ directory.
+
+3. Download Apache Commons Codec and Compress and put the .jar files in
+ this directory.
+
+4. Parse descriptors and write all relevant parts to one file per bridge:
+ $ javac
+ -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar
+ ExtractDescriptorParts.java
+ $ java
+ -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar:.
+ ExtractDescriptorParts
+
+5. Analyze descriptors parts bridge by bridge and determine whether it
+ reported bridge stats at a given time, and if not, find out why not:
+ $ javac AnalyzeDescriptorParts.java
+ $ java AnalyzeDescriptorParts
+
+6. Aggregate daily statistics that can be plotted:
+ $ javac AggregateStats.java
+ $ java AggregateStats
+
+7. Plot results:
+ $ R --slave -f plot.R
diff --git a/task-3261/plot.R b/task-3261/plot.R
new file mode 100644
index 0000000..8a3808c
--- /dev/null
+++ b/task-3261/plot.R
@@ -0,0 +1,65 @@
+library(ggplot2)
+library(scales)
+library(reshape)
+a <- read.csv("aggregated.csv", stringsAsFactors = FALSE)
+
+e <- a
+e <- data.frame(date = as.Date(e$date), case = ifelse(
+ e$reported == "true", ifelse(e$discarded == "false", "case1", "case2"),
+ "case3"), bridges = e$bridges)
+e <- aggregate(list(bridges = e$bridges),
+ by = list(date = e$date, case = e$case), FUN = sum)
+e <- cast(e, date ~ case)
+sums <- e$case1 + e$case2 + e$case3
+e <- data.frame(date = e$date, case1 = e$case1 / sums,
+ case2 = e$case2 / sums, case3 = e$case3 / sums, stringsAsFactors = FALSE)
+e <- melt(e, "date")
+e <- data.frame(date = e$date, variable = ifelse(e$variable == "case1",
+ "reported and used", ifelse(e$variable == "case2",
+ "reported and discarded", "not reported")), value = e$value)
+ggplot(e, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Fraction of bridge usage statistics that were...\n")
+ggsave("reported-bridge-statistics.png", width = 8, height = 6, dpi = 120)
+
+d <- a
+d <- d[d$reported == "false", ]
+d <- data.frame(date = d$date, reason = d$reason, value = d$bridges)
+d <- cast(d, date ~ reason)
+d <- data.frame(date = d$date, case1 = d$lessthan24h / sums,
+ case2 = d$publdelay / sums, case3 = d$other / sums)
+d <- melt(d, "date")
+d <- data.frame(date = d$date, variable = ifelse(d$variable == "case1",
+ "Less than 24h uptime", ifelse(d$variable == "case2",
+ "Publication delay", "Other reason")), value = d$value)
+ggplot(d, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Reasons for bridges not reporting usage statistics\n")
+ggsave("bridge-statistics-nonreported.png", width = 8, height = 6,
+ dpi = 120)
+
+b <- a
+b <- b[b$discarded == "true", ]
+b <- data.frame(date = b$date, reason = b$reason, value = b$bridges)
+b <- cast(b, date ~ reason)
+b <- data.frame(date = b$date, case1 = b$geoip022 / sums,
+ case2 = b$nogeoipfile / sums, case3 = b$runasrelay / sums)
+b <- melt(b, "date")
+b <- data.frame(date = b$date, variable = ifelse(b$variable == "case1",
+ "0.2.2.x geoip-stats bug", ifelse(b$variable == "case2",
+ "missing geoip file", "Run as non-bridge relay")), value = b$value)
+ggplot(b, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Reasons for discarding reported usage statistics\n")
+ggsave("bridge-statistics-discarded.png", width = 8, height = 6,
+ dpi = 120)
+
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R
deleted file mode 100644
index aef63f2..0000000
--- a/task-3261/stats-coverage.R
+++ /dev/null
@@ -1,25 +0,0 @@
-library(ggplot2)
-library(scales)
-b <- read.csv("stats-coverage.csv")
-b <- aggregate(list(
- totalwritten = b$totalwritten, totalseconds = b$totalseconds,
- totalrunning = b$totalrunning, bridgewritten = b$bridgewritten,
- bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning,
- geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds,
- geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum)
-b <- rbind(data.frame(date = b$date, variable = "by written bytes",
- value = (b$bridgewritten + b$geoipwritten) / b$totalwritten),
- data.frame(date = b$date, variable = "by uptime (bandwidth history)",
- value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds),
- data.frame(date = b$date, variable = "by uptime (Running flag)",
- value = (b$bridgerunning + b$geoiprunning) / b$totalrunning))
-b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ]
-ggplot(b, aes(x = date, y = value)) +
-geom_line() +
-facet_grid(variable ~ .) +
-scale_x_date(name = "") +
-scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
-scale_colour_hue(name = "") +
-opts(title = "Fraction of bridges reporting statistics\n")
-ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72)
-
More information about the tor-commits
mailing list