[tor-commits] [metrics-tasks/master] Update #3261 code.
karsten at torproject.org
karsten at torproject.org
Mon Apr 16 13:27:12 UTC 2012
commit a3fd963839d80da97d2e67384bd1c16021a82ac9
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Apr 16 15:20:46 2012 +0200
Update #3261 code.
We now parse bridge network statuses and append "running " lines to files
tempDirectory/$date/$fingerprint-$date for later processing by fingerprint
and date. We then add *running columns to the .csv file which contain
fractions by bridges with the Running flag.
---
task-3261/.gitignore | 1 +
task-3261/AnalyzeStatsCoverage.java | 182 ++++++++++++++++++++++++++++-------
task-3261/README | 4 +-
task-3261/stats-coverage.R | 22 +++--
4 files changed, 163 insertions(+), 46 deletions(-)
diff --git a/task-3261/.gitignore b/task-3261/.gitignore
index 1e3dcc5..2bfd23b 100644
--- a/task-3261/.gitignore
+++ b/task-3261/.gitignore
@@ -4,4 +4,5 @@
*.csv
in/
temp/
+*.jar
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java
index 21d791e..4688bde 100644
--- a/task-3261/AnalyzeStatsCoverage.java
+++ b/task-3261/AnalyzeStatsCoverage.java
@@ -1,6 +1,9 @@
import java.io.*;
import java.text.*;
import java.util.*;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
public class AnalyzeStatsCoverage {
public static void main(String[] args) throws Exception {
File inDirectory = new File("in");
@@ -26,6 +29,9 @@ public class AnalyzeStatsCoverage {
while (!dirs.isEmpty()) {
File file = dirs.pop();
if (file.isDirectory()) {
+ if (file.getName().equals("statuses")) {
+ continue;
+ }
for (File f : file.listFiles()) {
dirs.add(f);
}
@@ -179,22 +185,103 @@ public class AnalyzeStatsCoverage {
}
}
+ /* Parse bridge network statuses and append "running " lines to
+ * files tempDirectory/$date/$fingerprint-$date for later processing
+ * by fingerprint and date. */
+ SimpleDateFormat statusFormat =
+ new SimpleDateFormat("yyyyMMdd-HHmmss");
+ statusFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (inDirectory.exists() && inDirectory.isDirectory()) {
+ System.out.println("Parsing statuses in '"
+ + inDirectory.getAbsolutePath() + "'.");
+ long started = System.currentTimeMillis();
+ tempDirectory.mkdirs();
+ Stack<File> dirs = new Stack<File>();
+ SortedSet<File> files = new TreeSet<File>();
+ dirs.add(inDirectory);
+ while (!dirs.isEmpty()) {
+ File file = dirs.pop();
+ if (file.isDirectory()) {
+ if (file.getName().equals("extra-infos")) {
+ continue;
+ }
+ for (File f : file.listFiles()) {
+ dirs.add(f);
+ }
+ } else {
+ files.add(file);
+ }
+ }
+ int totalFiles = files.size(), fileNumber = 0;
+ for (File file : files) {
+ if (++fileNumber % (totalFiles / 1000) == 0) {
+ int numberLength = String.valueOf(totalFiles).length();
+ long minutesLeft = (((System.currentTimeMillis() - started)
+ * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
+ System.out.printf("Parsed %" + numberLength + "d of %"
+ + numberLength + "d statuses (%3d %%) %d minutes left%n",
+ fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
+ minutesLeft);
+ }
+ long statusPublishedMillis = statusFormat.parse(
+ file.getName().substring(0, "YYYYMMdd-HHmmss".length())).
+ getTime();
+ SortedSet<String> statusPublishedDates = new TreeSet<String>();
+ String statusPublishedString = dateTimeFormat.format(
+ statusPublishedMillis);
+ statusPublishedDates.add(dateFormat.format(
+ statusPublishedMillis));
+ statusPublishedDates.add(dateFormat.format(
+ statusPublishedMillis + 15L * 60L * 1000L));
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line, rLine = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ rLine = line;
+ } else if (line.startsWith("s ") && line.contains(" Running") &&
+ rLine != null) {
+ String[] parts = rLine.split(" ");
+ if (parts.length != 9) {
+ System.out.println("Illegal line '" + rLine + "' in "
+ + file.getAbsolutePath() + ". Skipping this line.");
+ continue;
+ }
+ String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "=="));
+ for (String date : statusPublishedDates) {
+ File outputFile = new File(tempDirectory, date + "/"
+ + fingerprint.toUpperCase() + "-" + date);
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile, true));
+ bw.write("running " + statusPublishedString + "\n");
+ bw.close();
+ }
+ }
+ }
+ }
+ }
+
/* Parse relevant lines by fingerprint and date. The result will be
* how many bytes that relay or bridge read/wrote in total, and how
* many bytes were included in the different reported statistics.
- * Another result is the number of seconds for which this relay or
- * bridge reported byte histories and other statistics. */
+ * Other results are the number of seconds for which this relay or
+ * bridge reported byte histories and other statistics, either based
+ * on self-reported bandwidth histories or based on the Running flag
+ * in bridge network statuses. */
if (tempDirectory.exists() && tempDirectory.isDirectory()) {
System.out.println("Evaluating previously parsed descriptors in '"
+ tempDirectory.getAbsolutePath() + "'.");
BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
bw.write("fingerprint,date,totalwritten,totalread,totalseconds,"
- + "dirreqwritten,dirreqread,dirreqseconds,entrywritten,"
- + "entryread,entryseconds,exitwritten,exitread,exitseconds,"
- + "cellwritten,cellread,cellseconds,connbidirectwritten,"
- + "connbidirectread,connbidirectseconds,bridgewritten,"
- + "bridgeread,bridgeseconds,geoipwritten,geoipread,"
- + "geoipseconds\n");
+ + "totalrunning,dirreqwritten,dirreqread,dirreqseconds,"
+ + "dirreqrunning,entrywritten,entryread,entryseconds,"
+ + "entryrunning,exitwritten,exitread,exitseconds,exitrunning,"
+ + "cellwritten,cellread,cellseconds,cellrunning,"
+ + "connbidirectwritten,connbidirectread,connbidirectseconds,"
+ + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds,"
+ + "bridgerunning,geoipwritten,geoipread,geoipseconds,"
+ + "geoiprunning\n");
Stack<File> dirs = new Stack<File>();
SortedSet<File> files = new TreeSet<File>();
dirs.add(tempDirectory);
@@ -221,7 +308,8 @@ public class AnalyzeStatsCoverage {
long dateStartMillis = dateFormat.parse(date).getTime();
long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L;
long[] writeHistory = new long[96], readHistory = new long[96];
- boolean[] running = new boolean[96],
+ boolean[] upBridge = new boolean[96],
+ upStatus = new boolean[96],
dirreqStats = new boolean[96],
entryStats = new boolean[96],
exitStats = new boolean[96],
@@ -232,7 +320,17 @@ public class AnalyzeStatsCoverage {
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
while ((line = br.readLine()) != null) {
- if (line.startsWith("write-history ") ||
+ if (line.startsWith("running ")) {
+ long statusPublishedMillis = dateTimeFormat.parse(
+ line.substring("running ".length())).getTime();
+ int j = (int) ((statusPublishedMillis - dateStartMillis)
+ / (900L * 1000L));
+ for (int i = 0; i < 2; i++) {
+ if (j + i >= 0 && j + i < 96) {
+ upStatus[j + i] = true;
+ }
+ }
+ } else if (line.startsWith("write-history ") ||
line.startsWith("read-history ")) {
long[] history = line.startsWith("write-history ")
? writeHistory : readHistory;
@@ -255,7 +353,7 @@ public class AnalyzeStatsCoverage {
System.exit(1);
}
history[j] = Long.parseLong(historyValues[i]);
- running[j] = true;
+ upBridge[j] = true;
}
currentMillis += 15L * 60L * 1000L;
}
@@ -312,52 +410,66 @@ public class AnalyzeStatsCoverage {
br.close();
bw.write(fingerprint + "," + date + ",");
long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L,
- dirreqWritten = 0L, dirreqRead = 0L, dirreqSeconds = 0L,
- entryWritten = 0L, entryRead = 0L, entrySeconds = 0L,
+ totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L,
+ dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L,
+ entryRead = 0L, entrySeconds = 0L, entryRunning = 0L,
exitWritten = 0L, exitRead = 0L, exitSeconds = 0L,
- cellWritten = 0L, cellRead = 0L, cellSeconds = 0L,
- connBiDirectWritten = 0L, connBiDirectRead = 0L,
- connBiDirectSeconds = 0L, bridgeWritten = 0L, bridgeRead = 0L,
- bridgeSeconds = 0L, geoipWritten = 0L, geoipRead = 0L,
- geoipSeconds = 0L;
+ exitRunning = 0L, cellWritten = 0L, cellRead = 0L,
+ cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L,
+ connBiDirectRead = 0L, connBiDirectSeconds = 0L,
+ connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L,
+ bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L,
+ geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L;
for (int i = 0; i < 96; i++) {
totalWritten += writeHistory[i];
totalRead += readHistory[i];
- totalSeconds += running[i] ? 900L : 0L;
+ totalSeconds += upBridge[i] ? 900L : 0L;
+ totalRunning += upStatus[i] ? 900L : 0L;
dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L;
dirreqRead += dirreqStats[i] ? readHistory[i] : 0L;
- dirreqSeconds += dirreqStats[i] && running[i] ? 900L : 0L;
+ dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L;
+ dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L;
entryWritten += entryStats[i] ? writeHistory[i] : 0L;
entryRead += entryStats[i] ? readHistory[i] : 0L;
- entrySeconds += entryStats[i] && running[i] ? 900L : 0L;
+ entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L;
+ entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L;
exitWritten += exitStats[i] ? writeHistory[i] : 0L;
exitRead += exitStats[i] ? readHistory[i] : 0L;
- exitSeconds += exitStats[i] && running[i] ? 900L : 0L;
+ exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L;
+ exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L;
cellWritten += cellStats[i] ? writeHistory[i] : 0L;
cellRead += cellStats[i] ? readHistory[i] : 0L;
- cellSeconds += cellStats[i] && running[i] ? 900L : 0L;
+ cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L;
+ cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L;
connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i]
: 0L;
connBiDirectRead += connBiDirectStats[i] ? readHistory[i]
: 0L;
- connBiDirectSeconds += connBiDirectStats[i] && running[i] ? 900L
- : 0L;
+ connBiDirectSeconds += connBiDirectStats[i] && upBridge[i]
+ ? 900L : 0L;
+ connBiDirectRunning += connBiDirectStats[i] && upStatus[i]
+ ? 900L : 0L;
bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L;
bridgeRead += bridgeStats[i] ? readHistory[i] : 0L;
- bridgeSeconds += bridgeStats[i] && running[i] ? 900L : 0L;
+ bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L;
+ bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L;
geoipWritten += geoipStats[i] ? writeHistory[i] : 0L;
geoipRead += geoipStats[i] ? readHistory[i] : 0L;
- geoipSeconds += geoipStats[i] && running[i] ? 900L : 0L;
+ geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L;
+ geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L;
}
bw.write(totalWritten + "," + totalRead + "," + totalSeconds + ","
- + dirreqWritten + "," + dirreqRead + "," + dirreqSeconds + ","
- + entryWritten + "," + entryRead + "," + entrySeconds + ","
- + exitWritten + "," + exitRead + "," + exitSeconds + ","
- + cellWritten + "," + cellRead + "," + cellSeconds + ","
- + connBiDirectWritten + "," + connBiDirectRead + ","
- + connBiDirectSeconds + "," + bridgeWritten + ","
- + bridgeRead + "," + bridgeSeconds + "," + geoipWritten + ","
- + geoipRead + "," + geoipSeconds + "\n");
+ + totalRunning + "," + dirreqWritten + "," + dirreqRead + ","
+ + dirreqSeconds + "," + dirreqRunning + "," + entryWritten
+ + "," + entryRead + "," + entrySeconds + "," + entryRunning
+ + "," + exitWritten + "," + exitRead + "," + exitSeconds + ","
+ + exitRunning + "," + cellWritten + "," + cellRead + ","
+ + cellSeconds + "," + cellRunning + "," + connBiDirectWritten
+ + "," + connBiDirectRead + "," + connBiDirectSeconds + ","
+ + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead
+ + "," + bridgeSeconds + "," + bridgeRunning + ","
+ + geoipWritten + "," + geoipRead + "," + geoipSeconds + ","
+ + geoipRunning + "\n");
}
bw.close();
}
diff --git a/task-3261/README b/task-3261/README
index 1b95743..cb430ac 100644
--- a/task-3261/README
+++ b/task-3261/README
@@ -1,4 +1,4 @@
-$ javac AnalyzeStatsCoverage.java
-$ java -Xmx4g AnalyzeStatsCoverage
+$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java
+$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage
$ R --slave -f stats-coverage.R
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R
index d0794b9..aef63f2 100644
--- a/task-3261/stats-coverage.R
+++ b/task-3261/stats-coverage.R
@@ -1,21 +1,25 @@
library(ggplot2)
+library(scales)
b <- read.csv("stats-coverage.csv")
b <- aggregate(list(
totalwritten = b$totalwritten, totalseconds = b$totalseconds,
- bridgewritten = b$bridgewritten, bridgeseconds = b$bridgeseconds,
- geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds),
- by = list(date = as.Date(b$date)), sum)
+ totalrunning = b$totalrunning, bridgewritten = b$bridgewritten,
+ bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning,
+ geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds,
+ geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum)
b <- rbind(data.frame(date = b$date, variable = "by written bytes",
value = (b$bridgewritten + b$geoipwritten) / b$totalwritten),
- data.frame(date = b$date, variable = "by uptime seconds",
- value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds))
+ data.frame(date = b$date, variable = "by uptime (bandwidth history)",
+ value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds),
+ data.frame(date = b$date, variable = "by uptime (Running flag)",
+ value = (b$bridgerunning + b$geoiprunning) / b$totalrunning))
+b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ]
ggplot(b, aes(x = date, y = value)) +
geom_line() +
facet_grid(variable ~ .) +
-scale_x_date(name = "", major = "3 months", minor = "1 month",
- format = "%b %Y", limits = as.Date(c("2010-10-01", "2011-09-30"))) +
-scale_y_continuous(name = "", limits = c(0, 1), formatter = "percent") +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
scale_colour_hue(name = "") +
opts(title = "Fraction of bridges reporting statistics\n")
-ggsave("stats-coverage-bridges.png", width = 8, height = 6, dpi = 72)
+ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72)
More information about the tor-commits
mailing list