[tor-commits] [metrics-tasks/master] Add #3277 code.
karsten at torproject.org
karsten at torproject.org
Tue May 24 13:30:29 UTC 2011
commit 0a10a39ca7f7f2b3e306ad6de7a1776a6bd50589
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue May 24 15:30:11 2011 +0200
Add #3277 code.
---
task-3277/.gitignore | 5 ++++
task-3277/EvaluateHsDirs.java | 52 +++++++++++++++++++++++++++++++++++++++++
task-3277/README | 22 +++++++++++++++++
task-3277/hsdir-sessions.R | 21 ++++++++++++++++
4 files changed, 100 insertions(+), 0 deletions(-)
diff --git a/task-3277/.gitignore b/task-3277/.gitignore
new file mode 100644
index 0000000..5103210
--- /dev/null
+++ b/task-3277/.gitignore
@@ -0,0 +1,5 @@
+*.csv
+*.class
+Rplots.pdf
+*.png
+
diff --git a/task-3277/EvaluateHsDirs.java b/task-3277/EvaluateHsDirs.java
new file mode 100644
index 0000000..4e6f1a4
--- /dev/null
+++ b/task-3277/EvaluateHsDirs.java
@@ -0,0 +1,52 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class EvaluateHsDirs {
+ public static void main(String[] args) throws Exception {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "hsdir-sessions.csv"));
+ bw.write("fingerprint,firstseen,lastseen,duration\n");
+ BufferedReader br = new BufferedReader(new FileReader("hsdir.csv"));
+ String line = br.readLine(), firstValidAfter = null,
+ lastValidAfter = null, lastButOneValidAfter = null;
+ Map<String, String> last = new HashMap<String, String>();
+ Map<String, String> current = new HashMap<String, String>();
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ String validAfter = parts[0];
+ if (firstValidAfter == null) {
+ firstValidAfter = validAfter;
+ lastValidAfter = validAfter;
+ }
+ if (!line.startsWith("20") || !lastValidAfter.equals(validAfter)) {
+ for (Map.Entry<String, String> e : last.entrySet()) {
+ if (!current.containsKey(e.getKey()) &&
+ !e.getValue().equals(firstValidAfter)) {
+ long duration =
+ (formatter.parse(lastButOneValidAfter).getTime()
+ - formatter.parse(e.getValue()).getTime()) / 1000L;
+ bw.write(e.getKey() + "," + e.getValue() + ","
+ + lastButOneValidAfter + "," + duration + "\n");
+ }
+ }
+ if (!line.startsWith("20")) {
+ break;
+ }
+ last = current;
+ current = new HashMap<String, String>();
+ lastButOneValidAfter = lastValidAfter;
+ } else if (last.containsKey(parts[1])) {
+ current.put(parts[1], last.remove(parts[1]));
+ } else {
+ current.put(parts[1], validAfter);
+ }
+ lastValidAfter = validAfter;
+ }
+ br.close();
+ bw.close();
+ }
+}
+
diff --git a/task-3277/README b/task-3277/README
new file mode 100644
index 0000000..6e71abb
--- /dev/null
+++ b/task-3277/README
@@ -0,0 +1,22 @@
+Analyze how long after earning the HSDir flag relays go away
+
+First, extract status entries with the HSDir flag set from the metrics
+database:
+
+ tordir=> \o hsdir.csv
+ tordir=> SELECT validafter, fingerprint FROM statusentry
+ WHERE validafter >= '2010-05-01' AND validafter < '2011-05-01'
+ AND ishsdir IS TRUE ORDER BY validafter, fingerprint;
+ tordir=> \o
+
+Next, calculate continuous sessions of a relay having the HSDir flag. For
+each such session, extract the fingerprint and the first and last time it
+was listed in the network status with the HSDir flag. Also calculate the
+session duration in seconds:
+
+ $ javac EvaluateHsDirs.java && java EvaluateHsDirs
+
+Plot an ECDF of the session length:
+
+ $ R --slave -f hsdir-sessions.R
+
diff --git a/task-3277/hsdir-sessions.R b/task-3277/hsdir-sessions.R
new file mode 100644
index 0000000..4b28131
--- /dev/null
+++ b/task-3277/hsdir-sessions.R
@@ -0,0 +1,21 @@
+library(ggplot2)
+data <- read.csv("hsdir-sessions.csv", stringsAsFactors = FALSE)
+
+## Histogram; not that useful
+#ggplot(data, aes(x = duration / 3600)) +
+#geom_histogram(aes(y = ..density..), binwidth = 1) +
+#scale_x_continuous(limits = c(0, 72)) +
+#scale_y_continuous(formatter = "percent")
+
+data <- sort(data$duration)
+data <- data.frame(x = data / (60 * 60),
+ y = (length(data):1) / length(data))
+ggplot(data, aes(x = x, y = y)) +
+geom_line() +
+scale_y_continuous("Cumulative fraction of continuous HSDir sessions\n",
+ formatter = "percent", limits = c(0, 1)) +
+scale_x_continuous(paste("\nHSDir session time between the relay earning",
+ "the HSDir flag and going away in hours"),
+ limits = c(0, 3 * 24), breaks = seq(0, 3 * 24, 24))
+ggsave(filename = "hsdir-sessions.png", width = 8, height = 5, dpi = 72)
+
More information about the tor-commits
mailing list