[tor-commits] [metrics-tasks/master] Add #3277 code.

karsten at torproject.org karsten at torproject.org
Tue May 24 13:30:29 UTC 2011


commit 0a10a39ca7f7f2b3e306ad6de7a1776a6bd50589
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue May 24 15:30:11 2011 +0200

    Add #3277 code.
---
 task-3277/.gitignore          |    5 ++++
 task-3277/EvaluateHsDirs.java |   52 +++++++++++++++++++++++++++++++++++++++++
 task-3277/README              |   22 +++++++++++++++++
 task-3277/hsdir-sessions.R    |   21 ++++++++++++++++
 4 files changed, 100 insertions(+), 0 deletions(-)

diff --git a/task-3277/.gitignore b/task-3277/.gitignore
new file mode 100644
index 0000000..5103210
--- /dev/null
+++ b/task-3277/.gitignore
@@ -0,0 +1,5 @@
+*.csv
+*.class
+Rplots.pdf
+*.png
+
diff --git a/task-3277/EvaluateHsDirs.java b/task-3277/EvaluateHsDirs.java
new file mode 100644
index 0000000..4e6f1a4
--- /dev/null
+++ b/task-3277/EvaluateHsDirs.java
@@ -0,0 +1,52 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class EvaluateHsDirs {
+  public static void main(String[] args) throws Exception {
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "hsdir-sessions.csv"));
+    bw.write("fingerprint,firstseen,lastseen,duration\n");
+    BufferedReader br = new BufferedReader(new FileReader("hsdir.csv"));
+    String line = br.readLine(), firstValidAfter = null,
+        lastValidAfter = null, lastButOneValidAfter = null;
+    Map<String, String> last = new HashMap<String, String>();
+    Map<String, String> current = new HashMap<String, String>();
+    SimpleDateFormat formatter = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    while ((line = br.readLine()) != null) {
+      String[] parts = line.split(",");
+      String validAfter = parts[0];
+      if (firstValidAfter == null) {
+        firstValidAfter = validAfter;
+        lastValidAfter = validAfter;
+      }
+      if (!line.startsWith("20") || !lastValidAfter.equals(validAfter)) {
+        for (Map.Entry<String, String> e : last.entrySet()) {
+          if (!current.containsKey(e.getKey()) &&
+              !e.getValue().equals(firstValidAfter)) {
+            long duration =
+                (formatter.parse(lastButOneValidAfter).getTime()
+                - formatter.parse(e.getValue()).getTime()) / 1000L;
+            bw.write(e.getKey() + "," + e.getValue() + ","
+                + lastButOneValidAfter + "," + duration + "\n");
+          }
+        }
+        if (!line.startsWith("20")) {
+          break;
+        }
+        last = current;
+        current = new HashMap<String, String>();
+        lastButOneValidAfter = lastValidAfter;
+      } else if (last.containsKey(parts[1])) {
+        current.put(parts[1], last.remove(parts[1]));
+      } else {
+        current.put(parts[1], validAfter);
+      }
+      lastValidAfter = validAfter;
+    }
+    br.close();
+    bw.close();
+  }
+}
+
diff --git a/task-3277/README b/task-3277/README
new file mode 100644
index 0000000..6e71abb
--- /dev/null
+++ b/task-3277/README
@@ -0,0 +1,22 @@
+Analyze how long after earning the HSDir flag relays go away
+
+First, extract status entries with the HSDir flag set from the metrics
+database:
+
+  tordir=> \o hsdir.csv
+  tordir=> SELECT validafter, fingerprint FROM statusentry
+           WHERE validafter >= '2010-05-01' AND validafter < '2011-05-01'
+           AND ishsdir IS TRUE ORDER BY validafter, fingerprint;
+  tordir=> \o
+
+Next, calculate continuous sessions of a relay having the HSDir flag.  For
+each such session, extract the fingerprint and the first and last time it
+was listed in the network status with the HSDir flag.  Also calculate the
+session duration in seconds:
+
+  $ javac EvaluateHsDirs.java && java EvaluateHsDirs
+
+Plot an ECDF of the session length:
+
+  $ R --slave -f hsdir-sessions.R
+
diff --git a/task-3277/hsdir-sessions.R b/task-3277/hsdir-sessions.R
new file mode 100644
index 0000000..4b28131
--- /dev/null
+++ b/task-3277/hsdir-sessions.R
@@ -0,0 +1,21 @@
+library(ggplot2)
+data <- read.csv("hsdir-sessions.csv", stringsAsFactors = FALSE)
+
+## Histogram; not that useful
+#ggplot(data, aes(x = duration / 3600)) +
+#geom_histogram(aes(y = ..density..), binwidth = 1) +
+#scale_x_continuous(limits = c(0, 72)) +
+#scale_y_continuous(formatter = "percent")
+
+data <- sort(data$duration)
+data <- data.frame(x = data / (60 * 60),
+  y = (length(data):1) / length(data))
+ggplot(data, aes(x = x, y = y)) +
+geom_line() +
+scale_y_continuous("Cumulative fraction of continuous HSDir sessions\n",
+  formatter = "percent", limits = c(0, 1)) +
+scale_x_continuous(paste("\nHSDir session time between the relay earning",
+  "the HSDir flag and going away in hours"),
+  limits = c(0, 3 * 24), breaks = seq(0, 3 * 24, 24))
+ggsave(filename = "hsdir-sessions.png", width = 8, height = 5, dpi = 72)
+



More information about the tor-commits mailing list