[tor-commits] [metrics-tasks/master] Commit graphing code for #3574.
karsten at torproject.org
karsten at torproject.org
Wed Apr 4 07:17:13 UTC 2012
commit b7a73f0ce0605e4d0f017cb0696cc924a75c986e
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Apr 4 09:15:30 2012 +0200
Commit graphing code for #3574.
Ticket is closed since 7 months now, just found the code on my disk.
---
task-3574/.gitignore | 6 +++
task-3574/AggregatePerDay.java | 66 +++++++++++++++++++++++++++++++++++
task-3574/OneLinePerDescriptor.java | 20 ++++++++++
task-3574/README | 23 ++++++++++++
task-3574/eval.R | 12 ++++++
5 files changed, 127 insertions(+), 0 deletions(-)
diff --git a/task-3574/.gitignore b/task-3574/.gitignore
new file mode 100644
index 0000000..fae3c24
--- /dev/null
+++ b/task-3574/.gitignore
@@ -0,0 +1,6 @@
+*.txt
+extra-infos/
+*.csv
+*.class
+*.pdf
+
diff --git a/task-3574/AggregatePerDay.java b/task-3574/AggregatePerDay.java
new file mode 100644
index 0000000..cf625a6
--- /dev/null
+++ b/task-3574/AggregatePerDay.java
@@ -0,0 +1,66 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class AggregatePerDay {
+ public static void main(String[] args) throws Exception {
+ SortedMap<String, long[]> byteHistory = new TreeMap<String, long[]>();
+ BufferedReader br = new BufferedReader(new FileReader(
+ "bridge-bandwidth-histories-sorted.txt"));
+ String line;
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat dateFormat = new SimpleDateFormat(
+ "yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat timeFormat = new SimpleDateFormat(
+ "HH:mm:ss");
+ timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ if (parts.length < 3) {
+ continue;
+ }
+ String fingerprint = parts[2];
+ while (line.contains("-history")) {
+ line = line.substring(line.indexOf("-history") - 4);
+ boolean isReadHistory = line.startsWith("read");
+ line = line.substring(5);
+ parts = line.split(" ");
+ if (parts.length >= 6 && parts[5].length() > 0 && !parts[5].contains("history")) {
+ String[] bytes = parts[5].split(",");
+ long intervalEnd = dateTimeFormat.parse(parts[1] + " " + parts[2]).getTime();
+ for (int i = bytes.length - 1; i >= 0; i--) {
+ String key = fingerprint + ","
+ + dateFormat.format(intervalEnd)
+ + (isReadHistory ? ",read" : ",write");
+ long timeIndex = timeFormat.parse(
+ dateTimeFormat.format(intervalEnd).split(" ")[1]).getTime()
+ / (15L * 60L * 1000L);
+ long value = Long.parseLong(bytes[i]);
+ if (!byteHistory.containsKey(key)) {
+ byteHistory.put(key, new long[96]);
+ }
+ byteHistory.get(key)[(int) timeIndex] = value + 1L;
+ intervalEnd -= 15L * 60L * 1000L;
+ }
+ }
+ }
+ }
+ br.close();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "bridge-bandwidth-per-day.csv"));
+ for (Map.Entry<String, long[]> e : byteHistory.entrySet()) {
+ long total = 0L, count = 0L;
+ for (long val : e.getValue()) {
+ if (val > 0L) {
+ total += val - 1L;
+ count += 1L;
+ }
+ }
+ bw.write(e.getKey() + "," + total + "," + count + "\n");
+ }
+ bw.close();
+ }
+}
+
diff --git a/task-3574/OneLinePerDescriptor.java b/task-3574/OneLinePerDescriptor.java
new file mode 100644
index 0000000..78c83b1
--- /dev/null
+++ b/task-3574/OneLinePerDescriptor.java
@@ -0,0 +1,20 @@
+import java.io.*;
+public class OneLinePerDescriptor {
+ public static void main(String[] args) throws Exception {
+ BufferedReader br = new BufferedReader(new FileReader(
+ "bridge-bandwidth-histories-raw.txt"));
+ String line;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "bridge-bandwidth-histories-by-fingerprint.txt"));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("extra-info ")) {
+ bw.write("\n" + line);
+ } else {
+ bw.write(" " + line);
+ }
+ }
+ bw.close();
+ br.close();
+ }
+}
+
diff --git a/task-3574/README b/task-3574/README
new file mode 100644
index 0000000..0151eb4
--- /dev/null
+++ b/task-3574/README
@@ -0,0 +1,23 @@
+Extract bridge fingerprints and byte histories from extra-info
+descriptors:
+
+ $ grep -hRE "^extra-info|^write-history|^read-history" extra-infos >
+ bridge-bandwidth-histories-raw.txt
+
+Convert the output in something we can sort by fingerprint:
+
+ $ javac OneLinePerDescriptor.java && java OneLinePerDescriptor
+
+Sort by fingerprint and filter out duplicates:
+
+ $ sort bridge-bandwidth-histories-by-fingerprint.txt | uniq >
+ bridge-bandwidth-histories-sorted.txt
+
+Aggregate bytes per day:
+
+ $ javac AggregatePerDay.java && java AggregatePerDay
+
+Plot graphs:
+
+ $ R --slave -f eval.R
+
diff --git a/task-3574/eval.R b/task-3574/eval.R
new file mode 100644
index 0000000..ac36880
--- /dev/null
+++ b/task-3574/eval.R
@@ -0,0 +1,12 @@
+library(ggplot2)
+data <- read.csv("bridge-bandwidth-per-day.csv", stringsAsFactors = FALSE, col.names = c("fingerprint", "date", "operation", "bytes", "intervals"))
+d <- aggregate(list(bytes = data$bytes, intervals = data$intervals), by = list(fingerprint = data$fingerprint), sum)
+ggplot(d, aes(x = sort(bytes) / 2^30, y = (1:length(bytes)) / length(bytes))) +
+geom_line() +
+scale_x_continuous(name = "\nTotal read and written GiB per month", limits = c(0, 100)) +
+scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1))
+ggplot(d, aes(x = sort(bytes / intervals) / (15 * 60 * 2^10), y = (1:length(bytes)) / length(bytes))) +
+geom_line() +
+scale_x_continuous(name = "\nMean read and written KiB per second", limits = c(0, 10)) +
+scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1))
+
More information about the tor-commits
mailing list