[tor-commits] [flashproxy/master] Extract proxy requests for graphing with R.
dcf at torproject.org
dcf at torproject.org
Wed Feb 20 17:30:39 UTC 2013
commit 5713db780884c581f53a8045b370fa32b4a7b5ec
Author: David Fifield <david at bamsoftware.com>
Date: Wed Feb 20 03:57:48 2013 -0800
Extract proxy requests for graphing with R.
---
experiments/proxy-extract.py | 82 ++++++++++++++++++++++++++++++++++++++++++
experiments/proxy-graph.r | 5 +++
2 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/experiments/proxy-extract.py b/experiments/proxy-extract.py
new file mode 100755
index 0000000..e8b02ce
--- /dev/null
+++ b/experiments/proxy-extract.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import datetime
+import getopt
+import re
+import sys
+
+def usage(f = sys.stdout):
+ print >> f, """\
+Usage: %s [INPUTFILE]
+Extract proxy connections from a facilitator log. Each output line is
+ date\tcount\n
+where count is the approximate poll interval in effect at date.
+
+ -h, --help show this help.
+""" % sys.argv[0]
+
+opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"])
+for o, a in opts:
+ if o == "-h" or o == "--help":
+ usage()
+ sys.exit()
+
+if len(args) == 0:
+ input_file = sys.stdin
+elif len(args) == 1:
+ input_file = open(args[0])
+else:
+ usage()
+ sys.exit()
+
+def timedelta_to_seconds(delta):
+ return delta.days * (24 * 60 * 60) + delta.seconds + delta.microseconds / 1000000.0
+
+# commit 49de7bf689ee989997a1edbf2414a7bdbc2164f9
+# Author: David Fifield <david at bamsoftware.com>
+# Date: Thu Jan 3 21:01:39 2013 -0800
+#
+# Bump poll interval from 10 s to 60 s.
+#
+# commit 69d429db12cedc90dac9ccefcace80c86af7eb51
+# Author: David Fifield <david at bamsoftware.com>
+# Date: Tue Jan 15 14:02:02 2013 -0800
+#
+# Increase facilitator_poll_interval from 1 m to 10 m.
+
+BEGIN_60S = datetime.datetime(2013, 1, 3, 21, 0, 0)
+BEGIN_600S = datetime.datetime(2013, 1, 15, 14, 0, 0)
+
+# Proxies refresh themselves once a day, so interpolate across a day when the
+# polling interval historically changed.
+def get_poll_interval(date):
+ if date < BEGIN_60S:
+ return 10
+ elif BEGIN_60S <= date < BEGIN_60S + datetime.timedelta(1):
+ return timedelta_to_seconds(date-BEGIN_60S) / timedelta_to_seconds(datetime.timedelta(1)) * (60-10) + 10
+ elif date < BEGIN_600S:
+ return 60
+ elif BEGIN_600S <= date < BEGIN_600S + datetime.timedelta(1):
+ return timedelta_to_seconds(date-BEGIN_600S) / timedelta_to_seconds(datetime.timedelta(1)) * (600-60) + 60
+ else:
+ return 600
+
+prev_output = None
+count = 0.0
+
+for line in input_file:
+ m = re.match(r'^(\d+-\d+-\d+ \d+:\d+:\d+) proxy gets', line)
+ if not m:
+ continue
+ date_str, = m.groups()
+ date = datetime.datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
+
+ count += get_poll_interval(date)
+
+ rounded_date = date.replace(minute=0, second=0, microsecond=0)
+ prev_output = prev_output or rounded_date
+ if prev_output is None or rounded_date != prev_output:
+ avg = float(count) / 10.0
+ print date.strftime("%Y-%m-%d %H:%M:%S") + "\t" + "%.2f" % avg
+ prev_output = rounded_date
+ count = 0.0
diff --git a/experiments/proxy-graph.r b/experiments/proxy-graph.r
new file mode 100644
index 0000000..6b3e1cc
--- /dev/null
+++ b/experiments/proxy-graph.r
@@ -0,0 +1,5 @@
+library(ggplot2)
+x <- read.delim("proxy.dat", header=FALSE, col.names=c("date", "interval"), colClasses=c("POSIXct", "numeric"))
+
+png("proxy-count.png", width=720, height=480)
+qplot(date, data=x, geom="bar", weight=interval/10, binwidth=86400, ylab="proxy requests per day")
More information about the tor-commits
mailing list