[tor-commits] [metrics-tasks/master] Add graph to figure out spikes in user numbers.
karsten at torproject.org
karsten at torproject.org
Thu Jun 23 15:26:18 UTC 2011
commit 04a508dbd179ccb7895720ca0ea0f8964a44a26b
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu Jun 23 17:25:48 2011 +0200
Add graph to figure out spikes in user numbers.
---
task-3338/README | 20 ++++++++++++++++
task-3338/daily-users.R | 57 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+), 0 deletions(-)
diff --git a/task-3338/README b/task-3338/README
new file mode 100644
index 0000000..4db2c65
--- /dev/null
+++ b/task-3338/README
@@ -0,0 +1,20 @@
+Ticket: Find out why our user graphs have huge spikes in Q2/2011
+================================================================
+
+1. Plot single variables of daily user estimate
+
+- reported directory requests
+- ratio of written directory bytes of the directory mirrors reporting
+ directory requests compared to all written directory bytes
+- daily users
+
+On a machine with the metrics database from metrics-web installed, edit
+and run daily-users.R.
+
+ $ R --slave -f daily-users.R
+
+
+2. Plot directory bytes estimate
+
+(TODO)
+
diff --git a/task-3338/daily-users.R b/task-3338/daily-users.R
new file mode 100644
index 0000000..58921cc
--- /dev/null
+++ b/task-3338/daily-users.R
@@ -0,0 +1,57 @@
+library("RPostgreSQL")
+library("DBI")
+library("ggplot2")
+library("proto")
+library("grid")
+library("reshape")
+library("plyr")
+library("digest")
+
+db = "tordir"
+dbuser = "metrics"
+dbpassword= "password" ###### <- put in real password here!
+
+plot_dirbytes <- function(start, end, path, dpi) {
+ drv <- dbDriver("PostgreSQL")
+ con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+ q <- paste("SELECT date, r, bwp, brp, bwn, brn, bwr, brr "
+ "FROM user_stats WHERE date >= '", start, "' AND date <= '", end,
+ "' AND date < (SELECT MAX(date) FROM user_stats) - 1 ",
+ "AND country = 'zy' ORDER BY date", sep = "")
+ rs <- dbSendQuery(con, q)
+ u <- fetch(rs, n = -1)
+ dbDisconnect(con)
+ dbUnloadDriver(drv)
+ u <- data.frame(date = u$date,
+ requests = u$r,
+ fraction = (u$bwr * u$brn / u$bwn - u$brr) /
+ (u$bwp * u$brn / u$bwn - u$brp),
+ users = u$r * (u$bwp * u$brn / u$bwn - u$brp) /
+ (u$bwr * u$brn / u$bwn - u$brr) / 10)
+ highest <- u[u$date %in% as.Date(c("2011-04-10", "2011-04-17",
+ "2011-04-24", "2011-05-29")), ]
+ highest <- melt(highest, id.vars = 1)
+ dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+ to = as.Date(end, "%Y-%m-%d"), by="1 day")
+ missing <- setdiff(dates, u$date)
+ if (length(missing) > 0)
+ u <- rbind(u,
+ data.frame(date = as.Date(missing, origin = "1970-01-01"),
+ requests = NA, fraction = NA, users = NA))
+ u <- melt(u, id.vars = 1)
+ date_breaks <- date_breaks(
+ as.numeric(max(as.Date(u$date, "%Y-%m-%d")) -
+ min(as.Date(u$date, "%Y-%m-%d"))))
+ ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = value)) +
+ geom_line() +
+ facet_grid(variable ~ ., scales = "free_y") +
+ scale_x_date(name = paste("\nThe Tor Project - ",
+ "https://metrics.torproject.org/", sep = "")) +
+ scale_y_continuous(name = "") +
+ geom_point(data = highest, size = 3, colour = alpha("red", 0.5)) +
+ opts(title = paste("Estimating directly connecting users from all",
+ "countries\n(users = requests / fraction / 10)\n"))
+ ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
+}
+plot_dirbytes("2011-01-01", "2011-06-23", "daily-users.pdf", 300)
+
More information about the tor-commits
mailing list