[tor-commits] [metrics-web/master] Extend censorship detector to new user estimates.
karsten at torproject.org
karsten at torproject.org
Mon Sep 16 18:00:12 UTC 2013
commit 41565da93ef5cc3e562f21b8695dbf251b55459a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Sep 16 19:17:27 2013 +0200
Extend censorship detector to new user estimates.
---
detector/detector.py | 30 ++++++-----
detector/detector.sh | 1 +
rserve/csv.R | 16 ++++++
rserve/graphs.R | 57 +++++++++++++++-----
rserve/tables.R | 38 +++++++++++--
.../ernie/web/graphs/GraphsSubpagesServlet.java | 2 +-
.../ernie/web/graphs/RObjectGenerator.java | 5 +-
web/WEB-INF/users.jsp | 45 ++++++++++++++++
8 files changed, 160 insertions(+), 34 deletions(-)
diff --git a/detector/detector.py b/detector/detector.py
index 7f924db..2c51b5b 100644
--- a/detector/detector.py
+++ b/detector/detector.py
@@ -348,8 +348,8 @@ def plot_all(tss, minx, maxx, INTERV, DAYS=None, rdir="img"):
summary_file.close()
"""Write a CSV report on the minimum/maximum users of each country per date."""
-def write_all(tss, minc, maxc, INTERVAL=7):
- ranges_file = file("direct-users-ranges.csv", "w")
+def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7):
+ ranges_file = file(RANGES_FILE, "w")
ranges_file.write("date,country,minusers,maxusers\n")
exclude = set(["all", "??", "date"])
for c in tss.country_codes:
@@ -415,23 +415,25 @@ def write_ml_report(tss, minx, maxx, INTERV, DAYS, notification_period=None):
report_file.close()
-def main():
- # Change these to customize script
- CSV_FILE = "direct-users.csv"
- GRAPH_DIR = "img"
- # Time interval to model connection rates.
- INTERV = 7
- # Consider maximum DAYS days back.
- DAYS= 6 * 31
-
+# INTERV is the time interval to model connection rates;
+# consider maximum DAYS days back.
+def detect(CSV_FILE = "userstats-detector.csv",
+ RANGES_FILE = "userstats-ranges.csv", GRAPH_DIR = "img",
+ INTERV = 7, DAYS = 6 * 31, REPORT = True):
tss = torstatstore(CSV_FILE)
l = tss.get_largest_locations(50)
minx, maxx = make_tendencies_minmax(l, INTERV)
#plot_all(tss, minx, maxx, INTERV, DAYS, rdir=GRAPH_DIR)
- write_all(tss, minx, maxx, INTERV)
+ write_all(tss, minx, maxx, RANGES_FILE, INTERV)
- # Make our short report; only consider events of the last day
- write_ml_report(tss, minx, maxx, INTERV, DAYS, 1)
+ if REPORT:
+ # Make our short report; only consider events of the last day
+ write_ml_report(tss, minx, maxx, INTERV, DAYS, 1)
+
+def main():
+ detect(CSV_FILE = "direct-users.csv",
+ RANGES_FILE = "direct-users-ranges.csv")
+ detect(REPORT = False)
if __name__ == "__main__":
main()
diff --git a/detector/detector.sh b/detector/detector.sh
index 8e2ea47..56f6886 100755
--- a/detector/detector.sh
+++ b/detector/detector.sh
@@ -1,5 +1,6 @@
#!/bin/bash
wget -qO direct-users.csv --no-check-certificate https://metrics.torproject.org/csv/direct-users.csv
+wget -qO userstats-detector.csv --no-check-certificate https://metrics.torproject.org/csv/userstats-detector.csv
python detector.py
cat short_censorship_report.txt | mail -E -s 'Possible censorship events' tor-censorship-events at lists.torproject.org
diff --git a/rserve/csv.R b/rserve/csv.R
index 531e73f..34a3f91 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -328,3 +328,19 @@ export_monthly_userstats_average <- function(path) {
help_export_monthly_userstats(path, mean)
}
+export_userstats_detector <- function(path) {
+ u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/",
+ "task-8462/userstats.csv", sep = ""),
+ stringsAsFactors = FALSE)
+ u <- u[u$country != '' & u$transport == '' & u$version == '' &
+ u$node == 'relay', c("country", "date", "users")]
+ u <- rbind(u, data.frame(country = "zy",
+ aggregate(list(users = u$users),
+ by = list(date = u$date), sum)))
+ u <- data.frame(date = u$date, country = u$country,
+ users = floor(u$users))
+ u <- cast(u, date ~ country, value = "users")
+ names(u)[names(u) == "zy"] <- "all"
+ write.csv(u, path, quote = FALSE, row.names = FALSE)
+}
+
diff --git a/rserve/graphs.R b/rserve/graphs.R
index 69f9aa9..da39327 100644
--- a/rserve/graphs.R
+++ b/rserve/graphs.R
@@ -690,8 +690,8 @@ plot_direct_users <- function(start, end, country, events, path) {
if (length(r$maxusers) > 0)
max_y <- max(max_y, max(r$maxusers, na.rm = TRUE))
plot <- plot +
- geom_ribbon(data = r, aes(ymin = minusers, ymax = maxusers),
- fill = "gray")
+ geom_ribbon(data = r, aes(ymin = max(0, minusers),
+ ymax = maxusers), fill = "gray")
}
if (length(upturns$date) > 0)
plot <- plot +
@@ -1022,7 +1022,8 @@ plot_bandwidth_flags <- function(start, end, path) {
ggsave(filename = path, width = 8, height = 5, dpi = 72)
}
-plot_userstats <- function(start, end, node, variable, value, path) {
+plot_userstats <- function(start, end, node, variable, value, events,
+ path) {
end <- min(end, as.character(Sys.Date()))
u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/",
"task-8462/userstats.csv", sep = ""),
@@ -1052,7 +1053,7 @@ plot_userstats <- function(start, end, node, variable, value, path) {
" (BETA)\n", sep = "")
} else {
u <- u[u$country == '' & u$transport == '' & u$version == '' &
- u$node == 'bridge', ]
+ u$node == 'bridge', ]
title <- "Bridge users (BETA)\n"
}
}
@@ -1068,32 +1069,62 @@ plot_userstats <- function(start, end, node, variable, value, path) {
formatter <- function(x, ...) { format(x, scientific = FALSE, ...) }
date_breaks <- date_breaks(
as.numeric(max(u$date) - min(u$date)))
- ggplot(u, aes(x = date, y = users)) +
+ max_y <- ifelse(length(na.omit(u$users)) == 0, 0,
+ max(u$users, na.rm = TRUE))
+ plot <- ggplot(u, aes(x = date, y = users))
+ if (length(na.omit(u$users)) > 0 & events != "off" &
+ variable == 'country' & value != "all") {
+ r <- read.csv(
+ "/srv/metrics.torproject.org/web/detector/userstats-ranges.csv",
+ stringsAsFactors = FALSE)
+ r <- r[r$date >= start & r$date <= end & r$country == value,
+ c("date", "minusers", "maxusers")]
+ r <- cast(rbind(melt(u, id.vars = "date"), melt(r, id.vars = "date")))
+ upturns <- r[r$users > r$maxusers, 1:2]
+ downturns <- r[r$users < r$minusers, 1:2]
+ if (events == "on") {
+ if (length(r$maxusers) > 0)
+ max_y <- max(max_y, max(r$maxusers, na.rm = TRUE))
+ plot <- plot +
+ geom_ribbon(data = r, aes(ymin = max(0, minusers),
+ ymax = maxusers), fill = "gray")
+ }
+ if (length(upturns$date) > 0)
+ plot <- plot +
+ geom_point(data = upturns, aes(x = date, y = users), size = 5,
+ colour = "dodgerblue2")
+ if (length(downturns$date) > 0)
+ plot <- plot +
+ geom_point(data = downturns, aes(x = date, y = users), size = 5,
+ colour = "firebrick2")
+ }
+ plot <- plot +
geom_line(size = 1) +
scale_x_date(name = paste("\nThe Tor Project - ",
"https://metrics.torproject.org/", sep = ""),
format = date_breaks$format, major = date_breaks$major,
minor = date_breaks$minor) +
- scale_y_continuous(name = "", limits = c(0,
- ifelse(length(na.omit(u$users)) == 0, 0,
- max(u$users, na.rm = TRUE))), formatter = formatter) +
+ scale_y_continuous(name = "", limits = c(0, max_y),
+ formatter = formatter)
opts(title = title)
ggsave(filename = path, width = 8, height = 5, dpi = 72)
}
-plot_userstats_relay_country <- function(start, end, country, path) {
- plot_userstats(start, end, 'relay', 'country', country, path)
+plot_userstats_relay_country <- function(start, end, country, events,
+ path) {
+ plot_userstats(start, end, 'relay', 'country', country, events, path)
}
plot_userstats_bridge_country <- function(start, end, country, path) {
- plot_userstats(start, end, 'bridge', 'country', country, path)
+ plot_userstats(start, end, 'bridge', 'country', country, 'off', path)
}
plot_userstats_bridge_transport <- function(start, end, transport, path) {
- plot_userstats(start, end, 'bridge', 'transport', transport, path)
+ plot_userstats(start, end, 'bridge', 'transport', transport, 'off',
+ path)
}
plot_userstats_bridge_version <- function(start, end, version, path) {
- plot_userstats(start, end, 'bridge', 'version', version, path)
+ plot_userstats(start, end, 'bridge', 'version', version, 'off', path)
}
diff --git a/rserve/tables.R b/rserve/tables.R
index e0dc1e4..59593d6 100644
--- a/rserve/tables.R
+++ b/rserve/tables.R
@@ -42,9 +42,6 @@ write_censorship_events <- function(start, end, path) {
u <- data.frame(date = u$date, country = u$country,
users = u$r * (u$bwp * u$brn / u$bwn - u$brp) /
(u$bwr * u$brn / u$bwn - u$brr) / 10)
- dates <- seq(from = as.Date(start, "%Y-%m-%d"),
- to = as.Date(end, "%Y-%m-%d"), by="1 day")
- missing <- setdiff(dates, u$date)
r <- read.csv(
"/srv/metrics.torproject.org/web/detector/direct-users-ranges.csv",
stringsAsFactors = FALSE)
@@ -60,10 +57,10 @@ write_censorship_events <- function(start, end, path) {
by = list(country = r$country), sum)
r <- r[!(r$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ]
r <- r[order(r$downturn, r$upturn, decreasing = TRUE), ]
- r <- r[1:10, ]
+ r <- r[1:10, ]
r <- data.frame(cc = r$country,
country = sub('the ', '', countrynames(as.character(r$country))),
- downturns = r$downturn,
+ downturns = r$downturn,
upturns = r$upturn)
write.csv(r, path, quote = FALSE, row.names = FALSE)
}
@@ -122,3 +119,34 @@ write_userstats_bridge <- function(start, end, path) {
write_userstats(start, end, 'bridge', path)
}
+write_userstats_censorship_events <- function(start, end, path) {
+ end <- min(end, as.character(Sys.Date()))
+ u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/",
+ "task-8462/userstats.csv", sep = ""),
+ stringsAsFactors = FALSE)
+ u <- u[u$date >= start & u$date <= end & u$country != '' &
+ u$transport == '' & u$version == '' & u$node == 'relay',
+ c("date", "country", "users")]
+ r <- read.csv(
+ "/srv/metrics.torproject.org/web/detector/userstats-ranges.csv",
+ stringsAsFactors = FALSE)
+ r <- r[r$date >= start & r$date <= end,
+ c("date", "country", "minusers", "maxusers")]
+ r <- cast(rbind(melt(u, id.vars = c("date", "country")),
+ melt(r, id.vars = c("date", "country"))))
+ r <- na.omit(r[r$users < r$minusers | r$users > r$maxusers, ])
+ r <- data.frame(date = r$date, country = r$country,
+ upturn = ifelse(r$users > r$maxusers, 1, 0),
+ downturn = ifelse(r$users < r$minusers, 1, 0))
+ r <- aggregate(r[, c("upturn", "downturn")],
+ by = list(country = r$country), sum)
+ r <- r[!(r$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ]
+ r <- r[order(r$downturn, r$upturn, decreasing = TRUE), ]
+ r <- r[1:10, ]
+ r <- data.frame(cc = r$country,
+ country = sub('the ', '', countrynames(as.character(r$country))),
+ downturns = r$downturn,
+ upturns = r$upturn)
+ write.csv(r, path, quote = FALSE, row.names = FALSE)
+}
+
diff --git a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
index 3ac99bb..c79f1e3 100644
--- a/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
+++ b/src/org/torproject/ernie/web/graphs/GraphsSubpagesServlet.java
@@ -52,7 +52,7 @@ public class GraphsSubpagesServlet extends HttpServlet {
this.availableGraphsSubpageTables.put("users.html",
new HashSet<String>(Arrays.asList((
"direct-users,censorship-events,bridge-users,userstats-relay,"
- + "userstats-bridge").split(","))));
+ + "userstats-censorship-events,userstats-bridge").split(","))));
this.knownCountries = Countries.getInstance().getCountryList();
}
diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
index 84d61c6..2fa0cc6 100644
--- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java
@@ -76,6 +76,7 @@ public class RObjectGenerator implements ServletContextListener {
this.availableCsvFiles.add("torperf");
this.availableCsvFiles.add("torperf-failures");
this.availableCsvFiles.add("userstats");
+ this.availableCsvFiles.add("userstats-detector");
this.availableCsvFiles.add("versions");
this.availableTables = new HashMap<String, String>();
@@ -84,6 +85,8 @@ public class RObjectGenerator implements ServletContextListener {
this.availableTables.put("bridge-users", "start,end,filename");
this.availableTables.put("userstats-relay", "start,end,filename");
this.availableTables.put("userstats-bridge", "start,end,filename");
+ this.availableTables.put("userstats-censorship-events",
+ "start,end,filename");
TableParameterChecker.getInstance().setAvailableTables(
availableTables);
@@ -111,7 +114,7 @@ public class RObjectGenerator implements ServletContextListener {
this.availableGraphs.put("fast-exits", "start,end,filename");
this.availableGraphs.put("almost-fast-exits", "start,end,filename");
this.availableGraphs.put("userstats-relay-country",
- "start,end,country,filename");
+ "start,end,country,events,filename");
this.availableGraphs.put("userstats-bridge-country",
"start,end,country,filename");
this.availableGraphs.put("userstats-bridge-transport",
diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp
index 2a7bede..456aaab 100644
--- a/web/WEB-INF/users.jsp
+++ b/web/WEB-INF/users.jsp
@@ -208,6 +208,9 @@ Tor users (direct and bridge) per month by country.</p>
daily Tor users (direct and bridge) per month by country.</p>
<br>
+<hr>
+<hr>
+
<a name="userstats"></a>
<h3><a href="#userstats" class="anchor">New approach to estimating daily
Tor users (BETA)</a></h3>
@@ -257,6 +260,14 @@ It's yet to be decided which approach is more correct.</font>
</c:forEach>
</select>
</p><p>
+ Show possible censorship events if available (<a
+ href="http://research.torproject.org/techreports/detector-2011-09-09.pdf">BETA</a>)
+ <select name="events">
+ <option value="off">Off</option>
+ <option value="on"<c:if test="${userstats_relay_country_events[0] eq 'on'}"> selected</c:if>>On: both points and expected range</option>
+ <option value="points"<c:if test="${userstats_relay_country_events[0] eq 'points'}"> selected</c:if>>On: points only, no expected range</option>
+ </select>
+ </p><p>
<input class="submit" type="submit" value="Update graph">
</p>
</div>
@@ -296,6 +307,40 @@ It's yet to be decided which approach is more correct.</font>
</c:forEach>
</table>
<hr>
+<a name="userstats-censorship-events"></a>
+<p><b>Top-10 countries by possible censorship events (<a
+ href="http://research.torproject.org/techreports/detector-2011-09-09.pdf">BETA</a>):</b></p>
+<form action="users.html#userstats-censorship-events">
+ <div class="formrow">
+ <input type="hidden" name="table" value="userstats-censorship-events">
+ <p>
+ <label>Start date (yyyy-mm-dd):</label>
+ <input type="text" name="start" size="10"
+ value="<c:choose><c:when test="${fn:length(userstats_censorship_events_start) == 0}">${default_start_date}</c:when><c:otherwise>${userstats_censorship_events_start[0]}</c:otherwise></c:choose>">
+ <label>End date (yyyy-mm-dd):</label>
+ <input type="text" name="end" size="10"
+ value="<c:choose><c:when test="${fn:length(userstats_censorship_events_end) == 0}">${default_end_date}</c:when><c:otherwise>${userstats_censorship_events_end[0]}</c:otherwise></c:choose>">
+ </p><p>
+ <input class="submit" type="submit" value="Update table">
+ </p>
+ </div>
+</form>
+<br>
+<table>
+ <tr>
+ <th>Country</th>
+ <th>Downturns</th>
+ <th>Upturns</th>
+ </tr>
+ <c:forEach var="row" items="${userstats_censorship_events_tabledata}">
+ <tr>
+ <td><a href="users.html?graph=direct-users&country=${row['cc']}&events=on#direct-users">${row['country']}</a> </td>
+ <td>${row['downturns']}</td>
+ <td>${row['upturns']}</td>
+ </tr>
+ </c:forEach>
+</table>
+<hr>
<a name="userstats-bridge-country"></a>
<p><b>Bridge users by country (BETA):</b></p>
More information about the tor-commits
mailing list