[tor-commits] [metrics-web/release] Simplify plot_webstats_tb_locale function.
karsten at torproject.org
karsten at torproject.org
Sat Nov 9 21:45:06 UTC 2019
commit 2b34cd2023a3e59057f4274afb0d7b8163282a18
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu Jan 10 10:41:48 2019 +0100
Simplify plot_webstats_tb_locale function.
---
src/main/R/rserver/graphs.R | 61 ++++++++++++++++++++-------------------------
1 file changed, 27 insertions(+), 34 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index ba8862c..27f399d 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -1265,8 +1265,8 @@ write_webstats_tb_platform <- function(start_p = NULL, end_p = NULL, path_p) {
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
- d <- read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
+prepare_webstats_tb_locale <- function(start_p, end_p) {
+ read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
request_type = col_factor(),
@@ -1274,20 +1274,35 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
channel = col_skip(),
locale = col_factor(),
incremental = col_skip(),
- count = col_double()))
- d <- d[d$log_date >= start_p & d$log_date <= end_p &
- d$request_type %in% c("tbid", "tbup"), ]
- levels(d$request_type) <- list(
- "Initial downloads" = "tbid",
- "Update pings" = "tbup")
+ count = col_double())) %>%
+ filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
+ filter(request_type %in% c("tbid", "tbup")) %>%
+ rename(date = log_date) %>%
+ group_by(date, locale, request_type) %>%
+ summarize(count = sum(count)) %>%
+ mutate(request_type = factor(request_type, levels = c("tbid", "tbup"))) %>%
+ spread(request_type, count, fill = 0) %>%
+ rename(initial_downloads = tbid, update_pings = tbup)
+}
+
+plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
+ d <- prepare_webstats_tb_locale(start_p, end_p) %>%
+ gather(request_type, count, -c(date, locale)) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "update_pings"),
+ labels = c("Initial downloads", "Update pings")))
e <- d
e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = sum)
e <- e[order(e$count, decreasing = TRUE), ]
e <- e[1:5, ]
- d <- aggregate(list(count = d$count), by = list(log_date = d$log_date,
+ d <- aggregate(list(count = d$count), by = list(date = d$date,
request_type = d$request_type,
locale = ifelse(d$locale %in% e$locale, d$locale, "(other)")), FUN = sum)
- ggplot(d, aes(x = log_date, y = count, colour = locale)) +
+ d %>%
+ complete(date = full_seq(date, period = 1),
+ nesting(locale, request_type)) %>%
+ ggplot(aes(x = date, y = count, colour = locale)) +
geom_point() +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1295,7 +1310,7 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
scale_colour_hue(name = "Locale",
breaks = c(e$locale, "(other)"),
- labels = c(e$locale, "Other")) +
+ labels = c(as.character(e$locale), "Other")) +
facet_grid(request_type ~ ., scales = "free_y") +
theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
strip.background = element_rect(fill = NA),
@@ -1305,30 +1320,8 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-# Ideally, this function would share code with plot_webstats_tb_locale
-# by using a common prepare_webstats_tb_locale function. This just
-# turned out to be a bit harder than for other functions, because
-# plot_webstats_tb_locale needs the preliminary data frame e for its
-# breaks and labels. Left as future work.
write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
- read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
- col_types = cols(
- log_date = col_date(format = ""),
- request_type = col_factor(),
- platform = col_skip(),
- channel = col_skip(),
- locale = col_factor(),
- incremental = col_skip(),
- count = col_double())) %>%
- filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
- filter(request_type %in% c("tbid", "tbup")) %>%
- rename(date = log_date) %>%
- group_by(date, locale, request_type) %>%
- summarize(count = sum(count)) %>%
- mutate(request_type = factor(request_type, levels = c("tbid", "tbup"))) %>%
- spread(request_type, count, fill = 0) %>%
- rename(initial_downloads = tbid, update_pings = tbup) %>%
+ prepare_webstats_tb_locale(start_p, end_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
More information about the tor-commits
mailing list