[tor-commits] [metrics-web/release] Split up huge plot_userstats function.
karsten at torproject.org
karsten at torproject.org
Sat Nov 9 21:45:06 UTC 2019
commit f55e63d986ed9c1054ce19ff0d4a19b1c0bce26d
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu Jan 10 09:54:39 2019 +0100
Split up huge plot_userstats function.
The mere size of this function made it hard to impossible to refactor
things to using more recent R packages dplyr and tidyr. Now there are
four plot_userstats_* functions with accompanying prepare_userstats_*
that make the corresponding write_userstats_* functions really small.
---
src/main/R/rserver/graphs.R | 269 +++++++++++++++++++-------------------------
1 file changed, 115 insertions(+), 154 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index d3ea90a..ba8862c 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -751,9 +751,9 @@ write_bandwidth_flags <- function(start_p = NULL, end_p = NULL, path_p) {
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
- events_p, path_p) {
- c <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
+prepare_userstats_relay_country <- function(start_p, end_p, country_p,
+ events_p) {
+ read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
node = col_character(),
@@ -763,97 +763,26 @@ plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
lower = col_double(),
upper = col_double(),
clients = col_double(),
- frac = col_skip()),
+ frac = col_double()),
na = character()) %>%
- filter(node == node_p)
- u <- c[c$date >= start_p & c$date <= end_p, c("date", "country", "transport",
- "version", "lower", "upper", "clients")]
- u <- rbind(u, data.frame(date = start_p,
- country = ifelse(variable_p == "country" & value_p != "all", value_p, ""),
- transport = ifelse(variable_p == "transport", value_p, ""),
- version = ifelse(variable_p == "version", value_p, ""),
- lower = 0, upper = 0, clients = 0))
- if (node_p == "relay") {
- if (value_p != "all") {
- u <- u[u$country == value_p, ]
- title <- paste("Directly connecting users from", countryname(value_p))
- } else {
- u <- u[u$country == "", ]
- title <- "Directly connecting users"
- }
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$country),
- FUN = sum)
- } else if (variable_p == "transport") {
- if ("!<OR>" %in% value_p) {
- n <- u[u$transport != "" & u$transport != "<OR>", ]
- n <- aggregate(list(lower = n$lower, upper = n$upper,
- clients = n$clients),
- by = list(date = n$date),
- FUN = sum)
- u <- rbind(u, data.frame(date = n$date,
- country = "", transport = "!<OR>",
- version = "", lower = n$lower,
- upper = n$upper, clients = n$clients))
- }
- if (length(value_p) > 1) {
- u <- u[u$transport %in% value_p, ]
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$transport),
- FUN = sum)
- title <- paste("Bridge users by transport")
- } else {
- u <- u[u$transport == value_p, ]
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$transport),
- FUN = sum)
- title <- paste("Bridge users using",
- ifelse(value_p == "<??>", "unknown pluggable transport(s)",
- ifelse(value_p == "<OR>", "default OR protocol",
- ifelse(value_p == "!<OR>", "any pluggable transport",
- ifelse(value_p == "fte", "FTE",
- ifelse(value_p == "websocket", "Flash proxy/websocket",
- paste("transport", value_p)))))))
- }
- } else if (variable_p == "version") {
- u <- u[u$version == value_p, ]
- title <- paste("Bridge users using IP", value_p, sep = "")
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$version),
- FUN = sum)
- } else {
- if (value_p != "all") {
- u <- u[u$country == value_p, ]
- title <- paste("Bridge users from", countryname(value_p))
- } else {
- u <- u[u$country == "" & u$transport == "" & u$version == "", ]
- title <- "Bridge users"
- }
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$country),
- FUN = sum)
- }
- u <- merge(x = u, all.y = TRUE, y = data.frame(expand.grid(
- date = seq(from = as.Date(start_p, "%Y-%m-%d"),
- to = as.Date(end_p, "%Y-%m-%d"), by = "1 day"),
- value = ifelse(value_p == "all", "", value_p))))
- if (length(value_p) > 1) {
- plot <- ggplot(u, aes(x = date, y = users, colour = value))
- } else {
- plot <- ggplot(u, aes(x = date, y = users))
- }
+ filter(node == "relay") %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(country_p))
+ country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
+ filter(transport == "") %>%
+ filter(version == "") %>%
+ select(date, country, clients, lower, upper, frac) %>%
+ rename(users = clients)
+}
+
+plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
+ path_p) {
+ u <- prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
+ complete(date = full_seq(date, period = 1))
+ plot <- ggplot(u, aes(x = date, y = users))
if (length(na.omit(u$users)) > 0 & events_p != "off" &
- variable_p == "country" & length(value_p) == 1 && value_p != "all") {
+ country_p != "all") {
upturns <- u[u$users > u$upper, c("date", "users")]
downturns <- u[u$users < u$lower, c("date", "users")]
if (events_p == "on") {
@@ -875,69 +804,20 @@ plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
- ggtitle(title) +
+ ggtitle(paste("Directly connecting users",
+ ifelse(country_p == "all", "",
+ paste(" from", countryname(country_p))), sep = "")) +
labs(caption = copyright_notice)
- if (length(value_p) > 1) {
- plot <- plot +
- scale_colour_hue(name = "", breaks = value_p,
- labels = ifelse(value_p == "<??>", "Unknown PT",
- ifelse(value_p == "<OR>", "Default OR protocol",
- ifelse(value_p == "!<OR>", "Any PT",
- ifelse(value_p == "fte", "FTE",
- ifelse(value_p == "websocket", "Flash proxy/websocket",
- value_p))))))
- }
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
- path_p) {
- plot_userstats(start_p, end_p, "relay", "country", country_p, events_p,
- path_p)
-}
-
-plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
- plot_userstats(start_p, end_p, "bridge", "country", country_p, "off", path_p)
-}
-
-plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
- path_p) {
- plot_userstats(start_p, end_p, "bridge", "transport", transport_p, "off",
- path_p)
-}
-
-plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
- plot_userstats(start_p, end_p, "bridge", "version", version_p, "off", path_p)
-}
-
write_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
country_p = NULL, events_p = NULL, path_p) {
- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
- col_types = cols(
- date = col_date(format = ""),
- node = col_character(),
- country = col_character(),
- transport = col_character(),
- version = col_character(),
- lower = col_double(),
- upper = col_double(),
- clients = col_double(),
- frac = col_double()),
- na = character()) %>%
- filter(node == "relay") %>%
- filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(country_p))
- country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
- filter(transport == "") %>%
- filter(version == "") %>%
- select(date, country, clients, lower, upper, frac) %>%
- rename(users = clients) %>%
+ prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
+prepare_userstats_bridge_country <- function(start_p, end_p, country_p) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -958,12 +838,32 @@ write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
filter(transport == "") %>%
filter(version == "") %>%
select(date, country, clients, frac) %>%
- rename(users = clients) %>%
+ rename(users = clients)
+}
+
+plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
+ prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
+ complete(date = full_seq(date, period = 1)) %>%
+ ggplot(aes(x = date, y = users)) +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(paste("Bridge users",
+ ifelse(country_p == "all", "",
+ paste(" from", countryname(country_p))), sep = "")) +
+ labs(caption = copyright_notice)
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL, path_p) {
+ prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
- transport_p = NULL, path_p) {
+prepare_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
+ transport_p = NULL) {
u <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -992,15 +892,58 @@ write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
}
u %>%
filter(if (!is.null(transport_p)) transport %in% transport_p else TRUE) %>%
- group_by(date, transport) %>%
select(date, transport, clients, frac) %>%
rename(users = clients) %>%
- arrange(date, transport) %>%
+ arrange(date, transport)
+}
+
+plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
+ path_p) {
+ if (length(transport_p) > 1) {
+ title <- paste("Bridge users by transport")
+ } else {
+ title <- paste("Bridge users using",
+ ifelse(transport_p == "<??>", "unknown pluggable transport(s)",
+ ifelse(transport_p == "<OR>", "default OR protocol",
+ ifelse(transport_p == "!<OR>", "any pluggable transport",
+ ifelse(transport_p == "fte", "FTE",
+ ifelse(transport_p == "websocket", "Flash proxy/websocket",
+ paste("transport", transport_p)))))))
+ }
+ u <- prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
+ complete(date = full_seq(date, period = 1), nesting(transport))
+ if (length(transport_p) > 1) {
+ plot <- ggplot(u, aes(x = date, y = users, colour = transport))
+ } else {
+ plot <- ggplot(u, aes(x = date, y = users))
+ }
+ plot <- plot +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(title) +
+ labs(caption = copyright_notice)
+ if (length(transport_p) > 1) {
+ plot <- plot +
+ scale_colour_hue(name = "", breaks = transport_p,
+ labels = ifelse(transport_p == "<??>", "Unknown PT",
+ ifelse(transport_p == "<OR>", "Default OR protocol",
+ ifelse(transport_p == "!<OR>", "Any PT",
+ ifelse(transport_p == "fte", "FTE",
+ ifelse(transport_p == "websocket", "Flash proxy/websocket",
+ transport_p))))))
+ }
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
+ transport_p = NULL, path_p) {
+ prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
- version_p = NULL, path_p) {
+prepare_userstats_bridge_version <- function(start_p, end_p, version_p) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -1019,7 +962,25 @@ write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
filter(is.na(transport)) %>%
filter(if (!is.null(version_p)) version == version_p else TRUE) %>%
select(date, version, clients, frac) %>%
- rename(users = clients) %>%
+ rename(users = clients)
+}
+
+plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
+ prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
+ complete(date = full_seq(date, period = 1)) %>%
+ ggplot(aes(x = date, y = users)) +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(paste("Bridge users using IP", version_p, sep = "")) +
+ labs(caption = copyright_notice)
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
+ version_p = NULL, path_p) {
+ prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
More information about the tor-commits
mailing list