[tor-commits] [metrics-web/release] Make all parameters in write_* functions optional.

karsten at torproject.org karsten at torproject.org
Wed May 30 13:45:12 UTC 2018


commit 167e72b5a06cec3753f7f952fb3e3247bae943a5
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri May 11 11:54:45 2018 +0200

    Make all parameters in write_* functions optional.
    
    We now permit parameters in write_* functions to be omitted. The
    effect is that we're not filtering if a parameter is missing, thus
    producing a CSV file with more rows.
    
    At the same time we're adding columns for data that was previously
    pre-determined by parameter values. For example, if a user specified a
    given country in a parameter, we didn't have to include a country
    column containing only that country. Now we need to put that column
    back.
    
    Implements #25383.
---
 src/main/R/rserver/graphs.R                        | 369 ++++++++++++---------
 .../torproject/metrics/web/RObjectGenerator.java   |   4 +
 2 files changed, 222 insertions(+), 151 deletions(-)

diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index ebb8c80..a9b7fc7 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -351,8 +351,13 @@ robust_call <- function(wrappee, filename) {
 prepare_networksize <- function(start, end) {
   read.csv(paste(stats_dir, "servers.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), flag == "",
-      country == "", version == "", platform == "", ec2bridge == "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(flag == "") %>%
+    filter(country == "") %>%
+    filter(version == "") %>%
+    filter(platform == "") %>%
+    filter(ec2bridge == "") %>%
     select(date, relays, bridges)
 }
 
@@ -373,16 +378,21 @@ plot_networksize <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_networksize <- function(start, end, path) {
+write_networksize <- function(start = NULL, end = NULL, path) {
   prepare_networksize(start, end) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_versions <- function(start, end) {
   read.csv(paste(stats_dir, "servers.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), flag == "",
-      country == "", version != "", platform == "", ec2bridge == "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(flag == "") %>%
+    filter(country == "") %>%
+    filter(version != "") %>%
+    filter(platform == "") %>%
+    filter(ec2bridge == "") %>%
     select(date, version, relays)
 }
 
@@ -411,17 +421,22 @@ plot_versions <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_versions <- function(start, end, path) {
+write_versions <- function(start = NULL, end = NULL, path) {
   prepare_versions(start, end) %>%
     spread(key = "version", value = "relays", fill = 0) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_platforms <- function(start, end) {
   read.csv(paste(stats_dir, "servers.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), flag == "",
-      country == "", version == "", platform != "", ec2bridge == "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(flag == "") %>%
+    filter(country == "") %>%
+    filter(version == "") %>%
+    filter(platform != "") %>%
+    filter(ec2bridge == "") %>%
     select(date, platform, relays) %>%
     mutate(platform = ifelse(platform == "Darwin", "macOS",
       as.character(platform)))
@@ -442,17 +457,19 @@ plot_platforms <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_platforms <- function(start, end, path) {
+write_platforms <- function(start = NULL, end = NULL, path) {
   prepare_platforms(start, end) %>%
     spread(platform, relays) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_bandwidth <- function(start, end) {
   read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), isexit != "",
-      isguard != "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(isexit != "") %>%
+    filter(isguard != "") %>%
     group_by(date) %>%
     summarize(advbw = sum(advbw) * 8 / 1e9,
       bwhist = sum(bwread + bwwrite) * 8 / 2e9) %>%
@@ -477,16 +494,18 @@ plot_bandwidth <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_bandwidth <- function(start, end, path) {
+write_bandwidth <- function(start = NULL, end = NULL, path) {
   prepare_bandwidth(start, end) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_bwhist_flags <- function(start, end) {
   read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), isexit != "",
-      isguard != "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(isexit != "") %>%
+    filter(isguard != "") %>%
     mutate(variable = ifelse(isexit == "t",
         ifelse(isguard == "t", "guard_and_exit", "exit_only"),
         ifelse(isguard == "t", "guard_only", "middle_only")),
@@ -514,17 +533,19 @@ plot_bwhist_flags <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_bwhist_flags <- function(start, end, path) {
+write_bwhist_flags <- function(start = NULL, end = NULL, path) {
   prepare_bwhist_flags(start, end) %>%
     spread(variable, value) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_dirbytes <- function(start, end, path) {
   read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), isexit == "",
-      isguard == "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(isexit == "") %>%
+    filter(isguard == "") %>%
     mutate(dirread = dirread * 8 / 1e9,
       dirwrite = dirwrite * 8 / 1e9) %>%
     select(date, dirread, dirwrite)
@@ -548,18 +569,22 @@ plot_dirbytes <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_dirbytes <- function(start, end, path) {
+write_dirbytes <- function(start = NULL, end = NULL, path) {
   prepare_dirbytes(start, end) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_relayflags <- function(start, end, flags) {
   read.csv(paste(stats_dir, "servers.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end), country == "",
-      version == "", platform == "", ec2bridge == "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(country == "") %>%
+    filter(version == "") %>%
+    filter(platform == "") %>%
+    filter(ec2bridge == "") %>%
     mutate(flag = ifelse(flag == "", "Running", as.character(flag))) %>%
-    filter(flag %in% flags) %>%
+    filter(if (!is.null(flags)) flag %in% flags else TRUE) %>%
     select(date, flag, relays)
 }
 
@@ -579,11 +604,11 @@ plot_relayflags <- function(start, end, flags, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_relayflags <- function(start, end, flags, path) {
+write_relayflags <- function(start = NULL, end = NULL, flags = NULL, path) {
   prepare_relayflags(start, end, flags) %>%
     mutate(flag = tolower(flag)) %>%
     spread(flag, relays) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 plot_torperf <- function(start, end, source, server, filesize, path) {
@@ -629,28 +654,39 @@ plot_torperf <- function(start, end, source, server, filesize, path) {
 # harder than for other functions, because plot_torperf uses different
 # colours based on which sources exist, unrelated to which source is
 # plotted. Left as future work.
-write_torperf <- function(start, end, source, server, filesize, path) {
+write_torperf <- function(start = NULL, end = NULL, source = NULL,
+    server = NULL, filesize = NULL, path) {
   read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      filesize == ifelse(!!filesize == "50kb", 50 * 1024,
-        ifelse(!!filesize == "1mb", 1024 * 1024, 5 * 1024 * 1024)),
-      source == ifelse(!!source == "all", "", !!source),
-      server == !!server) %>%
-    transmute(date, q1 = q1 / 1e3, md = md / 1e3, q3 = q3 / 1e3) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(!!source))
+        source == ifelse(!!source == "all", "", !!source) else TRUE) %>%
+    filter(if (!is.null(!!server)) server == !!server else TRUE) %>%
+    filter(if (!is.null(!!filesize))
+        filesize == ifelse(!!filesize == "50kb", 50 * 1024,
+        ifelse(!!filesize == "1mb", 1024 * 1024, 5 * 1024 * 1024)) else
+        TRUE) %>%
+    transmute(date, filesize, source, server, q1 = q1 / 1e3, md = md / 1e3,
+      q3 = q3 / 1e3) %>%
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_torperf_failures <- function(start, end, source, server, filesize) {
-  filesize_val <- ifelse(filesize == "50kb", 50 * 1024,
-          ifelse(filesize == "1mb", 1024 * 1024, 5 * 1024 * 1024))
-  t <- read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
-    colClasses = c("date" = "Date"))
-  t[t$date >= start & t$date <= end & t$filesize == filesize_val &
-         t$source == ifelse(source == "all", "", source) &
-         t$server == server & t$requests > 0, ] %>%
-  transmute(date, timeouts = timeouts / requests,
-    failures = failures / requests)
+  read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
+    colClasses = c("date" = "Date")) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(!!filesize))
+        filesize == ifelse(!!filesize == "50kb", 50 * 1024,
+        ifelse(!!filesize == "1mb", 1024 * 1024, 5 * 1024 * 1024)) else
+        TRUE) %>%
+    filter(if (!is.null(!!source))
+        source == ifelse(!!source == "all", "", !!source) else TRUE) %>%
+    filter(if (!is.null(!!server)) server == !!server else TRUE) %>%
+    filter(requests > 0) %>%
+    transmute(date, filesize, source, server, timeouts = timeouts / requests,
+        failures = failures / requests)
 }
 
 plot_torperf_failures <- function(start, end, source, server, filesize, path) {
@@ -675,15 +711,17 @@ plot_torperf_failures <- function(start, end, source, server, filesize, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_torperf_failures <- function(start, end, source, server, filesize, path) {
+write_torperf_failures <- function(start = NULL, end = NULL, source = NULL,
+    server = NULL, filesize = NULL, path) {
   prepare_torperf_failures(start, end, source, server, filesize) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_connbidirect <- function(start, end) {
   read.csv(paste(stats_dir, "connbidirect2.csv", sep = ""),
     colClasses = c("date" = "Date", "direction" = "factor")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end)) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
     mutate(quantile = paste("X", quantile, sep = ""),
       fraction = fraction / 100) %>%
     spread(quantile, fraction)
@@ -712,20 +750,23 @@ plot_connbidirect <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_connbidirect <- function(start, end, path) {
+write_connbidirect <- function(start = NULL, end = NULL, path) {
   prepare_connbidirect(start, end) %>%
     rename(q1 = X0.25, md = X0.5, q3 = X0.75) %>%
     gather(variable, value, -(date:direction)) %>%
     unite(temp, direction, variable) %>%
     spread(temp, value) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_bandwidth_flags <- function(start, end) {
   b <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
     colClasses = c("date" = "Date"))
-  b <- b[b$date >= start & b$date <= end & b$isexit != "" &
-         b$isguard != "", ]
+  b <- b %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(isexit != "") %>%
+    filter(isguard != "")
   b <- data.frame(date = b$date,
                   isexit = b$isexit == "t", isguard = b$isguard == "t",
                   advbw = b$advbw * 8 / 1e9,
@@ -770,10 +811,10 @@ plot_bandwidth_flags <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_bandwidth_flags <- function(start, end, path) {
+write_bandwidth_flags <- function(start = NULL, end = NULL, path) {
   prepare_bandwidth_flags(start, end) %>%
     spread(variable, value) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 plot_userstats <- function(start, end, node, variable, value, events,
@@ -921,48 +962,48 @@ plot_userstats_bridge_version <- function(start, end, version, path) {
   plot_userstats(start, end, "bridge", "version", version, "off", path)
 }
 
-write_userstats_relay_country <- function(start, end, country, events,
-    path) {
+write_userstats_relay_country <- function(start = NULL, end = NULL,
+    country = NULL, events = NULL, path) {
   load(paste(rdata_dir, "clients-relay.RData", sep = ""))
   u <- data %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      country == ifelse(!!country == "all", "", !!country), transport == "",
-      version == "")
-  if (country != "all" && events == "on") {
-    u <- u %>%
-      mutate(downturns = clients < u$lower, upturns = clients > upper) %>%
-      select(date, clients, downturns, upturns, lower, upper)
-  } else if (country != "all" && events != "off") {
-    u <- u %>%
-      mutate(downturns = clients < u$lower, upturns = clients > upper) %>%
-      select(date, clients, downturns, upturns)
-  } else {
-    u <- u %>%
-      select(date, clients)
-  }
-  u %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(!!country))
+      country == ifelse(!!country == "all", "", !!country) else TRUE) %>%
+    filter(transport == "") %>%
+    filter(version == "") %>%
+    mutate(downturns = clients < lower, upturns = clients > upper) %>%
+    select(date, country, clients, downturns, upturns, lower, upper) %>%
     rename(users = clients) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
-write_userstats_bridge_country <- function(start, end, country, path) {
+write_userstats_bridge_country <- function(start = NULL, end = NULL,
+    country = NULL, path) {
   load(paste(rdata_dir, "clients-bridge.RData", sep = ""))
   data %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      country == ifelse(!!country == "all", "", !!country), transport == "",
-      version == "") %>%
-    select(date, clients) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(!!country))
+      country == ifelse(!!country == "all", "", !!country) else TRUE) %>%
+    filter(transport == "") %>%
+    filter(version == "") %>%
+    select(date, country, clients) %>%
     rename(users = clients) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
-write_userstats_bridge_transport <- function(start, end, transports, path) {
+write_userstats_bridge_transport <- function(start = NULL, end = NULL,
+    transports = NULL, path) {
   load(paste(rdata_dir, "clients-bridge.RData", sep = ""))
   u <- data %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      country == "", version == "", transport != "") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(country == "") %>%
+    filter(version == "") %>%
+    filter(transport != "") %>%
     select(date, transport, clients)
-  if ("!<OR>" %in% transports) {
+  if (is.null(transports) || "!<OR>" %in% transports) {
     n <- u %>%
       filter(transport != "<OR>") %>%
       group_by(date) %>%
@@ -971,7 +1012,7 @@ write_userstats_bridge_transport <- function(start, end, transports, path) {
                              clients = n$clients))
   }
   u %>%
-    filter(transport %in% transports) %>%
+    filter(if (!is.null(transports)) transport %in% transports else TRUE) %>%
     mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
       ifelse(transport == "!<OR>", "any_pt",
       ifelse(transport == "<??>", "unknown_pluggable_transports",
@@ -979,38 +1020,41 @@ write_userstats_bridge_transport <- function(start, end, transports, path) {
     group_by(date, transport) %>%
     select(date, transport, clients) %>%
     spread(transport, clients) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
-write_userstats_bridge_version <- function(start, end, version, path) {
+write_userstats_bridge_version <- function(start = NULL, end = NULL,
+    version = NULL, path) {
   load(paste(rdata_dir, "clients-bridge.RData", sep = ""))
   data %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      country == "", transport == "", version == !!version) %>%
-    select(date, clients) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(country == "") %>%
+    filter(transport == "") %>%
+    filter(if (!is.null(!!version)) version == !!version else TRUE) %>%
+    select(date, version, clients) %>%
     rename(users = clients) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_userstats_bridge_combined <- function(start, end, country) {
-  top <- 3
-  country <- ifelse(country == "all", NA, country)
   load(paste(rdata_dir, "userstats-bridge-combined.RData", sep = ""))
-  u <- data
-  u <- u[u$date >= start & u$date <= end
-         & (is.na(country) | u$country == country), ]
-  a <- aggregate(list(mid = (u$high + u$low) / 2),
-                 by = list(transport = u$transport), FUN = sum)
-  a <- a[order(a$mid, decreasing = TRUE)[1:top], ]
-  u <- u[u$transport %in% a$transport, ]
-  u
+  data %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(!!country)) country == !!country else TRUE)
 }
 
 plot_userstats_bridge_combined <- function(start, end, country, path) {
   if (country == "all") {
     plot_userstats_bridge_country(start, end, country, path)
   } else {
+    top <- 3
     u <- prepare_userstats_bridge_combined(start, end, country)
+    a <- aggregate(list(mid = (u$high + u$low) / 2),
+                   by = list(transport = u$transport), FUN = sum)
+    a <- a[order(a$mid, decreasing = TRUE)[1:top], ]
+    u <- u[u$transport %in% a$transport, ]
     title <- paste("Bridge users by transport from ",
                    countryname(country), sep = "")
     ggplot(u, aes(x = as.Date(date), ymin = low, ymax = high,
@@ -1028,26 +1072,29 @@ plot_userstats_bridge_combined <- function(start, end, country, path) {
   }
 }
 
-write_userstats_bridge_combined <- function(start, end, country, path) {
-  if (country == "all") {
+write_userstats_bridge_combined <- function(start = NULL, end = NULL,
+    country = NULL, path) {
+  if (!is.null(country) && country == "all") {
     write_userstats_bridge_country(start, end, country, path)
   } else {
     prepare_userstats_bridge_combined(start, end, country) %>%
-      select(date, transport, low, high) %>%
-      mutate(transport = ifelse(transport == "<OR>",
-                                "default_or_protocol", transport)) %>%
+      select(date, country, transport, low, high) %>%
+      mutate(transport = ifelse(transport == "<OR>", "default_or_protocol",
+        ifelse(transport == "<??>", "unknown_transport", transport))) %>%
       gather(variable, value, -(date:transport)) %>%
       unite(temp, transport, variable) %>%
       spread(temp, value) %>%
-      write.csv(path, quote = FALSE, row.names = FALSE)
+      write.csv(path, quote = FALSE, row.names = FALSE, na = "")
   }
 }
 
 prepare_advbwdist_perc <- function(start, end, p) {
   read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      percentile %in% as.numeric(p)) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(p)) percentile %in% as.numeric(p) else
+      percentile != "") %>%
     transmute(date, percentile = as.factor(percentile),
       variable = ifelse(isexit != "t", "all", "exits"),
       advbw = advbw * 8 / 1e9)
@@ -1070,18 +1117,20 @@ plot_advbwdist_perc <- function(start, end, p, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_advbwdist_perc <- function(start, end, p, path) {
+write_advbwdist_perc <- function(start = NULL, end = NULL, p = NULL, path) {
   prepare_advbwdist_perc(start, end, p) %>%
     unite(temp, variable, percentile) %>%
     spread(temp, advbw) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_advbwdist_relay <- function(start, end, n) {
   read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      relay %in% as.numeric(n)) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(if (!is.null(n)) relay %in% as.numeric(n) else
+      relay != "") %>%
     transmute(date, relay = as.factor(relay),
       variable = ifelse(isexit != "t", "all", "exits"),
       advbw = advbw * 8 / 1e9)
@@ -1104,18 +1153,19 @@ plot_advbwdist_relay <- function(start, end, n, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_advbwdist_relay <- function(start, end, n, path) {
+write_advbwdist_relay <- function(start = NULL, end = NULL, n = NULL, path) {
   prepare_advbwdist_relay(start, end, n) %>%
     unite(temp, variable, relay) %>%
     spread(temp, advbw) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_hidserv_dir_onions_seen <- function(start, end) {
   read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      type == "dir-onions-seen") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(type == "dir-onions-seen") %>%
     transmute(date = date, onions = ifelse(frac >= 0.01, wiqm, NA))
 }
 
@@ -1131,16 +1181,17 @@ plot_hidserv_dir_onions_seen <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_hidserv_dir_onions_seen <- function(start, end, path) {
+write_hidserv_dir_onions_seen <- function(start = NULL, end = NULL, path) {
   prepare_hidserv_dir_onions_seen(start, end) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_hidserv_rend_relayed_cells <- function(start, end) {
   read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end),
-      type == "rend-relayed-cells") %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
+    filter(type == "rend-relayed-cells") %>%
     transmute(date,
       relayed = ifelse(frac >= 0.01, wiqm * 8 * 512 / (86400 * 1e9), NA))
 }
@@ -1158,15 +1209,16 @@ plot_hidserv_rend_relayed_cells <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_hidserv_rend_relayed_cells <- function(start, end, path) {
+write_hidserv_rend_relayed_cells <- function(start = NULL, end = NULL, path) {
   prepare_hidserv_rend_relayed_cells(start, end) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_hidserv_frac_reporting <- function(start, end) {
   read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
     colClasses = c("date" = "Date")) %>%
-    filter(date >= as.Date(start), date <= as.Date(end)) %>%
+    filter(if (!is.null(start)) date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) date <= as.Date(end) else TRUE) %>%
     select(date, frac, type)
 }
 
@@ -1189,17 +1241,18 @@ plot_hidserv_frac_reporting <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_hidserv_frac_reporting <- function(start, end, path) {
+write_hidserv_frac_reporting <- function(start = NULL, end = NULL, path) {
   prepare_hidserv_frac_reporting(start, end) %>%
     mutate(type = ifelse(type == "dir-onions-seen", "onions", "relayed")) %>%
     spread(type, frac) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_webstats_tb <- function(start, end) {
   load(paste(rdata_dir, "webstats-tb.RData", sep = ""))
   data %>%
-    filter(log_date >= as.Date(start), log_date <= as.Date(end)) %>%
+    filter(if (!is.null(start)) log_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) log_date <= as.Date(end) else TRUE) %>%
     mutate(request_type = factor(request_type))
 }
 
@@ -1224,20 +1277,21 @@ plot_webstats_tb <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_webstats_tb <- function(start, end, path) {
+write_webstats_tb <- function(start = NULL, end = NULL, path) {
   prepare_webstats_tb(start, end) %>%
     rename(date = log_date) %>%
     spread(request_type, count) %>%
     rename(initial_downloads = tbid, signature_downloads = tbsd,
       update_pings = tbup, update_requests = tbur) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_webstats_tb_platform <- function(start, end) {
   read.csv(paste(stats_dir, "webstats.csv", sep = ""),
     colClasses = c("log_date" = "Date")) %>%
-    filter(log_date >= as.Date(start), log_date <= as.Date(end),
-      request_type == "tbid") %>%
+    filter(if (!is.null(start)) log_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) log_date <= as.Date(end) else TRUE) %>%
+    filter(request_type == "tbid") %>%
     group_by(log_date, platform) %>%
     summarize(count = sum(count))
 }
@@ -1260,12 +1314,12 @@ plot_webstats_tb_platform <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_webstats_tb_platform <- function(start, end, path) {
+write_webstats_tb_platform <- function(start = NULL, end = NULL, path) {
   prepare_webstats_tb_platform(start, end) %>%
     rename(date = log_date) %>%
     spread(platform, count) %>%
     rename(linux = l, macos = m, windows = w) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 plot_webstats_tb_locale <- function(start, end, path) {
@@ -1299,10 +1353,13 @@ plot_webstats_tb_locale <- function(start, end, path) {
 # turned out to be a bit harder than for other functions, because
 # plot_webstats_tb_locale needs the preliminary data frame e for its
 # breaks and labels. Left as future work.
-write_webstats_tb_locale <- function(start, end, path) {
+write_webstats_tb_locale <- function(start = NULL, end = NULL, path) {
   d <- read.csv(paste(stats_dir, "webstats.csv", sep = ""),
     colClasses = c("log_date" = "Date", "locale" = "character"))
-  d <- d[d$log_date >= start & d$log_date <= end & d$request_type == "tbid", ]
+  d <- d %>%
+    filter(if (!is.null(start)) log_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) log_date <= as.Date(end) else TRUE) %>%
+    filter(request_type == "tbid")
   e <- d
   e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = sum)
   e <- e[order(e$count, decreasing = TRUE), ]
@@ -1313,13 +1370,14 @@ write_webstats_tb_locale <- function(start, end, path) {
     mutate(locale = tolower(locale)) %>%
     rename(date = log_date) %>%
     spread(locale, count) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_webstats_tm <- function(start, end) {
   load(paste(rdata_dir, "webstats-tm.RData", sep = ""))
   data %>%
-    filter(log_date >= as.Date(start), log_date <= as.Date(end)) %>%
+    filter(if (!is.null(start)) log_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end)) log_date <= as.Date(end) else TRUE) %>%
     mutate(request_type = factor(request_type))
 }
 
@@ -1342,19 +1400,22 @@ plot_webstats_tm <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_webstats_tm <- function(start, end, path) {
+write_webstats_tm <- function(start = NULL, end = NULL, path) {
   prepare_webstats_tm(start, end) %>%
     rename(date = log_date) %>%
     spread(request_type, count) %>%
     rename(initial_downloads = tmid, update_pings = tmup) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_relays_ipv6 <- function(start, end) {
   read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
     colClasses = c("valid_after_date" = "Date")) %>%
-    filter(valid_after_date >= as.Date(start),
-      valid_after_date <= as.Date(end), server == "relay") %>%
+    filter(if (!is.null(start))
+        valid_after_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end))
+        valid_after_date <= as.Date(end) else TRUE) %>%
+    filter(server == "relay") %>%
     group_by(valid_after_date) %>%
     summarize(total = sum(server_count_sum_avg),
       announced = sum(server_count_sum_avg[announced_ipv6 == "t"]),
@@ -1382,18 +1443,21 @@ plot_relays_ipv6 <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_relays_ipv6 <- function(start, end, path) {
+write_relays_ipv6 <- function(start = NULL, end = NULL, path) {
   prepare_relays_ipv6(start, end) %>%
     rename(date = valid_after_date) %>%
     spread(category, count) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_bridges_ipv6 <- function(start, end) {
   read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
     colClasses = c("valid_after_date" = "Date")) %>%
-    filter(valid_after_date >= as.Date(start),
-      valid_after_date <= as.Date(end), server == "bridge") %>%
+    filter(if (!is.null(start))
+        valid_after_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end))
+        valid_after_date <= as.Date(end) else TRUE) %>%
+    filter(server == "bridge") %>%
     group_by(valid_after_date) %>%
     summarize(total = sum(server_count_sum_avg),
       announced = sum(server_count_sum_avg[announced_ipv6 == "t"])) %>%
@@ -1417,18 +1481,21 @@ plot_bridges_ipv6 <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_bridges_ipv6 <- function(start, end, path) {
+write_bridges_ipv6 <- function(start = NULL, end = NULL, path) {
   prepare_bridges_ipv6(start, end) %>%
     rename(date = valid_after_date) %>%
     spread(category, count) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
 prepare_advbw_ipv6 <- function(start, end) {
   read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
     colClasses = c("valid_after_date" = "Date")) %>%
-    filter(valid_after_date >= as.Date(start),
-      valid_after_date <= as.Date(end), server == "relay") %>%
+    filter(if (!is.null(start))
+        valid_after_date >= as.Date(start) else TRUE) %>%
+    filter(if (!is.null(end))
+        valid_after_date <= as.Date(end) else TRUE) %>%
+    filter(server == "relay") %>%
     group_by(valid_after_date) %>%
     summarize(total = sum(advertised_bandwidth_bytes_sum_avg),
       total_guard = sum(advertised_bandwidth_bytes_sum_avg[guard_relay != "f"]),
@@ -1465,10 +1532,10 @@ plot_advbw_ipv6 <- function(start, end, path) {
   ggsave(filename = path, width = 8, height = 5, dpi = 150)
 }
 
-write_advbw_ipv6 <- function(start, end, path) {
+write_advbw_ipv6 <- function(start = NULL, end = NULL, path) {
   prepare_advbw_ipv6(start, end) %>%
     rename(date = valid_after_date) %>%
     spread(category, advbw) %>%
-    write.csv(path, quote = FALSE, row.names = FALSE)
+    write.csv(path, quote = FALSE, row.names = FALSE, na = "")
 }
 
diff --git a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
index aea6db7..00fcc81 100644
--- a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
+++ b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
@@ -126,6 +126,10 @@ public class RObjectGenerator implements ServletContextListener {
     queryBuilder.append("robust_call(as.call(list(");
     if ("csv".equalsIgnoreCase(fileType)) {
       queryBuilder.append("write_");
+      /* When we checked parameters above we also put in defaults for missing
+       * parameters. This is okay for graphs, but we want to support CSV files
+       * with empty parameters. Using the parameters we got here. */
+      checkedParameters = parameterMap;
     } else {
       queryBuilder.append("plot_");
     }





More information about the tor-commits mailing list