[tor-commits] [metrics-tasks/master] Add graph code for #2394.
karsten at torproject.org
karsten at torproject.org
Wed Apr 20 10:01:24 UTC 2011
commit c6887f322a243ef5526e2b793c5e461936722080
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Apr 20 12:01:00 2011 +0200
Add graph code for #2394.
---
task-2394/.gitignore | 1 +
task-2394/bandwidth-comparison.R | 153 ++++++++++++++++++++++++++++++++++++++
2 files changed, 154 insertions(+), 0 deletions(-)
diff --git a/task-2394/.gitignore b/task-2394/.gitignore
index bef5dec..1e3c939 100644
--- a/task-2394/.gitignore
+++ b/task-2394/.gitignore
@@ -3,4 +3,5 @@ descriptors/
*.pdf
*.csv
*.jar
+*.png
diff --git a/task-2394/bandwidth-comparison.R b/task-2394/bandwidth-comparison.R
new file mode 100644
index 0000000..77a5842
--- /dev/null
+++ b/task-2394/bandwidth-comparison.R
@@ -0,0 +1,153 @@
+options(warn = -1)
+suppressPackageStartupMessages(library("ggplot2"))
+
+b <- read.csv("bandwidth-comparison.csv", stringsAsFactors = FALSE)
+
+# Plot ECDF to compare categories
+cdf_relays_category <- function(data, category) {
+ d <- data[data$category == category & data$descriptorbandwidth > 0, ]
+ d <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
+ d <- data.frame(x = d, y = (1:length(d)) / length(d),
+ category = category)
+ d
+}
+relays_category <- rbind(
+ cdf_relays_category(b, "Guard & Exit (default policy)"),
+ cdf_relays_category(b, "Exit (default policy)"),
+ cdf_relays_category(b, "Guard & Exit (non-default policy)"),
+ cdf_relays_category(b, "Exit (non-default policy)"),
+ cdf_relays_category(b, "Guard"),
+ cdf_relays_category(b, "Middle"))
+ggplot(relays_category, aes(x = x, y = y, colour = category)) +
+geom_line() +
+scale_x_log10("\nRatio of measured by self-reported bandwidth",
+ limits = c(0.1, 10), breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10),
+ labels = c("0.1", "0.2", "0.5", "1", "2", "5", "10")) +
+scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
+ formatter = "percent") +
+scale_colour_hue("") +
+geom_vline(xintercept = 1, legend = FALSE, linetype = "dashed") +
+opts(title = "Ratio between measured and self-reported relay bandwidth",
+ legend.position = "top")
+ggsave(filename = "bandwidth-comparison-relays.png",
+ width = 8, height = 5, dpi = 150)
+
+stopit
+
+# Plot ECDFs to compare consensus to votes
+cdf_relays_category_votes <- function(data, category) {
+ d <- data[data$category == category & data$descriptorbandwidth > 0, ]
+ consensus <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
+ ides <- sort(d$idesbandwidth * 1000 / d$descriptorbandwidth)
+ urras <- sort(d$urrasbandwidth * 1000 / d$descriptorbandwidth)
+ moria1 <- sort(d$moria1bandwidth * 1000 / d$descriptorbandwidth)
+ gabelmoo <- sort(d$gabelmoobandwidth * 1000 / d$descriptorbandwidth)
+ d <- rbind(
+ data.frame(x = consensus,
+ y = (1:length(consensus)) / length(consensus),
+ source = "consensus",
+ category = category),
+ data.frame(x = ides,
+ y = (1:length(ides)) / length(ides),
+ source = "ides",
+ category = category),
+ data.frame(x = urras,
+ y = (1:length(urras)) / length(urras),
+ source = "urras",
+ category = category),
+ data.frame(x = moria1,
+ y = (1:length(moria1)) / length(moria1),
+ source = "moria1",
+ category = category),
+ data.frame(x = gabelmoo,
+ y = (1:length(gabelmoo)) / length(gabelmoo),
+ source = "gabelmoo",
+ category = category))
+ d
+}
+relays_category_votes <- rbind(
+ cdf_relays_category_votes(b, "Guard & Exit (default policy)"),
+ cdf_relays_category_votes(b, "Exit (default policy)"),
+ cdf_relays_category_votes(b, "Guard & Exit (non-default policy)"),
+ cdf_relays_category_votes(b, "Exit (non-default policy)"),
+ cdf_relays_category_votes(b, "Guard"),
+ cdf_relays_category_votes(b, "Middle"))
+ggplot(relays_category_votes, aes(x = x, y = y, colour = source)) +
+geom_line() +
+facet_wrap(~ category, ncol = 3) +
+scale_x_log10("\nRatio of measured by self-reported bandwidth",
+ limits = c(0.1, 10), breaks = c(0.1, 1, 10),
+ labels = c("0.1", "1", "10")) +
+scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
+ formatter = "percent") +
+scale_colour_manual("",
+ breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+ values = c("black", rep(alpha("black", 0.25), 4))) +
+geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
+opts(title = paste("Measured vs. self-reported bandwidth ratios in",
+ "consensus and votes\n"), legend.position = "none")
+ggsave(filename = "bandwidth-comparison-relays-votes.png",
+ width = 8, height = 5, dpi = 150)
+
+# Plot _weighted_ ECDFs
+wecdf <- function(data, source, category) {
+ data <- data[with(data, order(ratio)), ]
+ sum_measured <- sum(data$measured, na.rm = TRUE)
+ cur_measured <- data$measured[1]
+ res <- data.frame(x = data$ratio[1],
+ y = cur_measured / sum_measured,
+ source = source, category = category)
+ for (i in 2:length(data$ratio)) {
+ cur_measured <- cur_measured + data$measured[i]
+ res <- rbind(res, data.frame(x = data$ratio[i],
+ y = cur_measured / sum_measured,
+ source = source, category = category))
+ }
+ res
+}
+cdf_measured_category_votes <- function(data, category) {
+ d <- data[data$category == category & data$descriptorbandwidth > 0, ]
+ d <- rbind(
+ wecdf(data.frame(
+ ratio = d$consensusbandwidth * 1000 / d$descriptorbandwidth,
+ measured = d$consensusbandwidth), "consensus", category),
+ wecdf(data.frame(
+ ratio = d$idesbandwidth * 1000 / d$descriptorbandwidth,
+ measured = d$idesbandwidth), "ides", category),
+ wecdf(data.frame(
+ ratio = d$urrasbandwidth * 1000 / d$descriptorbandwidth,
+ measured = d$urrasbandwidth), "urras", category),
+ wecdf(data.frame(
+ ratio = d$moria1bandwidth * 1000 / d$descriptorbandwidth,
+ measured = d$moria1bandwidth), "moria1", category),
+ wecdf(data.frame(
+ ratio = d$gabelmoobandwidth * 1000 / d$descriptorbandwidth,
+ measured = d$gabelmoobandwidth), "gabelmoo", category))
+ d
+}
+measured_category_votes <- rbind(
+ cdf_measured_category_votes(b, "Guard & Exit (default policy)"),
+ cdf_measured_category_votes(b, "Exit (default policy)"),
+ cdf_measured_category_votes(b, "Guard & Exit (non-default policy)"),
+ cdf_measured_category_votes(b, "Exit (non-default policy)"),
+ cdf_measured_category_votes(b, "Guard"),
+ cdf_measured_category_votes(b, "Middle"))
+ggplot(measured_category_votes, aes(x = x, y = y, colour = source)) +
+geom_line() +
+facet_wrap(~ category, ncol = 3) +
+scale_x_log10("\nRatio of measured by self-reported bandwidth",
+ limits = c(0.1, 10), breaks = c(0.1, 1, 10),
+ labels = c("0.1", "1", "10")) +
+scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1),
+ formatter = "percent") +
+scale_colour_manual("",
+ breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+ values = c("black", rep(alpha("black", 0.25), 4))) +
+geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
+opts(title = paste("Measured vs. self-reported bandwidth ratios in",
+ "consensus and votes\n"), legend.position = "none")
+ggsave(filename = "bandwidth-comparison-measured-votes.png",
+ width = 8, height = 5, dpi = 150)
+write.csv(measured_category_votes, "measured_category_votes-temp.csv",
+ quote = FALSE, row.names = FALSE)
+
More information about the tor-commits
mailing list