[tor-commits] [metrics-tasks/master] Add graphing code for old user numbers (#10041).
karsten at torproject.org
karsten at torproject.org
Mon Oct 28 14:13:15 UTC 2013
commit 1fa116045b42db527729c2e254b3d2508cff4822
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Oct 28 15:10:39 2013 +0100
Add graphing code for old user numbers (#10041).
---
task-10041/.gitignore | 3 +
task-10041/LICENSE | 37 +++++
task-10041/README | 20 +++
task-10041/plot-users.R | 368 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 428 insertions(+)
diff --git a/task-10041/.gitignore b/task-10041/.gitignore
new file mode 100644
index 0000000..1f86c56
--- /dev/null
+++ b/task-10041/.gitignore
@@ -0,0 +1,3 @@
+Rplots.pdf
+*.csv.gz
+
diff --git a/task-10041/LICENSE b/task-10041/LICENSE
new file mode 100644
index 0000000..d5362a0
--- /dev/null
+++ b/task-10041/LICENSE
@@ -0,0 +1,37 @@
+Copyright 2013 The Tor Project
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+* Neither the names of the copyright owners nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Included data is freely available under a CC0 no copyright declaration: To
+the extent possible under law, the Tor Project has waived all copyright
+and related or neighboring rights in the data.
+
+Graphs produced with this code are licensed under a Creative Commons
+Attribution 3.0 United States License.
+
diff --git a/task-10041/README b/task-10041/README
new file mode 100644
index 0000000..007aa3d
--- /dev/null
+++ b/task-10041/README
@@ -0,0 +1,20 @@
+Usage:
+
+ Rscript plot-users.R direct [start] [end] [country] [events] [file]
+
+or
+
+ Rscript plot-users.R bridge [start] [end] [country] [file]
+
+ start: start date YYYY-MM-DD
+ end: end date YYYY-MM-DD
+ country: two-letter country code, or "all"
+ events: censorship events ("on" or "points"), or "off"
+ file: output filename, typically .png
+
+Examples:
+
+ Rscript plot-users.R direct 2013-07-01 2013-09-30 us on ex1.png
+
+ Rscript plot-users.R bridge 2013-01-01 2013-03-31 all ex2.png
+
diff --git a/task-10041/plot-users.R b/task-10041/plot-users.R
new file mode 100644
index 0000000..f07985f
--- /dev/null
+++ b/task-10041/plot-users.R
@@ -0,0 +1,368 @@
+# Copyright 2013 The Tor Project
+# See LICENSE for licensing information
+
+args <- commandArgs(TRUE)
+if (length(args) < 5) {
+ print("Not enough arguments. See README for usage instructions and examples.")
+ print(" Rscript plot-users.R direct 2013-07-01 2013-09-30 us on 1.png")
+ print(" Rscript plot-users.R bridge 2013-01-01 2013-03-31 all 2.png")
+ quit()
+}
+
+require(ggplot2, quietly = TRUE)
+require(reshape, quietly = TRUE, warn.conflicts = FALSE)
+options(scipen = 15)
+
+countrylist <- list(
+ "ad" = "Andorra",
+ "ae" = "the United Arab Emirates",
+ "af" = "Afghanistan",
+ "ag" = "Antigua and Barbuda",
+ "ai" = "Anguilla",
+ "al" = "Albania",
+ "am" = "Armenia",
+ "an" = "the Netherlands Antilles",
+ "ao" = "Angola",
+ "aq" = "Antarctica",
+ "ar" = "Argentina",
+ "as" = "American Samoa",
+ "at" = "Austria",
+ "au" = "Australia",
+ "aw" = "Aruba",
+ "ax" = "the Aland Islands",
+ "az" = "Azerbaijan",
+ "ba" = "Bosnia and Herzegovina",
+ "bb" = "Barbados",
+ "bd" = "Bangladesh",
+ "be" = "Belgium",
+ "bf" = "Burkina Faso",
+ "bg" = "Bulgaria",
+ "bh" = "Bahrain",
+ "bi" = "Burundi",
+ "bj" = "Benin",
+ "bl" = "Saint Bartelemey",
+ "bm" = "Bermuda",
+ "bn" = "Brunei",
+ "bo" = "Bolivia",
+ "br" = "Brazil",
+ "bs" = "the Bahamas",
+ "bt" = "Bhutan",
+ "bv" = "the Bouvet Island",
+ "bw" = "Botswana",
+ "by" = "Belarus",
+ "bz" = "Belize",
+ "ca" = "Canada",
+ "cc" = "the Cocos (Keeling) Islands",
+ "cd" = "the Democratic Republic of the Congo",
+ "cf" = "Central African Republic",
+ "cg" = "Congo",
+ "ch" = "Switzerland",
+ "ci" = "Côte d'Ivoire",
+ "ck" = "the Cook Islands",
+ "cl" = "Chile",
+ "cm" = "Cameroon",
+ "cn" = "China",
+ "co" = "Colombia",
+ "cr" = "Costa Rica",
+ "cu" = "Cuba",
+ "cv" = "Cape Verde",
+ "cx" = "the Christmas Island",
+ "cy" = "Cyprus",
+ "cz" = "the Czech Republic",
+ "de" = "Germany",
+ "dj" = "Djibouti",
+ "dk" = "Denmark",
+ "dm" = "Dominica",
+ "do" = "the Dominican Republic",
+ "dz" = "Algeria",
+ "ec" = "Ecuador",
+ "ee" = "Estonia",
+ "eg" = "Egypt",
+ "eh" = "the Western Sahara",
+ "er" = "Eritrea",
+ "es" = "Spain",
+ "et" = "Ethiopia",
+ "fi" = "Finland",
+ "fj" = "Fiji",
+ "fk" = "the Falkland Islands (Malvinas)",
+ "fm" = "the Federated States of Micronesia",
+ "fo" = "the Faroe Islands",
+ "fr" = "France",
+ "fx" = "Metropolitan France",
+ "ga" = "Gabon",
+ "gb" = "the United Kingdom",
+ "gd" = "Grenada",
+ "ge" = "Georgia",
+ "gf" = "French Guiana",
+ "gg" = "Guernsey",
+ "gh" = "Ghana",
+ "gi" = "Gibraltar",
+ "gl" = "Greenland",
+ "gm" = "Gambia",
+ "gn" = "Guinea",
+ "gp" = "Guadeloupe",
+ "gq" = "Equatorial Guinea",
+ "gr" = "Greece",
+ "gs" = "South Georgia and the South Sandwich Islands",
+ "gt" = "Guatemala",
+ "gu" = "Guam",
+ "gw" = "Guinea-Bissau",
+ "gy" = "Guyana",
+ "hk" = "Hong Kong",
+ "hm" = "Heard Island and McDonald Islands",
+ "hn" = "Honduras",
+ "hr" = "Croatia",
+ "ht" = "Haiti",
+ "hu" = "Hungary",
+ "id" = "Indonesia",
+ "ie" = "Ireland",
+ "il" = "Israel",
+ "im" = "the Isle of Man",
+ "in" = "India",
+ "io" = "the British Indian Ocean Territory",
+ "iq" = "Iraq",
+ "ir" = "Iran",
+ "is" = "Iceland",
+ "it" = "Italy",
+ "je" = "Jersey",
+ "jm" = "Jamaica",
+ "jo" = "Jordan",
+ "jp" = "Japan",
+ "ke" = "Kenya",
+ "kg" = "Kyrgyzstan",
+ "kh" = "Cambodia",
+ "ki" = "Kiribati",
+ "km" = "Comoros",
+ "kn" = "Saint Kitts and Nevis",
+ "kp" = "North Korea",
+ "kr" = "the Republic of Korea",
+ "kw" = "Kuwait",
+ "ky" = "the Cayman Islands",
+ "kz" = "Kazakhstan",
+ "la" = "Laos",
+ "lb" = "Lebanon",
+ "lc" = "Saint Lucia",
+ "li" = "Liechtenstein",
+ "lk" = "Sri Lanka",
+ "lr" = "Liberia",
+ "ls" = "Lesotho",
+ "lt" = "Lithuania",
+ "lu" = "Luxembourg",
+ "lv" = "Latvia",
+ "ly" = "Libya",
+ "ma" = "Morocco",
+ "mc" = "Monaco",
+ "md" = "the Republic of Moldova",
+ "me" = "Montenegro",
+ "mf" = "Saint Martin",
+ "mg" = "Madagascar",
+ "mh" = "the Marshall Islands",
+ "mk" = "Macedonia",
+ "ml" = "Mali",
+ "mm" = "Burma",
+ "mn" = "Mongolia",
+ "mo" = "Macau",
+ "mp" = "the Northern Mariana Islands",
+ "mq" = "Martinique",
+ "mr" = "Mauritania",
+ "ms" = "Montserrat",
+ "mt" = "Malta",
+ "mu" = "Mauritius",
+ "mv" = "the Maldives",
+ "mw" = "Malawi",
+ "mx" = "Mexico",
+ "my" = "Malaysia",
+ "mz" = "Mozambique",
+ "na" = "Namibia",
+ "nc" = "New Caledonia",
+ "ne" = "Niger",
+ "nf" = "Norfolk Island",
+ "ng" = "Nigeria",
+ "ni" = "Nicaragua",
+ "nl" = "the Netherlands",
+ "no" = "Norway",
+ "np" = "Nepal",
+ "nr" = "Nauru",
+ "nu" = "Niue",
+ "nz" = "New Zealand",
+ "om" = "Oman",
+ "pa" = "Panama",
+ "pe" = "Peru",
+ "pf" = "French Polynesia",
+ "pg" = "Papua New Guinea",
+ "ph" = "the Philippines",
+ "pk" = "Pakistan",
+ "pl" = "Poland",
+ "pm" = "Saint Pierre and Miquelon",
+ "pn" = "the Pitcairn Islands",
+ "pr" = "Puerto Rico",
+ "ps" = "the Palestinian Territory",
+ "pt" = "Portugal",
+ "pw" = "Palau",
+ "py" = "Paraguay",
+ "qa" = "Qatar",
+ "re" = "Reunion",
+ "ro" = "Romania",
+ "rs" = "Serbia",
+ "ru" = "Russia",
+ "rw" = "Rwanda",
+ "sa" = "Saudi Arabia",
+ "sb" = "the Solomon Islands",
+ "sc" = "the Seychelles",
+ "sd" = "Sudan",
+ "se" = "Sweden",
+ "sg" = "Singapore",
+ "sh" = "Saint Helena",
+ "si" = "Slovenia",
+ "sj" = "Svalbard and Jan Mayen",
+ "sk" = "Slovakia",
+ "sl" = "Sierra Leone",
+ "sm" = "San Marino",
+ "sn" = "Senegal",
+ "so" = "Somalia",
+ "sr" = "Suriname",
+ "st" = "São Tomé and PrÃncipe",
+ "sv" = "El Salvador",
+ "sy" = "the Syrian Arab Republic",
+ "sz" = "Swaziland",
+ "tc" = "Turks and Caicos Islands",
+ "td" = "Chad",
+ "tf" = "the French Southern Territories",
+ "tg" = "Togo",
+ "th" = "Thailand",
+ "tj" = "Tajikistan",
+ "tk" = "Tokelau",
+ "tl" = "East Timor",
+ "tm" = "Turkmenistan",
+ "tn" = "Tunisia",
+ "to" = "Tonga",
+ "tr" = "Turkey",
+ "tt" = "Trinidad and Tobago",
+ "tv" = "Tuvalu",
+ "tw" = "Taiwan",
+ "tz" = "the United Republic of Tanzania",
+ "ua" = "Ukraine",
+ "ug" = "Uganda",
+ "um" = "the United States Minor Outlying Islands",
+ "us" = "the United States",
+ "uy" = "Uruguay",
+ "uz" = "Uzbekistan",
+ "va" = "Vatican City",
+ "vc" = "Saint Vincent and the Grenadines",
+ "ve" = "Venezuela",
+ "vg" = "the British Virgin Islands",
+ "vi" = "the United States Virgin Islands",
+ "vn" = "Vietnam",
+ "vu" = "Vanuatu",
+ "wf" = "Wallis and Futuna",
+ "ws" = "Samoa",
+ "ye" = "Yemen",
+ "yt" = "Mayotte",
+ "za" = "South Africa",
+ "zm" = "Zambia",
+ "zw" = "Zimbabwe")
+
+countryname <- function(country) {
+ res <- countrylist[[country]]
+ if (is.null(res))
+ res <- "no-man's-land"
+ res
+}
+
+plot_direct_users <- function(start, end, country, events, path) {
+ u <- read.csv("direct-users.csv.gz", stringsAsFactors = FALSE)
+ u <- u[u$date >= start & u$date <= end, ]
+ u <- melt(u, id.vars = "date")
+ u <- u[u$variable %in% c(country, "all"), ]
+ a <- u[u$variable == "all", ]
+ if (country != "all")
+ u <- u[u$variable == country, ]
+ u <- data.frame(date = u$date, users = u$value)
+ dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+ to = as.Date(end, "%Y-%m-%d"), by="1 day")
+ missing <- setdiff(dates, as.Date(a$date))
+ if (length(missing) > 0)
+ u <- rbind(u,
+ data.frame(date = as.Date(missing, origin = "1970-01-01"),
+ users = NA))
+ missing <- setdiff(dates, as.Date(u$date))
+ if (length(missing) > 0)
+ u <- rbind(u,
+ data.frame(date = as.Date(missing, origin = "1970-01-01"),
+ users = 0))
+ title <- ifelse(country == "all",
+ "Directly connecting users from all countries\n",
+ paste("Directly connecting users from ", countryname(country), "\n",
+ sep = ""))
+ max_y <- ifelse(length(na.omit(u$users)) == 0, 0,
+ max(u$users, na.rm = TRUE))
+ plot <- ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = users))
+ if (length(na.omit(u$users)) > 0 & events != "off" & country != "all") {
+ r <- read.csv("direct-users-ranges.csv.gz", stringsAsFactors = FALSE)
+ r <- r[r$date >= start & r$date <= end & r$country == country,
+ c("date", "minusers", "maxusers")]
+ r[r$minusers < 0, "minusers"] <- 0
+ r <- cast(rbind(melt(u, id.vars = "date"), melt(r, id.vars = "date")))
+ upturns <- r[r$users > r$maxusers, 1:2]
+ downturns <- r[r$users < r$minusers, 1:2]
+ if (events == "on") {
+ if (length(r$maxusers) > 0)
+ max_y <- max(max_y, max(r$maxusers, na.rm = TRUE))
+ plot <- plot +
+ geom_ribbon(data = r, aes(ymin = minusers,
+ ymax = maxusers), fill = "gray")
+ }
+ if (length(upturns$date) > 0)
+ plot <- plot +
+ geom_point(data = upturns, aes(x = as.Date(date), y = users), size = 5,
+ colour = "dodgerblue2")
+ if (length(downturns$date) > 0)
+ plot <- plot +
+ geom_point(data = downturns, aes(x = as.Date(date), y = users), size = 5,
+ colour = "firebrick2")
+ }
+ plot <- plot +
+ geom_line(size = 1) +
+ scale_x_date(name = paste("\nThe Tor Project - ",
+ "https://metrics.torproject.org/", sep = "")) +
+ scale_y_continuous(name = "", limits = c(0, max_y)) +
+ ggtitle(title)
+ print(plot)
+ ggsave(filename = path, width = 8, height = 5, dpi = 72)
+}
+
+plot_bridge_users <- function(start, end, country, path) {
+ b <- read.csv("bridge-users.csv.gz", stringsAsFactors = FALSE)
+ b <- b[b$date >= start & b$date <= end, ]
+ b <- melt(b, id.vars = "date")
+ b <- b[b$variable %in% c(country, "all"), ]
+ bridgeusers <- data.frame(date = as.Date(b$date), users = b$value)
+ dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+ to = as.Date(end, "%Y-%m-%d"), by="1 day")
+ missing <- setdiff(dates, bridgeusers$date)
+ if (length(missing) > 0)
+ bridgeusers <- rbind(bridgeusers,
+ data.frame(date = as.Date(missing, origin = "1970-01-01"),
+ users = NA))
+ title <- ifelse(country == "all",
+ "Bridge users from all countries\n",
+ paste("Bridge users from ", countryname(country), "\n", sep = ""))
+ ggplot(bridgeusers, aes(x = as.Date(date, "%Y-%m-%d"), y = users)) +
+ geom_line(size = 1) +
+ scale_x_date(name = paste("\nThe Tor Project - ",
+ "https://metrics.torproject.org/", sep = "")) +
+ scale_y_continuous(name = "", limits = c(0,
+ ifelse(length(na.omit(bridgeusers$users)) == 0, 0,
+ max(bridgeusers$users, na.rm = TRUE)))) +
+ ggtitle(title)
+ ggsave(filename = path, width = 8, height = 5, dpi = 72)
+}
+
+if (args[1] == "direct") {
+ plot_direct_users(start = args[2], end = args[3], country = args[4],
+ events = args[5], path = args[6])
+} else if (args[1] == "bridge") {
+ plot_bridge_users(start = args[2], end = args[3], country = args[4],
+ path = args[5])
+}
+
More information about the tor-commits
mailing list