[or-cvs] [metrics-db/master 2/2] Extend statistics to all countries and directories.
karsten at torproject.org
karsten at torproject.org
Fri Nov 19 13:36:18 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 19 Nov 2010 12:42:31 +0100
Subject: Extend statistics to all countries and directories.
Commit: e8a093178d35f8f5ad8c80796bc8f32e0707d5ae
---
config | 11 --
db/tordir.sql | 1 -
.../ernie/db/BridgeDescriptorParser.java | 25 +--
.../ernie/db/BridgeStatsFileHandler.java | 135 ++++++++---------
src/org/torproject/ernie/db/Configuration.java | 25 ---
.../ernie/db/DirreqStatsFileHandler.java | 162 ++++++++------------
src/org/torproject/ernie/db/Main.java | 25 +--
.../ernie/db/RelayDescriptorDownloader.java | 60 ++------
.../torproject/ernie/db/RelayDescriptorParser.java | 49 ++-----
9 files changed, 178 insertions(+), 315 deletions(-)
diff --git a/config b/config
index f5486e7..bb39a4f 100644
--- a/config
+++ b/config
@@ -115,17 +115,6 @@
## Write dirreq stats to disk
#WriteDirreqStats 0
#
-## Comma-separated set of countries to be included in dirreq and bridge
-## graphs; note that after adding new countries, an import of the relevant
-## descriptor archives (relay and/or bridge) is necessary!
-#DirreqBridgeCountries au,bh,br,ca,cn,cu,de,et,fr,gb,ir,it,jp,kr,mm,pl,ru,sa,se,sy,tn,tm,us,uz,vn,ye
-#
-## Comma-separated set of fingerprints of directory mirrors to be included
-## in dirreq and bridge graphs; note that after adding new directories, an
-## import of the relevant descriptor archives (relay and/or bridge) is
-## necessary!
-#DirreqDirectories 8522EB98C91496E80EC238E732594D1509158E77,9695DFC35FFEB861329B9F1AB04C46397020CE31
-#
## Write bridge stats to disk
#WriteBridgeStats 0
diff --git a/db/tordir.sql b/db/tordir.sql
index 2035593..43064d6 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -564,7 +564,6 @@ CREATE TABLE dirreq_stats (
"date" DATE NOT NULL,
country CHARACTER(2) NOT NULL,
requests INTEGER NOT NULL,
- "share" DOUBLE PRECISION NOT NULL,
CONSTRAINT dirreq_stats_pkey PRIMARY KEY (source, "date", country)
);
diff --git a/src/org/torproject/ernie/db/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
index b4b0888..e2d8bd0 100644
--- a/src/org/torproject/ernie/db/BridgeDescriptorParser.java
+++ b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
@@ -12,15 +12,12 @@ public class BridgeDescriptorParser {
private ConsensusStatsFileHandler csfh;
private BridgeStatsFileHandler bsfh;
private SanitizedBridgesWriter sbw;
- private SortedSet<String> countries;
private Logger logger;
public BridgeDescriptorParser(ConsensusStatsFileHandler csfh,
- BridgeStatsFileHandler bsfh, SanitizedBridgesWriter sbw,
- SortedSet<String> countries) {
+ BridgeStatsFileHandler bsfh, SanitizedBridgesWriter sbw) {
this.csfh = csfh;
this.bsfh = bsfh;
this.sbw = sbw;
- this.countries = countries;
this.logger =
Logger.getLogger(BridgeDescriptorParser.class.getName());
}
@@ -107,17 +104,13 @@ public class BridgeDescriptorParser {
Map<String, String> obs = new HashMap<String, String>();
String[] parts = line.split(" ")[1].split(",");
for (String p : parts) {
+ String country = p.substring(0, 2);
double users = ((double) Long.parseLong(p.substring(3)) - 4L)
* 86400.0D / ((double) seconds);
allUsers += users;
- for (String c : this.countries) {
- if (p.startsWith(c)) {
- obs.put(c, String.format("%.2f", users));
- break;
- }
- }
- obs.put("zy", String.format("%.2f", allUsers));
+ obs.put(country, String.format("%.2f", users));
}
+ obs.put("zy", String.format("%.2f", allUsers));
String date = publishedLine.split(" ")[1];
String time = publishedLine.split(" ")[2];
if (this.bsfh != null) {
@@ -138,14 +131,12 @@ public class BridgeDescriptorParser {
Map<String, String> obs = new HashMap<String, String>();
String[] parts = line.split(" ")[1].split(",");
for (String p : parts) {
+ String country = p.substring(0, 2);
double users = (double) Long.parseLong(p.substring(3)) - 4L;
- for (String c : countries) {
- if (p.startsWith(c)) {
- obs.put(c, String.format("%.2f", users));
- break;
- }
- }
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
}
+ obs.put("zy", String.format("%.2f", allUsers));
String date = bridgeStatsEndLine.split(" ")[1];
String time = bridgeStatsEndLine.split(" ")[2];
if (this.bsfh != null) {
diff --git a/src/org/torproject/ernie/db/BridgeStatsFileHandler.java b/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
index ca4dff8..8a417b5 100644
--- a/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
+++ b/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
@@ -19,7 +19,7 @@ import java.util.logging.*;
public class BridgeStatsFileHandler {
/**
- * Two-letter country codes of countries that we care about.
+ * Two-letter country codes of known countries.
*/
private SortedSet<String> countries;
@@ -34,7 +34,7 @@ public class BridgeStatsFileHandler {
* day. Map keys are bridge and date written as "bridge,date", map
* values are lines as read from <code>stats/bridge-stats-raw</code>.
*/
- private SortedMap<String, String> bridgeUsersRaw;
+ private SortedMap<String, Map<String, String>> bridgeUsersRaw;
/**
* Helper file containing the hashed relay identities of all known
@@ -83,14 +83,14 @@ public class BridgeStatsFileHandler {
* files <code>stats/bridge-stats-raw</code> and
* <code>stats/hashed-relay-identities</code>.
*/
- public BridgeStatsFileHandler(SortedSet<String> countries,
- String connectionURL) {
+ public BridgeStatsFileHandler(String connectionURL) {
- /* Memorize the set of countries we care about. */
- this.countries = countries;
+ /* Initialize set of known countries. */
+ this.countries = new TreeSet<String>();
+ this.countries.add("zy");
/* Initialize local data structures to hold results. */
- this.bridgeUsersRaw = new TreeMap<String, String>();
+ this.bridgeUsersRaw = new TreeMap<String, Map<String, String>>();
this.hashedRelays = new TreeSet<String>();
this.zeroTwoTwoDescriptors = new TreeSet<String>();
@@ -130,9 +130,7 @@ public class BridgeStatsFileHandler {
} else {
String[] headers = line.split(",");
for (int i = 3; i < headers.length; i++) {
- if (headers[i].equals("all")) {
- this.countries.add("zy");
- } else {
+ if (!headers[i].equals("all")) {
this.countries.add(headers[i]);
}
}
@@ -151,6 +149,9 @@ public class BridgeStatsFileHandler {
SortedMap<String, String> obs =
new TreeMap<String, String>();
for (int i = 3; i < parts.length; i++) {
+ if (parts[i].equals("NA")) {
+ continue;
+ }
if (headers[i].equals("all")) {
obs.put("zy", parts[i]);
} else {
@@ -265,25 +266,25 @@ public class BridgeStatsFileHandler {
*/
public void addObs(String hashedIdentity, String date, String time,
Map<String, String> obs) {
- String key = hashedIdentity + "," + date;
- StringBuilder sb = new StringBuilder(key + "," + time);
- for (String c : this.countries) {
- sb.append("," + (obs.containsKey(c) && !obs.get(c).startsWith("-")
- ? obs.get(c) : "0.0"));
+ for (String country : obs.keySet()) {
+ this.countries.add(country);
}
- String value = sb.toString();
- if (!this.bridgeUsersRaw.containsKey(key)) {
- this.logger.finer("Adding new bridge user numbers: " + value);
- this.bridgeUsersRaw.put(key, value);
- } else if (value.compareTo(this.bridgeUsersRaw.get(key)) > 0) {
+ String shortKey = hashedIdentity + "," + date;
+ String longKey = shortKey + "," + time;
+ SortedMap<String, Map<String, String>> tailMap =
+ this.bridgeUsersRaw.tailMap(shortKey);
+ String nextKey = tailMap.isEmpty() ? null : tailMap.firstKey();
+ if (nextKey == null || !nextKey.startsWith(shortKey)) {
+ this.logger.finer("Adding new bridge user numbers for key "
+ + longKey);
+ this.bridgeUsersRaw.put(longKey, obs);
+ } else if (longKey.compareTo(nextKey) > 0) {
this.logger.finer("Replacing existing bridge user numbers (" +
- this.bridgeUsersRaw.get(key) + " with new numbers: "
- + value);
- this.bridgeUsersRaw.put(key, value);
+ nextKey + " with new numbers: " + longKey);
+ this.bridgeUsersRaw.put(longKey, obs);
} else {
this.logger.finer("Not replacing existing bridge user numbers (" +
- this.bridgeUsersRaw.get(key) + " with new numbers (" + value
- + ").");
+ nextKey + " with new numbers (" + longKey + ").");
}
}
@@ -346,14 +347,22 @@ public class BridgeStatsFileHandler {
}
}
bw.append("\n");
- for (String line : this.bridgeUsersRaw.values()) {
- String[] parts = line.split(",");
+ for (Map.Entry<String, Map<String, String>> e :
+ this.bridgeUsersRaw.entrySet()) {
+ String longKey = e.getKey();
+ String[] parts = longKey.split(",");
String hashedBridgeIdentity = parts[0];
String date = parts[1];
String time = parts[2];
if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
- !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity
- + "," + date + "," + time)) {
+ !this.zeroTwoTwoDescriptors.contains(longKey)) {
+ Map<String, String> obs = e.getValue();
+ StringBuilder sb = new StringBuilder(longKey);
+ for (String c : this.countries) {
+ sb.append("," + (obs.containsKey(c) &&
+ !obs.get(c).startsWith("-") ? obs.get(c) : "NA"));
+ }
+ String line = sb.toString();
bw.append(line + "\n");
}
}
@@ -368,21 +377,27 @@ public class BridgeStatsFileHandler {
/* Aggregate per-day statistics. */
SortedMap<String, double[]> bridgeUsersPerDay =
new TreeMap<String, double[]>();
- for (String line : this.bridgeUsersRaw.values()) {
- String[] parts = line.split(",");
+ for (Map.Entry<String, Map<String, String>> e :
+ this.bridgeUsersRaw.entrySet()) {
+ String longKey = e.getKey();
+ String[] parts = longKey.split(",");
String hashedBridgeIdentity = parts[0];
String date = parts[1];
String time = parts[2];
if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
- !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity + ","
- + date + "," + time)) {
+ !this.zeroTwoTwoDescriptors.contains(longKey)) {
double[] users = bridgeUsersPerDay.get(date);
+ Map<String, String> obs = e.getValue();
if (users == null) {
users = new double[this.countries.size()];
bridgeUsersPerDay.put(date, users);
}
- for (int i = 3; i < parts.length; i++) {
- users[i - 3] += Double.parseDouble(parts[i]);
+ int i = 0;
+ for (String c : this.countries) {
+ if (obs.containsKey(c) && !obs.get(c).startsWith("-")) {
+ users[i] += Double.parseDouble(obs.get(c));
+ }
+ i++;
}
}
}
@@ -405,28 +420,9 @@ public class BridgeStatsFileHandler {
}
bw.append("\n");
- /* Memorize last written date fill missing dates with NA's. */
- long lastDateMillis = 0L;
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ /* Write current observation. */
for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) {
String date = e.getKey();
- long currentDateMillis = dateFormat.parse(date).getTime();
- if (lastDateMillis == 0L) {
- lastDateMillis = currentDateMillis;
- }
- while (currentDateMillis - 24L * 60L * 60L * 1000L
- > lastDateMillis) {
- lastDateMillis += 24L * 60L * 60L * 1000L;
- bw.append(dateFormat.format(lastDateMillis));
- for (int i = 0; i < this.countries.size(); i++) {
- bw.append(",NA");
- }
- bw.append("\n");
- }
- lastDateMillis = currentDateMillis;
-
- /* Write current observation. */
bw.append(date);
double[] users = e.getValue();
for (int i = 0; i < users.length; i++) {
@@ -440,9 +436,6 @@ public class BridgeStatsFileHandler {
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write "
+ this.bridgeStatsFile.getAbsolutePath() + "!", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Failed to write "
- + this.bridgeStatsFile.getAbsolutePath() + "!", e);
}
/* Add daily bridge users to database. */
@@ -452,16 +445,20 @@ public class BridgeStatsFileHandler {
for (String c : this.countries) {
countryList.add(c);
}
- Map<String, Double> insertRows = new HashMap<String, Double>(),
- updateRows = new HashMap<String, Double>();
+ Map<String, Integer> insertRows = new HashMap<String, Integer>(),
+ updateRows = new HashMap<String, Integer>();
for (Map.Entry<String, double[]> e :
bridgeUsersPerDay.entrySet()) {
String date = e.getKey();
double[] users = e.getValue();
for (int i = 0; i < users.length; i++) {
+ int usersInt = (int) users[i];
+ if (usersInt < 1) {
+ continue;
+ }
String country = countryList.get(i);
String key = date + "," + country;
- insertRows.put(key, users[i]);
+ insertRows.put(key, usersInt);
}
}
Connection conn = DriverManager.getConnection(connectionURL);
@@ -474,8 +471,8 @@ public class BridgeStatsFileHandler {
String country = rs.getString(2);
String key = date + "," + country;
if (insertRows.containsKey(key)) {
- double insertRow = insertRows.remove(key);
- double oldUsers = rs.getDouble(3);
+ int insertRow = insertRows.remove(key);
+ int oldUsers = rs.getInt(3);
if (oldUsers != insertRow) {
updateRows.put(key, insertRow);
}
@@ -485,13 +482,13 @@ public class BridgeStatsFileHandler {
PreparedStatement psU = conn.prepareStatement(
"UPDATE bridge_stats SET users = ? "
+ "WHERE date = ? AND country = ?");
- for (Map.Entry<String, Double> e : updateRows.entrySet()) {
+ for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
String country = keyParts[1];
- double users = e.getValue();
+ int users = e.getValue();
psU.clearParameters();
- psU.setDouble(1, users);
+ psU.setInt(1, users);
psU.setDate(2, date);
psU.setString(3, country);
psU.executeUpdate();
@@ -499,13 +496,13 @@ public class BridgeStatsFileHandler {
PreparedStatement psI = conn.prepareStatement(
"INSERT INTO bridge_stats (users, date, country) "
+ "VALUES (?, ?, ?)");
- for (Map.Entry<String, Double> e : insertRows.entrySet()) {
+ for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
String country = keyParts[1];
- double users = e.getValue();
+ int users = e.getValue();
psI.clearParameters();
- psI.setDouble(1, users);
+ psI.setInt(1, users);
psI.setDate(2, date);
psI.setString(3, country);
psI.executeUpdate();
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index ea694a6..03b4358 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -15,12 +15,6 @@ import java.util.logging.*;
public class Configuration {
private boolean writeConsensusStats = false;
private boolean writeDirreqStats = false;
- private SortedSet<String> dirreqBridgeCountries = new TreeSet<String>(
- Arrays.asList(("au,bh,br,ca,cn,cu,de,et,fr,gb,ir,it,jp,kr,mm,pl,ru,"
- + "sa,se,sy,tn,tm,us,uz,vn,ye").split(",")));
- private SortedSet<String> dirreqDirectories = new TreeSet<String>(
- Arrays.asList(("8522EB98C91496E80EC238E732594D1509158E77,"
- + "9695DFC35FFEB861329B9F1AB04C46397020CE31").split(",")));
private boolean writeBridgeStats = false;
private boolean writeDirectoryArchives = false;
private String directoryArchivesOutputDirectory = "directory-archive/";
@@ -87,19 +81,6 @@ public class Configuration {
} else if (line.startsWith("WriteDirreqStats")) {
this.writeDirreqStats = Integer.parseInt(
line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirreqBridgeCountries")) {
- this.dirreqBridgeCountries = new TreeSet<String>();
- for (String country : line.split(" ")[1].split(",")) {
- if (country.length() != 2) {
- logger.severe("Configuration file contains illegal country "
- + "code in line '" + line + "'! Exiting!");
- System.exit(1);
- }
- this.dirreqBridgeCountries.add(country);
- }
- } else if (line.startsWith("DirreqDirectories")) {
- this.dirreqDirectories = new TreeSet<String>(
- Arrays.asList(line.split(" ")[1].split(",")));
} else if (line.startsWith("WriteBridgeStats")) {
this.writeBridgeStats = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -291,12 +272,6 @@ public class Configuration {
public boolean getWriteDirreqStats() {
return this.writeDirreqStats;
}
- public SortedSet<String> getDirreqBridgeCountries() {
- return this.dirreqBridgeCountries;
- }
- public SortedSet<String> getDirreqDirectories() {
- return this.dirreqDirectories;
- }
public boolean getWriteBridgeStats() {
return this.writeBridgeStats;
}
diff --git a/src/org/torproject/ernie/db/DirreqStatsFileHandler.java b/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
index 2ab65c9..83372dd 100644
--- a/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
+++ b/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
@@ -17,7 +17,7 @@ import java.util.logging.*;
public class DirreqStatsFileHandler {
/**
- * Two-letter country codes of countries that we care about.
+ * Two-letter country codes of known countries.
*/
private SortedSet<String> countries;
@@ -28,10 +28,9 @@ public class DirreqStatsFileHandler {
/**
* Directory requests by directory and date. Map keys are directory and
- * date written as "directory,date", map values are lines as read from
- * <code>stats/dirreq-stats</code>.
+ * date written as "directory,date", map values are country-user maps.
*/
- private SortedMap<String, String> dirreqs;
+ private SortedMap<String, Map<String, String>> dirreqs;
/**
* Modification flag for directory requests stored in memory. This flag
@@ -54,15 +53,15 @@ public class DirreqStatsFileHandler {
* Initializes this class, including reading in previous results from
* <code>stats/dirreq-stats</code>.
*/
- public DirreqStatsFileHandler(SortedSet<String> countries,
- String connectionURL) {
+ public DirreqStatsFileHandler(String connectionURL) {
- /* Memorize the set of countries we care about. */
- this.countries = countries;
+ /* Initialize set of known countries. */
+ this.countries = new TreeSet<String>();
+ this.countries.add("zy");
/* Initialize local data structure to hold observations received from
* RelayDescriptorParser. */
- this.dirreqs = new TreeMap<String, String>();
+ this.dirreqs = new TreeMap<String, Map<String, String>>();
/* Initialize file name for observations file. */
this.dirreqStatsFile = new File("stats/dirreq-stats");
@@ -94,9 +93,7 @@ public class DirreqStatsFileHandler {
} else {
String[] headers = line.split(",");
for (int i = 2; i < headers.length - 1; i++) {
- if (headers[i].equals("all")) {
- this.countries.add("zy");
- } else {
+ if (!headers[i].equals("all")) {
this.countries.add(headers[i]);
}
}
@@ -113,20 +110,18 @@ public class DirreqStatsFileHandler {
}
String directory = parts[0];
String date = parts[1];
- /* If the share column contains NA, all the other columns do.
- * We only want to read in non-NA lines here. */
- if (!parts[parts.length - 1].equals("NA")) {
- Map<String, String> obs = new HashMap<String, String>();
- for (int i = 2; i < parts.length - 1; i++) {
- if (headers[i].equals("all")) {
- obs.put("zy", parts[i]);
- } else {
- obs.put(headers[i], parts[i]);
- }
+ Map<String, String> obs = new HashMap<String, String>();
+ for (int i = 2; i < parts.length - 1; i++) {
+ if (parts[i].equals("NA")) {
+ continue;
+ }
+ if (headers[i].equals("all")) {
+ obs.put("zy", parts[i]);
+ } else {
+ obs.put(headers[i], parts[i]);
}
- String share = parts[parts.length - 1];
- this.addObs(directory, date, obs, share);
}
+ this.addObs(directory, date, obs);
}
}
}
@@ -146,28 +141,24 @@ public class DirreqStatsFileHandler {
/**
* Adds observations on the number of directory requests by country as
- * seen on a directory at a given data that expected to see the given
- * share of all directory requests in the network.
+ * seen on a directory at a given date.
*/
public void addObs(String directory, String date,
- Map<String, String> obs, String share) {
- String key = directory + "," + date;
- StringBuilder sb = new StringBuilder(key);
- for (String c : this.countries) {
- sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0"));
+ Map<String, String> obs) {
+ for (String country : obs.keySet()) {
+ this.countries.add(country);
}
- sb.append("," + share);
- String value = sb.toString();
+ String key = directory + "," + date;
if (!this.dirreqs.containsKey(key)) {
- this.logger.finer("Adding new directory request numbers: " + value);
- this.dirreqs.put(key, value);
+ this.logger.finer("Adding new directory request numbers: " + key);
+ this.dirreqs.put(key, obs);
this.dirreqsModified = true;
this.addedResults++;
- } else if (value.compareTo(this.dirreqs.get(key)) > 0) {
- this.logger.warning("The directory request numbers we were just "
- + "given (" + value + ") are different from what we learned "
- + "before (" + this.dirreqs.get(key) + "! Overwriting!");
- this.dirreqs.put(key, value);
+ } else {
+ this.logger.fine("The directory request numbers we were just "
+ + "given for " + key + " may be different from what we learned "
+ + "before. Overwriting!");
+ this.dirreqs.put(key, obs);
this.dirreqsModified = true;
}
}
@@ -195,32 +186,17 @@ public class DirreqStatsFileHandler {
bw.append("," + country);
}
}
- bw.append(",share\n");
- /* Memorize last written date and directory to fill missing dates
- * with NA's. */
- long lastDateMillis = 0L;
- String lastDirectory = null;
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- for (String line : this.dirreqs.values()) {
- /* Fill missing dates with NA's. */
- String[] parts = line.split(",");
- String currentDirectory = parts[0];
- long currentDateMillis = dateFormat.parse(parts[1]).getTime();
- while (currentDirectory.equals(lastDirectory) &&
- currentDateMillis - 24L * 60L * 60L * 1000L
- > lastDateMillis) {
- lastDateMillis += 24L * 60L * 60L * 1000L;
- bw.append(currentDirectory + ","
- + dateFormat.format(lastDateMillis));
- for (int i = 0; i < this.countries.size(); i++) {
- bw.append(",NA");
- }
- bw.append(",NA\n");
+ bw.append("\n");
+ /* Write observations. */
+ for (Map.Entry<String, Map<String, String>> e :
+ this.dirreqs.entrySet()) {
+ String key = e.getKey();
+ Map<String, String> obs = e.getValue();
+ StringBuilder sb = new StringBuilder(key);
+ for (String c : this.countries) {
+ sb.append("," + (obs.containsKey(c) ? obs.get(c) : "NA"));
}
- lastDateMillis = currentDateMillis;
- lastDirectory = currentDirectory;
- /* Write current observation. */
+ String line = sb.toString();
bw.append(line + "\n");
}
bw.close();
@@ -229,9 +205,6 @@ public class DirreqStatsFileHandler {
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to write file "
+ this.dirreqStatsFile.getAbsolutePath() + "!", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Failed to write file "
- + this.dirreqStatsFile.getAbsolutePath() + "!", e);
}
} else {
this.logger.fine("Not writing file "
@@ -248,25 +221,26 @@ public class DirreqStatsFileHandler {
}
Map<String, String> insertRows = new HashMap<String, String>(),
updateRows = new HashMap<String, String>();
- for (String dirreq : this.dirreqs.values()) {
- String[] parts = dirreq.split(",");
+ for (Map.Entry<String, Map<String, String>> e :
+ this.dirreqs.entrySet()) {
+ String[] parts = e.getKey().split(",");
String directory = parts[0];
String date = parts[1];
- String share = parts[parts.length - 1];
- for (int i = 2; i < parts.length - 1; i++) {
- String country = countryList.get(i - 2);
- String key = directory + "," + date + "," + country;
- String requests = parts[i];
- String value = requests + "," + share;
- insertRows.put(key, value);
+ Map<String, String> obs = e.getValue();
+ int i = 0;
+ for (String country : this.countries) {
+ if (obs.containsKey(country)) {
+ String key = directory + "," + date + "," + country;
+ String requests = "" + obs.get(country);
+ insertRows.put(key, requests);
+ }
}
}
Connection conn = DriverManager.getConnection(connectionURL);
conn.setAutoCommit(false);
Statement statement = conn.createStatement();
ResultSet rs = statement.executeQuery(
- "SELECT source, date, country, requests, share "
- + "FROM dirreq_stats");
+ "SELECT source, date, country, requests FROM dirreq_stats");
while (rs.next()) {
String source = rs.getString(1);
String date = rs.getDate(2).toString();
@@ -283,41 +257,37 @@ public class DirreqStatsFileHandler {
}
rs.close();
PreparedStatement psU = conn.prepareStatement(
- "UPDATE dirreq_stats SET requests = ?, share = ? "
+ "UPDATE dirreq_stats SET requests = ? "
+ "WHERE source = ? AND date = ? AND country = ?");
for (Map.Entry<String, String> e : updateRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
- String[] valueParts = e.getValue().split(",");
+ String valueParts = e.getValue();
String source = keyParts[0];
java.sql.Date date = java.sql.Date.valueOf(keyParts[1]);
String country = keyParts[2];
- long requests = Long.parseLong(valueParts[0]);
- double share = Double.parseDouble(valueParts[1]);
+ long requests = Long.parseLong(valueParts);
psU.clearParameters();
psU.setLong(1, requests);
- psU.setDouble(2, share);
- psU.setString(3, source);
- psU.setDate(4, date);
- psU.setString(5, country);
+ psU.setString(2, source);
+ psU.setDate(3, date);
+ psU.setString(4, country);
psU.executeUpdate();
}
PreparedStatement psI = conn.prepareStatement(
- "INSERT INTO dirreq_stats (requests, share, source, date, "
- + "country) VALUES (?, ?, ?, ?, ?)");
+ "INSERT INTO dirreq_stats (requests, source, date, "
+ + "country) VALUES (?, ?, ?, ?)");
for (Map.Entry<String, String> e : insertRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
- String[] valueParts = e.getValue().split(",");
+ String valueParts = e.getValue();
String source = keyParts[0];
java.sql.Date date = java.sql.Date.valueOf(keyParts[1]);
String country = keyParts[2];
- long requests = Long.parseLong(valueParts[0]);
- double share = Double.parseDouble(valueParts[1]);
+ long requests = Long.parseLong(valueParts);
psI.clearParameters();
psI.setLong(1, requests);
- psI.setDouble(2, share);
- psI.setString(3, source);
- psI.setDate(4, date);
- psI.setString(5, country);
+ psI.setString(2, source);
+ psI.setDate(3, date);
+ psI.setString(4, country);
psI.executeUpdate();
}
conn.commit();
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 9f5079f..64dcc55 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -29,22 +29,17 @@ public class Main {
System.exit(1);
}
- // Define which stats we are interested in
- SortedSet<String> countries = config.getDirreqBridgeCountries();
- countries.add("zy"); // Add country zy for 'all users'
- SortedSet<String> directories = config.getDirreqDirectories();
-
// Prepare stats file handlers (only if we are writing stats)
ConsensusStatsFileHandler csfh = config.getWriteConsensusStats() ?
new ConsensusStatsFileHandler(
config.getWriteAggregateStatsDatabase() ?
config.getRelayDescriptorDatabaseJDBC() : null) : null;
BridgeStatsFileHandler bsfh = config.getWriteBridgeStats() ?
- new BridgeStatsFileHandler(countries,
+ new BridgeStatsFileHandler(
config.getWriteAggregateStatsDatabase() ?
config.getRelayDescriptorDatabaseJDBC() : null) : null;
DirreqStatsFileHandler dsfh = config.getWriteDirreqStats() ?
- new DirreqStatsFileHandler(countries,
+ new DirreqStatsFileHandler(
config.getWriteAggregateStatsDatabase() ?
config.getRelayDescriptorDatabaseJDBC() : null) : null;
@@ -75,8 +70,8 @@ public class Main {
config.getWriteRelayDescriptorDatabase() ||
config.getWriteRelayDescriptorsRawFiles() ||
config.getWriteConsensusHealth() ?
- new RelayDescriptorParser(csfh, bsfh, dsfh, aw, rddi, chc,
- countries, directories) : null;
+ new RelayDescriptorParser(csfh, bsfh, dsfh, aw, rddi, chc)
+ : null;
// Import/download relay descriptors from the various sources
if (rdp != null) {
@@ -87,14 +82,12 @@ public class Main {
boolean downloadCurrentConsensus = aw != null || csfh != null ||
bsfh != null || rddi != null || chc != null;
boolean downloadCurrentVotes = aw != null || chc != null;
- boolean downloadAllServerDescriptors = aw != null || rddi != null;
- boolean downloadAllExtraInfos = aw != null;
- Set<String> downloadDescriptorsForRelays = bsfh != null ||
- dsfh != null ? directories : new HashSet<String>();
+ boolean downloadAllServerDescriptors = aw != null ||
+ dsfh != null || rddi != null;
+ boolean downloadAllExtraInfos = aw != null || dsfh != null;
rdd = new RelayDescriptorDownloader(rdp, dirSources,
downloadCurrentConsensus, downloadCurrentVotes,
- downloadAllServerDescriptors, downloadAllExtraInfos,
- downloadDescriptorsForRelays);
+ downloadAllServerDescriptors, downloadAllExtraInfos);
rdp.setRelayDescriptorDownloader(rdd);
}
if (config.getImportCachedRelayDescriptors()) {
@@ -151,7 +144,7 @@ public class Main {
// Prepare bridge descriptor parser
BridgeDescriptorParser bdp = config.getWriteConsensusStats() ||
config.getWriteBridgeStats() || config.getWriteSanitizedBridges()
- ? new BridgeDescriptorParser(csfh, bsfh, sbw, countries) : null;
+ ? new BridgeDescriptorParser(csfh, bsfh, sbw) : null;
// Import bridge descriptors
if (bdp != null && config.getImportSanitizedBridges()) {
diff --git a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
index c85473d..a88c36a 100644
--- a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
+++ b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
@@ -75,12 +75,6 @@ public class RelayDescriptorDownloader {
private boolean downloadAllExtraInfos;
/**
- * Should we try to download missing server and extra-info descriptors
- * of certain relays that have been published within the past 24 hours?
- */
- private Set<String> downloadDescriptorsForRelays;
-
- /**
* valid-after time that we expect the current consensus and votes to
* have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
* consensuses and votes with this valid-after time on the directories.
@@ -122,8 +116,7 @@ public class RelayDescriptorDownloader {
public RelayDescriptorDownloader(RelayDescriptorParser rdp,
List<String> dirSources, boolean downloadCurrentConsensus,
boolean downloadCurrentVotes, boolean downloadAllServerDescriptors,
- boolean downloadAllExtraInfos,
- Set<String> downloadDescriptorsForRelays) {
+ boolean downloadAllExtraInfos) {
/* Memorize argument values. */
this.rdp = rdp;
@@ -132,7 +125,6 @@ public class RelayDescriptorDownloader {
this.downloadCurrentVotes = downloadCurrentVotes;
this.downloadAllServerDescriptors = downloadAllServerDescriptors;
this.downloadAllExtraInfos = downloadAllExtraInfos;
- this.downloadDescriptorsForRelays = downloadDescriptorsForRelays;
/* Initialize logger. */
this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
@@ -252,21 +244,15 @@ public class RelayDescriptorDownloader {
}
/* Add server descriptors to missing list. */
- if (this.downloadAllServerDescriptors ||
- this.downloadDescriptorsForRelays != null) {
+ if (this.downloadAllServerDescriptors) {
for (String serverDescriptor : serverDescriptors) {
String published = serverDescriptor.split(",")[0];
if (this.descriptorCutOff.compareTo(published) <= 0) {
- if (this.downloadAllServerDescriptors ||
- (this.downloadDescriptorsForRelays != null &&
- this.downloadDescriptorsForRelays.contains(
- serverDescriptor.split(",")[1].toUpperCase()))) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
}
}
}
@@ -286,20 +272,15 @@ public class RelayDescriptorDownloader {
}
/* Add server descriptors to missing list. */
- if (this.downloadAllServerDescriptors ||
- this.downloadDescriptorsForRelays != null) {
+ if (this.downloadAllServerDescriptors) {
for (String serverDescriptor : serverDescriptors) {
String published = serverDescriptor.split(",")[0];
if (this.descriptorCutOff.compareTo(published) < 0) {
- if (this.downloadDescriptorsForRelays == null ||
- this.downloadDescriptorsForRelays.contains(
- serverDescriptor.split(",")[1].toUpperCase())) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
}
}
}
@@ -323,10 +304,7 @@ public class RelayDescriptorDownloader {
this.parsedTimestampString);
/* Add extra-info descriptor to missing list. */
- if (extraInfoDigest != null && (this.downloadAllExtraInfos ||
- (this.downloadDescriptorsForRelays != null &&
- this.downloadDescriptorsForRelays.contains(relayIdentity.
- toUpperCase())))) {
+ if (extraInfoDigest != null && this.downloadAllExtraInfos) {
String extraInfoKey = "extra," + published + ","
+ relayIdentity + "," + extraInfoDigest;
if (!this.missingDescriptors.containsKey(extraInfoKey)) {
@@ -404,17 +382,11 @@ public class RelayDescriptorDownloader {
this.currentValidAfter.equals(parts[1])) {
urls.add("/tor/status-vote/current/" + parts[2]);
} else if (parts[0].equals("server") &&
- (this.downloadAllServerDescriptors ||
- (this.downloadDescriptorsForRelays != null &&
- this.downloadDescriptorsForRelays.contains(parts[2].
- toUpperCase()))) &&
+ this.downloadAllServerDescriptors &&
this.descriptorCutOff.compareTo(parts[1]) <= 0) {
urls.add("/tor/server/d/" + parts[3]);
} else if (parts[0].equals("extra") &&
- (this.downloadAllExtraInfos ||
- (this.downloadDescriptorsForRelays != null &&
- this.downloadDescriptorsForRelays.contains(parts[2].
- toUpperCase()))) &&
+ this.downloadAllExtraInfos &&
this.descriptorCutOff.compareTo(parts[1]) <= 0) {
urls.add("/tor/extra/d/" + parts[3]);
}
diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java
index bbc1f16..c110bb5 100644
--- a/src/org/torproject/ernie/db/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/db/RelayDescriptorParser.java
@@ -55,17 +55,6 @@ public class RelayDescriptorParser {
private ConsensusHealthChecker chc;
/**
- * Countries that we care about for directory request and bridge
- * statistics.
- */
- private SortedSet<String> countries;
-
- /**
- * Directories that we care about for directory request statistics.
- */
- private SortedSet<String> directories;
-
- /**
* Logger for this class.
*/
private Logger logger;
@@ -78,16 +67,13 @@ public class RelayDescriptorParser {
public RelayDescriptorParser(ConsensusStatsFileHandler csfh,
BridgeStatsFileHandler bsfh, DirreqStatsFileHandler dsfh,
ArchiveWriter aw, RelayDescriptorDatabaseImporter rddi,
- ConsensusHealthChecker chc, SortedSet<String> countries,
- SortedSet<String> directories) {
+ ConsensusHealthChecker chc) {
this.csfh = csfh;
this.bsfh = bsfh;
this.dsfh = dsfh;
this.aw = aw;
this.rddi = rddi;
this.chc = chc;
- this.countries = countries;
- this.directories = directories;
/* Initialize logger. */
this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
@@ -269,9 +255,9 @@ public class RelayDescriptorParser {
}
}
} else if (line.startsWith("router ")) {
- String platformLine = null, publishedTime = null,
- bandwidthLine = null, extraInfoDigest = null,
- relayIdentifier = null;
+ String platformLine = null, publishedLine = null,
+ publishedTime = null, bandwidthLine = null,
+ extraInfoDigest = null, relayIdentifier = null;
String[] parts = line.split(" ");
String nickname = parts[1];
String address = parts[2];
@@ -334,7 +320,7 @@ public class RelayDescriptorParser {
String publishedTime = null, relayIdentifier = line.split(" ")[2];
long published = -1L;
String dir = line.split(" ")[2];
- String date = null, v3Reqs = null;
+ String date = null;
SortedMap<String, String> bandwidthHistory =
new TreeMap<String, String>();
boolean skip = false;
@@ -378,28 +364,19 @@ public class RelayDescriptorParser {
date = line.split(" ")[1];
} else if (line.startsWith("dirreq-v3-reqs ")
&& line.length() > "dirreq-v3-reqs ".length()) {
- v3Reqs = line.split(" ")[1];
- } else if (line.startsWith("dirreq-v3-share ")
- && v3Reqs != null && !skip) {
int allUsers = 0;
Map<String, String> obs = new HashMap<String, String>();
- String[] parts = v3Reqs.split(",");
+ String[] parts = line.substring("dirreq-v3-reqs ".length()).
+ split(",");
for (String p : parts) {
- allUsers += Integer.parseInt(p.substring(3)) - 4;
- for (String c : this.countries) {
- if (p.startsWith(c)) {
- // TODO in theory, we should substract 4 here, too
- obs.put(c, p.substring(3));
- break;
- }
- }
+ String country = p.substring(0, 2);
+ int users = Integer.parseInt(p.substring(3)) - 4;
+ allUsers += users;
+ obs.put(country, "" + users);
}
obs.put("zy", "" + allUsers);
- String share = line.substring("dirreq-v3-share ".length(),
- line.length() - 1);
- if (this.dsfh != null &&
- directories.contains(relayIdentifier)) {
- this.dsfh.addObs(dir, date, obs, share);
+ if (this.dsfh != null) {
+ this.dsfh.addObs(dir, date, obs);
}
}
}
--
1.7.1
More information about the tor-commits
mailing list