[or-cvs] [metrics-db/master 1/2] Remove descriptor stats that are now part of the DB schema.
karsten at torproject.org
karsten at torproject.org
Fri Nov 19 13:36:18 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 19 Nov 2010 09:45:13 +0100
Subject: Remove descriptor stats that are now part of the DB schema.
Commit: 777cb6f3e0ad3bf1f89162799ca1711c39ea5535
---
config | 10 -
src/org/torproject/ernie/db/Configuration.java | 30 +-
src/org/torproject/ernie/db/Main.java | 16 +-
.../torproject/ernie/db/RelayDescriptorParser.java | 41 +-
.../ernie/db/ServerDescriptorStatsFileHandler.java | 624 --------------------
5 files changed, 16 insertions(+), 705 deletions(-)
delete mode 100644 src/org/torproject/ernie/db/ServerDescriptorStatsFileHandler.java
diff --git a/config b/config
index 8fd75b9..f5486e7 100644
--- a/config
+++ b/config
@@ -128,14 +128,4 @@
#
## Write bridge stats to disk
#WriteBridgeStats 0
-#
-## Write server descriptors stats to disk
-#WriteServerDescriptorStats 0
-#
-## Comma-separated list of relay versions to be included in version-stats
-#RelayVersions 0.1.2,0.2.0,0.2.1,0.2.2
-#
-## Comma-separated list of relay platforms to be included in
-## platform-stats
-#RelayPlatforms Linux,Windows,Darwin,FreeBSD
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index 428dbb5..ea694a6 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -22,11 +22,6 @@ public class Configuration {
Arrays.asList(("8522EB98C91496E80EC238E732594D1509158E77,"
+ "9695DFC35FFEB861329B9F1AB04C46397020CE31").split(",")));
private boolean writeBridgeStats = false;
- private boolean writeServerDescriptorStats = false;
- private List<String> relayVersions = new ArrayList<String>(
- Arrays.asList("0.1.2,0.2.0,0.2.1,0.2.2".split(",")));
- private List<String> relayPlatforms = new ArrayList<String>(
- Arrays.asList("Linux,Windows,Darwin,FreeBSD".split(",")));
private boolean writeDirectoryArchives = false;
private String directoryArchivesOutputDirectory = "directory-archive/";
private boolean importCachedRelayDescriptors = false;
@@ -108,15 +103,6 @@ public class Configuration {
} else if (line.startsWith("WriteBridgeStats")) {
this.writeBridgeStats = Integer.parseInt(
line.split(" ")[1]) != 0;
- } else if (line.startsWith("WriteServerDescriptorStats")) {
- this.writeServerDescriptorStats = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RelayVersions")) {
- this.relayVersions = new ArrayList<String>(
- Arrays.asList(line.split(" ")[1].split(",")));
- } else if (line.startsWith("RelayPlatforms")) {
- this.relayPlatforms = new ArrayList<String>(
- Arrays.asList(line.split(" ")[1].split(",")));
} else if (line.startsWith("WriteDirectoryArchives")) {
this.writeDirectoryArchives = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -251,7 +237,7 @@ public class Configuration {
!this.writeAggregateStatsDatabase &&
!this.writeSanitizedBridges && !this.writeConsensusStats &&
!this.writeDirreqStats && !this.writeBridgeStats &&
- !this.writeServerDescriptorStats && !this.writeConsensusHealth) {
+ !this.writeConsensusHealth) {
logger.warning("We have not been configured to read data from any "
+ "data source or write data to any data sink. You need to "
+ "edit your config file (" + configFile.getAbsolutePath()
@@ -264,7 +250,7 @@ public class Configuration {
this.writeRelayDescriptorDatabase ||
this.writeRelayDescriptorsRawFiles || this.writeConsensusStats ||
this.writeDirreqStats || this.writeBridgeStats ||
- this.writeServerDescriptorStats || this.writeConsensusHealth)) {
+ this.writeConsensusHealth)) {
logger.warning("We are configured to import/download relay "
+ "descriptors, but we don't have a single data sink to write "
+ "relay descriptors to.");
@@ -272,8 +258,7 @@ public class Configuration {
if (!(this.importCachedRelayDescriptors ||
this.importDirectoryArchives || this.downloadRelayDescriptors) &&
(this.writeDirectoryArchives ||
- this.writeRelayDescriptorDatabase || this.writeDirreqStats ||
- this.writeServerDescriptorStats)) {
+ this.writeRelayDescriptorDatabase || this.writeDirreqStats)) {
logger.warning("We are configured to write relay descriptor to at "
+ "least one data sink, but we don't have a single data source "
+ "containing relay descriptors.");
@@ -315,15 +300,6 @@ public class Configuration {
public boolean getWriteBridgeStats() {
return this.writeBridgeStats;
}
- public boolean getWriteServerDescriptorStats() {
- return this.writeServerDescriptorStats;
- }
- public List<String> getRelayVersions() {
- return this.relayVersions;
- }
- public List<String> getRelayPlatforms() {
- return this.relayPlatforms;
- }
public boolean getWriteDirectoryArchives() {
return this.writeDirectoryArchives;
}
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 207c594..9f5079f 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -47,10 +47,6 @@ public class Main {
new DirreqStatsFileHandler(countries,
config.getWriteAggregateStatsDatabase() ?
config.getRelayDescriptorDatabaseJDBC() : null) : null;
- ServerDescriptorStatsFileHandler sdsfh =
- config.getWriteServerDescriptorStats() ?
- new ServerDescriptorStatsFileHandler(config.getRelayVersions(),
- config.getRelayPlatforms()) : null;
// Prepare consensus health checker
ConsensusHealthChecker chc = config.getWriteConsensusHealth() ?
@@ -75,12 +71,11 @@ public class Main {
// directory archives to disk)
RelayDescriptorParser rdp = config.getWriteConsensusStats() ||
config.getWriteBridgeStats() || config.getWriteDirreqStats() ||
- config.getWriteServerDescriptorStats() ||
config.getWriteDirectoryArchives() ||
config.getWriteRelayDescriptorDatabase() ||
config.getWriteRelayDescriptorsRawFiles() ||
config.getWriteConsensusHealth() ?
- new RelayDescriptorParser(csfh, bsfh, dsfh, sdsfh, aw, rddi, chc,
+ new RelayDescriptorParser(csfh, bsfh, dsfh, aw, rddi, chc,
countries, directories) : null;
// Import/download relay descriptors from the various sources
@@ -90,10 +85,9 @@ public class Main {
List<String> dirSources =
config.getDownloadFromDirectoryAuthorities();
boolean downloadCurrentConsensus = aw != null || csfh != null ||
- bsfh != null || sdsfh != null || rddi != null || chc != null;
+ bsfh != null || rddi != null || chc != null;
boolean downloadCurrentVotes = aw != null || chc != null;
- boolean downloadAllServerDescriptors = aw != null ||
- sdsfh != null || rddi != null;
+ boolean downloadAllServerDescriptors = aw != null || rddi != null;
boolean downloadAllExtraInfos = aw != null;
Set<String> downloadDescriptorsForRelays = bsfh != null ||
dsfh != null ? directories : new HashSet<String>();
@@ -148,10 +142,6 @@ public class Main {
dsfh.writeFile();
dsfh = null;
}
- if (sdsfh != null) {
- sdsfh.writeFiles();
- sdsfh = null;
- }
// Prepare sanitized bridge descriptor writer
SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java
index 1dc10c4..bbc1f16 100644
--- a/src/org/torproject/ernie/db/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/db/RelayDescriptorParser.java
@@ -35,12 +35,6 @@ public class RelayDescriptorParser {
private BridgeStatsFileHandler bsfh;
/**
- * Stats file handler that accepts parse results for server descriptor
- * statistics.
- */
- private ServerDescriptorStatsFileHandler sdsfh;
-
- /**
* File writer that writes descriptor contents to files in a
* directory-archive directory structure.
*/
@@ -83,13 +77,12 @@ public class RelayDescriptorParser {
*/
public RelayDescriptorParser(ConsensusStatsFileHandler csfh,
BridgeStatsFileHandler bsfh, DirreqStatsFileHandler dsfh,
- ServerDescriptorStatsFileHandler sdsfh, ArchiveWriter aw,
- RelayDescriptorDatabaseImporter rddi, ConsensusHealthChecker chc,
- SortedSet<String> countries, SortedSet<String> directories) {
+ ArchiveWriter aw, RelayDescriptorDatabaseImporter rddi,
+ ConsensusHealthChecker chc, SortedSet<String> countries,
+ SortedSet<String> directories) {
this.csfh = csfh;
this.bsfh = bsfh;
this.dsfh = dsfh;
- this.sdsfh = sdsfh;
this.aw = aw;
this.rddi = rddi;
this.chc = chc;
@@ -130,10 +123,9 @@ public class RelayDescriptorParser {
// consensuses
boolean isConsensus = true;
int exit = 0, fast = 0, guard = 0, running = 0, stable = 0;
- String validAfterTime = null, descriptorIdentity = null,
- nickname = null, relayIdentity = null, serverDesc = null,
- version = null, ports = null;
- StringBuilder descriptorIdentities = new StringBuilder();
+ String validAfterTime = null, nickname = null,
+ relayIdentity = null, serverDesc = null, version = null,
+ ports = null;
String fingerprint = null, dirSource = null, address = null;
long validAfter = -1L, published = -1L, bandwidth = -1L,
orPort = 0L, dirPort = 0L;
@@ -181,7 +173,6 @@ public class RelayDescriptorParser {
hashedRelayIdentities.add(DigestUtils.shaHex(
Base64.decodeBase64(parts[2] + "=")).
toUpperCase());
- descriptorIdentity = parts[3];
published = parseFormat.parse(parts[4] + " " + parts[5]).
getTime();
address = parts[6];
@@ -195,7 +186,6 @@ public class RelayDescriptorParser {
guard += line.contains(" Guard") ? 1 : 0;
stable += line.contains(" Stable") ? 1 : 0;
running++;
- descriptorIdentities.append("," + descriptorIdentity);
}
relayFlags = new TreeSet<String>();
if (line.length() > 2) {
@@ -240,10 +230,6 @@ public class RelayDescriptorParser {
this.csfh.addConsensusResults(validAfterTime, exit, fast,
guard, running, stable);
}
- if (this.sdsfh != null) {
- this.sdsfh.addConsensus(validAfterTime,
- descriptorIdentities.toString().substring(1));
- }
if (this.rdd != null) {
this.rdd.haveParsedConsensus(validAfterTime, dirSources,
serverDescriptors);
@@ -283,9 +269,9 @@ public class RelayDescriptorParser {
}
}
} else if (line.startsWith("router ")) {
- String platformLine = null, publishedLine = null,
- publishedTime = null, bandwidthLine = null,
- extraInfoDigest = null, relayIdentifier = null;
+ String platformLine = null, publishedTime = null,
+ bandwidthLine = null, extraInfoDigest = null,
+ relayIdentifier = null;
String[] parts = line.split(" ");
String nickname = parts[1];
String address = parts[2];
@@ -296,7 +282,6 @@ public class RelayDescriptorParser {
if (line.startsWith("platform ")) {
platformLine = line;
} else if (line.startsWith("published ")) {
- publishedLine = line;
publishedTime = line.substring("published ".length());
published = parseFormat.parse(publishedTime).getTime();
} else if (line.startsWith("opt fingerprint") ||
@@ -320,12 +305,10 @@ public class RelayDescriptorParser {
String sigToken = "\nrouter-signature\n";
int start = ascii.indexOf(startToken);
int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null, descriptorIdentity = null;
+ String digest = null;
if (start >= 0 || sig >= 0 || sig > start) {
byte[] forDigest = new byte[sig - start];
System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorIdentity = Base64.encodeBase64String(
- DigestUtils.sha(forDigest)).substring(0, 27);
digest = DigestUtils.shaHex(forDigest);
}
if (this.aw != null && digest != null) {
@@ -335,10 +318,6 @@ public class RelayDescriptorParser {
this.rdd.haveParsedServerDescriptor(publishedTime,
relayIdentifier, digest, extraInfoDigest);
}
- if (this.sdsfh != null && descriptorIdentity != null) {
- this.sdsfh.addServerDescriptor(descriptorIdentity, platformLine,
- publishedLine, bandwidthLine);
- }
if (this.rddi != null && digest != null) {
String[] bwParts = bandwidthLine.split(" ");
long bandwidthAvg = Long.parseLong(bwParts[1]);
diff --git a/src/org/torproject/ernie/db/ServerDescriptorStatsFileHandler.java b/src/org/torproject/ernie/db/ServerDescriptorStatsFileHandler.java
deleted file mode 100644
index 9368b28..0000000
--- a/src/org/torproject/ernie/db/ServerDescriptorStatsFileHandler.java
+++ /dev/null
@@ -1,624 +0,0 @@
-/* Copyright 2010 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.*;
-import java.text.*;
-import java.util.*;
-import java.util.logging.*;
-
-/**
- * Generates statistics about relays in the Tor network from data that
- * relays write to their server descriptors. Accepts lists of referenced
- * descriptors in network status consensuses and selected lines from
- * server descriptors from <code>RelayDescriptorParser</code>. Keeps two
- * intermediate results files <code>stats/consensuses-raw</code> and
- * <code>stats/descriptors-raw</code> and writes three final results files
- * <code>stats/version-stats</code>, <code>stats/platform-stats</code>,
- * and <code>stats/bandwidth-stats</code>.
- */
-public class ServerDescriptorStatsFileHandler {
-
- /**
- * Intermediate results file <code>stats/consensuses-raw</code>
- * containing consensuses and the referenced descriptor identities of
- * relays with the Running flag set. The file format is
- * "valid-after,descid,descid,descid...\n" for each consensus. Lines are
- * ordered by valid-after time in ascending order.
- */
- private File consensusesFile;
-
- /**
- * Temporary file for writing <code>stats/consensuses-raw</code> while
- * reading that file at the same time. After read and write operations
- * are complete, the original file is deleted and the temporary file
- * renamed to be the new intermediate results file.
- */
- private File consensusesTempFile;
-
- /**
- * Intermediate results file <code>stats/descriptors-raw</code>
- * containing server descriptors with relevant fields for statistics.
- * The file format is "published,descid,version,platform,advbw\n" for
- * each server descriptors. Lines are first ordered by published time,
- * then by descid.
- */
- private File descriptorsFile;
-
- /**
- * Temporary file for writing <code>stats/descriptors-raw</code> while
- * reading that file at the same time. After read and write operations
- * are complete, the original file is deleted and the temporary file
- * renamed to be the new intermediate results file.
- */
- private File descriptorsTempFile;
-
- /**
- * Final results file <code>stats/version-stats</code> containing
- * statistics about Tor versions of relays in the network. The file
- * format is "date,version1,version2,...,other" with versions as
- * specified in config option RelayVersions.
- */
- private File versionStatsFile;
-
- /**
- * Final results file <code>stats/platform-stats</code> containing
- * statistics about operating systems of relays in the network. The
- * file format is "date,os1,os2,...,other" with operating systems as
- * specified in config option RelayPlatforms.
- */
- private File platformStatsFile;
-
- /**
- * Final results file <code>stats/bandwidth-stats</code> containing
- * statistics about the advertised bandwidth of relays in the network.
- * The file format is "date,advbw".
- */
- private File bandwidthStatsFile;
-
- /**
- * Consensuses and referenced descriptor identities of relays with the
- * Running flag set. This data structure only holds those consensuses
- * that were parsed in this execution, not the previously parsed
- * consensuses as read from disk. Map keys are valid-after times
- * formatted as "yyyy-MM-dd HH:mm:ss", map values are valid-after times
- * followed by a comma-separated list of base-64-formatted descriptor
- * identifiers.
- */
- private SortedMap<String, String> consensuses;
-
- /**
- * Server descriptors with relevant fields for statistics, ordered by
- * published time and descriptor identifier. Map keys are publication
- * times of descriptors formatted as "yyyy-MM-dd HH:mm:ss", a comma, and
- * base-64-formatted descriptor identifiers. An example key is
- * "2009-09-30 20:42:19,ZQZ5zq4q1U8Uynyk6lkUy5uAsdM" (length 47). Map
- * values are map keys plus version, platform, and advertised bandwidth
- * written as "published,descid,version,platform,advbw". Note that the
- * platform string may contain commas.
- */
- private SortedMap<String, String> descriptors;
-
- /**
- * Server descriptors as in <code>descriptors</code>, accessible by
- * descriptor identifiers only, without knowing the publication time.
- * Map keys are base-64-formatted descriptor identifiers, map values
- * are formatted as map values in <code>descriptors</code>.
- */
- private SortedMap<String, String> descById;
-
- /**
- * Tor relay versions that we care about.
- */
- private List<String> relayVersions;
-
- /**
- * Platforms (operating systems) that we care about.
- */
- private List<String> relayPlatforms;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- // TODO should there be a modified flag, too?
-
- /**
- * Initializes this class, without reading in any files. We're only
- * reading in files when writing results to disk in
- * <code>writeFiles</code>.
- */
- public ServerDescriptorStatsFileHandler(List<String> relayVersions,
- List<String> relayPlatforms) {
-
- /* Memorize versions and platforms that we care about. */
- this.relayVersions = relayVersions;
- this.relayPlatforms = relayPlatforms;
-
- /* Initialize local data structures. */
- this.consensuses = new TreeMap<String, String>();
- this.descriptors = new TreeMap<String, String>();
- this.descById = new TreeMap<String, String>();
-
- /* Initialize file names for intermediate and final results files. */
- this.versionStatsFile = new File("stats/version-stats");
- this.platformStatsFile = new File("stats/platform-stats");
- this.bandwidthStatsFile = new File("stats/bandwidth-stats");
- this.consensusesFile = new File("stats/consensuses-raw");
- this.consensusesTempFile = new File("stats/consensuses-raw.temp");
- this.descriptorsFile = new File("stats/descriptors-raw");
- this.descriptorsTempFile = new File("stats/descriptors-raw.temp");
-
- /* Initialize logger. */
- this.logger =
- Logger.getLogger(ServerDescriptorStatsFileHandler.class.getName());
- }
-
- /**
- * Adds a consensus to the list with its valid-after time and a list of
- * descriptor identifiers of relays that have the Running flag set. If
- * the number of consensuses in memory exceeds a certain number, an
- * auto-save mechanism is triggered by calling <code>writeFiles</code>.
- */
- public void addConsensus(String validAfter,
- String descriptorIdentities) {
-
- /* Add consensus to the list. */
- if (!this.consensuses.containsKey(validAfter)) {
- this.logger.finer("Adding consensus published at " + validAfter
- + ".");
- } else {
- this.logger.fine("We already learned about a consensus published "
- + "at " + validAfter + " in this execution. Overwriting.");
- }
- this.consensuses.put(validAfter, validAfter + ","
- + descriptorIdentities);
-
- /* Check if we have more 240 consensuses in memory (covering 10 days).
- * If so, trigger the auto-save mechanism. */
- if (this.consensuses.size() > 240) {
- this.logger.fine("Autosave triggered by adding consensus: We have "
- + this.consensuses.size() + " consensuses and "
- + this.descriptors.size() + " descriptors in memory. Writing "
- + "to disk now.");
- this.writeFiles();
- }
- }
-
- /**
- * Adds a server descriptor to the list with its identity and the
- * platform, published, and bandwidth lines. Version and operating
- * system are parsed from the platform line. The parsed version consists
- * only of the dotted numbers part (e.g. "0.2.1.2") without any
- * additions like "-alpha". The operating system is the substring after
- * " on " up to the first encountered opening curly bracket ("{").
- * The publication time is extracted from the published line. The
- * advertised bandwidth is calculated from the bandwidth line by taking
- * the minimum of average and observed bandwidth, divided by 1024 to
- * obtain KiB/s.
- */
- public void addServerDescriptor(String descriptorIdentity,
- String platformLine, String publishedLine, String bandwidthLine) {
-
- /* Parse version, platform, and advertised bandwidth from the given
- * lines. */
- String version = "", platform = "", published = "", advBw = "";
- if (platformLine.contains(" Tor ")) {
- version = platformLine.substring(platformLine.indexOf(" Tor ") + 5).
- split(" ")[0];
- }
- if (platformLine.contains(" on ")) {
- platform = platformLine.substring(platformLine.indexOf(" on ") + 4);
- if (platform.contains("{")) {
- platform = platform.substring(0, platform.indexOf("{")).trim();
- }
- }
- published = publishedLine.substring("published ".length());
- String[] bwParts = bandwidthLine.split(" ");
- if (bwParts.length == 4) {
- try {
- advBw = "" + (Math.min(Long.parseLong(bwParts[1]),
- Long.parseLong(bwParts[3])) / 1024L);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Exception while parsing average "
- + "and observed bandwidth from line '" + bandwidthLine
- + "'. Not adding server descriptor!", e);
- return;
- }
- }
- String key = published + "," + descriptorIdentity;
- String line = key + "," + version + "," + platform + "," + advBw;
- if (!this.descriptors.containsKey(key)) {
- this.logger.finer("Adding server descriptor with identifier "
- + descriptorIdentity + ".");
- } else {
- this.logger.fine("We already learned about a server descriptor "
- + "with identifier " + descriptorIdentity + ", published at "
- + published + " in this execution. Overwriting.");
- }
- this.descriptors.put(key, line);
- this.descById.put(descriptorIdentity, line);
-
- /* Check if we have more 50K server descriptors in memory (covering 10
- * days as of early 2010). If so, trigger the auto-save mechanism. */
- if (this.descriptors.size() > 50000) {
- this.logger.fine("Autosave triggered by adding server descriptor: "
- + "We have " + this.consensuses.size() + " consensuses and "
- + this.descriptors.size() + " descriptors in memory. Writing "
- + "to disk now.");
- this.writeFiles();
- }
- }
-
- /**
- * Merges the newly learned consensuses and server descriptors with the
- * ones we wrote to disk earlier and extracts new statistics about relay
- * version, platforms, and advertised bandwidth.
- *
- * This method is rather complex, because we can only store a limited
- * number of consensuses and serer descriptors in memory. Also, we want
- * to avoid going through the files twice, once for merging old and new
- * lines and another time for extracting statistics.
- */
- public void writeFiles() {
-
- String lastWrittenDay = null;
-
- try {
-
- /* Initialize readers for reading intermediate results files from
- * disk. */
- BufferedReader consensusesReader = null;
- if (this.consensusesFile.exists()) {
- consensusesReader = new BufferedReader(new FileReader(
- this.consensusesFile));
- }
- BufferedReader descriptorsReader = null;
- if (this.descriptorsFile.exists()) {
- descriptorsReader = new BufferedReader(new FileReader(
- this.descriptorsFile));
- }
-
- /* Prepare writing intermediate results. The idea is to write to
- * temporary files while reading from the originals, delete the
- * originals, and rename the temporary files to be the new
- * originals. */
- this.consensusesTempFile.getParentFile().mkdirs();
- BufferedWriter consensusesWriter = new BufferedWriter(
- new FileWriter(this.consensusesTempFile));
- BufferedWriter descriptorsWriter = new BufferedWriter(
- new FileWriter(this.descriptorsTempFile));
-
- /* Prepare date format parsers. */
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- /* Prepare extracting statistics and writing them to disk. */
- String statsDate = null;
- int[] versionStats = new int[this.relayVersions.size() + 1];
- int[] platformStats = new int[this.relayPlatforms.size() + 1];
- long bandwidthStats = 0L;
- int consensusesAtThisDay = 0;
- BufferedWriter versionWriter = new BufferedWriter(new FileWriter(
- this.versionStatsFile));
- BufferedWriter platformWriter = new BufferedWriter(new FileWriter(
- this.platformStatsFile));
- BufferedWriter bandwidthWriter = new BufferedWriter(new FileWriter(
- this.bandwidthStatsFile));
- versionWriter.write("date");
- for (String v : this.relayVersions) {
- versionWriter.write("," + v);
- }
- versionWriter.write(",other\n");
- platformWriter.write("date");
- for (String p : this.relayPlatforms) {
- platformWriter.write("," + p);
- }
- platformWriter.write(",other\n");
- bandwidthWriter.write("date,advbw\n");
-
- /* Always keep one line of the consensuses and descriptors file in
- * memory. */
- String consensusLine = consensusesReader != null ?
- consensusesReader.readLine() : null;
- String descriptorLine = descriptorsReader != null ?
- descriptorsReader.readLine() : null;
-
- /* Iterate over both the consensus file and the consensus strings
- * that we have in memory at the same time. Whichever has an earlier
- * valid-after time gets processed. */
- while (consensusLine != null || !this.consensuses.isEmpty()) {
-
- /* Find out which line we want to process now, memorize it for
- * parsing below, advance the source from where we got the line,
- * and write the line to disk. Afterwards, variable line contains
- * the consensus line we want to parse in this iteration. */
- String line = null;
- if (consensusLine != null) {
- if (!this.consensuses.isEmpty()) {
- String fileKey = consensusLine.split(",")[0];
- String memKey = this.consensuses.firstKey();
- if (fileKey.equals(memKey)) {
- this.logger.finer("The consensus we read from disk has the "
- + "same valid-after time (" + fileKey + ") time as a "
- + "consensus we have in memory. Using the consensus "
- + "from memory.");
- consensusLine = consensusesReader.readLine();
- continue;
- } else if (fileKey.compareTo(memKey) < 0) {
- line = consensusLine;
- consensusLine = consensusesReader.readLine();
- } else {
- line = this.consensuses.remove(memKey);
- }
- } else {
- line = consensusLine;
- consensusLine = consensusesReader.readLine();
- }
- } else {
- line = this.consensuses.remove(this.consensuses.firstKey());
- }
- consensusesWriter.write(line + "\n");
-
- /* Write all server descriptors to disk that were published more
- * than 24 hours before the consensus we're about to process. Also
- * remove those server descriptors from memory. The idea is that
- * those server descriptors cannot be referenced from the
- * consensus anyway and would only bloat our memory. */
- String minus24h = dateTimeFormat.format(new Date(
- dateTimeFormat.parse(line.split(",")[0]).getTime() -
- (24L * 60L * 60L * 1000L)));
- while ((descriptorLine != null &&
- descriptorLine.split(",")[0].compareTo(minus24h) < 0) ||
- (!this.descriptors.isEmpty() &&
- this.descriptors.firstKey().split(",")[0].
- compareTo(minus24h) < 0)) {
- if (descriptorLine != null) {
- if (!this.descriptors.isEmpty()) {
- /* The first 47 chars contain the publication time (19
- * chars), a comma (1 char), and the descriptor identifier
- * (27 chars). */
- String fileKey = descriptorLine.substring(0, 47);
- String memKey = this.descriptors.firstKey();
- if (fileKey.equals(memKey)) {
- this.logger.finer("The server descriptor we read from "
- + "disk has the same publication time and identifier "
- + "(" + fileKey + ") as a server descriptor we have "
- + "in memory. Using the server descriptor from "
- + "memory.");
- descriptorLine = descriptorsReader.readLine();
- continue;
- } else if (fileKey.compareTo(memKey) < 0) {
- descriptorsWriter.write(descriptorLine + "\n");
- descriptorLine = descriptorsReader.readLine();
- } else {
- String removed = this.descriptors.remove(memKey);
- this.descById.remove(removed.split(",")[1]);
- descriptorsWriter.write(removed + "\n");
- }
- } else {
- descriptorsWriter.write(descriptorLine + "\n");
- descriptorLine = descriptorsReader.readLine();
- }
- } else {
- String removed = this.descriptors.remove(
- this.descriptors.firstKey());
- this.descById.remove(removed.split(",")[1]);
- descriptorsWriter.write(removed + "\n");
- }
- }
-
- /* Read in all server descriptors that were published in the last
- * 24 hours before the consensus that we're just processing. These
- * server descriptors might be referenced from the consensus.
- * Store references to these server descriptors by identifier to
- * facilitate matching a consensus entry with the corresponding
- * server descriptor. */
- String validAfter = line.split(",")[0];
- while (descriptorsReader != null && descriptorLine != null &&
- descriptorLine.split(",")[0].compareTo(validAfter) < 0) {
- this.descriptors.put(descriptorLine.substring(0, 47),
- descriptorLine);
- this.descById.put(descriptorLine.split(",")[1], descriptorLine);
- descriptorLine = descriptorsReader.readLine();
- }
-
- /* Now we have a consensus line we want to parse and all possibly
- * referenced descriptors in descById. Let's write some stats. */
- String consensusDate = line.substring(0, 10);
- if (statsDate == null) {
- statsDate = consensusDate;
- }
- if (!statsDate.equals(consensusDate)) {
- /* We have finished one day of consensuses. If we have parsed at
- * least half of the possible 24 consensuses of that day, write
- * stats to disk. */
- if (consensusesAtThisDay >= 12) {
- lastWrittenDay = statsDate;
- versionWriter.write(statsDate);
- for (int i = 0; i < versionStats.length; i++) {
- versionWriter.write("," + (versionStats[i] /
- consensusesAtThisDay));
- }
- versionWriter.write("\n");
- platformWriter.write(statsDate);
- for (int i = 0; i < platformStats.length; i++) {
- platformWriter.write("," + (platformStats[i] /
- consensusesAtThisDay));
- }
- platformWriter.write("\n");
- bandwidthWriter.write(statsDate + ","
- + (bandwidthStats / consensusesAtThisDay) + "\n");
- } else {
- this.logger.fine("Not enough consensuses to write to stats.");
- }
- /* Fill in NA's for missing dates. */
- long writtenMillis = dateFormat.parse(statsDate).getTime();
- if (consensusesAtThisDay < 12) {
- writtenMillis -= 24L * 60L * 60L * 1000L;
- }
- long nextMillis = dateFormat.parse(consensusDate).getTime();
- while (writtenMillis + (24L * 60L * 60L * 1000L) < nextMillis) {
- writtenMillis += 24L * 60L * 60L * 1000L;
- String date = dateFormat.format(new Date(writtenMillis));
- versionWriter.write(date);
- for (int i = 0; i < versionStats.length; i++) {
- versionWriter.write(",NA");
- }
- versionWriter.write("\n");
- platformWriter.write(date);
- for (int i = 0; i < platformStats.length; i++) {
- platformWriter.write(",NA");
- }
- platformWriter.write("\n");
- bandwidthWriter.write(date + ",NA\n");
- }
- /* Clear counters to collect next day's statistics. */
- versionStats = new int[this.relayVersions.size() + 1];
- platformStats = new int[this.relayPlatforms.size() + 1];
- bandwidthStats = 0L;
- consensusesAtThisDay = 0;
- statsDate = consensusDate;
- }
-
- /* For the given consensus, parse all referenced server
- * descriptors to obtain statistics on versions, platforms, and
- * advertised bandwidth. Only include these values if we have at
- * least 90 % of all referenced server descriptors. */
- int[] versionStatsCons = new int[this.relayVersions.size() + 1];
- int[] platformStatsCons = new int[this.relayPlatforms.size() + 1];
- long bandwidthStatsCons = 0L;
- String[] ids = line.split(",");
- int seenDescs = 0;
- for (int i = 1; i < ids.length; i++) {
- if (this.descById.containsKey(ids[i])) {
- seenDescs++;
- String desc = this.descById.get(ids[i]);
- String[] parts = desc.split(",");
- String version = parts[2].substring(0,
- parts[2].lastIndexOf("."));
- if (this.relayVersions.contains(version)) {
- versionStatsCons[this.relayVersions.indexOf(version)]++;
- } else {
- versionStatsCons[versionStatsCons.length - 1]++;
- }
- String platform = parts[3].toLowerCase();
- boolean isOther = true;
- for (String p : this.relayPlatforms) {
- if (platform.contains(p.toLowerCase())) {
- platformStatsCons[this.relayPlatforms.indexOf(p)]++;
- isOther = false;
- break;
- }
- }
- if (isOther) {
- platformStatsCons[platformStatsCons.length - 1]++;
- }
- bandwidthStatsCons += Long.parseLong(desc.substring(
- desc.lastIndexOf(",") + 1));
- }
- }
- if (10 * seenDescs / (ids.length - 1) >= 9) {
- for (int i = 0; i < versionStatsCons.length; i++) {
- versionStats[i] += versionStatsCons[i];
- }
- for (int i = 0; i < platformStatsCons.length; i++) {
- platformStats[i] += platformStatsCons[i];
- }
- bandwidthStats += bandwidthStatsCons;
- consensusesAtThisDay++;
- } else {
- this.logger.fine("Not enough referenced server descriptors for "
- + "consensus with valid-after time " + line.substring(0, 19)
- + ". Not including this consensus in the statistics.");
- }
-
- /* We're done reading one consensus. */
- }
-
- /* We're done reading all consensuses, both from disk and from
- * memory. Write remaining server descriptors to disk. These are the
- * server descriptors that were published 24 hours before the last
- * parsed consensus and those server descriptors published
- * afterwards. */
- while (descriptorLine != null || !this.descriptors.isEmpty()) {
- if (descriptorLine != null) {
- if (!this.descriptors.isEmpty()) {
- String fileKey = descriptorLine.substring(0, 47);
- String memKey = this.descriptors.firstKey();
- if (fileKey.equals(memKey)) {
- this.logger.finer("The server descriptor we read from "
- + "disk has the same publication time and identifier "
- + "(" + fileKey + ") as a server descriptor we have "
- + "in memory. Using the server descriptor from "
- + "memory.");
- descriptorLine = descriptorsReader.readLine();
- continue;
- } else if (fileKey.compareTo(memKey) < 0) {
- descriptorsWriter.write(descriptorLine + "\n");
- descriptorLine = descriptorsReader.readLine();
- } else {
- descriptorsWriter.write(this.descriptors.remove(memKey)
- + "\n");
- }
- } else {
- descriptorsWriter.write(descriptorLine + "\n");
- descriptorLine = descriptorsReader.readLine();
- }
- } else {
- descriptorsWriter.write(this.descriptors.remove(
- this.descriptors.firstKey()) + "\n");
- }
- }
- this.descById.clear();
-
- /* Close the files that we read from and wrote to. */
- if (consensusesReader != null) {
- consensusesReader.close();
- }
- if (descriptorsReader != null) {
- descriptorsReader.close();
- }
- consensusesWriter.close();
- descriptorsWriter.close();
- bandwidthWriter.close();
- versionWriter.close();
- platformWriter.close();
-
- /* Delete original files and rename temporary files to be the new
- * originals. */
- if (this.consensusesFile.exists()) {
- this.consensusesFile.delete();
- }
- this.consensusesTempFile.renameTo(this.consensusesFile);
- if (this.descriptorsFile.exists()) {
- this.descriptorsFile.delete();
- }
- this.descriptorsTempFile.renameTo(this.descriptorsFile);
-
- /* Done. Whee! */
- this.logger.fine("Finished writing.");
-
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Exception while writing files.", e);
- }
-
- /* Write stats. (Including the number of added consensuses and server
- * descriptors isn't trivial here, because we don't have the full set
- * of descriptors in memory when adding new ones. */
- StringBuilder dumpStats = new StringBuilder("Finished writing "
- + "statistics information contained in consensuses and server "
- + "descriptors.\n");
- if (lastWrittenDay == null) {
- dumpStats.append("No statistics written so far.");
- } else {
- dumpStats.append("Last written day of statistics was "
- + lastWrittenDay);
- }
- this.logger.info(dumpStats.toString());
- }
-}
--
1.7.1
More information about the tor-commits
mailing list