[tor-commits] [metrics-db/master] Recognize when data sources become stale.
karsten at torproject.org
karsten at torproject.org
Sat Oct 27 20:07:46 UTC 2012
commit f7b58361aa6df5fd2afe40c38f9dd111e2820f89
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sat Oct 27 13:52:42 2012 -0400
Recognize when data sources become stale.
Implements #3850.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 58 +++++++++++
.../BridgePoolAssignmentsProcessor.java | 16 +++
.../ernie/db/exitlists/ExitListDownloader.java | 100 +++++++++++++++-----
.../ernie/db/relaydescs/ArchiveWriter.java | 46 +++++++++
4 files changed, 194 insertions(+), 26 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 87593bd..7de9961 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -193,6 +193,8 @@ public class SanitizedBridgesWriter extends Thread {
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
+ this.checkStaleDescriptors();
+
this.cleanUpRsyncDirectory();
}
@@ -368,6 +370,8 @@ public class SanitizedBridgesWriter extends Thread {
return this.secretsForHashingIPAddresses.get(month);
}
+ private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
+
/**
* Sanitizes a network status and writes it to disk.
*/
@@ -380,6 +384,10 @@ public class SanitizedBridgesWriter extends Thread {
return;
}
+ if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
+ maxNetworkStatusPublishedTime = publicationTime;
+ }
+
if (this.bridgeSanitizingCutOffTimestamp.
compareTo(publicationTime) > 0) {
this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
@@ -543,6 +551,8 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00";
+
/**
* Sanitizes a bridge server descriptor and writes it to disk.
*/
@@ -590,6 +600,9 @@ public class SanitizedBridgesWriter extends Thread {
* sanitizing interval. */
} else if (line.startsWith("published ")) {
published = line.substring("published ".length());
+ if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
+ maxServerDescriptorPublishedTime = published;
+ }
if (this.bridgeSanitizingCutOffTimestamp.
compareTo(published) > 0) {
this.logger.log(!this.haveWarnedAboutInterval
@@ -799,6 +812,9 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private String maxExtraInfoDescriptorPublishedTime =
+ "1970-01-01 00:00:00";
+
/**
* Sanitizes an extra-info descriptor and writes it to disk.
*/
@@ -827,6 +843,10 @@ public class SanitizedBridgesWriter extends Thread {
} else if (line.startsWith("published ")) {
scrubbed.append(line + "\n");
published = line.substring("published ".length());
+ if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
+ > 0) {
+ maxExtraInfoDescriptorPublishedTime = published;
+ }
/* Remove everything from transport lines except the transport
* name. */
@@ -968,6 +988,44 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private void checkStaleDescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+ try {
+ long maxNetworkStatusPublishedMillis =
+ dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime();
+ if (maxNetworkStatusPublishedMillis > 0L &&
+ maxNetworkStatusPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge network status was "
+ + "published " + maxNetworkStatusPublishedTime + ", which is "
+ + "more than 5:30 hours in the past.");
+ }
+ long maxServerDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxServerDescriptorPublishedTime).
+ getTime();
+ if (maxServerDescriptorPublishedMillis > 0L &&
+ maxServerDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge server descriptor was "
+ + "published " + maxServerDescriptorPublishedTime + ", which "
+ + "is more than 5:30 hours in the past.");
+ }
+ long maxExtraInfoDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime).
+ getTime();
+ if (maxExtraInfoDescriptorPublishedMillis > 0L &&
+ maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge extra-info descriptor "
+ + "was published " + maxExtraInfoDescriptorPublishedTime
+ + ", which is more than 5:30 hours in the past.");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Unable to parse timestamp for "
+ + "stale check.", e);
+ }
+ }
+
/* Delete all files from the rsync directory that have not been modified
* in the last three days. */
public void cleanUpRsyncDirectory() {
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 0ac6f90..43d3427 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -77,6 +77,7 @@ public class BridgePoolAssignmentsProcessor extends Thread {
SimpleDateFormat filenameFormat =
new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long maxBridgePoolAssignmentTime = 0L;
for (File assignmentFile : assignmentFiles) {
logger.info("Processing bridge pool assignment file '"
+ assignmentFile.getAbsolutePath() + "'...");
@@ -120,6 +121,9 @@ public class BridgePoolAssignmentsProcessor extends Thread {
long bridgePoolAssignmentTime = assignmentFormat.parse(
bridgePoolAssignmentLine.substring(
"bridge-pool-assignment ".length())).getTime();
+ maxBridgePoolAssignmentTime = Math.max(
+ maxBridgePoolAssignmentTime,
+ bridgePoolAssignmentTime);
File tarballFile = new File(
sanitizedAssignmentsDirectory, filenameFormat.format(
bridgePoolAssignmentTime));
@@ -192,6 +196,18 @@ public class BridgePoolAssignmentsProcessor extends Thread {
}
}
+ if (maxBridgePoolAssignmentTime > 0L &&
+ maxBridgePoolAssignmentTime + 330L * 60L * 1000L
+ < System.currentTimeMillis()) {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ logger.warning("The last known bridge pool assignment list was "
+ + "published at "
+ + dateTimeFormat.format(maxBridgePoolAssignmentTime)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
this.cleanUpRsyncDirectory();
logger.info("Finished processing bridge pool assignment file(s).");
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index 26e944c..9b1f40b 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -12,6 +12,7 @@ import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
+import java.util.List;
import java.util.SortedSet;
import java.util.Stack;
import java.util.TimeZone;
@@ -19,6 +20,12 @@ import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+import org.torproject.descriptor.impl.DescriptorParseException;
import org.torproject.ernie.db.main.Configuration;
public class ExitListDownloader extends Thread {
@@ -35,8 +42,19 @@ public class ExitListDownloader extends Thread {
}
Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ Date downloadedDate = new Date();
+ String downloadedExitList = null;
try {
logger.fine("Downloading exit list...");
+ StringBuilder sb = new StringBuilder();
+ sb.append("@type tordnsel 1.0\n");
+ sb.append("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
String exitAddressesUrl =
"http://exitlist.torproject.org/exit-addresses";
URL u = new URL(exitAddressesUrl);
@@ -51,42 +69,72 @@ public class ExitListDownloader extends Thread {
}
BufferedInputStream in = new BufferedInputStream(
huc.getInputStream());
- SimpleDateFormat printFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Date downloadedDate = new Date();
- File tarballFile = new File("exitlist/" + printFormat.format(
- downloadedDate));
- tarballFile.getParentFile().mkdirs();
- File rsyncFile = new File("rsync/exit-lists/"
- + tarballFile.getName());
- rsyncFile.getParentFile().mkdirs();
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- BufferedWriter bwT = new BufferedWriter(new FileWriter(
- tarballFile));
- BufferedWriter bwR = new BufferedWriter(new FileWriter(
- rsyncFile));
- bwT.write("@type tordnsel 1.0\n");
- bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
- bwR.write("@type tordnsel 1.0\n");
- bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
int len;
byte[] data = new byte[1024];
while ((len = in.read(data, 0, 1024)) >= 0) {
- bwT.write(new String(data, 0, len));
- bwR.write(new String(data, 0, len));
+ sb.append(new String(data, 0, len));
}
in.close();
- bwT.close();
- bwR.close();
+ downloadedExitList = sb.toString();
logger.fine("Finished downloading exit list.");
} catch (IOException e) {
logger.log(Level.WARNING, "Failed downloading exit list", e);
return;
}
+ if (downloadedExitList == null) {
+ logger.warning("Failed downloading exit list");
+ return;
+ }
+
+ SimpleDateFormat tarballFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File("exitlist/" + tarballFormat.format(
+ downloadedDate));
+
+ long maxScanMillis = 0L;
+ try {
+ DescriptorParser descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ List<Descriptor> parsedDescriptors =
+ descriptorParser.parseDescriptors(downloadedExitList.getBytes(),
+ tarballFile.getName());
+ if (parsedDescriptors.size() != 1 ||
+ !(parsedDescriptors.get(0) instanceof ExitList)) {
+ logger.warning("Could not parse downloaded exit list");
+ return;
+ }
+ ExitList parsedExitList = (ExitList) parsedDescriptors.get(0);
+ for (ExitListEntry entry : parsedExitList.getExitListEntries()) {
+ maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis());
+ }
+ } catch (DescriptorParseException e) {
+ logger.log(Level.WARNING, "Could not parse downloaded exit list",
+ e);
+ }
+ if (maxScanMillis > 0L &&
+ maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) {
+ logger.warning("The last reported scan in the downloaded exit list "
+ + "took place at " + dateTimeFormat.format(maxScanMillis)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
+ /* Write to disk. */
+ File rsyncFile = new File("rsync/exit-lists/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ try {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write(downloadedExitList);
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write downloaded exit list "
+ + "to " + outputFile.getAbsolutePath(), e);
+ }
+ }
/* Write stats. */
StringBuilder dumpStats = new StringBuilder("Finished downloading "
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index f95bbf7..9a07ada 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -94,6 +94,8 @@ public class ArchiveWriter extends Thread {
// Write output to disk that only depends on relay descriptors
this.dumpStats();
+ this.checkStaledescriptors();
+
this.cleanUpRsyncDirectory();
}
@@ -128,9 +130,12 @@ public class ArchiveWriter extends Thread {
return false;
}
+ private long maxConsensusValidAfter = 0L;
private static final byte[] CONSENSUS_ANNOTATION =
"@type network-status-consensus-3 1.0\n".getBytes();
public void storeConsensus(byte[] data, long validAfter) {
+ this.maxConsensusValidAfter = Math.max(this.maxConsensusValidAfter,
+ validAfter);
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -144,10 +149,12 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxVoteValidAfter = 0L;
private static final byte[] VOTE_ANNOTATION =
"@type network-status-vote-3 1.0\n".getBytes();
public void storeVote(byte[] data, long validAfter,
String fingerprint, String digest) {
+ this.maxVoteValidAfter = Math.max(this.maxVoteValidAfter, validAfter);
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -177,10 +184,13 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxServerDescriptorPublished = 0L;
private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
"@type server-descriptor 1.0\n".getBytes();
public void storeServerDescriptor(byte[] data, String digest,
long published) {
+ this.maxServerDescriptorPublished = Math.max(
+ this.maxServerDescriptorPublished, published);
SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
File tarballFile = new File(this.outputDirectory
@@ -195,10 +205,13 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxExtraInfoDescriptorPublished = 0L;
private static final byte[] EXTRA_INFO_ANNOTATION =
"@type extra-info 1.0\n".getBytes();
public void storeExtraInfoDescriptor(byte[] data,
String extraInfoDigest, long published) {
+ this.maxExtraInfoDescriptorPublished = Math.max(
+ this.maxExtraInfoDescriptorPublished, published);
SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
File tarballFile = new File(this.outputDirectory + "/extra-info/"
@@ -403,6 +416,39 @@ public class ArchiveWriter extends Thread {
}
}
+ private void checkStaledescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+ if (maxConsensusValidAfter > 0L &&
+ maxConsensusValidAfter < tooOldMillis) {
+ this.logger.warning("The last known relay network status "
+ + "consensus was valid after "
+ + dateTimeFormat.format(maxConsensusValidAfter)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxVoteValidAfter > 0L && maxVoteValidAfter < tooOldMillis) {
+ this.logger.warning("The last known relay network status vote "
+ + "was valid after " + dateTimeFormat.format(maxVoteValidAfter)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxServerDescriptorPublished > 0L &&
+ maxServerDescriptorPublished < tooOldMillis) {
+ this.logger.warning("The last known relay server descriptor was "
+ + "published at "
+ + dateTimeFormat.format(maxServerDescriptorPublished)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxExtraInfoDescriptorPublished > 0L &&
+ maxExtraInfoDescriptorPublished < tooOldMillis) {
+ this.logger.warning("The last known relay extra-info descriptor "
+ + "was published at "
+ + dateTimeFormat.format(maxExtraInfoDescriptorPublished)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ }
+
/* Delete all files from the rsync directory that have not been modified
* in the last three days. */
public void cleanUpRsyncDirectory() {
More information about the tor-commits
mailing list