[tor-commits] [metrics-db/master] Add more robust descriptor reference checker.
karsten at torproject.org
karsten at torproject.org
Mon Feb 15 18:55:00 UTC 2016
commit 56288d49746ace3fdc03411c79f7d9b52d66c35d
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu Feb 11 22:41:52 2016 +0100
Add more robust descriptor reference checker.
The new reference checker reads files in recent/relay-descriptors/,
rather than keeping data structures of newly written descriptors in
memory and storing them to disk later. This should be more robust
against unplanned reboots.
---
.../ernie/db/relaydescs/ArchiveWriter.java | 4 +
.../ernie/db/relaydescs/ReferenceChecker.java | 310 +++++++++++++++++++++
2 files changed, 314 insertions(+)
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 90b41c3..01b4d06 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -57,6 +57,10 @@ public class ArchiveWriter extends Thread {
// Import/download relay descriptors from the various sources
new ArchiveWriter(config).run();
+ new ReferenceChecker(new File("recent/relay-descriptors"),
+ new File("stats/references"),
+ new File("stats/references-history")).check();
+
// Remove lock file
lf.releaseLock();
diff --git a/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java b/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
new file mode 100644
index 0000000..4bafa76
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
@@ -0,0 +1,310 @@
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirSourceEntry;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+
+import com.google.gson.Gson;
+
+public class ReferenceChecker {
+
+ private Logger log = Logger.getLogger(ReferenceChecker.class.getName());
+
+ private File descriptorsDir;
+
+ private File referencesFile;
+
+ private File historyFile;
+
+ public ReferenceChecker(File descriptorsDir, File referencesFile,
+ File historyFile) {
+ this.descriptorsDir = descriptorsDir;
+ this.referencesFile = referencesFile;
+ this.historyFile = historyFile;
+ }
+
+ public void check() {
+ this.getCurrentTimeMillis();
+ this.readReferencesFile();
+ this.readNewDescriptors();
+ this.dropStaleReferences();
+ this.checkReferences();
+ this.writeReferencesFile();
+ }
+
+ private long currentTimeMillis;
+
+ private void getCurrentTimeMillis() {
+ this.currentTimeMillis = System.currentTimeMillis();
+ }
+
+ private static class Reference implements Comparable<Reference> {
+
+ private String referencing;
+
+ private String referenced;
+
+ private double weight;
+
+ private long expiresAfterMillis;
+
+ public Reference(String referencing, String referenced, double weight,
+ long expiresAfterMillis) {
+ this.referencing = referencing;
+ this.referenced = referenced;
+ this.weight = weight;
+ this.expiresAfterMillis = expiresAfterMillis;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof Reference)) {
+ return false;
+ }
+ Reference other = (Reference) otherObject;
+ return this.referencing.equals(other.referencing) &&
+ this.referenced.equals(other.referenced);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.referencing.hashCode() + this.referenced.hashCode();
+ }
+
+ @Override
+ public int compareTo(Reference other) {
+ int result = this.referencing.compareTo(other.referencing);
+ if (result == 0) {
+ result = this.referenced.compareTo(other.referenced);
+ }
+ return result;
+ }
+ }
+
+ private SortedSet<Reference> references = new TreeSet<Reference>();
+
+ private void addReference(String referencing, String referenced,
+ double weight, long expiresAfterMillis) {
+ this.references.add(new Reference(referencing.toUpperCase(),
+ referenced.toUpperCase(), weight, expiresAfterMillis));
+ }
+
+ private void readReferencesFile() {
+ if (!this.referencesFile.exists()) {
+ return;
+ }
+ Gson gson = new Gson();
+ try {
+ FileReader fr = new FileReader(this.referencesFile);
+ this.references.addAll(Arrays.asList(gson.fromJson(fr,
+ Reference[].class)));
+ fr.close();
+ } catch (IOException e) {
+ this.log.log(Level.WARNING, "Cannot read existing references file "
+ + "from previous run.", e);
+ }
+ }
+
+ private void readNewDescriptors() {
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(this.descriptorsDir);
+ descriptorReader.setExcludeFiles(this.historyFile);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof RelayNetworkStatusConsensus) {
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ String consensusFlavor = consensus.getConsensusFlavor();
+ if (consensusFlavor == null) {
+ this.readRelayNetworkStatusConsensusUnflavored(consensus);
+ } else if (consensusFlavor.equals("microdesc")) {
+ this.readRelayNetworkStatusConsensusMicrodesc(consensus);
+ } else {
+ /* Ignore unknown consensus flavors. */
+ }
+ } else if (descriptor instanceof RelayNetworkStatusVote) {
+ this.readRelayNetworkStatusVote(
+ (RelayNetworkStatusVote) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.readServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof Microdescriptor) {
+ readMicrodescriptor((Microdescriptor) descriptor);
+ } else {
+ /* Ignore unknown descriptors. */
+ }
+ }
+ }
+ }
+
+ private static DateFormat dateTimeFormat;
+ static {
+ dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
+ Locale.US);
+ dateTimeFormat.setLenient(false);
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private static final long ONE_HOUR = 60L * 60L * 1000L,
+ THREE_HOURS = 3L * ONE_HOUR, SIX_HOURS = 6L * ONE_HOUR,
+ ONE_DAY = 24L * ONE_HOUR, THIRTY_DAYS = 30L * ONE_DAY;
+
+ private void readRelayNetworkStatusConsensusUnflavored(
+ RelayNetworkStatusConsensus consensus) {
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ String referencing = String.format("C-%s", validAfter);
+ this.addReference(referencing, String.format("M-%s", validAfter), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ for (DirSourceEntry dirSourceEntry :
+ consensus.getDirSourceEntries().values()) {
+ if (!dirSourceEntry.isLegacy()) {
+ this.addReference(referencing, String.format("V-%s-%s",
+ validAfter, dirSourceEntry.getIdentity()), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ }
+ }
+ double entryWeight = 200.0 /
+ ((double) consensus.getStatusEntries().size());
+ for (NetworkStatusEntry entry :
+ consensus.getStatusEntries().values()) {
+ this.addReference(referencing,
+ String.format("S-%s", entry.getDescriptor()), entryWeight,
+ entry.getPublishedMillis() + THREE_HOURS);
+ }
+ }
+
+
+ private void readRelayNetworkStatusConsensusMicrodesc(
+ RelayNetworkStatusConsensus consensus) {
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ String referencing = String.format("M-%s", validAfter);
+ this.addReference(referencing, String.format("C-%s", validAfter), 1.0,
+ consensus.getValidAfterMillis() + THREE_HOURS);
+ double entryWeight = 200.0 /
+ ((double) consensus.getStatusEntries().size());
+ for (NetworkStatusEntry entry :
+ consensus.getStatusEntries().values()) {
+ for (String digest : entry.getMicrodescriptorDigests()) {
+ this.addReference(referencing, String.format("D-%s", digest),
+ entryWeight, entry.getPublishedMillis() + THREE_HOURS);
+ }
+ }
+ }
+
+ private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) {
+ String validAfter = dateTimeFormat.format(vote.getValidAfterMillis());
+ String referencing = String.format("V-%s-%s", validAfter,
+ vote.getIdentity());
+ double entryWeight = 200.0 /
+ ((double) vote.getStatusEntries().size());
+ for (NetworkStatusEntry entry : vote.getStatusEntries().values()) {
+ this.addReference(referencing,
+ String.format("S-%s", entry.getDescriptor()), entryWeight,
+ entry.getPublishedMillis() + SIX_HOURS);
+ }
+ }
+
+ private void readServerDescriptor(ServerDescriptor serverDescriptor) {
+ String referenced = serverDescriptor.getExtraInfoDigest() == null ? ""
+ : String.format("E-%s", serverDescriptor.getExtraInfoDigest());
+ this.addReference(String.format("S-%s",
+ serverDescriptor.getServerDescriptorDigest()), referenced, 0.01,
+ serverDescriptor.getPublishedMillis() + SIX_HOURS);
+ }
+
+ private void readExtraInfoDescriptor(
+ ExtraInfoDescriptor extraInfoDescriptor) {
+ this.addReference(String.format("E-%s",
+ extraInfoDescriptor.getExtraInfoDigest()), "", 0.005,
+ extraInfoDescriptor.getPublishedMillis() + SIX_HOURS);
+ }
+
+ private void readMicrodescriptor(Microdescriptor microdesc) {
+ this.addReference(
+ String.format("D-%s", microdesc.getMicrodescriptorDigest()), "",
+ 0.0, this.currentTimeMillis + THIRTY_DAYS);
+ }
+
+ private void dropStaleReferences() {
+ SortedSet<Reference> recentReferences = new TreeSet<Reference>();
+ for (Reference reference : this.references) {
+ if (this.currentTimeMillis <= reference.expiresAfterMillis) {
+ recentReferences.add(reference);
+ }
+ }
+ this.references = recentReferences;
+ }
+
+ private void checkReferences() {
+ Set<String> knownDescriptors = new HashSet<String>();
+ for (Reference reference : this.references) {
+ knownDescriptors.add(reference.referencing);
+ }
+ double totalMissingDescriptorsWeight = 0.0;
+ Set<String> missingDescriptors = new TreeSet<String>();
+ StringBuilder sb = new StringBuilder("Missing referenced "
+ + "descriptors:");
+ for (Reference reference : this.references) {
+ if (reference.referenced.length() > 0 &&
+ !knownDescriptors.contains(reference.referenced)) {
+ if (!missingDescriptors.contains(reference.referenced)) {
+ totalMissingDescriptorsWeight += reference.weight;
+ }
+ missingDescriptors.add(reference.referenced);
+ sb.append(String.format("%n%s -> %s (%.4f -> %.4f)",
+ reference.referencing, reference.referenced, reference.weight,
+ totalMissingDescriptorsWeight));
+ }
+ }
+ this.log.log(Level.INFO, sb.toString());
+ if (totalMissingDescriptorsWeight > 0.999) {
+ this.log.log(Level.WARNING, "Missing too many referenced "
+ + "descriptors (" + totalMissingDescriptorsWeight + ").");
+ }
+ }
+
+ private void writeReferencesFile() {
+ Gson gson = new Gson();
+ try {
+ FileWriter fw = new FileWriter(this.referencesFile);
+ gson.toJson(this.references, fw);
+ fw.close();
+ } catch (IOException e) {
+ this.log.log(Level.WARNING, "Cannot write references file for next "
+ + "run.", e);
+ }
+ }
+}
+
More information about the tor-commits
mailing list