[tor-commits] [metrics-db/master] Add more robust descriptor reference checker.

karsten at torproject.org karsten at torproject.org
Mon Feb 15 18:55:00 UTC 2016


commit 56288d49746ace3fdc03411c79f7d9b52d66c35d
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Feb 11 22:41:52 2016 +0100

    Add more robust descriptor reference checker.
    
    The new reference checker reads files in recent/relay-descriptors/,
    rather than keeping data structures of newly written descriptors in
    memory and storing them to disk later.  This should be more robust
    against unplanned reboots.
---
 .../ernie/db/relaydescs/ArchiveWriter.java         |   4 +
 .../ernie/db/relaydescs/ReferenceChecker.java      | 310 +++++++++++++++++++++
 2 files changed, 314 insertions(+)

diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 90b41c3..01b4d06 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -57,6 +57,10 @@ public class ArchiveWriter extends Thread {
     // Import/download relay descriptors from the various sources
     new ArchiveWriter(config).run();
 
+    new ReferenceChecker(new File("recent/relay-descriptors"),
+        new File("stats/references"),
+        new File("stats/references-history")).check();
+
     // Remove lock file
     lf.releaseLock();
 
diff --git a/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java b/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
new file mode 100644
index 0000000..4bafa76
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ReferenceChecker.java
@@ -0,0 +1,310 @@
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirSourceEntry;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+
+import com.google.gson.Gson;
+
+public class ReferenceChecker {
+
+  private Logger log = Logger.getLogger(ReferenceChecker.class.getName());
+
+  private File descriptorsDir;
+
+  private File referencesFile;
+
+  private File historyFile;
+
+  public ReferenceChecker(File descriptorsDir, File referencesFile,
+      File historyFile) {
+    this.descriptorsDir = descriptorsDir;
+    this.referencesFile = referencesFile;
+    this.historyFile = historyFile;
+  }
+
+  public void check() {
+    this.getCurrentTimeMillis();
+    this.readReferencesFile();
+    this.readNewDescriptors();
+    this.dropStaleReferences();
+    this.checkReferences();
+    this.writeReferencesFile();
+  }
+
+  private long currentTimeMillis;
+
+  private void getCurrentTimeMillis() {
+    this.currentTimeMillis = System.currentTimeMillis();
+  }
+
+  private static class Reference implements Comparable<Reference> {
+
+    private String referencing;
+
+    private String referenced;
+
+    private double weight;
+
+    private long expiresAfterMillis;
+
+    public Reference(String referencing, String referenced, double weight,
+        long expiresAfterMillis) {
+      this.referencing = referencing;
+      this.referenced = referenced;
+      this.weight = weight;
+      this.expiresAfterMillis = expiresAfterMillis;
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      if (!(otherObject instanceof Reference)) {
+        return false;
+      }
+      Reference other = (Reference) otherObject;
+      return this.referencing.equals(other.referencing) &&
+          this.referenced.equals(other.referenced);
+    }
+
+    @Override
+    public int hashCode() {
+      return this.referencing.hashCode() + this.referenced.hashCode();
+    }
+
+    @Override
+    public int compareTo(Reference other) {
+      int result = this.referencing.compareTo(other.referencing);
+      if (result == 0) {
+        result = this.referenced.compareTo(other.referenced);
+      }
+      return result;
+    }
+  }
+
+  private SortedSet<Reference> references = new TreeSet<Reference>();
+
+  private void addReference(String referencing, String referenced,
+      double weight, long expiresAfterMillis) {
+    this.references.add(new Reference(referencing.toUpperCase(),
+        referenced.toUpperCase(), weight, expiresAfterMillis));
+  }
+
+  private void readReferencesFile() {
+    if (!this.referencesFile.exists()) {
+      return;
+    }
+    Gson gson = new Gson();
+    try {
+      FileReader fr = new FileReader(this.referencesFile);
+      this.references.addAll(Arrays.asList(gson.fromJson(fr,
+          Reference[].class)));
+      fr.close();
+    } catch (IOException e) {
+      this.log.log(Level.WARNING, "Cannot read existing references file "
+          + "from previous run.", e);
+    }
+  }
+
+  private void readNewDescriptors() {
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(this.descriptorsDir);
+    descriptorReader.setExcludeFiles(this.historyFile);
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (descriptor instanceof RelayNetworkStatusConsensus) {
+          RelayNetworkStatusConsensus consensus =
+              (RelayNetworkStatusConsensus) descriptor;
+          String consensusFlavor = consensus.getConsensusFlavor();
+          if (consensusFlavor == null) {
+            this.readRelayNetworkStatusConsensusUnflavored(consensus);
+          } else if (consensusFlavor.equals("microdesc")) {
+            this.readRelayNetworkStatusConsensusMicrodesc(consensus);
+          } else {
+            /* Ignore unknown consensus flavors. */
+          }
+        } else if (descriptor instanceof RelayNetworkStatusVote) {
+          this.readRelayNetworkStatusVote(
+              (RelayNetworkStatusVote) descriptor);
+        } else if (descriptor instanceof ServerDescriptor) {
+          this.readServerDescriptor((ServerDescriptor) descriptor);
+        } else if (descriptor instanceof ExtraInfoDescriptor) {
+          this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+        } else if (descriptor instanceof Microdescriptor) {
+          readMicrodescriptor((Microdescriptor) descriptor);
+        } else {
+          /* Ignore unknown descriptors. */
+        }
+      }
+    }
+  }
+
+  private static DateFormat dateTimeFormat;
+  static {
+    dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
+        Locale.US);
+    dateTimeFormat.setLenient(false);
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  private static final long ONE_HOUR = 60L * 60L * 1000L,
+      THREE_HOURS = 3L * ONE_HOUR, SIX_HOURS = 6L * ONE_HOUR,
+      ONE_DAY = 24L * ONE_HOUR, THIRTY_DAYS = 30L * ONE_DAY;
+
+  private void readRelayNetworkStatusConsensusUnflavored(
+      RelayNetworkStatusConsensus consensus) {
+    String validAfter = dateTimeFormat.format(
+        consensus.getValidAfterMillis());
+    String referencing = String.format("C-%s", validAfter);
+    this.addReference(referencing, String.format("M-%s", validAfter), 1.0,
+        consensus.getValidAfterMillis() + THREE_HOURS);
+    for (DirSourceEntry dirSourceEntry :
+        consensus.getDirSourceEntries().values()) {
+      if (!dirSourceEntry.isLegacy()) {
+        this.addReference(referencing, String.format("V-%s-%s",
+            validAfter, dirSourceEntry.getIdentity()), 1.0,
+            consensus.getValidAfterMillis() + THREE_HOURS);
+      }
+    }
+    double entryWeight = 200.0 /
+        ((double) consensus.getStatusEntries().size());
+    for (NetworkStatusEntry entry :
+        consensus.getStatusEntries().values()) {
+      this.addReference(referencing,
+          String.format("S-%s", entry.getDescriptor()), entryWeight,
+          entry.getPublishedMillis() + THREE_HOURS);
+    }
+  }
+
+
+  private void readRelayNetworkStatusConsensusMicrodesc(
+      RelayNetworkStatusConsensus consensus) {
+    String validAfter = dateTimeFormat.format(
+        consensus.getValidAfterMillis());
+    String referencing = String.format("M-%s", validAfter);
+    this.addReference(referencing, String.format("C-%s", validAfter), 1.0,
+        consensus.getValidAfterMillis() + THREE_HOURS);
+    double entryWeight = 200.0 /
+        ((double) consensus.getStatusEntries().size());
+    for (NetworkStatusEntry entry :
+        consensus.getStatusEntries().values()) {
+      for (String digest : entry.getMicrodescriptorDigests()) {
+        this.addReference(referencing, String.format("D-%s", digest),
+            entryWeight, entry.getPublishedMillis() + THREE_HOURS);
+      }
+    }
+  }
+
+  private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) {
+    String validAfter = dateTimeFormat.format(vote.getValidAfterMillis());
+    String referencing = String.format("V-%s-%s", validAfter,
+        vote.getIdentity());
+    double entryWeight = 200.0 /
+        ((double) vote.getStatusEntries().size());
+    for (NetworkStatusEntry entry : vote.getStatusEntries().values()) {
+      this.addReference(referencing,
+          String.format("S-%s", entry.getDescriptor()), entryWeight,
+          entry.getPublishedMillis() + SIX_HOURS);
+    }
+  }
+
+  private void readServerDescriptor(ServerDescriptor serverDescriptor) {
+    String referenced = serverDescriptor.getExtraInfoDigest() == null ? ""
+        : String.format("E-%s", serverDescriptor.getExtraInfoDigest());
+    this.addReference(String.format("S-%s",
+        serverDescriptor.getServerDescriptorDigest()), referenced, 0.01,
+        serverDescriptor.getPublishedMillis() + SIX_HOURS);
+  }
+
+  private void readExtraInfoDescriptor(
+      ExtraInfoDescriptor extraInfoDescriptor) {
+    this.addReference(String.format("E-%s",
+        extraInfoDescriptor.getExtraInfoDigest()), "", 0.005,
+        extraInfoDescriptor.getPublishedMillis() + SIX_HOURS);
+  }
+
+  private void readMicrodescriptor(Microdescriptor microdesc) {
+    this.addReference(
+        String.format("D-%s", microdesc.getMicrodescriptorDigest()), "",
+        0.0, this.currentTimeMillis + THIRTY_DAYS);
+  }
+
+  private void dropStaleReferences() {
+    SortedSet<Reference> recentReferences = new TreeSet<Reference>();
+    for (Reference reference : this.references) {
+      if (this.currentTimeMillis <= reference.expiresAfterMillis) {
+        recentReferences.add(reference);
+      }
+    }
+    this.references = recentReferences;
+  }
+
+  private void checkReferences() {
+    Set<String> knownDescriptors = new HashSet<String>();
+    for (Reference reference : this.references) {
+      knownDescriptors.add(reference.referencing);
+    }
+    double totalMissingDescriptorsWeight = 0.0;
+    Set<String> missingDescriptors = new TreeSet<String>();
+    StringBuilder sb = new StringBuilder("Missing referenced "
+        + "descriptors:");
+    for (Reference reference : this.references) {
+      if (reference.referenced.length() > 0 &&
+          !knownDescriptors.contains(reference.referenced)) {
+        if (!missingDescriptors.contains(reference.referenced)) {
+          totalMissingDescriptorsWeight += reference.weight;
+        }
+        missingDescriptors.add(reference.referenced);
+        sb.append(String.format("%n%s -> %s (%.4f -> %.4f)",
+            reference.referencing, reference.referenced, reference.weight,
+            totalMissingDescriptorsWeight));
+      }
+    }
+    this.log.log(Level.INFO, sb.toString());
+    if (totalMissingDescriptorsWeight > 0.999) {
+      this.log.log(Level.WARNING, "Missing too many referenced "
+          + "descriptors (" + totalMissingDescriptorsWeight + ").");
+    }
+  }
+
+  private void writeReferencesFile() {
+    Gson gson = new Gson();
+    try {
+      FileWriter fw = new FileWriter(this.referencesFile);
+      gson.toJson(this.references, fw);
+      fw.close();
+    } catch (IOException e) {
+      this.log.log(Level.WARNING, "Cannot write references file for next "
+          + "run.", e);
+    }
+  }
+}
+





More information about the tor-commits mailing list