[tor-commits] [onionoo/master] Refactor rDNS code and add more execution stats.

karsten at torproject.org karsten at torproject.org
Sun Jun 30 20:12:49 UTC 2013


commit 0dabab48c6792579498692503afd8076b219662e
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Jun 28 19:39:12 2013 +0200

    Refactor rDNS code and add more execution stats.
---
 src/org/torproject/onionoo/DetailsDataWriter.java  |  117 +++----------
 src/org/torproject/onionoo/DocumentStore.java      |    4 +
 src/org/torproject/onionoo/LookupService.java      |   20 +++
 src/org/torproject/onionoo/Main.java               |   48 +++---
 src/org/torproject/onionoo/NodeDataWriter.java     |   31 ++++
 .../onionoo/ReverseDomainNameResolver.java         |  178 ++++++++++++++++++++
 6 files changed, 273 insertions(+), 125 deletions(-)

diff --git a/src/org/torproject/onionoo/DetailsDataWriter.java b/src/org/torproject/onionoo/DetailsDataWriter.java
index 3d49169..e4ccb78 100644
--- a/src/org/torproject/onionoo/DetailsDataWriter.java
+++ b/src/org/torproject/onionoo/DetailsDataWriter.java
@@ -2,8 +2,6 @@
  * See LICENSE for licensing information */
 package org.torproject.onionoo;
 
-import java.net.InetAddress;
-import java.net.UnknownHostException;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
@@ -38,6 +36,8 @@ public class DetailsDataWriter {
 
   private DescriptorSource descriptorSource;
 
+  private ReverseDomainNameResolver reverseDomainNameResolver;
+
   private DocumentStore documentStore;
 
   private SortedMap<String, NodeStatus> relays;
@@ -45,8 +45,10 @@ public class DetailsDataWriter {
   private SortedMap<String, NodeStatus> bridges;
 
   public DetailsDataWriter(DescriptorSource descriptorSource,
+      ReverseDomainNameResolver reverseDomainNameResolver,
       DocumentStore documentStore) {
     this.descriptorSource = descriptorSource;
+    this.reverseDomainNameResolver = reverseDomainNameResolver;
     this.documentStore = documentStore;
   }
 
@@ -63,109 +65,28 @@ public class DetailsDataWriter {
     }
   }
 
-  private static final long RDNS_LOOKUP_MAX_REQUEST_MILLIS = 10L * 1000L;
-  private static final long RDNS_LOOKUP_MAX_DURATION_MILLIS = 5L * 60L
-      * 1000L;
-  private static final long RDNS_LOOKUP_MAX_AGE_MILLIS = 12L * 60L * 60L
-      * 1000L;
-  private static final int RDNS_LOOKUP_WORKERS_NUM = 5;
-  private Set<String> rdnsLookupJobs;
-  private Map<String, String> rdnsLookupResults;
-  private long startedRdnsLookups;
-  private List<RdnsLookupWorker> rdnsLookupWorkers;
   public void startReverseDomainNameLookups() {
-    this.startedRdnsLookups = System.currentTimeMillis();
-    this.rdnsLookupJobs = new HashSet<String>();
+    Map<String, Long> addressLastLookupTimes =
+        new HashMap<String, Long>();
     for (NodeStatus relay : relays.values()) {
-      if (relay.getLastRdnsLookup() < this.startedRdnsLookups
-          - RDNS_LOOKUP_MAX_AGE_MILLIS) {
-        this.rdnsLookupJobs.add(relay.getAddress());
-      }
-    }
-    this.rdnsLookupResults = new HashMap<String, String>();
-    this.rdnsLookupWorkers = new ArrayList<RdnsLookupWorker>();
-    for (int i = 0; i < RDNS_LOOKUP_WORKERS_NUM; i++) {
-      RdnsLookupWorker rdnsLookupWorker = new RdnsLookupWorker();
-      this.rdnsLookupWorkers.add(rdnsLookupWorker);
-      rdnsLookupWorker.setDaemon(true);
-      rdnsLookupWorker.start();
+      addressLastLookupTimes.put(relay.getAddress(),
+          relay.getLastRdnsLookup());
     }
+    this.reverseDomainNameResolver.setAddresses(addressLastLookupTimes);
+    this.reverseDomainNameResolver.startReverseDomainNameLookups();
   }
 
   public void finishReverseDomainNameLookups() {
-    for (RdnsLookupWorker rdnsLookupWorker : this.rdnsLookupWorkers) {
-      try {
-        rdnsLookupWorker.join();
-      } catch (InterruptedException e) {
-        /* This is not something that we can take care of.  Just leave the
-         * worker thread alone. */
-      }
-    }
-    synchronized (this.rdnsLookupResults) {
-      for (NodeStatus relay : relays.values()) {
-        if (this.rdnsLookupResults.containsKey(relay.getAddress())) {
-          relay.setHostName(this.rdnsLookupResults.get(
-              relay.getAddress()));
-          relay.setLastRdnsLookup(this.startedRdnsLookups);
-        }
-      }
-    }
-  }
-
-  private class RdnsLookupWorker extends Thread {
-    public void run() {
-      while (System.currentTimeMillis() - RDNS_LOOKUP_MAX_DURATION_MILLIS
-          <= startedRdnsLookups) {
-        String rdnsLookupJob = null;
-        synchronized (rdnsLookupJobs) {
-          for (String job : rdnsLookupJobs) {
-            rdnsLookupJob = job;
-            rdnsLookupJobs.remove(job);
-            break;
-          }
-        }
-        if (rdnsLookupJob == null) {
-          break;
-        }
-        RdnsLookupRequest request = new RdnsLookupRequest(this,
-            rdnsLookupJob);
-        request.setDaemon(true);
-        request.start();
-        try {
-          Thread.sleep(RDNS_LOOKUP_MAX_REQUEST_MILLIS);
-        } catch (InterruptedException e) {
-          /* Getting interrupted should be the default case. */
-        }
-        String hostName = request.getHostName();
-        if (hostName != null) {
-          synchronized (rdnsLookupResults) {
-            rdnsLookupResults.put(rdnsLookupJob, hostName);
-          }
-        }
-      }
-    }
-  }
-
-  private class RdnsLookupRequest extends Thread {
-    RdnsLookupWorker parent;
-    String address, hostName;
-    public RdnsLookupRequest(RdnsLookupWorker parent, String address) {
-      this.parent = parent;
-      this.address = address;
-    }
-    public void run() {
-      try {
-        String result = InetAddress.getByName(this.address).getHostName();
-        synchronized (this) {
-          this.hostName = result;
-        }
-      } catch (UnknownHostException e) {
-        /* We'll try again the next time. */
+    this.reverseDomainNameResolver.finishReverseDomainNameLookups();
+    Map<String, String> lookupResults =
+        this.reverseDomainNameResolver.getLookupResults();
+    long startedRdnsLookups =
+        this.reverseDomainNameResolver.getLookupStartMillis();
+    for (NodeStatus relay : relays.values()) {
+      if (lookupResults.containsKey(relay.getAddress())) {
+        relay.setHostName(lookupResults.get(relay.getAddress()));
+        relay.setLastRdnsLookup(startedRdnsLookups);
       }
-      this.parent.interrupt();
-    }
-    public synchronized String getHostName() {
-      return hostName;
     }
   }
 
diff --git a/src/org/torproject/onionoo/DocumentStore.java b/src/org/torproject/onionoo/DocumentStore.java
index 829f35f..11d7d1b 100644
--- a/src/org/torproject/onionoo/DocumentStore.java
+++ b/src/org/torproject/onionoo/DocumentStore.java
@@ -374,6 +374,10 @@ public class DocumentStore {
   }
 
   public void flushDocumentCache() {
+    /* Write cached node statuses to disk, and write update file
+     * containing current time.  It's important to write the update file
+     * now, not earlier, because the front-end should not read new node
+     * statuses until all details, bandwidths, and weights are ready. */
     if (this.listedArchivedNodeStatuses) {
       this.writeNodeStatuses(false);
       this.writeNodeStatuses(true);
diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java
index 4071e26..3791443 100644
--- a/src/org/torproject/onionoo/LookupService.java
+++ b/src/org/torproject/onionoo/LookupService.java
@@ -379,6 +379,10 @@ public class LookupService {
       lookupResults.put(addressString, lookupResult);
     }
 
+    /* Keep statistics. */
+    this.addressesLookedUp += addressStrings.size();
+    this.addressesResolved += lookupResults.size();
+
     return lookupResults;
   }
 
@@ -392,4 +396,20 @@ public class LookupService {
     String aSNumber;
     String aSName;
   }
+
+  int addressesLookedUp = 0, addressesResolved = 0;
+
+  public String getStatsString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("    " + formatDecimalNumber(addressesLookedUp)
+        + " addresses looked up\n");
+    sb.append("    " + formatDecimalNumber(addressesResolved)
+        + " addresses resolved\n");
+    return sb.toString();
+  }
+
+  //TODO This method should go into a utility class.
+  private static String formatDecimalNumber(long decimalNumber) {
+    return String.format("%,d", decimalNumber);
+  }
 }
diff --git a/src/org/torproject/onionoo/Main.java b/src/org/torproject/onionoo/Main.java
index 2232d81..3fca6f0 100644
--- a/src/org/torproject/onionoo/Main.java
+++ b/src/org/torproject/onionoo/Main.java
@@ -10,19 +10,17 @@ import java.util.SortedMap;
 public class Main {
   public static void main(String[] args) {
 
-    printStatus("Initializing descriptor source.");
+    printStatus("Initializing.");
     DescriptorSource dso = new DescriptorSource(new File("in"),
         new File("status"));
     printStatusTime("Initialized descriptor source");
-
-    printStatus("Initializing document store.");
     DocumentStore ds = new DocumentStore(new File("status"),
         new File("out"));
     printStatusTime("Initialized document store");
-
-    printStatus("Initializing lookup service.");
     LookupService ls = new LookupService(new File("geoip"));
     printStatusTime("Initialized Geoip lookup service");
+    ReverseDomainNameResolver rdnr = new ReverseDomainNameResolver();
+    printStatusTime("Initialized reverse domain name resolver");
 
     printStatus("Updating internal node list.");
     NodeDataWriter ndw = new NodeDataWriter(dso, ls, ds);
@@ -38,13 +36,14 @@ public class Main {
     printStatusTime("Set running bits");
     ndw.writeStatusSummary();
     printStatusTime("Wrote status summary");
+    ndw.writeOutSummary();
+    printStatusTime("Wrote out summary");
     SortedMap<String, NodeStatus> currentNodes = ndw.getCurrentNodes();
     SortedMap<String, Integer> lastBandwidthWeights =
         ndw.getLastBandwidthWeights();
-    // TODO Could write statistics here, too.
 
     printStatus("Updating detail data.");
-    DetailsDataWriter ddw = new DetailsDataWriter(dso, ds);
+    DetailsDataWriter ddw = new DetailsDataWriter(dso, rdnr, ds);
     // TODO Instead of using ndw's currentNodes and lastBandwidthWeights,
     // parse statuses once again, keeping separate parse history.  Allows
     // us to run ndw and ddw in parallel in the future.  Alternatively,
@@ -67,7 +66,6 @@ public class Main {
     printStatusTime("Finished reverse domain name lookups");
     ddw.writeOutDetails();
     printStatusTime("Wrote detail data files");
-    // TODO Could write statistics here, too.
 
     printStatus("Updating bandwidth data.");
     BandwidthDataWriter bdw = new BandwidthDataWriter(dso, ds);
@@ -81,7 +79,6 @@ public class Main {
     // future.
     bdw.deleteObsoleteBandwidthFiles();
     printStatusTime("Deleted obsolete bandwidth files");
-    // TODO Could write statistics here, too.
 
     printStatus("Updating weights data.");
     WeightsDataWriter wdw = new WeightsDataWriter(dso, ds);
@@ -98,27 +95,24 @@ public class Main {
     // which allows us to run ndw and wdw in parallel in the future.
     wdw.deleteObsoleteWeightsDataFiles();
     printStatusTime("Deleted obsolete weights files");
-    // TODO Could write statistics here, too.
-
-    printStatus("Updating summary data.");
-    ndw.writeOutSummary();
-    printStatusTime("Wrote out summary");
-    // TODO Could write statistics here, too.
-
-    // TODO "Shut down" lookup service and write statistics about number
-    // of (successfully) looked up addresses.
 
-    printStatus("Shutting down descriptor source.");
+    printStatus("Shutting down.");
     dso.writeHistoryFiles();
     printStatusTime("Wrote parse histories");
-    printStatistics(dso.getStatsString());
-    printStatusTime("Shut down descriptor source");
-
-    printStatus("Shutting down document store.");
     ds.flushDocumentCache();
     printStatusTime("Flushed document cache");
-    printStatistics(ds.getStatsString());
-    printStatusTime("Shut down document store");
+
+    printStatus("Gathering statistics.");
+    printStatistics("Node data writer", ndw.getStatsString());
+    /* TODO Add statistics to remaining *Writers. */
+    //printStatistics("Details data writer", ddw.getStatsString());
+    //printStatistics("Bandwidth data writer", bdw.getStatsString());
+    //printStatistics("Weights data writer", wdw.getStatsString());
+    printStatistics("Descriptor source", dso.getStatsString());
+    printStatistics("Document store", ds.getStatsString());
+    printStatistics("GeoIP lookup service", ls.getStatsString());
+    printStatistics("Reverse domain name resolver",
+        rdnr.getStatsString());
 
     printStatus("Terminating.");
   }
@@ -131,8 +125,8 @@ public class Main {
     printedLastStatusMessage = System.currentTimeMillis();
   }
 
-  private static void printStatistics(String message) {
-    System.out.print("  Statistics:\n" + message);
+  private static void printStatistics(String component, String message) {
+    System.out.print("  " + component + " statistics:\n" + message);
   }
 
   private static void printStatusTime(String message) {
diff --git a/src/org/torproject/onionoo/NodeDataWriter.java b/src/org/torproject/onionoo/NodeDataWriter.java
index 9cfbb21..4afc595 100644
--- a/src/org/torproject/onionoo/NodeDataWriter.java
+++ b/src/org/torproject/onionoo/NodeDataWriter.java
@@ -33,6 +33,9 @@ public class NodeDataWriter {
 
   private SortedMap<String, Integer> lastBandwidthWeights = null;
 
+  private int relaysUpdated = 0, relaysAdded = 0,
+      relayConsensusesProcessed = 0, bridgesUpdated = 0,
+      bridgesAdded = 0, bridgeStatusesProcessed = 0;
   public NodeDataWriter(DescriptorSource descriptorSource,
       LookupService lookupService, DocumentStore documentStore) {
     this.descriptorSource = descriptorSource;
@@ -115,10 +118,13 @@ public class NodeDataWriter {
           validAfterMillis, null);
       if (this.knownNodes.containsKey(fingerprint)) {
         this.knownNodes.get(fingerprint).update(newNodeStatus);
+        this.relaysUpdated++;
       } else {
         this.knownNodes.put(fingerprint, newNodeStatus);
+        this.relaysAdded++;
       }
     }
+    this.relayConsensusesProcessed++;
     if (this.relaysLastValidAfterMillis == validAfterMillis) {
       this.lastBandwidthWeights = consensus.getBandwidthWeights();
     }
@@ -195,10 +201,13 @@ public class NodeDataWriter {
           -1L, null, null, publishedMillis, -1L, null);
       if (this.knownNodes.containsKey(fingerprint)) {
         this.knownNodes.get(fingerprint).update(newNodeStatus);
+        this.bridgesUpdated++;
       } else {
         this.knownNodes.put(fingerprint, newNodeStatus);
+        this.bridgesAdded++;
       }
     }
+    this.bridgeStatusesProcessed++;
   }
 
   public void writeStatusSummary() {
@@ -233,5 +242,27 @@ public class NodeDataWriter {
   public SortedMap<String, Integer> getLastBandwidthWeights() {
     return this.lastBandwidthWeights;
   }
+
+  public String getStatsString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("    " + formatDecimalNumber(relayConsensusesProcessed)
+        + " relay consensuses processed\n");
+    sb.append("    " + formatDecimalNumber(relaysUpdated)
+        + " relays updated\n");
+    sb.append("    " + formatDecimalNumber(relaysAdded)
+        + " relays added\n");
+    sb.append("    " + formatDecimalNumber(bridgeStatusesProcessed)
+        + " bridge statuses processed\n");
+    sb.append("    " + formatDecimalNumber(bridgesUpdated)
+        + " bridges updated\n");
+    sb.append("    " + formatDecimalNumber(bridgesAdded)
+        + " bridges added\n");
+    return sb.toString();
+  }
+
+  //TODO This method should go into a utility class.
+  private static String formatDecimalNumber(long decimalNumber) {
+    return String.format("%,d", decimalNumber);
+  }
 }
 
diff --git a/src/org/torproject/onionoo/ReverseDomainNameResolver.java b/src/org/torproject/onionoo/ReverseDomainNameResolver.java
new file mode 100644
index 0000000..b04ad0a
--- /dev/null
+++ b/src/org/torproject/onionoo/ReverseDomainNameResolver.java
@@ -0,0 +1,178 @@
+/* Copyright 2013 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.onionoo;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class ReverseDomainNameResolver {
+
+  private class RdnsLookupWorker extends Thread {
+    public void run() {
+      while (System.currentTimeMillis() - RDNS_LOOKUP_MAX_DURATION_MILLIS
+          <= startedRdnsLookups) {
+        String rdnsLookupJob = null;
+        synchronized (rdnsLookupJobs) {
+          for (String job : rdnsLookupJobs) {
+            rdnsLookupJob = job;
+            rdnsLookupJobs.remove(job);
+            break;
+          }
+        }
+        if (rdnsLookupJob == null) {
+          break;
+        }
+        RdnsLookupRequest request = new RdnsLookupRequest(this,
+            rdnsLookupJob);
+        request.setDaemon(true);
+        request.start();
+        try {
+          Thread.sleep(RDNS_LOOKUP_MAX_REQUEST_MILLIS);
+        } catch (InterruptedException e) {
+          /* Getting interrupted should be the default case. */
+        }
+        String hostName = request.getHostName();
+        if (hostName != null) {
+          synchronized (rdnsLookupResults) {
+            rdnsLookupResults.put(rdnsLookupJob, hostName);
+          }
+        }
+        long lookupMillis = request.getLookupMillis();
+        if (lookupMillis >= 0L) {
+          synchronized (rdnsLookupMillis) {
+            rdnsLookupMillis.add(lookupMillis);
+          }
+        }
+      }
+    }
+  }
+
+  private class RdnsLookupRequest extends Thread {
+    RdnsLookupWorker parent;
+    String address, hostName;
+    long lookupStartedMillis = -1L, lookupCompletedMillis = -1L;
+    public RdnsLookupRequest(RdnsLookupWorker parent, String address) {
+      this.parent = parent;
+      this.address = address;
+    }
+    public void run() {
+      this.lookupStartedMillis = System.currentTimeMillis();
+      try {
+        String result = InetAddress.getByName(this.address).getHostName();
+        synchronized (this) {
+          this.hostName = result;
+        }
+      } catch (UnknownHostException e) {
+        /* We'll try again the next time. */
+      }
+      this.lookupCompletedMillis = System.currentTimeMillis();
+      this.parent.interrupt();
+    }
+    public synchronized String getHostName() {
+      return hostName;
+    }
+    public synchronized long getLookupMillis() {
+      return this.lookupCompletedMillis - this.lookupStartedMillis;
+    }
+  }
+
+  private static final long RDNS_LOOKUP_MAX_REQUEST_MILLIS = 10L * 1000L;
+  private static final long RDNS_LOOKUP_MAX_DURATION_MILLIS = 5L * 60L
+      * 1000L;
+  private static final long RDNS_LOOKUP_MAX_AGE_MILLIS = 12L * 60L * 60L
+      * 1000L;
+  private static final int RDNS_LOOKUP_WORKERS_NUM = 5;
+
+  private Map<String, Long> addressLastLookupTimes;
+
+  private Set<String> rdnsLookupJobs;
+
+  private Map<String, String> rdnsLookupResults;
+
+  private List<Long> rdnsLookupMillis;
+
+  private long startedRdnsLookups;
+
+  private List<RdnsLookupWorker> rdnsLookupWorkers;
+
+  public void setAddresses(Map<String, Long> addressLastLookupTimes) {
+    this.addressLastLookupTimes = addressLastLookupTimes;
+  }
+
+  public void startReverseDomainNameLookups() {
+    this.startedRdnsLookups = System.currentTimeMillis();
+    this.rdnsLookupJobs = new HashSet<String>();
+    for (Map.Entry<String, Long> e :
+        this.addressLastLookupTimes.entrySet()) {
+      if (e.getValue() < this.startedRdnsLookups
+          - RDNS_LOOKUP_MAX_AGE_MILLIS) {
+        this.rdnsLookupJobs.add(e.getKey());
+      }
+    }
+    this.rdnsLookupResults = new HashMap<String, String>();
+    this.rdnsLookupMillis = new ArrayList<Long>();
+    this.rdnsLookupWorkers = new ArrayList<RdnsLookupWorker>();
+    for (int i = 0; i < RDNS_LOOKUP_WORKERS_NUM; i++) {
+      RdnsLookupWorker rdnsLookupWorker = new RdnsLookupWorker();
+      this.rdnsLookupWorkers.add(rdnsLookupWorker);
+      rdnsLookupWorker.setDaemon(true);
+      rdnsLookupWorker.start();
+    }
+  }
+
+  public void finishReverseDomainNameLookups() {
+    for (RdnsLookupWorker rdnsLookupWorker : this.rdnsLookupWorkers) {
+      try {
+        rdnsLookupWorker.join();
+      } catch (InterruptedException e) {
+        /* This is not something that we can take care of.  Just leave the
+         * worker thread alone. */
+      }
+    }
+  }
+
+  public Map<String, String> getLookupResults() {
+    synchronized (this.rdnsLookupResults) {
+      return new HashMap<String, String>(this.rdnsLookupResults);
+    }
+  }
+
+  public long getLookupStartMillis() {
+    return this.startedRdnsLookups;
+  }
+
+  public String getStatsString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("    " + formatDecimalNumber(rdnsLookupMillis.size())
+        + " lookups performed\n");
+    if (rdnsLookupMillis.size() > 0) {
+      Collections.sort(rdnsLookupMillis);
+      sb.append("    " + formatMillis(rdnsLookupMillis.get(0))
+          + " minimum lookup time\n");
+      sb.append("    " + formatMillis(rdnsLookupMillis.get(
+          rdnsLookupMillis.size() / 2)) + " median lookup time\n");
+      sb.append("    " + formatMillis(rdnsLookupMillis.get(
+          rdnsLookupMillis.size() - 1)) + " maximum lookup time\n");
+    }
+    return sb.toString();
+  }
+
+  //TODO This method should go into a utility class.
+  private static String formatDecimalNumber(long decimalNumber) {
+    return String.format("%,d", decimalNumber);
+  }
+
+  // TODO This method should go into a utility class.
+  private static String formatMillis(long millis) {
+    return String.format("%02d:%02d.%03d minutes",
+        millis / (1000L * 60L), (millis / 1000L) % 60L, millis % 1000L);
+  }
+}
+



More information about the tor-commits mailing list