[tor-commits] [exonerator/master] Print out a warning if we're missing data.

karsten at torproject.org karsten at torproject.org
Tue Oct 15 13:58:19 UTC 2019


commit b0165cb03138e933a337ddc8400c3a8bbe889b3d
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Sep 30 17:44:38 2019 +0200

    Print out a warning if we're missing data.
    
    We're now counting the hours for which we have statuses and exit
    lists. More precisely, we're truncating consensus valid-after times
    and exit scan times to the hour and counting how many distinct hours
    we saw during the requested time. If we're missing 18 or more hours of
    statuses or exit lists we're printing out a note in the summary.
    
    Fixes #31071.
---
 .../metrics/exonerator/ExoneraTorServlet.java      | 43 ++++++++++++++++------
 .../metrics/exonerator/QueryResponse.java          | 21 ++++++++++-
 .../metrics/exonerator/QueryServlet.java           | 24 +++++++++++-
 src/main/resources/ExoneraTor.properties           |  1 +
 src/main/sql/exonerator2.sql                       | 42 +++++++++++++++------
 .../metrics/exonerator/ExoneraTorServletTest.java  |  2 +-
 .../metrics/exonerator/QueryResponseTest.java      | 12 ++++--
 7 files changed, 112 insertions(+), 33 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/exonerator/ExoneraTorServlet.java b/src/main/java/org/torproject/metrics/exonerator/ExoneraTorServlet.java
index a74aa60..4b0cf16 100644
--- a/src/main/java/org/torproject/metrics/exonerator/ExoneraTorServlet.java
+++ b/src/main/java/org/torproject/metrics/exonerator/ExoneraTorServlet.java
@@ -89,6 +89,7 @@ public class ExoneraTorServlet extends HttpServlet {
       ExoneraTorDate firstDate = ExoneraTorDate.INVALID;
       ExoneraTorDate lastDate = ExoneraTorDate.INVALID;
       boolean noRelevantConsensuses = true;
+      boolean missingData = false;
       List<String[]> statusEntries = new ArrayList<>();
       List<String> addressesInSameNetwork = null;
 
@@ -105,6 +106,14 @@ public class ExoneraTorServlet extends HttpServlet {
               && queryResponse.relevantStatuses) {
             noRelevantConsensuses = false;
           }
+          if (null != queryResponse.missingStatuses
+              && queryResponse.missingStatuses) {
+            missingData = true;
+          }
+          if (null != queryResponse.missingExitLists
+              && queryResponse.missingExitLists) {
+            missingData = true;
+          }
           if (null != queryResponse.matches) {
             for (QueryResponse.Match match : queryResponse.matches) {
               StringBuilder sb = new StringBuilder();
@@ -210,15 +219,18 @@ public class ExoneraTorServlet extends HttpServlet {
         /* Print out result. */
       } else {
         if (!statusEntries.isEmpty()) {
-          this.writeSummaryPositive(out, rb, relayIp, requestedDate.asString);
+          this.writeSummaryPositive(out, rb, relayIp, requestedDate.asString,
+              missingData);
           this.writeTechnicalDetails(out, rb, relayIp, requestedDate.asString,
               statusEntries);
         } else if (addressesInSameNetwork != null
             && !addressesInSameNetwork.isEmpty()) {
           this.writeSummaryAddressesInSameNetwork(out, rb, requestUrl, relayIp,
-              requestedDate.asString, langStr, addressesInSameNetwork);
+              requestedDate.asString, langStr, addressesInSameNetwork,
+              missingData);
         } else {
-          this.writeSummaryNegative(out, rb, relayIp, requestedDate.asString);
+          this.writeSummaryNegative(out, rb, relayIp, requestedDate.asString,
+              missingData);
         }
         this.writePermanentLink(out, rb, requestUrl, relayIp,
             requestedDate.asString, langStr);
@@ -398,13 +410,13 @@ public class ExoneraTorServlet extends HttpServlet {
   private void writeSummaryNoTimestamp(PrintWriter out, ResourceBundle rb) {
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-danger",
-        rb.getString("summary.invalidparams.notimestamp.title"), null,
+        rb.getString("summary.invalidparams.notimestamp.title"), null, null,
         rb.getString("summary.invalidparams.notimestamp.body"));
   }
 
   private void writeSummaryNoIp(PrintWriter out, ResourceBundle rb) {
     this.writeSummary(out, rb.getString("summary.heading"),
-        "panel-danger", rb.getString("summary.invalidparams.noip.title"),
+        "panel-danger", rb.getString("summary.invalidparams.noip.title"), null,
         null, rb.getString("summary.invalidparams.noip.body"));
   }
 
@@ -413,7 +425,7 @@ public class ExoneraTorServlet extends HttpServlet {
       String lastDate) {
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-danger",
-        rb.getString("summary.invalidparams.timestamprange.title"), null,
+        rb.getString("summary.invalidparams.timestamprange.title"), null, null,
         rb.getString("summary.invalidparams.timestamprange.body"),
         timestampStr, firstDate, lastDate);
   }
@@ -438,7 +450,7 @@ public class ExoneraTorServlet extends HttpServlet {
         : StringEscapeUtils.escapeHtml4(timestampParameter);
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-danger",
-        rb.getString("summary.invalidparams.invalidtimestamp.title"),
+        rb.getString("summary.invalidparams.invalidtimestamp.title"), null,
         null, rb.getString("summary.invalidparams.invalidtimestamp.body"),
         escapedTimestampParameter, "\"YYYY-MM-DD\"");
   }
@@ -446,7 +458,7 @@ public class ExoneraTorServlet extends HttpServlet {
   private void writeSummaryTimestampTooRecent(PrintWriter out,
       ResourceBundle rb) {
     this.writeSummary(out, rb.getString("summary.heading"), "panel-danger",
-        rb.getString("summary.invalidparams.timestamptoorecent.title"),
+        rb.getString("summary.invalidparams.timestamptoorecent.title"), null,
         null, rb.getString("summary.invalidparams.timestamptoorecent.body"));
   }
 
@@ -465,7 +477,8 @@ public class ExoneraTorServlet extends HttpServlet {
 
   void writeSummaryAddressesInSameNetwork(PrintWriter out,
       ResourceBundle rb, String requestUrl, String relayIp, String timestampStr,
-      String langStr, List<String> addressesInSameNetwork) {
+      String langStr, List<String> addressesInSameNetwork,
+      boolean missingData) {
     Object[][] panelItems = new Object[addressesInSameNetwork.size()][];
     for (int i = 0; i < addressesInSameNetwork.size(); i++) {
       String addressInSameNetwork = addressesInSameNetwork.get(i);
@@ -486,33 +499,36 @@ public class ExoneraTorServlet extends HttpServlet {
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-warning",
         rb.getString("summary.negativesamenetwork.title"), panelItems,
+        missingData ? rb.getString("summary.missingdata") : null,
         rb.getString("summary.negativesamenetwork.body"),
         relayIp, timestampStr, relayIp.contains(":") ? 48 : 24);
   }
 
   private void writeSummaryPositive(PrintWriter out, ResourceBundle rb,
-      String relayIp, String timestampStr) {
+      String relayIp, String timestampStr, boolean missingData) {
     String formattedRelayIp = relayIp.contains(":")
         ? "[" + relayIp + "]" : relayIp;
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-success", rb.getString("summary.positive.title"), null,
+        missingData ? rb.getString("summary.missingdata") : null,
         rb.getString("summary.positive.body"), formattedRelayIp,
         timestampStr);
   }
 
   private void writeSummaryNegative(PrintWriter out, ResourceBundle rb,
-      String relayIp, String timestampStr) {
+      String relayIp, String timestampStr, boolean missingData) {
     String formattedRelayIp = relayIp.contains(":")
         ? "[" + relayIp + "]" : relayIp;
     this.writeSummary(out, rb.getString("summary.heading"),
         "panel-warning", rb.getString("summary.negative.title"), null,
+        missingData ? rb.getString("summary.missingdata") : null,
         rb.getString("summary.negative.body"), formattedRelayIp,
         timestampStr);
   }
 
   private void writeSummary(PrintWriter out, String heading,
       String panelContext, String panelTitle, Object[][] panelItems,
-      String panelBodyTemplate, Object... panelBodyArgs) {
+      String panelWarning, String panelBodyTemplate, Object... panelBodyArgs) {
     out.printf("      <div class=\"row\">\n"
         + "        <div class=\"col-xs-12\">\n"
         + "          <h2>%s</h2>\n"
@@ -531,6 +547,9 @@ public class ExoneraTorServlet extends HttpServlet {
       }
       out.print("              </ul>\n");
     }
+    if (null != panelWarning) {
+      out.printf("              <p>%s</p>\n", panelWarning);
+    }
     out.print("            </div><!-- panel-body -->\n"
         + "          </div><!-- panel -->\n"
         + "        </div><!-- col -->\n"
diff --git a/src/main/java/org/torproject/metrics/exonerator/QueryResponse.java b/src/main/java/org/torproject/metrics/exonerator/QueryResponse.java
index 6a8976a..d148d0c 100644
--- a/src/main/java/org/torproject/metrics/exonerator/QueryResponse.java
+++ b/src/main/java/org/torproject/metrics/exonerator/QueryResponse.java
@@ -21,7 +21,7 @@ public class QueryResponse {
   private static Logger logger = LoggerFactory.getLogger(QueryResponse.class);
 
   /* Actual version implemented by this class. */
-  private static final String VERSION = "1.0";
+  private static final String VERSION = "1.1";
 
   /* Don't accept query responses with versions lower than this. */
   private static final String FIRSTRECOGNIZEDVERSION = "1.0";
@@ -60,6 +60,20 @@ public class QueryResponse {
    * a day of the requested date; {@code null} if the database is empty. */
   Boolean relevantStatuses;
 
+  /** Whether there are at least 18 hours of statuses missing in the database
+   * within a day of the requested date; {@code null} if the database is
+   * empty.
+   *
+   * @since 1.1 */
+  Boolean missingStatuses;
+
+  /** Whether there are at least 18 hours of exit lists missing in the database
+   * from two days before the requested date until one day after; {@code null}
+   * if the database is empty.
+   *
+   * @since 1.1 */
+  Boolean missingExitLists;
+
   /** All matches for the given IP address and date; {@code null} if there
    * were no matches at all. */
   Match[] matches;
@@ -70,13 +84,16 @@ public class QueryResponse {
   /** Constructor for tests. */
   QueryResponse(String version, String queryAddress, String queryDate,
       String firstDateInDatabase, String lastDateInDatabase,
-      Boolean relevantStatuses, Match[] matches, String[] nearbyAddresses) {
+      Boolean relevantStatuses, Boolean missingStatuses,
+      Boolean missingExitLists, Match[] matches, String[] nearbyAddresses) {
     this.version = version;
     this.queryAddress = queryAddress;
     this.queryDate = queryDate;
     this.firstDateInDatabase = firstDateInDatabase;
     this.lastDateInDatabase = lastDateInDatabase;
     this.relevantStatuses = relevantStatuses;
+    this.missingStatuses = missingStatuses;
+    this.missingExitLists = missingExitLists;
     this.matches = matches;
     this.nearbyAddresses = nearbyAddresses;
   }
diff --git a/src/main/java/org/torproject/metrics/exonerator/QueryServlet.java b/src/main/java/org/torproject/metrics/exonerator/QueryServlet.java
index 760e385..17980a9 100644
--- a/src/main/java/org/torproject/metrics/exonerator/QueryServlet.java
+++ b/src/main/java/org/torproject/metrics/exonerator/QueryServlet.java
@@ -54,6 +54,10 @@ public class QueryServlet extends HttpServlet {
       = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
       .withZone(ZoneOffset.UTC);
 
+  private static final int MISSING_STATUSES_IF_LESS_THAN = 3 * 24 - 17;
+
+  private static final int MISSING_EXIT_LISTS_IF_LESS_THAN = 4 * 24 - 17;
+
   @Override
   public void init() {
     this.logger = LoggerFactory.getLogger(QueryServlet.class);
@@ -264,6 +268,14 @@ public class QueryServlet extends HttpServlet {
      * {first|last}_date_in_database and relevant_statuses fields. */
     SortedSet<LocalDate> allDates = new TreeSet<>();
 
+    /* Store all hours for which the database contains relevant status
+     * entries. */
+    Set<LocalDateTime> allStatusHours = new HashSet<>();
+
+    /* Store all hours for which the database contains relevant exit list
+     * entries. */
+    Set<LocalDateTime> allExitListHours = new HashSet<>();
+
     /* Store all possible matches for the results table by base64-encoded
      * fingerprint and valid-after time. This map is first populated by going
      * through the result set and adding or updating map entries, so that
@@ -309,12 +321,12 @@ public class QueryServlet extends HttpServlet {
             String orAddress = rs.getString(8);
             if (null != date) {
               allDates.add(date);
-            } else if (null != scanned) {
+            } else if (null != scanned && null != fingerprintBase64) {
               exitAddressesByFingeprintBase64AndScanned.putIfAbsent(
                   fingerprintBase64, new TreeMap<>());
               exitAddressesByFingeprintBase64AndScanned.get(fingerprintBase64)
                   .put(scanned, exitAddress);
-            } else if (null != validAfter) {
+            } else if (null != validAfter && null != fingerprintBase64) {
               matchesByFingerprintBase64AndValidAfter.putIfAbsent(
                   fingerprintBase64, new TreeMap<>());
               if (!matchesByFingerprintBase64AndValidAfter
@@ -338,6 +350,10 @@ public class QueryServlet extends HttpServlet {
               }
               matchesByAddress.putIfAbsent(orAddress, new HashSet<>());
               matchesByAddress.get(orAddress).add(match);
+            } else if (null != validAfter) {
+              allStatusHours.add(validAfter);
+            } else if (null != scanned) {
+              allExitListHours.add(scanned);
             }
           }
         } catch (SQLException e) {
@@ -394,6 +410,10 @@ public class QueryServlet extends HttpServlet {
           || allDates.contains(timestamp.minusDays(1L))
           || allDates.contains(timestamp.plusDays(1L));
     }
+    response.missingStatuses = allStatusHours.size()
+        < MISSING_STATUSES_IF_LESS_THAN;
+    response.missingExitLists = allExitListHours.size()
+        < MISSING_EXIT_LISTS_IF_LESS_THAN;
     if (matchesByAddress.containsKey(relayIp)) {
       List<QueryResponse.Match> matchesList
           = new ArrayList<>(matchesByAddress.get(relayIp));
diff --git a/src/main/resources/ExoneraTor.properties b/src/main/resources/ExoneraTor.properties
index 05bb0cb..a67d880 100644
--- a/src/main/resources/ExoneraTor.properties
+++ b/src/main/resources/ExoneraTor.properties
@@ -30,6 +30,7 @@ summary.positive.title=Result is positive
 summary.positive.body=We found one or more Tor relays on IP address %s on or within a day of %s that Tor clients were likely to know.
 summary.negative.title=Result is negative
 summary.negative.body=We did not find IP address %s on or within a day of %s.
+summary.missingdata=However, the database is missing several hours of data for this specific request, so that this result must be interpreted carefully.
 technicaldetails.heading=Technical details
 technicaldetails.pre=Looking up IP address %s on or within one day of %s. Tor clients could have selected this or these Tor relays to build circuits.
 technicaldetails.colheader.timestamp=Timestamp (UTC)
diff --git a/src/main/sql/exonerator2.sql b/src/main/sql/exonerator2.sql
index 397d2bf..ef017d3 100755
--- a/src/main/sql/exonerator2.sql
+++ b/src/main/sql/exonerator2.sql
@@ -279,20 +279,24 @@ CREATE OR REPLACE FUNCTION insert_exitlistentry_exitaddress (
   END;
 $$ LANGUAGE 'plpgsql';
 
--- Search for (1) status entries with an IPv4 or IPv6 onion routing address in
--- the same /24 network as the given hex-encoded IP address prefix and with a
--- valid-after date within a day of the given date, (2) exit list entries with
--- an IPv4 exit address in the same /24 network and with a scan time not earlier
--- than two days before and not later than one day after the given date, and (3)
--- the last and first dates in the database as well as the dates for which the
--- database contains relevant data within a day of the given date.
+-- Search by date and /24 IPv4 or IPv6 prefix for:
+--  - exit list entries with an IPv4 exit address in the same /24 network and
+--    with a scan time not earlier than two days before and not later than one
+--    day after the given date,
+--  - status entries with an IPv4 or IPv6 onion routing address in the same /24
+--    network as the given hex-encoded IP address prefix and with a valid-after
+--    date within a day of the given date,
+--  - the first and last dates in the database as well as the dates for which
+--    the database contains relevant data within a day of the given date,
+--  - the hours for which the database contains relevant exit list entries, and
+--  - the hours for which the database contains relevant status entries.
 --
 -- This function makes heavy use of the date_address24 table in order to reduce
 -- query response time by first obtaining all relevant fingerprint identifiers.
--- In the next step it runs three selects to obtain status entries, exit list
--- entries, and relevant dates. Any postprocessing, including filtering by exact
--- IP address or matching status entries and exit list entries, needs to happen
--- at the caller.
+-- In the next step it runs five selects to obtain status entries, exit list
+-- entries, relevant dates, and relevant hours. Any postprocessing, including
+-- filtering by exact IP address or matching status entries and exit list
+-- entries, needs to happen at the caller.
 CREATE OR REPLACE FUNCTION search_by_date_address24 (
   search_date DATE, search_address24 CHARACTER(6))
     RETURNS TABLE(
@@ -339,7 +343,21 @@ CREATE OR REPLACE FUNCTION search_by_date_address24 (
      WHERE date IN (SELECT MIN(date) FROM date_address24 UNION
                     SELECT MAX(date) FROM date_address24 UNION
                     SELECT date FROM date_address24
-                    WHERE date >= $1 - 1 AND date <= $1 + 1)'
+                    WHERE date >= $1 - 1 AND date <= $1 + 1)
+     UNION
+     SELECT NULL AS date, NULL AS fingerprint_base64,
+           DATE_TRUNC(''hour'', scanned) AS scanned, NULL AS exitaddress,
+           NULL AS validafter, NULL AS nickname, NULL AS exit, NULL AS oraddress
+       FROM exitlistentry_exitaddress
+       WHERE DATE(exitlistentry_exitaddress.scanned) >= $1 - 2
+       AND DATE(exitlistentry_exitaddress.scanned) <= $1 + 1
+     UNION
+     SELECT NULL AS date, NULL AS fingerprint_base64, NULL AS scanned,
+           NULL AS exitaddress, DATE_TRUNC(''hour'', validafter) AS validafter,
+           NULL AS nickname, NULL AS exit, NULL AS oraddress
+       FROM statusentry_oraddress
+       WHERE DATE(statusentry_oraddress.validafter) >= $1 - 1
+       AND DATE(statusentry_oraddress.validafter) <= $1 + 1'
     USING search_date, search_address24;
 END;
 $$ LANGUAGE plpgsql;
diff --git a/src/test/java/org/torproject/metrics/exonerator/ExoneraTorServletTest.java b/src/test/java/org/torproject/metrics/exonerator/ExoneraTorServletTest.java
index 1ede590..64656da 100644
--- a/src/test/java/org/torproject/metrics/exonerator/ExoneraTorServletTest.java
+++ b/src/test/java/org/torproject/metrics/exonerator/ExoneraTorServletTest.java
@@ -50,7 +50,7 @@ public class ExoneraTorServletTest {
       StringWriter sw = new StringWriter();
       es.writeSummaryAddressesInSameNetwork(new PrintWriter(sw), rb,
           "http://localhost:8080/", qr.queryAddress, qr.queryDate, "en",
-          Arrays.asList(qr.nearbyAddresses));
+          Arrays.asList(qr.nearbyAddresses), false);
       String errorMsg = "Test data:" + QueryResponse.toJson(qr)
           + "\nresult:\n" + sw.toString();
       assertTrue(errorMsg,
diff --git a/src/test/java/org/torproject/metrics/exonerator/QueryResponseTest.java b/src/test/java/org/torproject/metrics/exonerator/QueryResponseTest.java
index 97ae88d..826da1c 100644
--- a/src/test/java/org/torproject/metrics/exonerator/QueryResponseTest.java
+++ b/src/test/java/org/torproject/metrics/exonerator/QueryResponseTest.java
@@ -42,10 +42,12 @@ public class QueryResponseTest {
           + "\"exit\":false}],\"nearby_addresses\":[\"12.13.14.15\","
           + "\"12.13.14.16\"]}"},
         {new QueryResponse("1.1", null, null, null,
-          null, false, null, null),
-          "{\"version\":\"1.1\",\"relevant_statuses\":false}"},
+          null, false, true, true, null, null),
+          "{\"version\":\"1.1\",\"relevant_statuses\":false,"
+            + "\"missing_statuses\":true,"
+            + "\"missing_exit_lists\":true}"},
         {new QueryResponse("1.0", "12.13.14.15", "2016-12-12", "2016-01-01",
-          "2016-12-31", true,
+          "2016-12-31", true, false, false,
         new QueryResponse.Match[]{new QueryResponse.Match("2016-12-03",
         new TreeSet<>(Arrays.asList("12.13.14.15", "12.13.14.16")),
         "fingerprint-not-checked", "some name", true),
@@ -59,6 +61,8 @@ public class QueryResponseTest {
           + "\"first_date_in_database\":\"2016-01-01\","
           + "\"last_date_in_database\":\"2016-12-31\","
           + "\"relevant_statuses\":true,"
+          + "\"missing_statuses\":false,"
+          + "\"missing_exit_lists\":false,"
           + "\"matches\":[{\"timestamp\":\"2016-12-03\","
           + "\"addresses\":[\"12.13.14.15\","
           + "\"12.13.14.16\"],\"fingerprint\":\"fingerprint-not-checked\","
@@ -70,7 +74,7 @@ public class QueryResponseTest {
           + "\"exit\":false}],\"nearby_addresses\":[\"12.13.14.15\","
           + "\"12.13.14.16\"]}"},
         {new QueryResponse("1.0", "12.13.14.15", "2016-12-12", "2016-01-01",
-            "2016-12-31", false,
+            "2016-12-31", false, null, null,
             new QueryResponse.Match[]{new QueryResponse.Match("2016-12-03",
             new TreeSet<>(Arrays.asList("12.13.14.15", "12.13.14.16")),
             "fingerprint-not-checked", "some name", null),



More information about the tor-commits mailing list