[tor-commits] [exonerator/master] Reduce database query to single request.
karsten at torproject.org
karsten at torproject.org
Fri Sep 15 12:18:16 UTC 2017
commit 0d79dbd211e7e7d06e1435cf7d78f1abc3bf4365
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Aug 16 09:56:21 2017 +0200
Reduce database query to single request.
Implements #16596.
---
.../org/torproject/exonerator/QueryServlet.java | 263 +++++----------------
src/main/resources/db/exonerator.sql | 77 +++---
2 files changed, 96 insertions(+), 244 deletions(-)
diff --git a/src/main/java/org/torproject/exonerator/QueryServlet.java b/src/main/java/org/torproject/exonerator/QueryServlet.java
index 873a53a..903c9d2 100644
--- a/src/main/java/org/torproject/exonerator/QueryServlet.java
+++ b/src/main/java/org/torproject/exonerator/QueryServlet.java
@@ -11,7 +11,6 @@ import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
-import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -231,141 +230,7 @@ public class QueryServlet extends HttpServlet {
private QueryResponse queryDatabase(String relayIp, long timestamp) {
- QueryResponse response = null;
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat validAfterTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- validAfterTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- /* Open a database connection that we'll use to handle the whole
- * request. */
- long requestedConnection = System.currentTimeMillis();
- Connection conn = this.connectToDatabase();
- if (null != conn) {
-
- response = new QueryResponse();
- response.queryAddress = relayIp;
- response.queryDate = dateFormat.format(timestamp);
-
- /* Look up first and last date in the database. */
- long[] firstAndLastDates = this.queryFirstAndLastDatesFromDatabase(
- conn);
- if (null != firstAndLastDates) {
- response.firstDateInDatabase = dateFormat.format(
- firstAndLastDates[0]);
- response.lastDateInDatabase = dateFormat.format(firstAndLastDates[1]);
-
- /* Consider all consensuses published on or within a day of the given
- * date. */
- long timestampFrom = timestamp - 24L * 60L * 60L * 1000L;
- long timestampTo = timestamp + 2 * 24L * 60L * 60L * 1000L - 1L;
- String fromValidAfter = validAfterTimeFormat.format(timestampFrom);
- String toValidAfter = validAfterTimeFormat.format(timestampTo);
- SortedSet<Long> relevantConsensuses =
- this.queryKnownConsensusValidAfterTimes(conn, fromValidAfter,
- toValidAfter);
- if (null != relevantConsensuses && !relevantConsensuses.isEmpty()) {
- response.relevantStatuses = true;
-
- /* Search for status entries with the given IP address as onion
- * routing address, plus status entries of relays having an exit
- * list entry with the given IP address as exit address. */
- List<QueryResponse.Match> matches = this.queryStatusEntries(conn,
- relayIp, timestamp, validAfterTimeFormat);
- if (!matches.isEmpty()) {
- response.matches = matches.toArray(new QueryResponse.Match[0]);
-
- /* If we didn't find anything, run another query to find out if
- * there are relays running on other IP addresses in the same /24 or
- * /48 network and tell the user about it. */
- } else {
- if (!relayIp.contains(":")) {
- String address24 = this.convertIpV4ToHex(relayIp)
- .substring(0, 6);
- if (address24 != null) {
- response.nearbyAddresses = this.queryAddressesInSame24(conn,
- address24, timestamp).toArray(new String[0]);
- }
- } else {
- String address48 = this.convertIpV6ToHex(relayIp)
- .substring(0, 12);
- if (address48 != null) {
- response.nearbyAddresses = this.queryAddressesInSame48(conn,
- address48, timestamp).toArray(new String[0]);
- }
- }
- }
- }
- }
-
- /* Close the database connection. */
- this.closeDatabaseConnection(conn, requestedConnection);
- }
- return response;
- }
-
- private Connection connectToDatabase() {
- Connection conn = null;
- try {
- conn = this.ds.getConnection();
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Couldn't connect: " + e.getMessage(), e);
- }
- return conn;
- }
-
- private long[] queryFirstAndLastDatesFromDatabase(Connection conn) {
- long[] firstAndLastDates = null;
- try {
- Statement statement = conn.createStatement();
- String query = "SELECT DATE(MIN(validafter)) AS first, "
- + "DATE(MAX(validafter)) AS last FROM statusentry";
- ResultSet rs = statement.executeQuery(query);
- if (rs.next()) {
- Calendar utcCalendar = Calendar.getInstance(
- TimeZone.getTimeZone("UTC"));
- firstAndLastDates = new long[] {
- rs.getTimestamp(1, utcCalendar).getTime(),
- rs.getTimestamp(2, utcCalendar).getTime()
- };
- }
- rs.close();
- statement.close();
- } catch (SQLException e) {
- /* Looks like we don't have any consensuses. */
- firstAndLastDates = null;
- }
- return firstAndLastDates;
- }
-
- private SortedSet<Long> queryKnownConsensusValidAfterTimes(
- Connection conn, String fromValidAfter, String toValidAfter) {
- SortedSet<Long> relevantConsensuses = new TreeSet<>();
- try {
- Statement statement = conn.createStatement();
- String query = "SELECT DISTINCT validafter FROM statusentry "
- + "WHERE validafter >= '" + fromValidAfter
- + "' AND validafter <= '" + toValidAfter + "'";
- ResultSet rs = statement.executeQuery(query);
- while (rs.next()) {
- long consensusTime = rs.getTimestamp(1).getTime();
- relevantConsensuses.add(consensusTime);
- }
- rs.close();
- statement.close();
- } catch (SQLException e) {
- /* Looks like we don't have any consensuses in the requested
- * interval. */
- relevantConsensuses = null;
- }
- return relevantConsensuses;
- }
-
- private List<QueryResponse.Match> queryStatusEntries(Connection conn,
- String relayIp, long timestamp,
- SimpleDateFormat validAfterTimeFormat) {
- List<QueryResponse.Match> matches = new ArrayList<>();
+ /* Convert address to hex. */
String addressHex = !relayIp.contains(":")
? this.convertIpV4ToHex(relayIp) : this.convertIpV6ToHex(relayIp);
if (addressHex == null) {
@@ -373,20 +238,36 @@ public class QueryServlet extends HttpServlet {
}
String address24Or48Hex = !relayIp.contains(":")
? addressHex.substring(0, 6) : addressHex.substring(0, 12);
+
+ /* Prepare formatting response items. */
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat validAfterTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ validAfterTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ /* Make the database query. */
+ SortedSet<Long> allValidAfters = new TreeSet<>();
+ List<QueryResponse.Match> matches = new ArrayList<>();
+ SortedSet<String> allAddresses = new TreeSet<>();
try {
- CallableStatement cs;
- if (!relayIp.contains(":")) {
- cs = conn.prepareCall("{call search_by_address24_date(?, ?)}");
- } else {
- cs = conn.prepareCall("{call search_by_address48_date(?, ?)}");
- }
+ final long requestedConnection = System.currentTimeMillis();
+ Connection conn = this.ds.getConnection();
+ CallableStatement cs = conn.prepareCall(String.format(
+ "{call search_by_address%s_date(?, ?)}",
+ relayIp.contains(":") ? 48 : 24));
cs.setString(1, address24Or48Hex);
Calendar utcCalendar = Calendar.getInstance(
TimeZone.getTimeZone("UTC"));
cs.setDate(2, new java.sql.Date(timestamp), utcCalendar);
ResultSet rs = cs.executeQuery();
while (rs.next()) {
+ long validafter = rs.getTimestamp(2, utcCalendar).getTime();
+ allValidAfters.add(validafter);
byte[] rawstatusentry = rs.getBytes(1);
+ if (null == rawstatusentry) {
+ continue;
+ }
SortedSet<String> addresses = new TreeSet<>();
SortedSet<String> addressesHex = new TreeSet<>();
String nickname = null;
@@ -414,10 +295,10 @@ public class QueryServlet extends HttpServlet {
addresses.add(exitaddress);
addressesHex.add(this.convertIpV4ToHex(exitaddress));
}
+ allAddresses.addAll(addresses);
if (!addressesHex.contains(addressHex)) {
continue;
}
- long validafter = rs.getTimestamp(2, utcCalendar).getTime();
String validAfterString = validAfterTimeFormat.format(validafter);
String fingerprint = rs.getString(3).toUpperCase();
QueryResponse.Match match = new QueryResponse.Match();
@@ -430,73 +311,55 @@ public class QueryServlet extends HttpServlet {
}
rs.close();
cs.close();
+ conn.close();
+ this.logger.info("Returned a database connection to the pool "
+ + "after " + (System.currentTimeMillis()
+ - requestedConnection) + " millis.");
} catch (SQLException e) {
/* Nothing found. */
- matches.clear();
+ this.logger.log(Level.WARNING, "Database error: " + e.getMessage(), e);
+ return null;
}
- return matches;
- }
- private List<String> queryAddressesInSame24(Connection conn,
- String address24, long timestamp) {
- List<String> addressesInSameNetwork = new ArrayList<>();
- try {
- CallableStatement cs = conn.prepareCall(
- "{call search_addresses_in_same_24 (?, ?)}");
- cs.setString(1, address24);
- cs.setDate(2, new java.sql.Date(timestamp));
- ResultSet rs = cs.executeQuery();
- while (rs.next()) {
- String address = rs.getString(1);
- if (!addressesInSameNetwork.contains(address)) {
- addressesInSameNetwork.add(address);
+ /* Create a query response object. */
+ QueryResponse response = new QueryResponse();
+ response.queryAddress = relayIp;
+ response.queryDate = dateFormat.format(timestamp);
+ if (!allValidAfters.isEmpty()) {
+ response.firstDateInDatabase = dateFormat.format(allValidAfters.first());
+ response.lastDateInDatabase = dateFormat.format(allValidAfters.last());
+ response.relevantStatuses = false;
+ long timestampFrom = timestamp - 24L * 60L * 60L * 1000L;
+ long timestampTo = timestamp + 2 * 24L * 60L * 60L * 1000L - 1L;
+ for (long validAfter : allValidAfters) {
+ if (validAfter >= timestampFrom && validAfter <= timestampTo) {
+ response.relevantStatuses = true;
+ break;
}
}
- rs.close();
- cs.close();
- } catch (SQLException e) {
- /* No other addresses in the same /24 found. */
- addressesInSameNetwork = null;
- }
- return addressesInSameNetwork;
- }
-
- private List<String> queryAddressesInSame48(Connection conn,
- String address48, long timestamp) {
- List<String> addressesInSameNetwork = new ArrayList<>();
- try {
- CallableStatement cs = conn.prepareCall(
- "{call search_addresses_in_same_48 (?, ?)}");
- cs.setString(1, address48);
- cs.setDate(2, new java.sql.Date(timestamp));
- ResultSet rs = cs.executeQuery();
- while (rs.next()) {
- String address = rs.getString(1);
- if (!addressesInSameNetwork.contains(address)) {
- addressesInSameNetwork.add(address);
+ if (!matches.isEmpty()) {
+ response.matches = matches.toArray(new QueryResponse.Match[0]);
+ } else {
+ List<String> nearbyAddresses = new ArrayList<>();
+ for (String nearbyAddress : allAddresses) {
+ String nearbyAddressHex = !nearbyAddress.contains(":")
+ ? this.convertIpV4ToHex(nearbyAddress)
+ : this.convertIpV6ToHex(nearbyAddress);
+ String nearbyAddress24Or48Hex = !nearbyAddress.contains(":")
+ ? nearbyAddressHex.substring(0, 6)
+ : nearbyAddressHex.substring(0, 12);
+ if (address24Or48Hex.equals(nearbyAddress24Or48Hex)) {
+ nearbyAddresses.add(nearbyAddress);
+ }
+ }
+ if (!nearbyAddresses.isEmpty()) {
+ response.nearbyAddresses = nearbyAddresses.toArray(new String[0]);
}
}
- rs.close();
- cs.close();
- } catch (SQLException e) {
- /* No other addresses in the same /48 found. */
- addressesInSameNetwork = null;
}
- return addressesInSameNetwork;
- }
- private void closeDatabaseConnection(Connection conn,
- long requestedConnection) {
- try {
- conn.close();
- this.logger.info("Returned a database connection to the pool "
- + "after " + (System.currentTimeMillis()
- - requestedConnection) + " millis.");
- } catch (SQLException e) {
- this.logger.log(Level.WARNING, "Couldn't close: " + e.getMessage(), e);
- }
- return;
+ /* Return the query response. */
+ return response;
}
-
}
diff --git a/src/main/resources/db/exonerator.sql b/src/main/resources/db/exonerator.sql
index 84c5af8..85d5d01 100755
--- a/src/main/resources/db/exonerator.sql
+++ b/src/main/resources/db/exonerator.sql
@@ -200,19 +200,36 @@ CREATE OR REPLACE FUNCTION search_by_address24_date (
exitaddress TEXT) AS $$
BEGIN
RETURN QUERY EXECUTE
- -- The first select finds all status entries of relays with the given
+ -- The first and second selects retrieve the first and last valid-after
+ -- time in the database.
+ --
+ -- The third select retrieves known valid-after times from 1 day before
+ -- to 1 day after the given date.
+ --
+ -- The fourth select finds all status entries of relays with the given
-- IP address as onion routing address.
--
- -- The second select finds status entries of relays having an exit list
+ -- The fifth select finds status entries of relays having an exit list
-- entry with the provided IP address as the exit address.
- -- In the second select,
+ -- In the fifth select,
-- - Focus on a time period from 1 day before and 1 day after the
-- given date. Also include a second day before the given date
-- for exit lists, because it can take up to 24 hours to scan a
-- relay again. We should not miss exit list entries here.
-- - Consider only exit list scans that took place in the 24 hours
-- before the relay was listed in a consensus.
- 'SELECT rawstatusentry,
+ 'SELECT NULL::BYTEA, MIN(validafter), NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ UNION
+ SELECT NULL::BYTEA, MAX(validafter), NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ UNION
+ SELECT DISTINCT NULL::BYTEA, validafter, NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ WHERE DATE(validafter) >= ''' || select_date || '''::DATE - 1
+ AND DATE(validafter) <= ''' || select_date || '''::DATE + 1
+ UNION
+ SELECT rawstatusentry,
validafter,
fingerprint,
NULL
@@ -256,7 +273,18 @@ CREATE OR REPLACE FUNCTION search_by_address48_date (
exitaddress TEXT) AS $$
BEGIN
RETURN QUERY EXECUTE
- 'SELECT rawstatusentry,
+ 'SELECT NULL::BYTEA, MIN(validafter), NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ UNION
+ SELECT NULL::BYTEA, MAX(validafter), NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ UNION
+ SELECT DISTINCT NULL::BYTEA, validafter, NULL::CHARACTER, NULL::TEXT
+ FROM statusentry
+ WHERE DATE(validafter) >= ''' || select_date || '''::DATE - 1
+ AND DATE(validafter) <= ''' || select_date || '''::DATE + 1
+ UNION
+ SELECT rawstatusentry,
validafter,
fingerprint,
NULL::TEXT
@@ -268,42 +296,3 @@ CREATE OR REPLACE FUNCTION search_by_address48_date (
END;
$$ LANGUAGE plpgsql;
--- Look up all IPv4 OR and exit addresses in the /24 network of a given
--- address to suggest other addresses the user may be looking for.
-CREATE OR REPLACE FUNCTION search_addresses_in_same_24 (
- select_address24 CHARACTER(6),
- select_date DATE)
- RETURNS TABLE(addresstext TEXT,
- addressinet INET) AS $$
- SELECT HOST(oraddress),
- oraddress
- FROM statusentry
- WHERE oraddress24 = $1
- AND DATE(validafter) >= $2 - 1
- AND DATE(validafter) <= $2 + 1
- UNION
- SELECT HOST(exitaddress),
- exitaddress
- FROM exitlistentry
- WHERE exitaddress24 = $1
- AND DATE(scanned) >= $2 - 2
- AND DATE(scanned) <= $2 + 1
- ORDER BY 2;
-$$ LANGUAGE SQL;
-
--- Look up all IPv6 OR addresses in the /48 network of a given address to
--- suggest other addresses the user may be looking for.
-CREATE OR REPLACE FUNCTION search_addresses_in_same_48 (
- select_address48 CHARACTER(12),
- select_date DATE)
- RETURNS TABLE(addresstext TEXT,
- addressinet INET) AS $$
- SELECT HOST(oraddress),
- oraddress
- FROM statusentry
- WHERE oraddress48 = $1
- AND DATE(validafter) >= $2 - 1
- AND DATE(validafter) <= $2 + 1
- ORDER BY 2;
-$$ LANGUAGE SQL;
-
More information about the tor-commits
mailing list