[tor-commits] [metrics-web/master] Remove advbw column from bandwidth.csv.
karsten at torproject.org
karsten at torproject.org
Thu Nov 22 08:03:10 UTC 2018
commit 09cfdfdff4efc1aa1cc60f53f7f1353a6193e6ad
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Nov 12 19:50:46 2018 +0100
Remove advbw column from bandwidth.csv.
Instead use advbw data from ipv6servers module.
As a result, we can stop aggregating advertised bandwidths in the
legacy module.
Required schema changes to live tordir databases:
DROP VIEW stats_bandwidth;
CREATE VIEW stats_bandwidth [...]
CREATE OR REPLACE FUNCTION refresh_all() [...]
DROP FUNCTION refresh_bandwidth_flags();
DROP FUNCTION refresh_relay_statuses_per_day();
DROP TABLE relay_statuses_per_day;
DROP TABLE bandwidth_flags;
DROP TABLE consensus;
DROP FUNCTION delete_old_descriptor();
DROP TABLE descriptor;
Part of #28116.
---
src/main/R/rserver/graphs.R | 58 +++---
.../metrics/stats/ipv6servers/Database.java | 22 ++
.../torproject/metrics/stats/ipv6servers/Main.java | 2 +
.../metrics/stats/servers/Configuration.java | 1 -
.../servers/RelayDescriptorDatabaseImporter.java | 232 +--------------------
src/main/sql/ipv6servers/init-ipv6servers.sql | 11 +
src/main/sql/legacy/tordir.sql | 135 +-----------
7 files changed, 73 insertions(+), 388 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 9dc8c2d..df108e2 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -446,16 +446,19 @@ write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
}
prepare_bandwidth <- function(start_p, end_p) {
- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
+ advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""),
+ colClasses = c("date" = "Date")) %>%
+ transmute(date, variable = "advbw", value = advbw * 8 / 1e9)
+ bwhist <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
+ transmute(date, variable = "bwhist", value = (bwread + bwwrite) * 8 / 2e9)
+ rbind(advbw, bwhist) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(isexit != "") %>%
- filter(isguard != "") %>%
- group_by(date) %>%
- summarize(advbw = sum(advbw) * 8 / 1e9,
- bwhist = sum(bwread + bwwrite) * 8 / 2e9) %>%
- select(date, advbw, bwhist)
+ filter(!is.na(value)) %>%
+ group_by(date, variable) %>%
+ summarize(value = sum(value)) %>%
+ spread(variable, value)
}
plot_bandwidth <- function(start_p, end_p, path_p) {
@@ -810,33 +813,24 @@ write_connbidirect <- function(start_p = NULL, end_p = NULL, path_p) {
}
prepare_bandwidth_flags <- function(start_p, end_p) {
- b <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
- colClasses = c("date" = "Date"))
- b <- b %>%
+ advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""),
+ colClasses = c("date" = "Date")) %>%
+ transmute(date, isguard, isexit, variable = "advbw",
+ value = advbw * 8 / 1e9)
+ bwhist <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
+ colClasses = c("date" = "Date")) %>%
+ transmute(date, isguard, isexit, variable = "bwhist",
+ value = (bwread + bwwrite) * 8 / 2e9)
+ rbind(advbw, bwhist) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(isexit != "") %>%
- filter(isguard != "")
- b <- data.frame(date = b$date,
- isexit = b$isexit == "t", isguard = b$isguard == "t",
- advbw = b$advbw * 8 / 1e9,
- bwhist = (b$bwread + b$bwwrite) * 8 / 2e9)
- b <- rbind(
- data.frame(b[b$isguard == TRUE, ], flag = "guard"),
- data.frame(b[b$isexit == TRUE, ], flag = "exit"))
- b <- data.frame(date = b$date, advbw = b$advbw, bwhist = b$bwhist,
- flag = b$flag)
- b <- aggregate(list(advbw = b$advbw, bwhist = b$bwhist),
- by = list(date = b$date, flag = b$flag), FUN = sum,
- na.rm = TRUE, na.action = NULL)
- b <- gather(b, type, value, -c(date, flag))
- bandwidth <- b[b$value > 0, ]
- bandwidth <- data.frame(date = bandwidth$date,
- variable = as.factor(paste(bandwidth$flag, "_", bandwidth$type,
- sep = "")), value = bandwidth$value)
- bandwidth$variable <- factor(bandwidth$variable,
- levels = levels(bandwidth$variable)[c(3, 4, 1, 2)])
- bandwidth
+ group_by(date, variable) %>%
+ summarize(exit = sum(value[isexit == "t"]),
+ guard = sum(value[isguard == "t"])) %>%
+ gather(flag, value, -date, -variable) %>%
+ unite(variable, flag, variable) %>%
+ mutate(variable = factor(variable,
+ levels = c("guard_advbw", "guard_bwhist", "exit_advbw", "exit_bwhist")))
}
plot_bandwidth_flags <- function(start_p, end_p, path_p) {
diff --git a/src/main/java/org/torproject/metrics/stats/ipv6servers/Database.java b/src/main/java/org/torproject/metrics/stats/ipv6servers/Database.java
index c3a1fec..b5efe3e 100644
--- a/src/main/java/org/torproject/metrics/stats/ipv6servers/Database.java
+++ b/src/main/java/org/torproject/metrics/stats/ipv6servers/Database.java
@@ -435,6 +435,28 @@ class Database implements AutoCloseable {
return statistics;
}
+ /** Query the bandwidth_advbw view. */
+ List<String[]> queryAdvbw() throws SQLException {
+ List<String[]> statistics = new ArrayList<>();
+ String columns = "date, isexit, isguard, advbw";
+ statistics.add(columns.split(", "));
+ Statement st = this.connection.createStatement();
+ Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"),
+ Locale.US);
+ String queryString = "SELECT " + columns + " FROM bandwidth_advbw";
+ try (ResultSet rs = st.executeQuery(queryString)) {
+ while (rs.next()) {
+ String[] outputLine = new String[4];
+ outputLine[0] = rs.getDate("date", calendar).toLocalDate().toString();
+ outputLine[1] = rs.getString("isexit");
+ outputLine[2] = rs.getString("isguard");
+ outputLine[3] = getLongFromResultSet(rs, "advbw");
+ statistics.add(outputLine);
+ }
+ }
+ return statistics;
+ }
+
/** Query the servers_networksize view. */
List<String[]> queryNetworksize() throws SQLException {
List<String[]> statistics = new ArrayList<>();
diff --git a/src/main/java/org/torproject/metrics/stats/ipv6servers/Main.java b/src/main/java/org/torproject/metrics/stats/ipv6servers/Main.java
index a91a74f..d322a2e 100644
--- a/src/main/java/org/torproject/metrics/stats/ipv6servers/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/ipv6servers/Main.java
@@ -88,6 +88,8 @@ public class Main {
log.info("Querying aggregated statistics from the database.");
new Writer().write(Paths.get(Configuration.output, "ipv6servers.csv"),
database.queryServersIpv6());
+ new Writer().write(Paths.get(Configuration.output, "advbw.csv"),
+ database.queryAdvbw());
new Writer().write(Paths.get(Configuration.output, "networksize.csv"),
database.queryNetworksize());
new Writer().write(Paths.get(Configuration.output, "relayflags.csv"),
diff --git a/src/main/java/org/torproject/metrics/stats/servers/Configuration.java b/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
index c4597bc..76788df 100644
--- a/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
+++ b/src/main/java/org/torproject/metrics/stats/servers/Configuration.java
@@ -102,7 +102,6 @@ public class Configuration {
if (this.directoryArchivesDirectories.isEmpty()) {
String prefix = "../../shared/in/recent/relay-descriptors/";
return Arrays.asList(new File(prefix + "consensuses/"),
- new File(prefix + "server-descriptors/"),
new File(prefix + "extra-infos/"));
} else {
return this.directoryArchivesDirectories;
diff --git a/src/main/java/org/torproject/metrics/stats/servers/RelayDescriptorDatabaseImporter.java b/src/main/java/org/torproject/metrics/stats/servers/RelayDescriptorDatabaseImporter.java
index c9a6fa7..2d1ae47 100644
--- a/src/main/java/org/torproject/metrics/stats/servers/RelayDescriptorDatabaseImporter.java
+++ b/src/main/java/org/torproject/metrics/stats/servers/RelayDescriptorDatabaseImporter.java
@@ -9,7 +9,6 @@ import org.torproject.descriptor.DescriptorSourceFactory;
import org.torproject.descriptor.ExtraInfoDescriptor;
import org.torproject.descriptor.NetworkStatusEntry;
import org.torproject.descriptor.RelayNetworkStatusConsensus;
-import org.torproject.descriptor.ServerDescriptor;
import org.postgresql.util.PGbytea;
@@ -20,7 +19,6 @@ import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
@@ -28,7 +26,6 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
-import java.sql.Types;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -44,10 +41,6 @@ import java.util.TreeSet;
/**
* Parse directory data.
*/
-
-/* TODO Split up this class and move its parts to cron.network,
- * cron.users, and status.relaysearch packages. Requires extensive
- * changes to the database schema though. */
public final class RelayDescriptorDatabaseImporter {
/**
@@ -58,20 +51,10 @@ public final class RelayDescriptorDatabaseImporter {
/* Counters to keep track of the number of records committed before
* each transaction. */
- private int rdsCount = 0;
-
- private int resCount = 0;
-
private int rhsCount = 0;
private int rrsCount = 0;
- private int rcsCount = 0;
-
- private int rvsCount = 0;
-
- private int rqsCount = 0;
-
/**
* Relay descriptor database connection.
*/
@@ -85,18 +68,6 @@ public final class RelayDescriptorDatabaseImporter {
private PreparedStatement psSs;
/**
- * Prepared statement to check whether a given server descriptor has
- * been imported into the database before.
- */
- private PreparedStatement psDs;
-
- /**
- * Prepared statement to check whether a given network status consensus
- * has been imported into the database before.
- */
- private PreparedStatement psCs;
-
- /**
* Set of dates that have been inserted into the database for being
* included in the next refresh run.
*/
@@ -115,22 +86,11 @@ public final class RelayDescriptorDatabaseImporter {
private PreparedStatement psR;
/**
- * Prepared statement to insert a server descriptor into the database.
- */
- private PreparedStatement psD;
-
- /**
* Callable statement to insert the bandwidth history of an extra-info
* descriptor into the database.
*/
private CallableStatement csH;
- /**
- * Prepared statement to insert a network status consensus into the
- * database.
- */
- private PreparedStatement psC;
-
private static Logger log
= LoggerFactory.getLogger(RelayDescriptorDatabaseImporter.class);
@@ -145,21 +105,11 @@ public final class RelayDescriptorDatabaseImporter {
private BufferedWriter statusentryOut;
/**
- * Raw import file containing server descriptors.
- */
- private BufferedWriter descriptorOut;
-
- /**
* Raw import file containing bandwidth histories.
*/
private BufferedWriter bwhistOut;
/**
- * Raw import file containing consensuses.
- */
- private BufferedWriter consensusOut;
-
- /**
* Date format to parse timestamps.
*/
private SimpleDateFormat dateTimeFormat;
@@ -212,10 +162,6 @@ public final class RelayDescriptorDatabaseImporter {
/* Prepare statements. */
this.psSs = conn.prepareStatement("SELECT fingerprint "
+ "FROM statusentry WHERE validafter = ?");
- this.psDs = conn.prepareStatement("SELECT COUNT(*) "
- + "FROM descriptor WHERE descriptor = ?");
- this.psCs = conn.prepareStatement("SELECT COUNT(*) "
- + "FROM consensus WHERE validafter = ?");
this.psR = conn.prepareStatement("INSERT INTO statusentry "
+ "(validafter, nickname, fingerprint, descriptor, "
+ "published, address, orport, dirport, isauthority, "
@@ -224,16 +170,8 @@ public final class RelayDescriptorDatabaseImporter {
+ "isvalid, isv2dir, isv3dir, version, bandwidth, ports, "
+ "rawdesc) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+ "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
- this.psD = conn.prepareStatement("INSERT INTO descriptor "
- + "(descriptor, nickname, address, orport, dirport, "
- + "fingerprint, bandwidthavg, bandwidthburst, "
- + "bandwidthobserved, platform, published, uptime, "
- + "extrainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
- + "?)");
this.csH = conn.prepareCall("{call insert_bwhist(?, ?, ?, ?, ?, "
+ "?)}");
- this.psC = conn.prepareStatement("INSERT INTO consensus "
- + "(validafter) VALUES (?)");
this.psU = conn.prepareStatement("INSERT INTO scheduled_updates "
+ "(date) VALUES (?)");
this.scheduledUpdates = new HashSet<>();
@@ -390,95 +328,9 @@ public final class RelayDescriptorDatabaseImporter {
}
/**
- * Insert server descriptor into database.
- */
- public void addServerDescriptorContents(String descriptor,
- String nickname, String address, int orPort, int dirPort,
- String relayIdentifier, long bandwidthAvg, long bandwidthBurst,
- long bandwidthObserved, String platform, long published,
- Long uptime, String extraInfoDigest) {
- if (this.importIntoDatabase) {
- try {
- this.addDateToScheduledUpdates(published);
- this.addDateToScheduledUpdates(
- published + 24L * 60L * 60L * 1000L);
- Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- this.psDs.setString(1, descriptor);
- ResultSet rs = psDs.executeQuery();
- rs.next();
- if (rs.getInt(1) == 0) {
- this.psD.clearParameters();
- this.psD.setString(1, descriptor);
- this.psD.setString(2, nickname);
- this.psD.setString(3, address);
- this.psD.setInt(4, orPort);
- this.psD.setInt(5, dirPort);
- this.psD.setString(6, relayIdentifier);
- this.psD.setLong(7, bandwidthAvg);
- this.psD.setLong(8, bandwidthBurst);
- this.psD.setLong(9, bandwidthObserved);
- /* Remove all non-ASCII characters from the platform string, or
- * we'll make Postgres unhappy. Sun's JDK and OpenJDK behave
- * differently when creating a new String with a given encoding.
- * That's what the regexp below is for. */
- this.psD.setString(10, new String(platform.getBytes(),
- StandardCharsets.US_ASCII).replaceAll("[^\\p{ASCII}]",""));
- this.psD.setTimestamp(11, new Timestamp(published), cal);
- if (null != uptime) {
- this.psD.setLong(12, uptime);
- } else {
- this.psD.setNull(12, Types.BIGINT);
- }
- this.psD.setString(13, extraInfoDigest);
- this.psD.executeUpdate();
- rdsCount++;
- if (rdsCount % autoCommitCount == 0) {
- this.conn.commit();
- }
- }
- } catch (SQLException e) {
- log.warn("Could not add server "
- + "descriptor. We won't make any further SQL requests in "
- + "this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.descriptorOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.descriptorOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/descriptor.sql"));
- this.descriptorOut.write(" COPY descriptor (descriptor, "
- + "nickname, address, orport, dirport, fingerprint, "
- + "bandwidthavg, bandwidthburst, bandwidthobserved, "
- + "platform, published, uptime, extrainfo) FROM stdin;\n");
- }
- this.descriptorOut.write(descriptor.toLowerCase() + "\t"
- + nickname + "\t" + address + "\t" + orPort + "\t" + dirPort
- + "\t" + relayIdentifier + "\t" + bandwidthAvg + "\t"
- + bandwidthBurst + "\t" + bandwidthObserved + "\t"
- + (platform != null && platform.length() > 0
- ? new String(platform.getBytes(), StandardCharsets.US_ASCII)
- : "\\N") + "\t" + this.dateTimeFormat.format(published) + "\t"
- + (uptime >= 0 ? uptime : "\\N") + "\t"
- + (extraInfoDigest != null ? extraInfoDigest : "\\N")
- + "\n");
- } catch (IOException e) {
- log.warn("Could not write server "
- + "descriptor to raw database import file. We won't make "
- + "any further attempts to write raw import files in this "
- + "execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- }
-
- /**
* Insert extra-info descriptor into database.
*/
- public void addExtraInfoDescriptorContents(String extraInfoDigest,
- String nickname, String fingerprint, long published,
+ public void addExtraInfoDescriptorContents(String fingerprint, long published,
List<String> bandwidthHistoryLines) {
if (!bandwidthHistoryLines.isEmpty()) {
this.addBandwidthHistory(fingerprint.toLowerCase(), published,
@@ -766,55 +618,6 @@ public final class RelayDescriptorDatabaseImporter {
}
}
- /**
- * Insert network status consensus into database.
- */
- public void addConsensus(long validAfter) {
- if (this.importIntoDatabase) {
- try {
- this.addDateToScheduledUpdates(validAfter);
- Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Timestamp validAfterTimestamp = new Timestamp(validAfter);
- this.psCs.setTimestamp(1, validAfterTimestamp, cal);
- ResultSet rs = psCs.executeQuery();
- rs.next();
- if (rs.getInt(1) == 0) {
- this.psC.clearParameters();
- this.psC.setTimestamp(1, validAfterTimestamp, cal);
- this.psC.executeUpdate();
- rcsCount++;
- if (rcsCount % autoCommitCount == 0) {
- this.conn.commit();
- }
- }
- } catch (SQLException e) {
- log.warn("Could not add network status "
- + "consensus. We won't make any further SQL requests in "
- + "this execution.", e);
- this.importIntoDatabase = false;
- }
- }
- if (this.writeRawImportFiles) {
- try {
- if (this.consensusOut == null) {
- new File(rawFilesDirectory).mkdirs();
- this.consensusOut = new BufferedWriter(new FileWriter(
- rawFilesDirectory + "/consensus.sql"));
- this.consensusOut.write(" COPY consensus (validafter) "
- + "FROM stdin;\n");
- }
- String validAfterString = this.dateTimeFormat.format(validAfter);
- this.consensusOut.write(validAfterString + "\n");
- } catch (IOException e) {
- log.warn("Could not write network status "
- + "consensus to raw database import file. We won't make "
- + "any further attempts to write raw import files in this "
- + "execution.", e);
- this.writeRawImportFiles = false;
- }
- }
- }
-
/** Imports relay descriptors into the database. */
public void importRelayDescriptors() {
log.info("Importing files in directories " + archivesDirectories
@@ -834,8 +637,6 @@ public final class RelayDescriptorDatabaseImporter {
if (descriptor instanceof RelayNetworkStatusConsensus) {
this.addRelayNetworkStatusConsensus(
(RelayNetworkStatusConsensus) descriptor);
- } else if (descriptor instanceof ServerDescriptor) {
- this.addServerDescriptor((ServerDescriptor) descriptor);
} else if (descriptor instanceof ExtraInfoDescriptor) {
this.addExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
}
@@ -862,18 +663,6 @@ public final class RelayDescriptorDatabaseImporter {
statusEntry.getBandwidth(), statusEntry.getPortList(),
statusEntry.getStatusEntryBytes());
}
- this.addConsensus(consensus.getValidAfterMillis());
- }
-
- private void addServerDescriptor(ServerDescriptor descriptor) {
- this.addServerDescriptorContents(
- descriptor.getDigestSha1Hex(), descriptor.getNickname(),
- descriptor.getAddress(), descriptor.getOrPort(),
- descriptor.getDirPort(), descriptor.getFingerprint(),
- descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(),
- descriptor.getBandwidthObserved(), descriptor.getPlatform(),
- descriptor.getPublishedMillis(), descriptor.getUptime(),
- descriptor.getExtraInfoDigestSha1Hex());
}
private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
@@ -892,8 +681,7 @@ public final class RelayDescriptorDatabaseImporter {
bandwidthHistoryLines.add(
descriptor.getDirreqReadHistory().getLine());
}
- this.addExtraInfoDescriptorContents(descriptor.getDigestSha1Hex(),
- descriptor.getNickname(),
+ this.addExtraInfoDescriptorContents(
descriptor.getFingerprint().toLowerCase(),
descriptor.getPublishedMillis(), bandwidthHistoryLines);
}
@@ -904,12 +692,8 @@ public final class RelayDescriptorDatabaseImporter {
public void closeConnection() {
/* Log stats about imported descriptors. */
- log.info("Finished importing relay "
- + "descriptors: {} consensuses, {} network status entries, {} "
- + "votes, {} server descriptors, {} extra-info descriptors, {} "
- + "bandwidth history elements, and {} dirreq stats elements",
- rcsCount, rrsCount, rvsCount, rdsCount, resCount, rhsCount,
- rqsCount);
+ log.info("Finished importing relay descriptors: {} network status entries "
+ + "and {} bandwidth history elements", rrsCount, rhsCount);
/* Insert scheduled updates a second time, just in case the refresh
* run has started since inserting them the first time in which case
@@ -951,18 +735,10 @@ public final class RelayDescriptorDatabaseImporter {
this.statusentryOut.write("\\.\n");
this.statusentryOut.close();
}
- if (this.descriptorOut != null) {
- this.descriptorOut.write("\\.\n");
- this.descriptorOut.close();
- }
if (this.bwhistOut != null) {
this.bwhistOut.write("\\.\n");
this.bwhistOut.close();
}
- if (this.consensusOut != null) {
- this.consensusOut.write("\\.\n");
- this.consensusOut.close();
- }
} catch (IOException e) {
log.warn("Could not close one or more raw database import files.", e);
}
diff --git a/src/main/sql/ipv6servers/init-ipv6servers.sql b/src/main/sql/ipv6servers/init-ipv6servers.sql
index b478a49..c94a19d 100644
--- a/src/main/sql/ipv6servers/init-ipv6servers.sql
+++ b/src/main/sql/ipv6servers/init-ipv6servers.sql
@@ -312,6 +312,17 @@ GROUP BY DATE(valid_after), server, guard_relay, exit_relay, announced_ipv6,
ORDER BY valid_after_date, server, guard_relay, exit_relay, announced_ipv6,
exiting_ipv6_relay, reachable_ipv6_relay;
+-- View on advertised bandwidth by Exit/Guard flag combination.
+CREATE OR REPLACE VIEW bandwidth_advbw AS
+SELECT valid_after_date AS date,
+ exit_relay AS isexit,
+ guard_relay AS isguard,
+ FLOOR(SUM(advertised_bandwidth_bytes_sum_avg)) AS advbw
+FROM ipv6servers
+WHERE server = 'relay'
+GROUP BY date, isexit, isguard
+ORDER BY date, isexit, isguard;
+
-- View on the number of running servers by relay flag.
CREATE OR REPLACE VIEW servers_flags_complete AS
WITH included_statuses AS (
diff --git a/src/main/sql/legacy/tordir.sql b/src/main/sql/legacy/tordir.sql
index f1d6767..dfe7b5d 100644
--- a/src/main/sql/legacy/tordir.sql
+++ b/src/main/sql/legacy/tordir.sql
@@ -3,33 +3,6 @@
CREATE LANGUAGE plpgsql;
--- TABLE descriptor
--- Contains all of the descriptors published by routers.
-CREATE TABLE descriptor (
- descriptor CHARACTER(40) NOT NULL,
- nickname CHARACTER VARYING(19) NOT NULL,
- address CHARACTER VARYING(15) NOT NULL,
- orport INTEGER NOT NULL,
- dirport INTEGER NOT NULL,
- fingerprint CHARACTER(40) NOT NULL,
- bandwidthavg BIGINT NOT NULL,
- bandwidthburst BIGINT NOT NULL,
- bandwidthobserved BIGINT NOT NULL,
- platform CHARACTER VARYING(256),
- published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- uptime BIGINT,
- extrainfo CHARACTER(40),
- CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
-);
-
-CREATE OR REPLACE FUNCTION delete_old_descriptor()
-RETURNS INTEGER AS $$
- BEGIN
- DELETE FROM descriptor WHERE DATE(published) < current_date - 14;
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
-- Contains bandwidth histories reported by relays in extra-info
-- descriptors. Each row contains the reported bandwidth in 15-minute
-- intervals for each relay and date.
@@ -97,22 +70,6 @@ RETURNS INTEGER AS $$
END;
$$ LANGUAGE plpgsql;
--- TABLE consensus
--- Contains all of the consensuses published by the directories.
-CREATE TABLE consensus (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
-);
-
--- TABLE bandwidth_flags
-CREATE TABLE bandwidth_flags (
- date DATE NOT NULL,
- isexit BOOLEAN NOT NULL,
- isguard BOOLEAN NOT NULL,
- bwadvertised BIGINT NOT NULL,
- CONSTRAINT bandwidth_flags_pkey PRIMARY KEY(date, isexit, isguard)
-);
-
-- TABLE bwhist_flags
CREATE TABLE bwhist_flags (
date DATE NOT NULL,
@@ -149,15 +106,6 @@ CREATE TABLE user_stats (
CONSTRAINT user_stats_pkey PRIMARY KEY(date, country)
);
--- TABLE relay_statuses_per_day
--- A helper table which is commonly used to update the tables above in the
--- refresh_* functions.
-CREATE TABLE relay_statuses_per_day (
- date DATE NOT NULL,
- count INTEGER NOT NULL,
- CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
-);
-
-- Dates to be included in the next refresh run.
CREATE TABLE scheduled_updates (
id SERIAL,
@@ -174,24 +122,6 @@ CREATE TABLE updates (
date DATE
);
--- FUNCTION refresh_relay_statuses_per_day()
--- Updates helper table which is used to refresh the aggregate tables.
-CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
-RETURNS INTEGER AS $$
- BEGIN
- DELETE FROM relay_statuses_per_day
- WHERE date IN (SELECT date FROM updates);
- INSERT INTO relay_statuses_per_day (date, count)
- SELECT DATE(validafter) AS date, COUNT(*) AS count
- FROM consensus
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY DATE(validafter);
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
CREATE OR REPLACE FUNCTION array_sum (BIGINT[]) RETURNS BIGINT AS $$
SELECT SUM($1[i])::bigint
FROM generate_series(array_lower($1, 1), array_upper($1, 1)) index(i);
@@ -247,45 +177,11 @@ $$ LANGUAGE plpgsql;
-- refresh_* functions
-- The following functions keep their corresponding aggregate tables
--- up-to-date. They should be called every time ERNIE is run, or when new
--- data is finished being added to the descriptor or statusentry tables.
+-- up-to-date. They should be called every time this module is run, or when new
+-- data is finished being added to the statusentry tables.
-- They find what new data has been entered or updated based on the
-- updates table.
-CREATE OR REPLACE FUNCTION refresh_bandwidth_flags() RETURNS INTEGER AS $$
- DECLARE
- min_date TIMESTAMP WITHOUT TIME ZONE;
- max_date TIMESTAMP WITHOUT TIME ZONE;
- BEGIN
-
- min_date := (SELECT MIN(date) FROM updates);
- max_date := (SELECT MAX(date) + 1 FROM updates);
-
- DELETE FROM bandwidth_flags WHERE date IN (SELECT date FROM updates);
- EXECUTE '
- INSERT INTO bandwidth_flags (date, isexit, isguard, bwadvertised)
- SELECT DATE(validafter) AS date,
- BOOL_OR(isexit) AS isexit,
- BOOL_OR(isguard) AS isguard,
- (SUM(LEAST(bandwidthavg, bandwidthobserved))
- / relay_statuses_per_day.count)::BIGINT AS bwadvertised
- FROM descriptor RIGHT JOIN statusentry
- ON descriptor.descriptor = statusentry.descriptor
- JOIN relay_statuses_per_day
- ON DATE(validafter) = relay_statuses_per_day.date
- WHERE isrunning = TRUE
- AND validafter >= ''' || min_date || '''
- AND validafter < ''' || max_date || '''
- AND DATE(validafter) IN (SELECT date FROM updates)
- AND relay_statuses_per_day.date >= ''' || min_date || '''
- AND relay_statuses_per_day.date < ''' || max_date || '''
- AND DATE(relay_statuses_per_day.date) IN
- (SELECT date FROM updates)
- GROUP BY DATE(validafter), isexit, isguard, relay_statuses_per_day.count';
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$
DECLARE
min_date TIMESTAMP WITHOUT TIME ZONE;
@@ -391,18 +287,12 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
DELETE FROM updates;
RAISE NOTICE '% Copying scheduled dates.', timeofday();
INSERT INTO updates SELECT * FROM scheduled_updates;
- RAISE NOTICE '% Refreshing relay statuses per day.', timeofday();
- PERFORM refresh_relay_statuses_per_day();
- RAISE NOTICE '% Refreshing total relay bandwidth.', timeofday();
- PERFORM refresh_bandwidth_flags();
RAISE NOTICE '% Refreshing bandwidth history.', timeofday();
PERFORM refresh_bwhist_flags();
RAISE NOTICE '% Refreshing user statistics.', timeofday();
PERFORM refresh_user_stats();
RAISE NOTICE '% Deleting processed dates.', timeofday();
DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
- RAISE NOTICE '% Deleting old descriptors.', timeofday();
- PERFORM delete_old_descriptor();
RAISE NOTICE '% Deleting old bandwidth histories.', timeofday();
PERFORM delete_old_bwhist();
RAISE NOTICE '% Deleting old status entries.', timeofday();
@@ -414,23 +304,14 @@ $$ LANGUAGE plpgsql;
-- View for exporting bandwidth statistics.
CREATE VIEW stats_bandwidth AS
- (SELECT COALESCE(bandwidth_flags.date, bwhist_flags.date) AS date,
- COALESCE(bandwidth_flags.isexit, bwhist_flags.isexit) AS isexit,
- COALESCE(bandwidth_flags.isguard, bwhist_flags.isguard) AS isguard,
- bandwidth_flags.bwadvertised AS advbw,
- CASE WHEN bwhist_flags.read IS NOT NULL
- THEN bwhist_flags.read / 86400 END AS bwread,
- CASE WHEN bwhist_flags.written IS NOT NULL
- THEN bwhist_flags.written / 86400 END AS bwwrite,
+ (SELECT date, isexit, isguard,
+ read / 86400 AS bwread,
+ written / 86400 AS bwwrite,
NULL AS dirread, NULL AS dirwrite
- FROM bandwidth_flags FULL OUTER JOIN bwhist_flags
- ON bandwidth_flags.date = bwhist_flags.date
- AND bandwidth_flags.isexit = bwhist_flags.isexit
- AND bandwidth_flags.isguard = bwhist_flags.isguard
- WHERE COALESCE(bandwidth_flags.date, bwhist_flags.date) <
- current_date - 2)
+ FROM bwhist_flags
+ WHERE date < current_date - 2)
UNION ALL
- (SELECT date, NULL AS isexit, NULL AS isguard, NULL AS advbw,
+ (SELECT date, NULL AS isexit, NULL AS isguard,
NULL AS bwread, NULL AS bwwrite,
FLOOR(CAST(dr AS NUMERIC) / CAST(86400 AS NUMERIC)) AS dirread,
FLOOR(CAST(dw AS NUMERIC) / CAST(86400 AS NUMERIC)) AS dirwrite
More information about the tor-commits
mailing list