[tor-commits] [metrics-web/master] Tweak refresh functions to use partitioned table.
karsten at torproject.org
karsten at torproject.org
Thu Jan 12 15:36:05 UTC 2012
commit 13857257ac1df8c4b4d9bc00e0164b5731289762
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu Jan 12 14:54:16 2012 +0100
Tweak refresh functions to use partitioned table.
---
db/tordir.sql | 162 ++++++++++++++++++++++++++++++++++++++-------------------
1 files changed, 109 insertions(+), 53 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql
index fbb1341..bc93b45 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -438,11 +438,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_network_size()
CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM network_size
WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO network_size
(date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
SELECT date,
@@ -460,12 +467,12 @@ CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
COUNT(NULLIF(isstable, FALSE)) AS isstable
FROM statusentry
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY DATE(validafter)
) b
- NATURAL JOIN relay_statuses_per_day;
+ NATURAL JOIN relay_statuses_per_day';
RETURN 1;
END;
@@ -473,11 +480,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_network_size_hour()
CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM network_size_hour
WHERE DATE(validafter) IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO network_size_hour
(validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
SELECT validafter, COUNT(*) AS avg_running,
@@ -487,10 +501,10 @@ CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
COUNT(NULLIF(isstable, FALSE)) AS avg_stable
FROM statusentry
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY validafter;
+ GROUP BY validafter';
RETURN 1;
END;
@@ -498,27 +512,34 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_countries()
CREATE OR REPLACE FUNCTION refresh_relay_countries() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM relay_countries
WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO relay_countries
(date, country, relays)
SELECT date, country, relays / count AS relays
FROM (
SELECT DATE(validafter),
- COALESCE(lower((geoip_lookup(address)).country), 'zz')
+ COALESCE(lower((geoip_lookup(address)).country), ''zz'')
AS country,
COUNT(*) AS relays
FROM statusentry
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY 1, 2
) b
- NATURAL JOIN relay_statuses_per_day;
+ NATURAL JOIN relay_statuses_per_day';
RETURN 1;
END;
@@ -526,11 +547,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_platforms()
CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM relay_platforms
WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO relay_platforms
(date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
SELECT date,
@@ -541,29 +569,29 @@ CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
other / count AS avg_other
FROM (
SELECT DATE(validafter) AS date,
- SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END)
+ SUM(CASE WHEN platform LIKE ''%Linux%'' THEN 1 ELSE 0 END)
AS linux,
- SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END)
+ SUM(CASE WHEN platform LIKE ''%Darwin%'' THEN 1 ELSE 0 END)
AS darwin,
- SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END)
+ SUM(CASE WHEN platform LIKE ''%BSD%'' THEN 1 ELSE 0 END)
AS bsd,
- SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END)
+ SUM(CASE WHEN platform LIKE ''%Windows%'' THEN 1 ELSE 0 END)
AS windows,
- SUM(CASE WHEN platform NOT LIKE '%Windows%'
- AND platform NOT LIKE '%Darwin%'
- AND platform NOT LIKE '%BSD%'
- AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END)
+ SUM(CASE WHEN platform NOT LIKE ''%Windows%''
+ AND platform NOT LIKE ''%Darwin%''
+ AND platform NOT LIKE ''%BSD%''
+ AND platform NOT LIKE ''%Linux%'' THEN 1 ELSE 0 END)
AS other
FROM descriptor
RIGHT JOIN statusentry
ON statusentry.descriptor = descriptor.descriptor
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY DATE(validafter)
) b
- NATURAL JOIN relay_statuses_per_day;
+ NATURAL JOIN relay_statuses_per_day';
RETURN 1;
END;
@@ -571,11 +599,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_versions()
CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM relay_versions
WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO relay_versions
(date, version, relays)
SELECT date, version, relays / count AS relays
@@ -586,12 +621,12 @@ CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
ON descriptor.descriptor = statusentry.descriptor
WHERE isrunning = TRUE
AND platform IS NOT NULL
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY 1, 2
) b
- NATURAL JOIN relay_statuses_per_day;
+ NATURAL JOIN relay_statuses_per_day';
RETURN 1;
END;
@@ -600,11 +635,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_total_bandwidth()
-- This keeps the table total_bandwidth up-to-date when necessary.
CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
BEGIN
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM total_bandwidth
WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO total_bandwidth
(bwavg, bwburst, bwobserved, bwadvertised, date)
SELECT (SUM(bandwidthavg)
@@ -621,16 +663,14 @@ CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
JOIN relay_statuses_per_day
ON DATE(validafter) = relay_statuses_per_day.date
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
- AND DATE(relay_statuses_per_day.date) >=
- (SELECT MIN(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) <=
- (SELECT MAX(date) FROM updates)
+ AND relay_statuses_per_day.date >= ''' || min_date || '''
+ AND relay_statuses_per_day.date < ''' || max_date || '''
AND DATE(relay_statuses_per_day.date) IN
(SELECT date FROM updates)
- GROUP BY DATE(validafter), relay_statuses_per_day.count;
+ GROUP BY DATE(validafter), relay_statuses_per_day.count';
RETURN 1;
END;
@@ -651,8 +691,16 @@ CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$
- BEGIN
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
+ BEGIN
+
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
DELETE FROM bwhist_flags WHERE date IN (SELECT date FROM updates);
+ EXECUTE '
INSERT INTO bwhist_flags (date, isexit, isguard, read, written)
SELECT a.date, isexit, isguard, SUM(read_sum) as read,
SUM(written_sum) AS written
@@ -663,14 +711,14 @@ CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$
BOOL_OR(isguard) AS isguard
FROM statusentry
WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY 1, 2) a
JOIN bwhist
ON a.date = bwhist.date
AND a.fingerprint = bwhist.fingerprint
- GROUP BY 1, 2, 3;
+ GROUP BY 1, 2, 3';
RETURN 1;
END;
$$ LANGUAGE plpgsql;
@@ -680,11 +728,19 @@ $$ LANGUAGE plpgsql;
-- directory request statistics of directory mirrors with bandwidth
-- histories.
CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
- BEGIN
+ DECLARE
+ min_date TIMESTAMP WITHOUT TIME ZONE;
+ max_date TIMESTAMP WITHOUT TIME ZONE;
+ BEGIN
+
+ min_date := (SELECT MIN(date) FROM updates);
+ max_date := (SELECT MAX(date) + 1 FROM updates);
+
-- Start by deleting user statistics of the dates we're about to
-- regenerate.
DELETE FROM user_stats WHERE date IN (SELECT date FROM updates);
-- Now insert new user statistics.
+ EXECUTE '
INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd,
brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn)
SELECT
@@ -692,7 +748,7 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
dirreq_stats_by_country.date AS date,
dirreq_stats_by_country.country AS country,
dirreq_stats_by_country.r AS r,
- -- In order to weight the reported directory requests, we're
+ -- In order to weight the reported directory requests, we are
-- counting bytes of relays (except directory authorities)
-- matching certain criteria: whether or not they are reporting
-- directory requests, whether or not they are reporting
@@ -759,14 +815,14 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
) dirreq_stats_split
GROUP BY 1, 2, 3
) dirreq_stats_by_date
- -- We're only interested in requests by directory mirrors, not
+ -- We are only interested in requests by directory mirrors, not
-- directory authorities, so we exclude all relays with the Authority
-- flag.
RIGHT JOIN (
SELECT fingerprint, DATE(validafter) AS date
FROM statusentry
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ WHERE validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
AND isauthority IS FALSE
GROUP BY 1, 2
@@ -782,8 +838,8 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
SELECT fingerprint, date, read_sum AS read, written_sum AS written,
dirread_sum AS dirread, dirwritten_sum AS dirwritten
FROM bwhist
- WHERE date >= (SELECT MIN(date) FROM updates)
- AND date <= (SELECT MAX(date) FROM updates)
+ WHERE date >= ''' || min_date || '''
+ AND date < ''' || max_date || '''
AND date IN (SELECT date FROM updates)
) bwhist_by_relay
ON dirreq_stats_by_country.date = bwhist_by_relay.date
@@ -794,8 +850,8 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport,
SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority
FROM statusentry
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ WHERE validafter >= ''' || min_date || '''
+ AND validafter < ''' || max_date || '''
AND DATE(validafter) IN (SELECT date FROM updates)
GROUP BY 1, 2
) statusentry_by_relay
@@ -805,23 +861,23 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
-- For each relay, tell if it has reported directory request
-- statistics on a given date. Again, we have to take into account
-- that statistics intervals cover more than one calendar date in most
- -- cases. The exact number of requests isn't relevant here, but only
+ -- cases. The exact number of requests is not relevant here, but only
-- whether the relay reported directory requests or not.
SELECT fingerprint, date, 1 AS requests
FROM (
SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date
FROM dirreq_stats
- WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates)
- AND DATE(statsend) <= (SELECT MAX(date) FROM updates)
+ WHERE DATE(statsend) >= ''' || min_date || '''
+ AND DATE(statsend) < ''' || max_date || '''
AND DATE(statsend) IN (SELECT date FROM updates)
- AND country = 'zy'
+ AND country = ''zy''
UNION
SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date
FROM dirreq_stats
- WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates)
- AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates)
- AND DATE(statsend) - 1 IN (SELECT date FROM updates)
- AND country = 'zy'
+ WHERE DATE(statsend) - 1 >= ''' || min_date || '''
+ AND DATE(statsend) - 1 < ''' || max_date || '''
+ AND DATE(statsend) IN (SELECT date FROM updates)
+ AND country = ''zy''
AND EXTRACT(EPOCH FROM DATE(statsend)) -
EXTRACT(EPOCH FROM statsend) + seconds > 0
) dirreq_stats_split
@@ -832,7 +888,7 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
WHERE dirreq_stats_by_country.country IS NOT NULL
-- Group by date, country, and total reported directory requests,
-- summing up the bandwidth histories.
- GROUP BY 1, 2, 3;
+ GROUP BY 1, 2, 3';
RETURN 1;
END;
$$ LANGUAGE plpgsql;
More information about the tor-commits
mailing list