[tor-commits] [metrics-web/master] Rewrite insert_bwhist in SQL.
karsten at torproject.org
karsten at torproject.org
Wed Mar 4 19:55:45 UTC 2020
commit 714b2ee0cc9c3d96afbd87b3d12595d549ae58a1
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sun Feb 23 10:53:46 2020 +0100
Rewrite insert_bwhist in SQL.
The old PL/pgSQL version of this function made three lookups in the
bwhist table to 1) check whether a row already exists, 2) insert or
update the row, and 3) update the row once again with array sums. The
new SQL version uses the INSERT ON CONFLICT statement introduced in
PostgreSQL 9.5 (Debian stretch has 9.6, buster has 11). The
performance gain measured using metrics-test is impressive, computed
aggregates are equivalent.
---
CHANGELOG.md | 2 ++
src/main/sql/bwhist/tordir.sql | 46 +++++++++++++++++-------------------------
2 files changed, 20 insertions(+), 28 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 43b7e75..b1571c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@
extrapolated statistics even if computed network fractions are
zero, to avoid re-processing these statistics over and over.
- Extract directory authority bytes per day in the bwhist module.
+ - Rewrite insert_bwhist in SQL to improve performance of the bwhist
+ module.
* Minor changes
- Make Jetty host configurable.
diff --git a/src/main/sql/bwhist/tordir.sql b/src/main/sql/bwhist/tordir.sql
index fad5d2f..9e7bea9 100644
--- a/src/main/sql/bwhist/tordir.sql
+++ b/src/main/sql/bwhist/tordir.sql
@@ -114,34 +114,24 @@ CREATE OR REPLACE FUNCTION insert_bwhist(
insert_fingerprint CHARACTER(40), insert_date DATE,
insert_read BIGINT[], insert_written BIGINT[],
insert_dirread BIGINT[], insert_dirwritten BIGINT[])
- RETURNS INTEGER AS $$
- BEGIN
- IF (SELECT COUNT(*) FROM bwhist
- WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
- THEN
- INSERT INTO bwhist (fingerprint, date, read, written, dirread,
- dirwritten)
- VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
- insert_dirread, insert_dirwritten);
- ELSE
- BEGIN
- UPDATE bwhist
- SET read = array_merge(read, insert_read),
- written = array_merge(written, insert_written),
- dirread = array_merge(dirread, insert_dirread),
- dirwritten = array_merge(dirwritten, insert_dirwritten)
- WHERE fingerprint = insert_fingerprint AND date = insert_date;
- END;
- END IF;
- UPDATE bwhist
- SET read_sum = array_sum(read),
- written_sum = array_sum(written),
- dirread_sum = array_sum(dirread),
- dirwritten_sum = array_sum(dirwritten)
- WHERE fingerprint = insert_fingerprint AND date = insert_date;
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
+ RETURNS VOID AS $$
+ INSERT INTO bwhist (fingerprint, date, read, read_sum, written, written_sum,
+ dirread, dirread_sum, dirwritten, dirwritten_sum)
+ VALUES (insert_fingerprint, insert_date, insert_read,
+ array_sum(insert_read), insert_written, array_sum(insert_written),
+ insert_dirread, array_sum(insert_dirread), insert_dirwritten,
+ array_sum(insert_dirwritten))
+ ON CONFLICT ON CONSTRAINT bwhist_pkey DO UPDATE
+ SET read = array_merge(bwhist.read, insert_read),
+ read_sum = array_sum(array_merge(bwhist.read, insert_read)),
+ written = array_merge(bwhist.written, insert_written),
+ written_sum = array_sum(array_merge(bwhist.written, insert_written)),
+ dirread = array_merge(bwhist.dirread, insert_dirread),
+ dirread_sum = array_sum(array_merge(bwhist.dirread, insert_dirread)),
+ dirwritten = array_merge(bwhist.dirwritten, insert_dirwritten),
+ dirwritten_sum = array_sum(
+ array_merge(bwhist.dirwritten, insert_dirwritten));
+$$ LANGUAGE SQL;
-- refresh_* functions
-- The following functions keep their corresponding aggregate tables
More information about the tor-commits
mailing list