[tor-commits] [torperf/master] Truncate .data and .extradata files to contain only the last 4 days.
karsten at torproject.org
karsten at torproject.org
Tue Mar 6 17:20:15 UTC 2012
commit 178f753a451959cb78c6d0bf13a929ed1940a2e1
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Mar 5 16:33:41 2012 +0100
Truncate .data and .extradata files to contain only the last 4 days.
---
extra_stats.py | 57 +++++++++++++++++++++++++++++++++++----
measurements-HOWTO | 18 ++++++++----
truncate-data.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 138 insertions(+), 12 deletions(-)
diff --git a/extra_stats.py b/extra_stats.py
index 69bb8a5..662cf43 100755
--- a/extra_stats.py
+++ b/extra_stats.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
-import sys, time
+import os, re, sys, time
import TorCtl.TorUtil as TorUtil
import TorCtl.TorCtl as TorCtl
@@ -22,15 +22,18 @@ class Circuit:
self.stream_fail_reason = None
class WriteStats(TorCtl.PostEventListener):
- def __init__(self, port, filename):
+ def __init__(self, port, filename, truncate):
TorCtl.PostEventListener.__init__(self)
self._port = int(port)
self._filename = filename
+ self.truncate = truncate
+ self.first_launched = None
self._conn = None
self.all_circs = {}
self.ignore_streams = {}
self.current_timeout = None
self.current_quantile = None
+ self.truncate_statsfile()
def connect(self):
self._conn = TorCtl.connect(HOST, self._port)
@@ -52,6 +55,7 @@ class WriteStats(TorCtl.PostEventListener):
self.current_quantile = b.cutoff_quantile
result = b.event_name + " " +b.body
self.write_result(result)
+ self.truncate_statsfile()
def circ_status_event(self, c):
if c.status == "LAUNCHED":
@@ -94,6 +98,7 @@ class WriteStats(TorCtl.PostEventListener):
(self.current_timeout, self.current_quantile)
self.write_result(result)
+ self.truncate_statsfile()
def stream_status_event(self, event):
if event.status == "NEW":
@@ -140,19 +145,59 @@ class WriteStats(TorCtl.PostEventListener):
def write_result(self, result):
# XXX: hrmm. seems wasteful to keep opening+closing..
+ # XXX: When changing this, also change truncated_statsfile().
statsfile = open(self._filename, 'a')
statsfile.write(result+"\n")
statsfile.close()
+ def truncate_statsfile(self):
+ if not self.truncate:
+ return
+ launched_str = "^.*LAUNCH=([\\d]*).*$"
+ if not self.first_launched:
+ if os.path.isfile(self._filename):
+ launched_re = re.compile(launched_str)
+ with open(self._filename) as statsfile:
+ for line in statsfile:
+ m = launched_re.match(line)
+ if m:
+ self.first_launched = int(m.group(1))
+ break
+ if not self.first_launched:
+ self.first_launched = time.time()
+ now = time.time()
+ if self.first_launched < now - 7 * 24 * 60 * 60:
+ copylines = False
+ statsfilebak_path = self._filename + ".bak"
+ statsfilebak_file = open(statsfilebak_path, "w")
+ launched_re = re.compile(launched_str)
+ with open(self._filename) as statsfile:
+ for line in statsfile:
+ if copylines:
+ statsfilebak_file.write(line)
+ else:
+ m = launched_re.match(line)
+ if m and int(m.group(1)) >= now - 4 * 24 * 60 * 60:
+ statsfilebak_file.write(line)
+ copylines = True
+ statsfilebak_file.close()
+ os.rename(statsfilebak_path, self._filename)
+
def main():
- if len(sys.argv) < 3:
+ if len(sys.argv) < 3 or len(sys.argv) > 4:
print "Bad arguments"
sys.exit(1)
- port = sys.argv[1]
- filename = sys.argv[2]
+ truncate = False
+ if (sys.argv[1] == "--truncate"):
+ truncate = True
+ port = sys.argv[2]
+ filename = sys.argv[3]
+ else:
+ port = sys.argv[1]
+ filename = sys.argv[2]
- stats = WriteStats(port, filename)
+ stats = WriteStats(port, filename, truncate)
stats.connect()
stats.setup_listener()
try:
diff --git a/measurements-HOWTO b/measurements-HOWTO
index 67de0c5..2cefb68 100644
--- a/measurements-HOWTO
+++ b/measurements-HOWTO
@@ -103,14 +103,17 @@ cd ~/torperf/torclient50kb && tor -f ~/torperf/torclient50kb/torrc
cd ~/torperf/torclient1mb && tor -f ~/torperf/torclient1mb/torrc
cd ~/torperf/torclient5mb && tor -f ~/torperf/torclient5mb/torrc
sleep 5
-cd ~/torperf/torclient50kb && python ../extra_stats.py 10020
+cd ~/torperf/torclient50kb && python ../extra_stats.py --truncate 10020
../50kb.extradata &
-cd ~/torperf/torclient1mb && python ../extra_stats.py 10021
+cd ~/torperf/torclient1mb && python ../extra_stats.py --truncate 10021
../1mb.extradata &
-cd ~/torperf/torclient5mb && python ../extra_stats.py 10022
+cd ~/torperf/torclient5mb && python ../extra_stats.py --truncate 10022
../5mb.extradata &
EOF
+(Omit the --truncate switch if you don't want .extradata files to be
+truncated once per week to contain only the last 4 days of data.)
+
$ chmod a+x start-tors
$ ./start-tors
@@ -125,13 +128,16 @@ $ crontab -e
*/5 * * * * timeout -s2 295 ~/torperf/trivsocks-client
torperf.torproject.org 127.0.0.1:9020 /.50kbfile >> ~/torperf/50kb.data
- 2>/dev/null
+ 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/50kb.data
2,32 * * * * timeout -s2 1795 ~/torperf/trivsocks-client
torperf.torproject.org 127.0.0.1:9021 /.1mbfile >> ~/torperf/1mb.data
- 2>/dev/null
+ 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/1mb.data
8 * * * * timeout -s2 3595 ~/torperf/trivsocks-client
torperf.torproject.org 127.0.0.1:9022 /.5mbfile >> ~/torperf/5mb.data
- 2>/dev/null
+ 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/5mb.data
+
+(Omit the truncate-data.py command if you don't want .data files to be
+truncated once per week to contain only the last 4 days of data.)
From now on, the three files 50kb.data, 1mb.data, and 5mb.data should
accumulate lines like this (50kb.data shown here; line breaks are only for
diff --git a/truncate-data.py b/truncate-data.py
new file mode 100755
index 0000000..a70e3fc
--- /dev/null
+++ b/truncate-data.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+import os
+import re
+import sys
+import time
+
+# Truncate Torperf .data file by deleting lines older than 4 days, but
+# only truncate once a week.
+def main():
+
+ # Check usage.
+ if len(sys.argv) != 2:
+ print "Usage: ./truncate.py <.data file>"
+ return
+ data_path = sys.argv[1]
+ if not os.path.isfile(data_path):
+ print "%s is not a .data file." % data_path
+ return
+
+ # Prepare for parsing.
+ parselines = False
+ copylines = False
+ databak_path = data_path + ".bak"
+ databak_file = None
+ started_re = re.compile('(^[\\d]*) .*')
+ now = time.time()
+
+ # Parse the .data file line by line, possibly stopping early if the
+ # first timestamp we find isn't older than a week.
+ with open(data_path) as data_file:
+ for line in data_file:
+
+ # Copy lines written in the past 4 days. We have already decided to
+ # copy this part of the .data file before, so just copy the line and
+ # continue.
+ if copylines:
+ databak_file.write(line)
+ continue
+
+ # Skip empty lines.
+ if line.strip() == "":
+ continue
+
+ # Extract the first timestamp from the current line.
+ m = started_re.match(line)
+ if not m:
+ print "%s is not a valid .data file." % data_path
+ return
+ started_ts = int(m.group(1))
+
+ # Decide whether to start copying lines. We have already decided to
+ # truncate this file before.
+ if parselines:
+ if started_ts >= now - 4 * 24 * 60 * 60:
+ databak_file.write(line)
+ copylines = True
+ continue
+
+ # Decide whether to truncate this file at all.
+ if started_ts >= now - 7 * 24 * 60 * 60:
+ return
+
+ # Open a .bak file to write into and start parsing lines to copy in
+ # the next iteration.
+ databak_file = open(databak_path, "w")
+ parselines = True
+
+ # Close the .bak file and replace the original .data file with it.
+ if databak_file:
+ databak_file.close()
+ os.rename(databak_path, data_path)
+
+if __name__ == "__main__":
+ main()
+
More information about the tor-commits
mailing list