[tor-commits] [stem/master] tail() function for reading the last lines from a file
atagar at torproject.org
atagar at torproject.org
Tue Mar 24 16:35:38 UTC 2015
commit 8736a7ee133a8eecc7f193b52f23c3ed951271ee
Author: Damian Johnson <atagar at torproject.org>
Date: Tue Mar 24 09:26:41 2015 -0700
tail() function for reading the last lines from a file
Tor log files can be in the order of gigabytes so we often want a tail helper
for reading them. Happily StackOverflow had some great answers...
https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-python-similar--to-tail
Adopting a slight tweak of S.Lott's answer with papercrane's fix. Actually,
this performs even better than shelling out to tail. Tad surprised. :P
% cat scrap.py
import time
import stem.util.system
print "Reading the whole file:"
for i in range(3):
start_time = time.time()
with open('/tmp/long_file') as long_file:
print long_file.readlines()[-3:]
print 'took %s' % (time.time() - start_time)
print "\nShelling out to tail:"
for i in range(3):
start_time = time.time()
print stem.util.system.call('tail -n 3 /tmp/long_file')
print 'took %s' % (time.time() - start_time)
print "\nCalling our tail() function:"
for i in range(3):
start_time = time.time()
print stem.util.system.tail('/tmp/long_file', 3)
print 'took %s' % (time.time() - start_time)
--------------------------------------------------------------------------------
% python scrap.py
Reading the whole file:
[' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n']
took 0.546777009964
[' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n']
took 0.518195152283
[' or the dense London fog\n', "but I'm with people you meet\n", ' both the wicked and sweet.\n']
took 0.519068956375
Shelling out to tail:
[u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.']
took 0.00712585449219
[u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.']
took 0.00904107093811
[u'or the dense London fog', u"but I'm with people you meet", u' both the wicked and sweet.']
took 0.00874090194702
Calling our tail() function:
[' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.']
took 0.000432968139648
[' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.']
took 0.000234842300415
[' or the dense London fog', "but I'm with people you meet", ' both the wicked and sweet.']
took 0.000171184539795
---
stem/util/system.py | 50 +++++++++++++++++++++++++++++++++++++++++++++-
test/unit/util/system.py | 20 +++++++++++++++++++
test/unit/util/text_file | 14 +++++++++++++
3 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/stem/util/system.py b/stem/util/system.py
index 2c15d23..f2d1bc8 100644
--- a/stem/util/system.py
+++ b/stem/util/system.py
@@ -30,6 +30,7 @@ best-effort, providing **None** if the lookup fails.
cwd - provides the current working directory for a given process
user - provides the user a process is running under
start_time - provides the unix timestamp when the process started
+ tail - provides lines from the end of a file
bsd_jail_id - provides the BSD jail id a given process is running within
bsd_jail_path - provides the path of the given BSD jail
@@ -37,7 +38,6 @@ best-effort, providing **None** if the lookup fails.
expand_path - expands relative paths and ~ entries
files_with_suffix - provides files with the given suffix
-
get_process_name - provides our process' name
set_process_name - changes our process' name
"""
@@ -49,6 +49,7 @@ import os
import platform
import re
import subprocess
+import sys
import tarfile
import time
@@ -88,6 +89,8 @@ GET_CWD_LSOF = 'lsof -a -p %s -d cwd -Fn'
GET_BSD_JAIL_ID_PS = 'ps -p %s -o jid'
GET_BSD_JAIL_PATH = 'jls -j %s'
+BLOCK_SIZE = 1024
+
# flag for setting the process name, found in '/usr/include/linux/prctl.h'
PR_SET_NAME = 15
@@ -754,6 +757,51 @@ def start_time(pid):
return None
+def tail(target, lines = None):
+ """
+ Provides the last lines from a file, similar to 'tail -n 50 /tmp/my_log'.
+
+ :param str,file target: path or file object to read from
+ :param int lines: number of lines to read
+
+ :returns: **list** of lines the file ends with
+ """
+
+ if isinstance(target, str):
+ with open(target) as target_file:
+ return tail(target_file, lines)
+
+ if lines is None:
+ lines = sys.maxint
+
+ # based on snippet from...
+ # https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-python-similar-to-tail
+
+ target.seek(0, 2) # go to the end of the file
+ block_end_byte = target.tell()
+ lines_left = lines
+ block_number = -1
+ blocks = [] # blocks of size BLOCK_SIZE, in reverse order
+
+ while lines_left > 0 and block_end_byte > 0:
+ if (block_end_byte - BLOCK_SIZE > 0):
+ # read the last block we haven't yet read
+ target.seek(block_number * BLOCK_SIZE, 2)
+ blocks.insert(0, target.read(BLOCK_SIZE))
+ else:
+ # reached the start of the file, just read what's left
+ target.seek(0, 0)
+ blocks.insert(0, target.read(block_end_byte))
+
+ lines_found = blocks[-1].count('\n')
+ lines_left -= lines_found
+ block_end_byte -= BLOCK_SIZE
+ block_number -= 1
+
+ text = ''.join(blocks)
+ return text.splitlines()[-lines:]
+
+
def bsd_jail_id(pid):
"""
Gets the jail id for a process. These seem to only exist for FreeBSD (this
diff --git a/test/unit/util/system.py b/test/unit/util/system.py
index f6e9f35..98cfb8d 100644
--- a/test/unit/util/system.py
+++ b/test/unit/util/system.py
@@ -7,6 +7,7 @@ system running the tests.
import functools
import ntpath
+import os
import posixpath
import unittest
@@ -371,6 +372,25 @@ class TestSystem(unittest.TestCase):
expected_response = '/Users/atagar/tor/src/or' if test_input == '75717' else None
self.assertEqual(expected_response, system.cwd(test_input))
+ def test_tail(self):
+ """
+ Exercise our tail() function with a variety of inputs.
+ """
+
+ path = os.path.join(os.path.dirname(__file__), 'text_file')
+
+ # by file handle
+
+ with open(path) as riddle_file:
+ self.assertEqual([' both the wicked and sweet.'], system.tail(riddle_file, 1))
+
+ self.assertEqual([], system.tail(path, 0))
+ self.assertEqual([' both the wicked and sweet.'], system.tail(path, 1))
+ self.assertEqual(["but I'm with people you meet", ' both the wicked and sweet.'], system.tail(path, 2))
+
+ self.assertEqual(14, len(system.tail(path)))
+ self.assertEqual(14, len(system.tail(path, 200)))
+
@patch('stem.util.system.call')
@patch('stem.util.system.is_available', Mock(return_value = True))
def test_bsd_jail_id(self, call_mock):
diff --git a/test/unit/util/text_file b/test/unit/util/text_file
new file mode 100644
index 0000000..6c6d2ae
--- /dev/null
+++ b/test/unit/util/text_file
@@ -0,0 +1,14 @@
+I'm in magic and books,
+ with spooks and the crooks,
+and forbidden lover's bliss
+ after a vine trellis kiss.
+
+I might be open or closed,
+ often plain as your nose.
+I'm a gift you can't take back,
+ and in the sharing I crack.
+
+Authors place me in Prague
+ or the dense London fog
+but I'm with people you meet
+ both the wicked and sweet.
More information about the tor-commits
mailing list