[tor-commits] [stem/master] tail() function for reading the last lines from a file

atagar at torproject.org atagar at torproject.org
Tue Mar 24 16:35:38 UTC 2015


commit 8736a7ee133a8eecc7f193b52f23c3ed951271ee
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Mar 24 09:26:41 2015 -0700

    tail() function for reading the last lines from a file
    
    Tor log files can be in the order of gigabytes so we often want a tail helper
    for reading them. Happily StackOverflow had some great answers...
    
      https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-python-similar--to-tail
    
    Adopting a slight tweak of S.Lott's answer with papercrane's fix. Actually,
    this performs even better than shelling out to tail. Tad surprised. :P
    
      % cat scrap.py
      import time
    
      import stem.util.system
    
      print "Reading the whole file:"
    
      for i in range(3):
        start_time = time.time()
    
        with open('/tmp/long_file') as long_file:
          print long_file.readlines()[-3:]
    
        print 'took %s' % (time.time() - start_time)
    
      print "\nShelling out to tail:"
    
      for i in range(3):
        start_time = time.time()
        print stem.util.system.call('tail -n 3 /tmp/long_file')
        print 'took %s' % (time.time() - start_time)
    
      print "\nCalling our tail() function:"
    
      for i in range(3):
        start_time = time.time()
        print stem.util.system.tail('/tmp/long_file', 3)
        print 'took %s' % (time.time() - start_time)
    
      --------------------------------------------------------------------------------
    
      % python scrap.py
      Reading the whole file:
      ['  or the dense London fog\n', "but I'm with people you meet\n", '  both the wicked and sweet.\n']
      took 0.546777009964
      ['  or the dense London fog\n', "but I'm with people you meet\n", '  both the wicked and sweet.\n']
      took 0.518195152283
      ['  or the dense London fog\n', "but I'm with people you meet\n", '  both the wicked and sweet.\n']
      took 0.519068956375
    
      Shelling out to tail:
      [u'or the dense London fog', u"but I'm with people you meet", u'  both the wicked and sweet.']
      took 0.00712585449219
      [u'or the dense London fog', u"but I'm with people you meet", u'  both the wicked and sweet.']
      took 0.00904107093811
      [u'or the dense London fog', u"but I'm with people you meet", u'  both the wicked and sweet.']
      took 0.00874090194702
    
      Calling our tail() function:
      ['  or the dense London fog', "but I'm with people you meet", '  both the wicked and sweet.']
      took 0.000432968139648
      ['  or the dense London fog', "but I'm with people you meet", '  both the wicked and sweet.']
      took 0.000234842300415
      ['  or the dense London fog', "but I'm with people you meet", '  both the wicked and sweet.']
      took 0.000171184539795
---
 stem/util/system.py      |   50 +++++++++++++++++++++++++++++++++++++++++++++-
 test/unit/util/system.py |   20 +++++++++++++++++++
 test/unit/util/text_file |   14 +++++++++++++
 3 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/stem/util/system.py b/stem/util/system.py
index 2c15d23..f2d1bc8 100644
--- a/stem/util/system.py
+++ b/stem/util/system.py
@@ -30,6 +30,7 @@ best-effort, providing **None** if the lookup fails.
   cwd - provides the current working directory for a given process
   user - provides the user a process is running under
   start_time - provides the unix timestamp when the process started
+  tail - provides lines from the end of a file
   bsd_jail_id - provides the BSD jail id a given process is running within
   bsd_jail_path - provides the path of the given BSD jail
 
@@ -37,7 +38,6 @@ best-effort, providing **None** if the lookup fails.
   expand_path - expands relative paths and ~ entries
   files_with_suffix - provides files with the given suffix
 
-
   get_process_name - provides our process' name
   set_process_name - changes our process' name
 """
@@ -49,6 +49,7 @@ import os
 import platform
 import re
 import subprocess
+import sys
 import tarfile
 import time
 
@@ -88,6 +89,8 @@ GET_CWD_LSOF = 'lsof -a -p %s -d cwd -Fn'
 GET_BSD_JAIL_ID_PS = 'ps -p %s -o jid'
 GET_BSD_JAIL_PATH = 'jls -j %s'
 
+BLOCK_SIZE = 1024
+
 # flag for setting the process name, found in '/usr/include/linux/prctl.h'
 
 PR_SET_NAME = 15
@@ -754,6 +757,51 @@ def start_time(pid):
   return None
 
 
+def tail(target, lines = None):
+  """
+  Provides the last lines from a file, similar to 'tail -n 50 /tmp/my_log'.
+
+  :param str,file target: path or file object to read from
+  :param int lines: number of lines to read
+
+  :returns: **list** of lines the file ends with
+  """
+
+  if isinstance(target, str):
+    with open(target) as target_file:
+      return tail(target_file, lines)
+
+  if lines is None:
+    lines = sys.maxint
+
+  # based on snippet from...
+  # https://stackoverflow.com/questions/136168/get-last-n-lines-of-a-file-with-python-similar-to-tail
+
+  target.seek(0, 2)  # go to the end of the file
+  block_end_byte = target.tell()
+  lines_left = lines
+  block_number = -1
+  blocks = []  # blocks of size BLOCK_SIZE, in reverse order
+
+  while lines_left > 0 and block_end_byte > 0:
+    if (block_end_byte - BLOCK_SIZE > 0):
+      # read the last block we haven't yet read
+      target.seek(block_number * BLOCK_SIZE, 2)
+      blocks.insert(0, target.read(BLOCK_SIZE))
+    else:
+      # reached the start of the file, just read what's left
+      target.seek(0, 0)
+      blocks.insert(0, target.read(block_end_byte))
+
+    lines_found = blocks[-1].count('\n')
+    lines_left -= lines_found
+    block_end_byte -= BLOCK_SIZE
+    block_number -= 1
+
+  text = ''.join(blocks)
+  return text.splitlines()[-lines:]
+
+
 def bsd_jail_id(pid):
   """
   Gets the jail id for a process. These seem to only exist for FreeBSD (this
diff --git a/test/unit/util/system.py b/test/unit/util/system.py
index f6e9f35..98cfb8d 100644
--- a/test/unit/util/system.py
+++ b/test/unit/util/system.py
@@ -7,6 +7,7 @@ system running the tests.
 
 import functools
 import ntpath
+import os
 import posixpath
 import unittest
 
@@ -371,6 +372,25 @@ class TestSystem(unittest.TestCase):
       expected_response = '/Users/atagar/tor/src/or' if test_input == '75717' else None
       self.assertEqual(expected_response, system.cwd(test_input))
 
+  def test_tail(self):
+    """
+    Exercise our tail() function with a variety of inputs.
+    """
+
+    path = os.path.join(os.path.dirname(__file__), 'text_file')
+
+    # by file handle
+
+    with open(path) as riddle_file:
+      self.assertEqual(['  both the wicked and sweet.'], system.tail(riddle_file, 1))
+
+    self.assertEqual([], system.tail(path, 0))
+    self.assertEqual(['  both the wicked and sweet.'], system.tail(path, 1))
+    self.assertEqual(["but I'm with people you meet", '  both the wicked and sweet.'], system.tail(path, 2))
+
+    self.assertEqual(14, len(system.tail(path)))
+    self.assertEqual(14, len(system.tail(path, 200)))
+
   @patch('stem.util.system.call')
   @patch('stem.util.system.is_available', Mock(return_value = True))
   def test_bsd_jail_id(self, call_mock):
diff --git a/test/unit/util/text_file b/test/unit/util/text_file
new file mode 100644
index 0000000..6c6d2ae
--- /dev/null
+++ b/test/unit/util/text_file
@@ -0,0 +1,14 @@
+I'm in magic and books,
+  with spooks and the crooks,
+and forbidden lover's bliss
+  after a vine trellis kiss.
+
+I might be open or closed,
+  often plain as your nose.
+I'm a gift you can't take back,
+  and in the sharing I crack.
+
+Authors place me in Prague
+  or the dense London fog
+but I'm with people you meet
+  both the wicked and sweet.



More information about the tor-commits mailing list