[tor-commits] [nyx/master] Overhaul daybreak handling
atagar at torproject.org
atagar at torproject.org
Tue May 5 05:42:06 UTC 2015
commit 85f98793c90f13b5bdc9a8922ae40380614373f4
Author: Damian Johnson <atagar at torproject.org>
Date: Sun Apr 26 11:38:36 2015 -0700
Overhaul daybreak handling
I never liked our old approach for grouping entries by day. Oh, and it was
inefficient as hell.
Replacing it with simpler and more performant grouping.
---
nyx/log_panel.py | 159 +++++++++--------------------
nyx/util/log.py | 33 +++++-
test/util/log/data/daybreak_deduplication | 37 +++++++
test/util/log/log_group.py | 61 ++++++++++-
4 files changed, 176 insertions(+), 114 deletions(-)
diff --git a/nyx/log_panel.py b/nyx/log_panel.py
index f61d9c6..2038a69 100644
--- a/nyx/log_panel.py
+++ b/nyx/log_panel.py
@@ -22,10 +22,7 @@ import nyx.popups
from nyx import __version__
from nyx.util import panel, tor_controller, ui_tools
-from nyx.util.log import LogGroup, LogEntry, read_tor_log
-
-DAYBREAK_EVENT = 'DAYBREAK' # special event for marking when the date changes
-TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone
+from nyx.util.log import LogGroup, LogEntry, read_tor_log, days_since
ENTRY_INDENT = 2 # spaces an entry's message is indented after the first line
@@ -65,32 +62,11 @@ DUPLICATE_MSG = ' [%i duplicate%s hidden]'
CONTENT_HEIGHT_REDRAW_THRESHOLD = 3
-# cached values and the arguments that generated it for the get_daybreaks and
-# get_duplicates functions
-
-CACHED_DAYBREAKS_ARGUMENTS = (None, None) # events, current day
-CACHED_DAYBREAKS_RESULT = None
-
# maximum number of regex filters we'll remember
MAX_REGEX_FILTERS = 5
-def days_since(timestamp = None):
- """
- Provides the number of days since the epoch converted to local time (rounded
- down).
-
- Arguments:
- timestamp - unix timestamp to convert, current time if undefined
- """
-
- if timestamp is None:
- timestamp = time.time()
-
- return int((timestamp - TIMEZONE_OFFSET) / 86400)
-
-
def log_file_path():
for log_entry in tor_controller().get_conf('Log', [], True):
entry_comp = log_entry.split() # looking for an entry like: notice file /var/log/tor/notices.log
@@ -99,48 +75,6 @@ def log_file_path():
return CONFIG['tor.chroot'] + entry_comp[2]
-def get_daybreaks(events, ignore_time_for_cache = False):
- """
- Provides the input events back with special 'DAYBREAK_EVENT' markers inserted
- whenever the date changed between log entries (or since the most recent
- event). The timestamp matches the beginning of the day for the following
- entry.
-
- Arguments:
- events - chronologically ordered listing of events
- ignore_time_for_cache - skips taking the day into consideration for providing
- cached results if true
- """
-
- global CACHED_DAYBREAKS_ARGUMENTS, CACHED_DAYBREAKS_RESULT
-
- if not events:
- return []
-
- new_listing = []
- current_day = days_since()
- last_day = current_day
-
- if CACHED_DAYBREAKS_ARGUMENTS[0] == events and \
- (ignore_time_for_cache or CACHED_DAYBREAKS_ARGUMENTS[1] == current_day):
- return list(CACHED_DAYBREAKS_RESULT)
-
- for entry in events:
- event_day = days_since(entry.timestamp)
-
- if event_day != last_day:
- marker_timestamp = (event_day * 86400) + TIMEZONE_OFFSET
- new_listing.append(LogEntry(marker_timestamp, DAYBREAK_EVENT, ''))
-
- new_listing.append(entry)
- last_day = event_day
-
- CACHED_DAYBREAKS_ARGUMENTS = (list(events), current_day)
- CACHED_DAYBREAKS_RESULT = list(new_listing)
-
- return new_listing
-
-
class LogPanel(panel.Panel, threading.Thread, logging.Handler):
"""
Listens for and displays tor, nyx, and stem events. This can prepopulate
@@ -188,7 +122,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
self.scroll = 0
self.set_pause_attr('_msg_log')
- self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+ self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
self._last_update = -1 # time the content was last revised
self._halt = False # terminates thread if true
@@ -260,7 +194,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
with self.vals_lock:
# clears the event log
- self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+ self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
# fetches past tor events from log file, if available
@@ -477,7 +411,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
"""
with self.vals_lock:
- self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+ self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
self.redraw(True)
def save_snapshot(self, path):
@@ -635,6 +569,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
# determines if we have the minimum width to show date dividers
show_daybreaks = width - divider_indent >= 3
+ last_day = deduplicated_log[0][0].days_since()
while deduplicated_log:
entry, duplicate_count = deduplicated_log.pop(0)
@@ -644,7 +579,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
# checks if we should be showing a divider with the date
- if entry.type == DAYBREAK_EVENT:
+ if last_day != entry.days_since():
# bottom of the divider
if seen_first_date_divider:
@@ -669,57 +604,59 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
seen_first_date_divider = True
line_count += 1
- else:
- # entry contents to be displayed, tuples of the form:
- # (msg, formatting, includeLinebreak)
- display_queue = []
+ # entry contents to be displayed, tuples of the form:
+ # (msg, formatting, includeLinebreak)
- msg_comp = entry.display_message.split('\n')
+ display_queue = []
- for i in range(len(msg_comp)):
- font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL # emphasizes ERR messages
- display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1))
+ msg_comp = entry.display_message.split('\n')
- if duplicate_count:
- plural_label = 's' if duplicate_count > 1 else ''
- duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label)
- display_queue.append((duplicate_msg, duplicate_attr, False))
+ for i in range(len(msg_comp)):
+ font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL # emphasizes ERR messages
+ display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1))
- cursor_location, line_offset = msg_indent, 0
- max_entries_per_line = CONFIG['features.log.max_lines_per_entry']
+ if duplicate_count:
+ plural_label = 's' if duplicate_count > 1 else ''
+ duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label)
+ display_queue.append((duplicate_msg, duplicate_attr, False))
- while display_queue:
- msg, format, include_break = display_queue.pop(0)
- draw_line = line_count + line_offset
+ # TODO: a fix made line_offset unused, and probably broke max_entries_per_line... not sure if we care
- if line_offset == max_entries_per_line:
- break
+ cursor_location, line_offset = msg_indent, 0
+ max_entries_per_line = CONFIG['features.log.max_lines_per_entry']
- max_msg_size = width - cursor_location - 1
+ while display_queue:
+ msg, format, include_break = display_queue.pop(0)
+ draw_line = line_count + line_offset
- if len(msg) > max_msg_size:
- # message is too long - break it up
- if line_offset == max_entries_per_line - 1:
- msg = str_tools.crop(msg, max_msg_size)
- else:
- msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True)
- display_queue.insert(0, (remainder.strip(), format, include_break))
+ if line_offset == max_entries_per_line:
+ break
- include_break = True
+ max_msg_size = width - cursor_location - 1
+
+ if len(msg) > max_msg_size:
+ # message is too long - break it up
+ if line_offset == max_entries_per_line - 1:
+ msg = str_tools.crop(msg, max_msg_size)
+ else:
+ msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True)
+ display_queue.insert(0, (remainder.strip(), format, include_break))
- if draw_line < height and draw_line >= 1:
- if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks:
- self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr)
- self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr)
+ include_break = True
- self.addstr(draw_line, cursor_location, msg, *format)
+ if draw_line < height and draw_line >= 1:
+ if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks:
+ self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr)
+ self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr)
- cursor_location += len(msg)
+ self.addstr(draw_line, cursor_location, msg, *format)
- if include_break or not display_queue:
- line_offset += 1
- cursor_location = msg_indent + ENTRY_INDENT
+ cursor_location += len(msg)
+
+ if include_break or not display_queue:
+ line_count += 1
+ cursor_location = msg_indent + ENTRY_INDENT
line_count += line_offset
@@ -733,6 +670,8 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
line_count += 1
+ last_day = entry.days_since()
+
# redraw the display if...
# - last_content_height was off by too much
# - we're off the bottom of the page
@@ -769,10 +708,10 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
responsive if additions are less frequent.
"""
- last_day = days_since() # used to determine if the date has changed
+ last_day = days_since(time.time()) # used to determine if the date has changed
while not self._halt:
- current_day = days_since()
+ current_day = days_since(time.time())
time_since_reset = time.time() - self._last_update
max_log_update_rate = CONFIG['features.log.maxRefreshRate'] / 1000.0
diff --git a/nyx/util/log.py b/nyx/util/log.py
index 545eb75..58126a7 100644
--- a/nyx/util/log.py
+++ b/nyx/util/log.py
@@ -19,6 +19,19 @@ except ImportError:
from stem.util.lru_cache import lru_cache
TOR_RUNLEVELS = ['DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERR']
+TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone
+
+
+def days_since(timestamp):
+ """
+ Provides the number of days since a given unix timestamp, by local time.
+
+ :param int timestamp: unix timestamp to provide time since
+
+ :reutrns: **int** with the number of days since this event
+ """
+
+ return int((timestamp - TIMEZONE_OFFSET) / 86400)
@lru_cache()
@@ -47,19 +60,24 @@ class LogGroup(object):
and supports deduplication.
"""
- def __init__(self, max_size):
+ def __init__(self, max_size, group_by_day = False):
self._max_size = max_size
+ self._group_by_day = group_by_day
self._entries = []
self._lock = threading.RLock()
def add(self, timestamp, type, message):
- entry = LogEntry(timestamp, type, message)
+ self.add_entry(LogEntry(timestamp, type, message))
+ def add_entry(self, entry):
with self._lock:
duplicate = None
+ our_day = entry.days_since()
for existing_entry in self._entries:
- if entry.is_duplicate_of(existing_entry):
+ if self._group_by_day and our_day != existing_entry.days_since():
+ break
+ elif entry.is_duplicate_of(existing_entry):
duplicate = existing_entry
break
@@ -150,6 +168,15 @@ class LogEntry(object):
return False
+ def days_since(self):
+ """
+ Provides the number of days since this event, by local time.
+
+ :reutrns: **int** with the number of days since this event
+ """
+
+ return days_since(self.timestamp)
+
def __eq__(self, other):
if isinstance(other, LogEntry):
return hash(self) == hash(other)
diff --git a/test/util/log/data/daybreak_deduplication b/test/util/log/data/daybreak_deduplication
new file mode 100644
index 0000000..1849ad3
--- /dev/null
+++ b/test/util/log/data/daybreak_deduplication
@@ -0,0 +1,37 @@
+Apr 24 19:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 0:00 hours, with 0 circuits open. I've sent 4.94 MB and received 130.17 MB.
+Apr 24 19:50:42.000 [notice] Average packaged cell fullness: 65.101%. TLS write overhead: 11%
+Apr 25 01:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 6:00 hours, with 0 circuits open. I've sent 5.00 MB and received 131.87 MB.
+Apr 25 01:50:42.000 [notice] Average packaged cell fullness: 64.927%. TLS write overhead: 11%
+Apr 25 07:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 12:00 hours, with 0 circuits open. I've sent 5.08 MB and received 134.19 MB.
+Apr 25 07:50:42.000 [notice] Average packaged cell fullness: 64.587%. TLS write overhead: 11%
+Apr 25 11:44:21.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 11:44:33.000 [notice] Interrupt: exiting cleanly.
+Apr 25 11:44:36.000 [notice] Tor 0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) opening log file.
+Apr 25 11:44:36.492 [notice] Tor v0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) running on Linux with Libevent 2.0.16-stable, OpenSSL 1.0.1 and Zlib 1.2.3.4.
+Apr 25 11:44:36.492 [notice] Tor can't help you if you use it wrong! Learn how to be safe at https://www.torproject.org/download/download#warning
+Apr 25 11:44:36.492 [notice] This version is not a stable Tor release. Expect more bugs than usual.
+Apr 25 11:44:36.525 [notice] Read configuration file "/home/atagar/.tor/torrc".
+Apr 25 11:44:36.530 [notice] Opening Socks listener on 127.0.0.1:9050
+Apr 25 11:44:36.530 [notice] Opening Control listener on 127.0.0.1:9051
+Apr 25 11:44:36.000 [notice] Bootstrapped 0%: Starting
+Apr 25 11:44:39.000 [notice] Bootstrapped 45%: Asking for relay descriptors
+Apr 25 11:44:40.000 [notice] Bootstrapped 50%: Loading relay descriptors
+Apr 25 11:44:47.000 [notice] Bootstrapped 55%: Loading relay descriptors
+Apr 25 11:44:47.000 [notice] Bootstrapped 62%: Loading relay descriptors
+Apr 25 11:44:48.000 [notice] Bootstrapped 72%: Loading relay descriptors
+Apr 25 11:44:48.000 [notice] Bootstrapped 80%: Connecting to the Tor network
+Apr 25 11:44:48.000 [notice] Bootstrapped 90%: Establishing a Tor circuit
+Apr 25 11:44:49.000 [notice] Tor has successfully opened a circuit. Looks like client functionality is working.
+Apr 25 11:44:49.000 [notice] Bootstrapped 100%: Done
+Apr 25 11:45:03.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 17:44:40.000 [notice] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.
+Apr 25 18:11:56.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 18:52:47.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 19:02:44.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 23:44:40.000 [notice] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.
+Apr 26 05:44:40.000 [notice] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.
+Apr 26 10:16:38.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:19:24.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:21:31.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:24:27.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:24:46.000 [notice] New control connection opened from 127.0.0.1.
diff --git a/test/util/log/log_group.py b/test/util/log/log_group.py
index 732ee14..d1f1f46 100644
--- a/test/util/log/log_group.py
+++ b/test/util/log/log_group.py
@@ -1,6 +1,7 @@
+import os
import unittest
-from nyx.util.log import LogGroup, LogEntry
+from nyx.util.log import LogGroup, LogEntry, read_tor_log
class TestLogGroup(unittest.TestCase):
@@ -85,3 +86,61 @@ class TestLogGroup(unittest.TestCase):
self.assertEqual(None, group_items[0].duplicates)
self.assertEqual(bootstrap_messages, [e.message for e in group_items[1].duplicates])
self.assertEqual([False, False, True, True, False], [e.is_duplicate for e in group_items])
+
+ def test_deduplication_with_daybreaks(self):
+ group = LogGroup(100, group_by_day = True)
+ test_log_path = os.path.join(os.path.dirname(__file__), 'data', 'daybreak_deduplication')
+
+ for entry in reversed(list(read_tor_log(test_log_path))):
+ group.add_entry(entry)
+
+ # Entries should consist of two days of results...
+ #
+ # Day 1:
+ # 10:24:27 [NOTICE] New control connection opened from 127.0.0.1.
+ # 10:21:31 [NOTICE] New control connection opened from 127.0.0.1.
+ # 10:19:24 [NOTICE] New control connection opened from 127.0.0.1.
+ # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1.
+ # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1.
+ # 05:44:40 [NOTICE] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.
+ #
+ # Day 2:
+ # 23:44:40 [NOTICE] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.
+ # 19:02:44 [NOTICE] New control connection opened from 127.0.0.1.
+ # 18:52:47 [NOTICE] New control connection opened from 127.0.0.1.
+ # 18:11:56 [NOTICE] New control connection opened from 127.0.0.1.
+ # 17:44:40 [NOTICE] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.
+ # 11:45:03 [NOTICE] New control connection opened from 127.0.0.1.
+ # 11:44:49 [NOTICE] Bootstrapped 100%: Done
+ # ... etc...
+
+ group_items = list(group)
+
+ # First day
+
+ self.assertEqual('New control connection opened from 127.0.0.1.', group_items[0].message)
+ self.assertEqual(5, len(group_items[0].duplicates))
+ self.assertFalse(group_items[0].is_duplicate)
+
+ for entry in group_items[1:5]:
+ self.assertEqual('New control connection opened from 127.0.0.1.', entry.message)
+ self.assertEqual(5, len(entry.duplicates))
+ self.assertTrue(entry.is_duplicate)
+
+ self.assertEqual("Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.", group_items[5].message)
+ self.assertEqual(None, group_items[5].duplicates)
+ self.assertFalse(group_items[5].is_duplicate)
+
+ # Second day
+
+ self.assertEqual("Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.", group_items[6].message)
+ self.assertEqual(2, len(group_items[6].duplicates))
+ self.assertFalse(group_items[6].is_duplicate)
+
+ self.assertEqual('New control connection opened from 127.0.0.1.', group_items[8].message)
+ self.assertEqual(4, len(group_items[8].duplicates))
+ self.assertTrue(group_items[8].is_duplicate)
+
+ self.assertEqual("Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.", group_items[10].message)
+ self.assertEqual(2, len(group_items[10].duplicates))
+ self.assertTrue(group_items[10].is_duplicate)
More information about the tor-commits
mailing list