[tor-commits] [policies/master] Script to provide irc activity

atagar at torproject.org atagar at torproject.org
Thu Oct 5 18:49:51 UTC 2017


commit f752aa8a0e3245e53c84b38949515778cfbcd4cd
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Oct 5 11:49:45 2017 -0700

    Script to provide irc activity
---
 scripts/irc_activity.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/scripts/irc_activity.py b/scripts/irc_activity.py
new file mode 100644
index 0000000..4b405ba
--- /dev/null
+++ b/scripts/irc_activity.py
@@ -0,0 +1,104 @@
+import calendar
+import collections
+import datetime
+import os
+import re
+import sys
+
+try:
+  from dateutil import relativedelta
+except ImportError:
+  print("dateutil unavailable, please run 'sudo pip install python-dateutil'")
+  sys.exit(1)
+
+IRC_CHANNELS = (
+  '#tor',
+  '#tor-project',
+  '#tor-dev',
+  '#tor-internal',
+)
+
+AUTHOR = re.compile('^[0-9]{2}:[0-9]{2} <.(\S+)>')
+DATE_LINE = re.compile('^--- Day changed [\S]{3} [\S]{3} [0-9]{2} [0-9]{4}$')
+START_DATE = datetime.date.today() - relativedelta.relativedelta(months = 6)
+LOG_DIR = sys.argv[1] if len(sys.argv) >= 2 else None
+
+if not LOG_DIR:
+  print("Please provide the path of the irc log directory to read.")
+  sys.exit(1)
+elif not os.path.exists(LOG_DIR):
+  print("%s doesn't exist" % LOG_DIR)
+  sys.exit(1)
+elif not os.path.isdir(LOG_DIR):
+  print("%s isn't a directory" % LOG_DIR)
+  sys.exit(1)
+
+for channel in IRC_CHANNELS:
+  log_path = os.path.join(LOG_DIR, channel) + '.log'
+
+  if not os.path.exists(log_path):
+    print("%s doesn't exist" % log_path)
+    sys.exit(1)
+
+
+def latest_date_line(log_path):
+  # gets the closest date header to what we're looking for
+
+  datelines, closest_date = set(), START_DATE
+
+  with open(log_path) as log_file:
+    for line in log_file:
+      if DATE_LINE.match(line):
+        datelines.add(line.strip())
+
+  while True:
+    month = closest_date.strftime("%B")[:3]
+    day_of_week = calendar.day_name[closest_date.weekday()][:3]
+    dateline = closest_date.strftime("--- Day changed %%s %%s %d %Y") % (day_of_week, month)
+
+    if dateline in datelines:
+      return dateline
+    elif closest_date > datetime.date.today():
+      return None
+
+    closest_date += relativedelta.relativedelta(days = 1)
+
+
+all_authorship = []
+messages_for_channel = {}  # {author => {channel => count}}
+
+for channel in IRC_CHANNELS:
+  log_path = os.path.join(LOG_DIR, channel) + '.log'
+  date_line = latest_date_line(log_path)
+  all_channel_authors = []
+
+  if not date_line:
+    print('unable to find a date line to start reading from for %s' % log_path)
+    sys.exit(1)
+
+  with open(log_path) as log_file:
+    reached_date = False
+
+    for line in log_file:
+      if not reached_date:
+        if line.strip() == date_line:
+          reached_date = True
+
+        continue  # prior to what we should take into consideration
+
+      m = AUTHOR.match(line)      
+
+      if m:
+        author = m.group(1)
+        all_authorship.append(author)
+        all_channel_authors.append(author)
+
+  for author, count in collections.Counter(all_channel_authors).items():
+    messages_for_channel.setdefault(author, {})[channel] = count
+
+for author, count in sorted(collections.Counter(all_authorship).items(), key = lambda entry: entry[1], reverse = True):
+  channel_counts = ['%s %s' % (c, a) for (a, c) in sorted(messages_for_channel.get(author, {}).items(), key = lambda e: e[1], reverse = True)]
+
+  print('%s %s' % (count, author))
+  print('  %s' % ', '.join(channel_counts))
+



More information about the tor-commits mailing list