[tor-commits] [stem/master] Checking whitespace as part of the tests

Mon Apr 16 01:37:20 UTC 2012

commit 25f65d01a10ab5e4993f48317927774c956e73ae
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Apr 15 18:27:37 2012 -0700

    Checking whitespace as part of the tests
    
    A common mistake made by new contributors to stem (... or any project,
    actually) is incorrect whitespace. Checking for tabs, trailing whitespace,
    and incorrect indentation levels after running tests to make it easier for
    patch contributors to get this right.
    
    It's already showing a few mistakes in code that I wrote... oops.
---
 run_tests.py             |   15 +++++++
 test/__init__.py         |    2 +-
 test/whitespace_check.py |  106 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 1 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index 2c36a04..d6a0a52 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -13,6 +13,7 @@ import StringIO
 
 import test.output
 import test.runner
+import test.whitespace_check
 import test.unit.connection.authentication
 import test.unit.connection.protocolinfo
 import test.unit.socket.control_line
@@ -335,6 +336,20 @@ if __name__ == '__main__':
     
     # TODO: note unused config options afterward?
   
+  whitespace_issues = test.whitespace_check.get_issues()
+  
+  if whitespace_issues:
+    test.output.print_line("WHITESPACE ISSUES", term.Color.BLUE, term.Attr.BOLD)
+    
+    for file_path in whitespace_issues:
+      test.output.print_line("* %s" % file_path, term.Color.BLUE, term.Attr.BOLD)
+      
+      for line_number, msg in whitespace_issues[file_path]:
+        line_count = "%-4s" % line_number
+        test.output.print_line("  line %s - %s" % (line_count, msg))
+      
+      print
+  
   runtime = time.time() - start_time
   if runtime < 1: runtime_label = "(%0.1f seconds)" % runtime
   else: runtime_label = "(%i seconds)" % runtime
diff --git a/test/__init__.py b/test/__init__.py
index 93250e2..9f227af 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -2,5 +2,5 @@
 Unit and integration tests for the stem library.
 """
 
-__all__ = ["mocking", "output", "runner"]
+__all__ = ["mocking", "output", "runner", "whitespace_check"]
 
diff --git a/test/whitespace_check.py b/test/whitespace_check.py
new file mode 100644
index 0000000..e0419a5
--- /dev/null
+++ b/test/whitespace_check.py
@@ -0,0 +1,106 @@
+"""
+Performs a check that our python source code follows its whitespace conventions
+which are...
+
+* two space indentations
+* tabs are the root of all evil and should be shot on sight
+* no trailing whitespace unless the line is empty, in which case it should have
+  the same indentation as the surrounding code
+"""
+
+import re
+import os
+
+# if ran directly then run over everything one level up
+DEFAULT_TARGET = os.path.sep.join(__file__.split(os.path.sep)[:-1])
+
+def get_issues(base_path = DEFAULT_TARGET):
+  """
+  Checks python source code in the given directory for whitespace issues.
+  
+  Arguments:
+    base_path (str) - directory to be iterated over
+  
+  Returns:
+    dict of the form...
+    path => [(line_number, message)...]
+  """
+  
+  # TODO: This does not check that block indentations are two spaces because
+  # differentiating source from string blocks ("""foo""") is more of a pita
+  # than I want to deal with right now.
+  
+  issues = {}
+  
+  for file_path in _get_python_files(base_path):
+    with open(file_path) as f: file_contents = f.read()
+    lines, file_issues, prev_indent = file_contents.splitlines(), [], 0
+    
+    for i in xrange(len(lines)):
+      whitespace, content = re.match("^(\s*)(.*)$", lines[i]).groups()
+      
+      if "\t" in whitespace:
+        file_issues.append((i + 1, "indentation has a tab"))
+      elif content != content.rstrip():
+        file_issues.append((i + 1, "line has trailing whitespace"))
+      elif content == '':
+        # empty line, check its indentation against the previous and next line
+        # with content
+        
+        next_indent = 0
+        
+        for k in xrange(i + 1, len(lines)):
+          future_whitespace, future_content = re.match("^(\s*)(.*)$", lines[k]).groups()
+          
+          if future_content:
+            next_indent = len(future_whitespace)
+            break
+        
+        if not len(whitespace) in (prev_indent, next_indent):
+          msg = "indentation should match surrounding content (%s spaces)"
+          
+          if prev_indent == next_indent:
+            msg = msg % prev_indent
+          elif prev_indent < next_indent:
+            msg = msg % ("%i or %i" % (prev_indent, next_indent))
+          else:
+            msg = msg % ("%i or %i" % (next_indent, prev_indent))
+          
+          file_issues.append((i + 1, msg))
+      else:
+        # we had content and it's fine, making a note of its indentation
+        prev_indent = len(whitespace)
+    
+    if file_issues:
+      issues[file_path] = file_issues
+  
+  return issues
+
+def _get_python_files(base_path):
+  """
+  Iterates over all of the python files within a directory.
+  
+  Arguments:
+    base_path (str) - directory to be iterated over
+  
+  Returns:
+    iterator that yields the absolute path for python source code
+  """
+  
+  for root, _, files in os.walk(base_path, followlinks = True):
+    for filename in files:
+      if filename.endswith(".py"):
+        yield os.path.join(root, filename)
+
+if __name__ == '__main__':
+  issues = get_issues()
+  
+  for file_path in issues:
+    print file_path
+    
+    for line_number, msg in issues[file_path]:
+      line_count = "%-4s" % line_number
+      print "  line %s %s" % (line_count, msg)
+    
+    print
+