[tor-commits] [tor/master] codetool: post-processor for clang-format
nickm at torproject.org
nickm at torproject.org
Thu Mar 5 13:25:48 UTC 2020
commit 5bd86b50b58ffcad48416df6fa6b411e92c4636e
Author: Nick Mathewson <nickm at torproject.org>
Date: Fri Jan 10 08:32:56 2020 -0500
codetool: post-processor for clang-format
This code transformer makes a couple of changes that we want for our
source code, and can be expanded to handle more.
---
scripts/maint/codetool.py | 174 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 174 insertions(+)
diff --git a/scripts/maint/codetool.py b/scripts/maint/codetool.py
new file mode 100755
index 000000000..6336e6843
--- /dev/null
+++ b/scripts/maint/codetool.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020, The Tor Project, Inc.
+# See LICENSE for licensing information.
+
+"""
+ This program uses a set of plugable filters to inspect and transform
+ our C code.
+"""
+
+import os
+import re
+import sys
+
+class Filter:
+ """A Filter transforms a string containing a C program."""
+ def __init__(self):
+ pass
+
+ def transform(self, s):
+ return s
+
+class CompoundFilt(Filter):
+ """A CompoundFilt runs another set of filters, in sequence."""
+ def __init__(self, items=()):
+ super().__init__()
+ self._filters = list(items)
+
+ def add(self, filt):
+ self._filters.append(filt)
+ return self
+
+ def transform(self, s):
+ for f in self._filters:
+ s = f.transform(s)
+
+ return s
+
+class SplitError(Exception):
+ """Exception: raised if split_comments() can't understand a C file."""
+ pass
+
+def split_comments(s):
+ r"""Iterate over the C code in 's', and yield a sequence of (code,
+ comment) pairs. Each pair will contain either a nonempty piece
+ of code, a nonempty comment, or both.
+
+ >>> list(split_comments("hello // world\n"))
+ [('hello ', '// world'), ('\n', '')]
+
+ >>> list(split_comments("a /* b cd */ efg // hi"))
+ [('a ', '/* b cd */'), (' efg ', '// hi')]
+ """
+
+ # Matches a block of code without any comments.
+ PAT_CODE = re.compile(r'''^(?: [^/"']+ |
+ "(?:[^\\"]+|\\.)*" |
+ '(?:[^\\']+|\\.)*' |
+ /[^/*]
+ )*''', re.VERBOSE|re.DOTALL)
+
+ # Matches a C99 "//" comment.
+ PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
+
+ # Matches a C "/* */" comment.
+ PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
+
+ while True:
+ # Find some non-comment code at the start of the string.
+ m = PAT_CODE.match(s)
+
+ # If we found some code here, save it and advance the string.
+ # Otherwise set 'code' to "".
+ if m:
+ code = m.group(0)
+ s = s[m.end():]
+ else:
+ code = ""
+
+ # Now we have a comment, or the end of the string. Find out which
+ # one, and how long it is.
+ if s.startswith("//"):
+ m = PAT_C99_COMMENT.match(s)
+ else:
+ m = PAT_C_COMMENT.match(s)
+
+ # If we got a comment, save it and advance the string. Otherwise
+ # set 'comment' to "".
+ if m:
+ comment = m.group(0)
+ s = s[m.end():]
+ else:
+ comment = ""
+
+ # If we found no code and no comment, we should be at the end of
+ # the string...
+ if code == "" and comment == "":
+ if s:
+ # But in case we *aren't* at the end of the string, raise
+ # an error.
+ raise SplitError()
+ # ... all is well, we're done scanning the code.
+ return
+
+ yield (code, comment)
+
+class IgnoreCommentsFilt(Filter):
+ """Wrapper: applies another filter to C code only, excluding comments.
+ """
+ def __init__(self, filt):
+ super().__init__()
+ self._filt = filt
+
+ def transform(self, s):
+ result = []
+ for code, comment in split_comments(s):
+ result.append(self._filt.transform(code))
+ result.append(comment)
+ return "".join(result)
+
+
+class RegexFilt(Filter):
+ """A regex filter applies a regular expression to some C code."""
+ def __init__(self, pat, replacement, flags=0):
+ super().__init__()
+ self._pat = re.compile(pat, flags)
+ self._replacement = replacement
+
+ def transform(self, s):
+ s, _ = self._pat.subn(self._replacement, s)
+ return s
+
+def revise(fname, filt):
+ """Run 'filt' on the contents of the file in 'fname'. If any
+ changes are made, then replace the file with its new contents.
+ Otherwise, leave the file alone.
+ """
+ contents = open(fname, 'r').read()
+ result = filt.transform(contents)
+ if result == contents:
+ return
+
+ tmpname = "{}_codetool_tmp".format(fname)
+ try:
+ with open(tmpname, 'w') as f:
+ f.write(result)
+ os.rename(tmpname, fname)
+ except:
+ os.unlink(tmpname)
+ raise
+
+##############################
+# Filtering rules.
+##############################
+
+# Make sure that there is a newline after the first comma in a MOCK_IMPL()
+BREAK_MOCK_IMPL = RegexFilt(
+ r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
+ r'MOCK_IMPL(\1,\n\2',
+ re.MULTILINE)
+
+# Make sure there is no newline between } and a loop iteration terminator.
+RESTORE_SMARTLIST_END = RegexFilt(
+ r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
+ r'} \1_FOREACH_END (',
+ re.MULTILINE)
+
+F = CompoundFilt()
+F.add(IgnoreCommentsFilt(CompoundFilt([
+ RESTORE_SMARTLIST_END,
+ BREAK_MOCK_IMPL])))
+
+if __name__ == '__main__':
+ for fname in sys.argv[1:]:
+ revise(fname, F)
More information about the tor-commits
mailing list