[tor-commits] [tor/master] Add a script to rename C identifiers, and optionally add a commit message
teor at torproject.org
teor at torproject.org
Fri Oct 25 00:28:04 UTC 2019
commit fcdb66acd3afd4068a3b200ffc71daeab72e0fcc
Author: Nick Mathewson <nickm at torproject.org>
Date: Wed Oct 23 13:26:08 2019 -0400
Add a script to rename C identifiers, and optionally add a commit message
This script should "pay for itself" if it prevents one bug caused by
careless search-replace-commit loops.
---
changes/ticket32237 | 5 +
scripts/maint/rename_c_identifier.py | 261 +++++++++++++++++++++++++++++++++++
2 files changed, 266 insertions(+)
diff --git a/changes/ticket32237 b/changes/ticket32237
new file mode 100644
index 000000000..dfa0b7840
--- /dev/null
+++ b/changes/ticket32237
@@ -0,0 +1,5 @@
+ o Minor features (developer tools):
+ - Add a rename_c_identifiers.py tool to rename a bunch of C
+ identifiers at once, and generate a well-formed commit message
+ describing the change. This should help with refactoring. Closes
+ ticket 32237.
diff --git a/scripts/maint/rename_c_identifier.py b/scripts/maint/rename_c_identifier.py
new file mode 100755
index 000000000..0e56dc48d
--- /dev/null
+++ b/scripts/maint/rename_c_identifier.py
@@ -0,0 +1,261 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2001 Matej Pfajfar.
+# Copyright (c) 2001-2004, Roger Dingledine.
+# Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+# Copyright (c) 2007-2019, The Tor Project, Inc.
+# See LICENSE for licensing information
+
+"""
+Helpful script to replace one or more C identifiers, and optionally
+generate a commit message explaining what happened.
+"""
+
+import argparse
+import fileinput
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+
+TOPDIR = "src"
+
+
+def is_c_file(fn):
+ """
+ Return true iff fn is the name of a C file.
+
+ >>> is_c_file("a/b/module.c")
+ True
+ >>> is_c_file("a/b/module.h")
+ True
+ >>> is_c_file("a/b/module.c~")
+ False
+ >>> is_c_file("a/b/.module.c")
+ False
+ >>> is_c_file("a/b/module.cpp")
+ False
+ """
+ fn = os.path.split(fn)[1]
+ if fn.startswith("."):
+ return False
+ ext = os.path.splitext(fn)[1]
+ return ext in {".c", ".h", ".i", ".inc"}
+
+
+def list_c_files(topdir=TOPDIR):
+ """
+ Use git to list all the C files under version control.
+
+ >>> lst = list(list_c_files())
+ >>> "src/core/mainloop/mainloop.c" in lst
+ True
+ >>> "src/core/mainloop/twiddledeedoo.c" in lst
+ False
+ >>> "micro-revision.i" in lst
+ False
+ """
+ proc = subprocess.Popen(
+ ["git", "ls-tree", "--name-only", "-r", "HEAD", topdir],
+ stdout=subprocess.PIPE,
+ encoding="utf-8")
+ for line in proc.stdout.readlines():
+ line = line.strip()
+ if is_c_file(line):
+ yield line
+
+
+class Rewriter:
+ """
+ A rewriter applies a series of word-by-word replacements, in
+ sequence. Replacements only happen at "word boundaries",
+ as determined by the \\b regular expression marker.
+
+ ("A word is defined as a sequence of alphanumeric or underscore
+ characters", according to the documentation.)
+
+ >>> R = Rewriter([("magic", "secret"), ("words", "codes")])
+ >>> R.apply("The magic words are rambunctious bluejay")
+ 'The secret codes are rambunctious bluejay'
+ >>> R.apply("The magical words are rambunctious bluejay")
+ 'The magical codes are rambunctious bluejay'
+ >>> R.get_count()
+ 3
+
+ """
+
+ def __init__(self, replacements):
+ """Make a new Rewriter. Takes a sequence of pairs of
+ (from_id, to_id), where from_id is an identifier to replace,
+ and to_id is its replacement.
+ """
+ self._patterns = []
+ for id1, id2 in replacements:
+ pat = re.compile(r"\b{}\b".format(re.escape(id1)))
+ self._patterns.append((pat, id2))
+
+ self._count = 0
+
+ def apply(self, line):
+ """Return `line` as transformed by this rewriter."""
+ for pat, ident in self._patterns:
+ line, count = pat.subn(ident, line)
+ self._count += count
+ return line
+
+ def get_count(self):
+ """Return the number of identifiers that this rewriter has
+ rewritten."""
+ return self._count
+
+
+def rewrite_files(files, rewriter):
+ """
+ Apply `rewriter` to every file in `files`, replacing those files
+ with their rewritten contents.
+ """
+ for line in fileinput.input(files, inplace=True):
+ sys.stdout.write(rewriter.apply(line))
+
+
+def make_commit_msg(pairs, no_verify):
+ """Return a commit message to explain what was replaced by the provided
+ arguments.
+ """
+ script = ["./scripts/maint/rename_c_identifier.py"]
+ for id1, id2 in pairs:
+ qid1 = shlex.quote(id1)
+ qid2 = shlex.quote(id2)
+ script.append(" {} {}".format(qid1, qid2))
+ script = " \\\n".join(script)
+
+ if len(pairs) == 1:
+ line1 = "Rename {} to {}".format(pairs[0])
+ else:
+ line1 = "Replace several C identifiers."
+
+ msg = """\
+{}
+
+This is an automated commit, generated by this command:
+
+{}
+""".format(line1, script)
+
+ if no_verify:
+ msg += """
+It was generated with --no-verify, so it probably breaks some commit hooks.
+The commiter should be sure to fix them up in a subsequent commit.
+"""
+
+ return msg
+
+
+def commit(pairs, no_verify=False):
+ """Try to commit the current git state, generating the commit message as
+ appropriate. If `no_verify` is True, pass the --no-verify argument to
+ git commit.
+ """
+ args = []
+ if no_verify:
+ args.append("--no-verify")
+
+ # We have to use a try block to delete the temporary file here, since we
+ # are using tempfile with delete=False. We have to use delete=False,
+ # since otherwise we are not guaranteed to be able to give the file to
+ # git for it to open.
+ fname = None
+ try:
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
+ fname = f.name
+ f.write(make_commit_msg(pairs, no_verify))
+ s = subprocess.run(["git", "commit", "-a", "-F", fname, "--edit"]+args)
+ if s.returncode != 0 and not no_verify:
+ print('"git commit" failed. Maybe retry with --no-verify?',
+ file=sys.stderr)
+ revert_changes()
+ return False
+ finally:
+ os.unlink(fname)
+
+ return True
+
+
+def any_uncommitted_changes():
+ """Return True if git says there are any uncommitted changes in the current
+ working tree; false otherwise.
+ """
+ s = subprocess.run(["git", "diff-index", "--quiet", "HEAD"])
+ return s.returncode != 0
+
+
+DESC = "Replace one identifier with another throughout our source."
+EXAMPLES = """\
+Examples:
+
+ rename_c_identifier.py set_ctrl_id set_controller_id
+ (Replaces every occurrence of "set_ctrl_id" with "set_controller_id".)
+
+ rename_c_identifier.py --commit set_ctrl_id set_controller_id
+ (As above, but also generate a git commit with an appropriate message.)
+
+ rename_c_identifier.py a b c d
+ (Replace "a" with "b", and "c" with "d".)"""
+
+
+def revert_changes():
+ """Tell git to revert all the changes in the current working tree.
+ """
+ print('Reverting changes.', file=sys.stderr)
+ subprocess.run(["git", "checkout", "--quiet", TOPDIR])
+
+
+def main(argv):
+ import argparse
+ parser = argparse.ArgumentParser(description=DESC, epilog=EXAMPLES,
+ # prevent re-wrapping the examples
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+
+ parser.add_argument("--commit", action='store_true',
+ help="Generate a Git commit.")
+ parser.add_argument("--no-verify", action='store_true',
+ help="Tell Git not to run its pre-commit hooks.")
+ parser.add_argument("from_id", type=str, help="Original identifier")
+ parser.add_argument("to_id", type=str, help="New identifier")
+ parser.add_argument("more", type=str, nargs=argparse.REMAINDER,
+ help="Additional identifier pairs")
+
+ args = parser.parse_args(argv[1:])
+
+ if len(args.more) % 2 != 0:
+ print("I require an even number of identifiers.", file=sys.stderr)
+ return 1
+
+ if any_uncommitted_changes():
+ print("Uncommitted changes found. Not running.", file=sys.stderr)
+ return 1
+
+ pairs = []
+ print("renaming {} to {}".format(args.from_id, args.to_id), file=sys.stderr)
+ pairs.append((args.from_id, args.to_id))
+ for idx in range(0, len(args.more), 2):
+ id1 = args.more[idx]
+ id2 = args.more[idx+1]
+ print("renaming {} to {}".format(id1, id2))
+ pairs.append((id1, id2))
+
+ rewriter = Rewriter(pairs)
+
+ rewrite_files(list_c_files(), rewriter)
+
+ print("Replaced {} identifiers".format(rewriter.get_count()),
+ file=sys.stderr)
+
+ if args.commit:
+ commit(pairs, args.no_verify)
+
+
+if __name__ == '__main__':
+ main(sys.argv)
More information about the tor-commits
mailing list