blob: 4e3e73c7a8baaf4dfbc9987f66edfc6fec435f7a [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#! /usr/bin/env python3
2
3# TODO
4# - option to just list all broken files
5# - test suite
6# - validate signed-off-by
7
8
9class Result:
10 # Whether the patch has an Upstream-Status or not
11 missing_upstream_status = False
12 # If the Upstream-Status tag is malformed in some way (string for bad bit)
13 malformed_upstream_status = None
14 # If the Upstream-Status value is unknown (boolean)
15 unknown_upstream_status = False
16 # The upstream status value (Pending, etc)
17 upstream_status = None
18 # Whether the patch has a Signed-off-by or not
19 missing_sob = False
20 # Whether the Signed-off-by tag is malformed in some way
21 malformed_sob = False
22 # The Signed-off-by tag value
23 sob = None
24 # Whether a patch looks like a CVE but doesn't have a CVE tag
25 missing_cve = False
26
27def blame_patch(patch):
28 """
29 From a patch filename, return a list of "commit summary (author name <author
30 email>)" strings representing the history.
31 """
32 import subprocess
33 return subprocess.check_output(("git", "log",
34 "--follow", "--find-renames", "--diff-filter=A",
35 "--format=%s (%aN <%aE>)",
36 "--", patch)).decode("utf-8").splitlines()
37
38def patchreview(patches):
39 import re
40
41 # General pattern: start of line, optional whitespace, tag with optional
42 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
43 # insensitive.
44 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
45 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
46 status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied")
47 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
48 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
49
50 results = {}
51
52 for patch in patches:
53 result = Result()
54 results[patch] = result
55
56 content = open(patch, encoding='ascii', errors='ignore').read()
57
58 # Find the Signed-off-by tag
59 match = sob_re.search(content)
60 if match:
61 value = match.group(1)
62 if value != "Signed-off-by:":
63 result.malformed_sob = value
64 result.sob = match.group(2)
65 else:
66 result.missing_sob = True
67
68
69 # Find the Upstream-Status tag
70 match = status_re.search(content)
71 if match:
72 value = match.group(1)
73 if value != "Upstream-Status:":
74 result.malformed_upstream_status = value
75
76 value = match.group(2).lower()
77 # TODO: check case
78 if value not in status_values:
79 result.unknown_upstream_status = True
80 result.upstream_status = value
81 else:
82 result.missing_upstream_status = True
83
84 # Check that patches which looks like CVEs have CVE tags
85 if cve_re.search(patch) or cve_re.search(content):
86 if not cve_tag_re.search(content):
87 result.missing_cve = True
88 # TODO: extract CVE list
89
90 return results
91
92
93def analyse(results, want_blame=False, verbose=True):
94 """
95 want_blame: display blame data for each malformed patch
96 verbose: display per-file results instead of just summary
97 """
98
99 # want_blame requires verbose, so disable blame if we're not verbose
100 if want_blame and not verbose:
101 want_blame = False
102
103 total_patches = 0
104 missing_sob = 0
105 malformed_sob = 0
106 missing_status = 0
107 malformed_status = 0
108 missing_cve = 0
109 pending_patches = 0
110
111 for patch in sorted(results):
112 r = results[patch]
113 total_patches += 1
114 need_blame = False
115
116 # Build statistics
117 if r.missing_sob:
118 missing_sob += 1
119 if r.malformed_sob:
120 malformed_sob += 1
121 if r.missing_upstream_status:
122 missing_status += 1
123 if r.malformed_upstream_status or r.unknown_upstream_status:
124 malformed_status += 1
125 if r.missing_cve:
126 missing_cve += 1
127 if r.upstream_status == "pending":
128 pending_patches += 1
129
130 # Output warnings
131 if r.missing_sob:
132 need_blame = True
133 if verbose:
134 print("Missing Signed-off-by tag (%s)" % patch)
135 # TODO: disable this for now as too much fails
136 if False and r.malformed_sob:
137 need_blame = True
138 if verbose:
139 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
140 if r.missing_cve:
141 need_blame = True
142 if verbose:
143 print("Missing CVE tag (%s)" % patch)
144 if r.missing_upstream_status:
145 need_blame = True
146 if verbose:
147 print("Missing Upstream-Status tag (%s)" % patch)
148 if r.malformed_upstream_status:
149 need_blame = True
150 if verbose:
151 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
152 if r.unknown_upstream_status:
153 need_blame = True
154 if verbose:
155 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
156
157 if want_blame and need_blame:
158 print("\n".join(blame_patch(patch)) + "\n")
159
160 def percent(num):
161 try:
162 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
163 except ZeroDivisionError:
164 return "N/A"
165
166 if verbose:
167 print()
168
169 print("""Total patches found: %d
170Patches missing Signed-off-by: %s
171Patches with malformed Signed-off-by: %s
172Patches missing CVE: %s
173Patches missing Upstream-Status: %s
174Patches with malformed Upstream-Status: %s
175Patches in Pending state: %s""" % (total_patches,
176 percent(missing_sob),
177 percent(malformed_sob),
178 percent(missing_cve),
179 percent(missing_status),
180 percent(malformed_status),
181 percent(pending_patches)))
182
183
184
185def histogram(results):
186 from toolz import recipes, dicttoolz
187 import math
188 counts = recipes.countby(lambda r: r.upstream_status, results.values())
189 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
190 for k in bars:
191 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
192
193
194if __name__ == "__main__":
195 import argparse, subprocess, os
196
197 args = argparse.ArgumentParser(description="Patch Review Tool")
198 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
199 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
200 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
201 args.add_argument("directory", nargs="?", help="directory to scan")
202 args = args.parse_args()
203
204 if args.directory:
205 os.chdir(args.directory)
206 patches = subprocess.check_output(("git", "ls-files", "*.patch", "*.diff")).decode("utf-8").split()
207 results = patchreview(patches)
208 analyse(results, want_blame=args.blame, verbose=args.verbose)
209 if args.histogram:
210 print()
211 histogram(results)