blob: dc417b4c55b4f45c04abb006d485d8347f8c674d [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#! /usr/bin/env python3
Brad Bishopc342db32019-05-15 21:57:59 -04002#
3# SPDX-License-Identifier: GPL-2.0-only
4#
Brad Bishopd7bf8c12018-02-25 22:55:05 -05005
6# TODO
7# - option to just list all broken files
8# - test suite
9# - validate signed-off-by
10
Andrew Geissler9aee5002022-03-30 16:27:02 +000011status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
Brad Bishopd7bf8c12018-02-25 22:55:05 -050012
13class Result:
14 # Whether the patch has an Upstream-Status or not
15 missing_upstream_status = False
16 # If the Upstream-Status tag is malformed in some way (string for bad bit)
17 malformed_upstream_status = None
18 # If the Upstream-Status value is unknown (boolean)
19 unknown_upstream_status = False
20 # The upstream status value (Pending, etc)
21 upstream_status = None
22 # Whether the patch has a Signed-off-by or not
23 missing_sob = False
24 # Whether the Signed-off-by tag is malformed in some way
25 malformed_sob = False
26 # The Signed-off-by tag value
27 sob = None
28 # Whether a patch looks like a CVE but doesn't have a CVE tag
29 missing_cve = False
30
31def blame_patch(patch):
32 """
33 From a patch filename, return a list of "commit summary (author name <author
34 email>)" strings representing the history.
35 """
36 import subprocess
37 return subprocess.check_output(("git", "log",
38 "--follow", "--find-renames", "--diff-filter=A",
39 "--format=%s (%aN <%aE>)",
40 "--", patch)).decode("utf-8").splitlines()
41
Brad Bishopa5c52ff2018-11-23 10:55:50 +130042def patchreview(path, patches):
43 import re, os.path
Brad Bishopd7bf8c12018-02-25 22:55:05 -050044
45 # General pattern: start of line, optional whitespace, tag with optional
46 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
47 # insensitive.
48 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
Andrew Geissler9aee5002022-03-30 16:27:02 +000049 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050050 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
51 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
52
53 results = {}
54
55 for patch in patches:
Brad Bishopd7bf8c12018-02-25 22:55:05 -050056
Brad Bishopa5c52ff2018-11-23 10:55:50 +130057 fullpath = os.path.join(path, patch)
58 result = Result()
59 results[fullpath] = result
60
61 content = open(fullpath, encoding='ascii', errors='ignore').read()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050062
63 # Find the Signed-off-by tag
64 match = sob_re.search(content)
65 if match:
66 value = match.group(1)
67 if value != "Signed-off-by:":
68 result.malformed_sob = value
69 result.sob = match.group(2)
70 else:
71 result.missing_sob = True
72
73
74 # Find the Upstream-Status tag
75 match = status_re.search(content)
76 if match:
77 value = match.group(1)
78 if value != "Upstream-Status:":
79 result.malformed_upstream_status = value
80
81 value = match.group(2).lower()
82 # TODO: check case
83 if value not in status_values:
84 result.unknown_upstream_status = True
85 result.upstream_status = value
86 else:
87 result.missing_upstream_status = True
88
89 # Check that patches which looks like CVEs have CVE tags
90 if cve_re.search(patch) or cve_re.search(content):
91 if not cve_tag_re.search(content):
92 result.missing_cve = True
93 # TODO: extract CVE list
94
95 return results
96
97
98def analyse(results, want_blame=False, verbose=True):
99 """
100 want_blame: display blame data for each malformed patch
101 verbose: display per-file results instead of just summary
102 """
103
104 # want_blame requires verbose, so disable blame if we're not verbose
105 if want_blame and not verbose:
106 want_blame = False
107
108 total_patches = 0
109 missing_sob = 0
110 malformed_sob = 0
111 missing_status = 0
112 malformed_status = 0
113 missing_cve = 0
114 pending_patches = 0
115
116 for patch in sorted(results):
117 r = results[patch]
118 total_patches += 1
119 need_blame = False
120
121 # Build statistics
122 if r.missing_sob:
123 missing_sob += 1
124 if r.malformed_sob:
125 malformed_sob += 1
126 if r.missing_upstream_status:
127 missing_status += 1
128 if r.malformed_upstream_status or r.unknown_upstream_status:
129 malformed_status += 1
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300130 # Count patches with no status as pending
131 pending_patches +=1
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500132 if r.missing_cve:
133 missing_cve += 1
134 if r.upstream_status == "pending":
135 pending_patches += 1
136
137 # Output warnings
138 if r.missing_sob:
139 need_blame = True
140 if verbose:
141 print("Missing Signed-off-by tag (%s)" % patch)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800142 if r.malformed_sob:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500143 need_blame = True
144 if verbose:
145 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
146 if r.missing_cve:
147 need_blame = True
148 if verbose:
149 print("Missing CVE tag (%s)" % patch)
150 if r.missing_upstream_status:
151 need_blame = True
152 if verbose:
153 print("Missing Upstream-Status tag (%s)" % patch)
154 if r.malformed_upstream_status:
155 need_blame = True
156 if verbose:
157 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
158 if r.unknown_upstream_status:
159 need_blame = True
160 if verbose:
161 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
162
163 if want_blame and need_blame:
164 print("\n".join(blame_patch(patch)) + "\n")
165
166 def percent(num):
167 try:
168 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
169 except ZeroDivisionError:
170 return "N/A"
171
172 if verbose:
173 print()
174
175 print("""Total patches found: %d
176Patches missing Signed-off-by: %s
177Patches with malformed Signed-off-by: %s
178Patches missing CVE: %s
179Patches missing Upstream-Status: %s
180Patches with malformed Upstream-Status: %s
181Patches in Pending state: %s""" % (total_patches,
182 percent(missing_sob),
183 percent(malformed_sob),
184 percent(missing_cve),
185 percent(missing_status),
186 percent(malformed_status),
187 percent(pending_patches)))
188
189
190
191def histogram(results):
192 from toolz import recipes, dicttoolz
193 import math
194 counts = recipes.countby(lambda r: r.upstream_status, results.values())
195 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
196 for k in bars:
197 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
198
199
200if __name__ == "__main__":
201 import argparse, subprocess, os
202
203 args = argparse.ArgumentParser(description="Patch Review Tool")
204 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
205 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
206 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300207 args.add_argument("-j", "--json", help="update JSON")
208 args.add_argument("directory", help="directory to scan")
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500209 args = args.parse_args()
210
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300211 patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
212 results = patchreview(args.directory, patches)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500213 analyse(results, want_blame=args.blame, verbose=args.verbose)
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300214
215 if args.json:
216 import json, os.path, collections
217 if os.path.isfile(args.json):
218 data = json.load(open(args.json))
219 else:
220 data = []
221
222 row = collections.Counter()
223 row["total"] = len(results)
224 row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
225 for r in results.values():
226 if r.upstream_status in status_values:
227 row[r.upstream_status] += 1
228 if r.malformed_upstream_status or r.missing_upstream_status:
229 row['malformed-upstream-status'] += 1
230 if r.malformed_sob or r.missing_sob:
231 row['malformed-sob'] += 1
232
233 data.append(row)
234 json.dump(data, open(args.json, "w"))
235
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500236 if args.histogram:
237 print()
238 histogram(results)