blob: b23eda1f14319f09d414cd9060e3c3259f26e7d3 [file] [log] [blame]
Brad Bishopbec4ebc2022-08-03 09:55:16 -04001#! /usr/bin/env python3
2#
3# SPDX-License-Identifier: GPL-2.0-only
4#
5
6# TODO
7# - option to just list all broken files
8# - test suite
9# - validate signed-off-by
10
11import argparse
12import collections
13import json
14import os
15import re
16import subprocess
17
18status_values = (
19 "accepted",
20 "pending",
21 "inappropriate",
22 "backport",
23 "submitted",
24 "denied",
25)
26
27
28class PatchResult:
29 # Whether the patch has an Upstream-Status or not
30 missing_upstream_status = False
31 # If the Upstream-Status tag is malformed in some way (string for bad bit)
32 malformed_upstream_status = None
33 # If the Upstream-Status value is unknown (boolean)
34 unknown_upstream_status = False
35 # The upstream status value (Pending, etc)
36 upstream_status = None
37 # Whether the patch has a Signed-off-by or not
38 missing_sob = False
39 # Whether the Signed-off-by tag is malformed in some way
40 malformed_sob = False
41 # The Signed-off-by tag value
42 sob = None
43 # Whether a patch looks like a CVE but doesn't have a CVE tag
44 missing_cve = False
45
46
47class Summary:
48 total = 0
49 cve_missing = 0
50 sob_missing = 0
51 sob_malformed = 0
52 status_missing = 0
53 status_malformed = 0
54 status_pending = 0
55
56def blame_patch(patch):
57 """
58 From a patch filename, return a list of "commit summary (author name <author
59 email>)" strings representing the history.
60 """
61 return subprocess.check_output(("git", "log",
62 "--follow", "--find-renames", "--diff-filter=A",
63 "--format=%s (%aN <%aE>)",
64 "--", patch)).decode("utf-8").splitlines()
65
66def patchreview(patches):
67 # General pattern: start of line, optional whitespace, tag with optional
68 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
69 # insensitive.
70 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
71 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
72 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
73 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
74
75 results = {}
76
77 for patch in patches:
78
79 result = PatchResult()
80 results[patch] = result
81
82 content = open(patch, encoding="ascii", errors="ignore").read()
83
84 # Find the Signed-off-by tag
85 match = sob_re.search(content)
86 if match:
87 value = match.group(1)
88 if value != "Signed-off-by:":
89 result.malformed_sob = value
90 result.sob = match.group(2)
91 else:
92 result.missing_sob = True
93
94 # Find the Upstream-Status tag
95 match = status_re.search(content)
96 if match:
97 value = match.group(1)
98 if value != "Upstream-Status:":
99 result.malformed_upstream_status = value
100
101 value = match.group(2).lower()
102 # TODO: check case
103 if value not in status_values:
104 result.unknown_upstream_status = True
105 result.upstream_status = value
106 else:
107 result.missing_upstream_status = True
108
109 # Check that patches which looks like CVEs have CVE tags
110 if cve_re.search(patch) or cve_re.search(content):
111 if not cve_tag_re.search(content):
112 result.missing_cve = True
113 # TODO: extract CVE list
114
115 return results
116
117
118def analyse(results, want_blame=False, verbose=True):
119 """
120 want_blame: display blame data for each malformed patch
121 verbose: display per-file results instead of just summary
122 """
123
124 # want_blame requires verbose, so disable blame if we're not verbose
125 if want_blame and not verbose:
126 want_blame = False
127
128 summary = Summary()
129
130 for patch in sorted(results):
131 r = results[patch]
132 summary.total += 1
133 need_blame = False
134
135 # Build statistics
136 if r.missing_sob:
137 summary.sob_missing += 1
138 if r.malformed_sob:
139 summary.sob_malformed += 1
140 if r.missing_upstream_status:
141 summary.status_missing += 1
142 if r.malformed_upstream_status or r.unknown_upstream_status:
143 summary.status_malformed += 1
144 # Count patches with no status as pending
145 summary.status_pending += 1
146 if r.missing_cve:
147 summary.cve_missing += 1
148 if r.upstream_status == "pending":
149 summary.status_pending += 1
150
151 # Output warnings
152 if r.missing_sob:
153 need_blame = True
154 if verbose:
155 print("Missing Signed-off-by tag (%s)" % patch)
156 if r.malformed_sob:
157 need_blame = True
158 if verbose:
159 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
160 if r.missing_cve:
161 need_blame = True
162 if verbose:
163 print("Missing CVE tag (%s)" % patch)
164 if r.missing_upstream_status:
165 need_blame = True
166 if verbose:
167 print("Missing Upstream-Status tag (%s)" % patch)
168 if r.malformed_upstream_status:
169 need_blame = True
170 if verbose:
171 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
172 if r.unknown_upstream_status:
173 need_blame = True
174 if verbose:
175 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
176
177 if want_blame and need_blame:
178 print("\n".join(blame_patch(patch)) + "\n")
179
180 return summary
181
182
183def display_summary(summary, verbose):
184 def percent(num):
185 try:
186 return "%d (%d%%)" % (num, round(num * 100.0 / summary.total))
187 except ZeroDivisionError:
188 return "N/A"
189
190 if verbose:
191 print()
192
193 print("""Total patches found: %d
194Patches missing Signed-off-by: %s
195Patches with malformed Signed-off-by: %s
196Patches missing CVE: %s
197Patches missing Upstream-Status: %s
198Patches with malformed Upstream-Status: %s
199Patches in Pending state: %s""" % (summary.total,
200 percent(summary.sob_missing),
201 percent(summary.sob_malformed),
202 percent(summary.cve_missing),
203 percent(summary.status_missing),
204 percent(summary.status_malformed),
205 percent(summary.status_pending)))
206
207
208def generate_metrics(summary, output):
209 # https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md
210 # Summary attribute name, MetricPoint help
211 mapping = (
212 ("total", "Total patches"),
213 ("cve_missing", "Patches missing CVE tag"),
214 ("sob_malformed", "Patches with malformed Signed-off-by"),
215 ("sob_missing", "Patches with missing Signed-off-by"),
216 ("status_malformed", "Patches with malformed Upstream-Status"),
217 ("status_missing", "Patches with missing Upstream-Status"),
218 ("status_pending", "Patches with Pending Upstream-Status")
219 )
220 for attr, help in mapping:
221 metric = f"patch_check_{attr}"
222 value = getattr(summary, attr)
223 output.write(f"""
224# TYPE {metric} gauge
225# HELP {help}
226{metric} {value}
227""")
228 output.write("\n# EOF\n")
229
230def histogram(results):
231 import math
232
233 from toolz import dicttoolz, recipes
234 counts = recipes.countby(lambda r: r.upstream_status, results.values())
235 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
236 for k in bars:
237 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
238
239def gather_patches(directories):
240 patches = []
241 for directory in directories:
242 filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
243 patches += [os.path.join(directory, f) for f in filenames]
244 return patches
245
246if __name__ == "__main__":
247 args = argparse.ArgumentParser(description="Patch Review Tool")
248 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
249 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
250 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
251 args.add_argument("-j", "--json", help="update JSON")
252 args.add_argument("-m", "--metrics", type=argparse.FileType('w'), help="write OpenMetrics")
253 args.add_argument("dirs", metavar="DIRECTORY", nargs="+", help="directory to scan")
254 args = args.parse_args()
255
256 patches = gather_patches(args.dirs)
257 results = patchreview(patches)
258 summary = analyse(results, want_blame=args.blame, verbose=args.verbose)
259 display_summary(summary, verbose=args.verbose)
260
261 if args.json:
262 if os.path.isfile(args.json):
263 data = json.load(open(args.json))
264 else:
265 data = []
266
267 row = collections.Counter()
268 row["total"] = len(results)
269 row["date"] = subprocess.check_output(["git", "-C", args.dirs[0], "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
270 for r in results.values():
271 if r.upstream_status in status_values:
272 row[r.upstream_status] += 1
273 if r.malformed_upstream_status or r.missing_upstream_status:
274 row["malformed-upstream-status"] += 1
275 if r.malformed_sob or r.missing_sob:
276 row["malformed-sob"] += 1
277
278 data.append(row)
279 json.dump(data, open(args.json, "w"))
280
281 if args.metrics:
282 generate_metrics(summary, args.metrics)
283
284 if args.histogram:
285 print()
286 histogram(results)