blob: bceae06561c48c87a6911a76a3314d7e8800ee05 [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#! /usr/bin/env python3
Brad Bishopc342db32019-05-15 21:57:59 -04002#
Patrick Williams92b42cb2022-09-03 06:53:57 -05003# Copyright OpenEmbedded Contributors
4#
Brad Bishopc342db32019-05-15 21:57:59 -04005# SPDX-License-Identifier: GPL-2.0-only
6#
Brad Bishopd7bf8c12018-02-25 22:55:05 -05007
Patrick Williamsac13d5f2023-11-24 18:59:46 -06008import argparse
9import collections
10import json
11import os
12import os.path
13import pathlib
14import re
15import subprocess
16
Brad Bishopd7bf8c12018-02-25 22:55:05 -050017# TODO
18# - option to just list all broken files
19# - test suite
20# - validate signed-off-by
21
Andrew Geissler9aee5002022-03-30 16:27:02 +000022status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
Brad Bishopd7bf8c12018-02-25 22:55:05 -050023
24class Result:
25 # Whether the patch has an Upstream-Status or not
26 missing_upstream_status = False
27 # If the Upstream-Status tag is malformed in some way (string for bad bit)
28 malformed_upstream_status = None
29 # If the Upstream-Status value is unknown (boolean)
30 unknown_upstream_status = False
31 # The upstream status value (Pending, etc)
32 upstream_status = None
33 # Whether the patch has a Signed-off-by or not
34 missing_sob = False
35 # Whether the Signed-off-by tag is malformed in some way
36 malformed_sob = False
37 # The Signed-off-by tag value
38 sob = None
39 # Whether a patch looks like a CVE but doesn't have a CVE tag
40 missing_cve = False
41
42def blame_patch(patch):
43 """
44 From a patch filename, return a list of "commit summary (author name <author
45 email>)" strings representing the history.
46 """
Brad Bishopd7bf8c12018-02-25 22:55:05 -050047 return subprocess.check_output(("git", "log",
48 "--follow", "--find-renames", "--diff-filter=A",
49 "--format=%s (%aN <%aE>)",
50 "--", patch)).decode("utf-8").splitlines()
51
Patrick Williamsac13d5f2023-11-24 18:59:46 -060052def patchreview(patches):
Brad Bishopd7bf8c12018-02-25 22:55:05 -050053
54 # General pattern: start of line, optional whitespace, tag with optional
55 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
56 # insensitive.
57 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
Andrew Geissler9aee5002022-03-30 16:27:02 +000058 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050059 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
60 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
61
62 results = {}
63
64 for patch in patches:
Brad Bishopd7bf8c12018-02-25 22:55:05 -050065
Brad Bishopa5c52ff2018-11-23 10:55:50 +130066 result = Result()
Patrick Williamsac13d5f2023-11-24 18:59:46 -060067 results[patch] = result
Brad Bishopa5c52ff2018-11-23 10:55:50 +130068
Patrick Williamsac13d5f2023-11-24 18:59:46 -060069 content = open(patch, encoding='ascii', errors='ignore').read()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050070
71 # Find the Signed-off-by tag
72 match = sob_re.search(content)
73 if match:
74 value = match.group(1)
75 if value != "Signed-off-by:":
76 result.malformed_sob = value
77 result.sob = match.group(2)
78 else:
79 result.missing_sob = True
80
81
82 # Find the Upstream-Status tag
83 match = status_re.search(content)
84 if match:
85 value = match.group(1)
86 if value != "Upstream-Status:":
87 result.malformed_upstream_status = value
88
89 value = match.group(2).lower()
90 # TODO: check case
91 if value not in status_values:
92 result.unknown_upstream_status = True
93 result.upstream_status = value
94 else:
95 result.missing_upstream_status = True
96
97 # Check that patches which looks like CVEs have CVE tags
98 if cve_re.search(patch) or cve_re.search(content):
99 if not cve_tag_re.search(content):
100 result.missing_cve = True
101 # TODO: extract CVE list
102
103 return results
104
105
106def analyse(results, want_blame=False, verbose=True):
107 """
108 want_blame: display blame data for each malformed patch
109 verbose: display per-file results instead of just summary
110 """
111
112 # want_blame requires verbose, so disable blame if we're not verbose
113 if want_blame and not verbose:
114 want_blame = False
115
116 total_patches = 0
117 missing_sob = 0
118 malformed_sob = 0
119 missing_status = 0
120 malformed_status = 0
121 missing_cve = 0
122 pending_patches = 0
123
124 for patch in sorted(results):
125 r = results[patch]
126 total_patches += 1
127 need_blame = False
128
129 # Build statistics
130 if r.missing_sob:
131 missing_sob += 1
132 if r.malformed_sob:
133 malformed_sob += 1
134 if r.missing_upstream_status:
135 missing_status += 1
136 if r.malformed_upstream_status or r.unknown_upstream_status:
137 malformed_status += 1
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300138 # Count patches with no status as pending
139 pending_patches +=1
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500140 if r.missing_cve:
141 missing_cve += 1
142 if r.upstream_status == "pending":
143 pending_patches += 1
144
145 # Output warnings
146 if r.missing_sob:
147 need_blame = True
148 if verbose:
149 print("Missing Signed-off-by tag (%s)" % patch)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800150 if r.malformed_sob:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500151 need_blame = True
152 if verbose:
153 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
154 if r.missing_cve:
155 need_blame = True
156 if verbose:
157 print("Missing CVE tag (%s)" % patch)
158 if r.missing_upstream_status:
159 need_blame = True
160 if verbose:
161 print("Missing Upstream-Status tag (%s)" % patch)
162 if r.malformed_upstream_status:
163 need_blame = True
164 if verbose:
165 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
166 if r.unknown_upstream_status:
167 need_blame = True
168 if verbose:
169 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
170
171 if want_blame and need_blame:
172 print("\n".join(blame_patch(patch)) + "\n")
173
174 def percent(num):
175 try:
176 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
177 except ZeroDivisionError:
178 return "N/A"
179
180 if verbose:
181 print()
182
183 print("""Total patches found: %d
184Patches missing Signed-off-by: %s
185Patches with malformed Signed-off-by: %s
186Patches missing CVE: %s
187Patches missing Upstream-Status: %s
188Patches with malformed Upstream-Status: %s
189Patches in Pending state: %s""" % (total_patches,
190 percent(missing_sob),
191 percent(malformed_sob),
192 percent(missing_cve),
193 percent(missing_status),
194 percent(malformed_status),
195 percent(pending_patches)))
196
197
198
199def histogram(results):
200 from toolz import recipes, dicttoolz
201 import math
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600202
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500203 counts = recipes.countby(lambda r: r.upstream_status, results.values())
204 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
205 for k in bars:
206 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
207
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600208def find_layers(candidate):
209 # candidate can either be the path to a layer directly (eg meta-intel), or a
210 # repository that contains other layers (meta-arm). We can determine what by
211 # looking for a conf/layer.conf file. If that file exists then it's a layer,
212 # otherwise its a repository of layers and we can assume they're called
213 # meta-*.
214
215 if (candidate / "conf" / "layer.conf").exists():
216 return [candidate.absolute()]
217 else:
218 return [d.absolute() for d in candidate.iterdir() if d.is_dir() and (d.name == "meta" or d.name.startswith("meta-"))]
219
220# TODO these don't actually handle dynamic-layers/
221
222def gather_patches(layers):
223 patches = []
224 for directory in layers:
225 filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff"), universal_newlines=True).split()
226 patches += [os.path.join(directory, f) for f in filenames]
227 return patches
228
229def count_recipes(layers):
230 count = 0
231 for directory in layers:
232 output = subprocess.check_output(["git", "-C", directory, "ls-files", "recipes-*/**/*.bb"], universal_newlines=True)
233 count += len(output.splitlines())
234 return count
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500235
236if __name__ == "__main__":
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500237 args = argparse.ArgumentParser(description="Patch Review Tool")
238 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
239 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
240 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300241 args.add_argument("-j", "--json", help="update JSON")
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600242 args.add_argument("directory", type=pathlib.Path, metavar="DIRECTORY", help="directory to scan (layer, or repository of layers)")
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500243 args = args.parse_args()
244
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600245 layers = find_layers(args.directory)
246 print(f"Found layers {' '.join((d.name for d in layers))}")
247 patches = gather_patches(layers)
248 results = patchreview(patches)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500249 analyse(results, want_blame=args.blame, verbose=args.verbose)
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300250
251 if args.json:
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300252 if os.path.isfile(args.json):
253 data = json.load(open(args.json))
254 else:
255 data = []
256
257 row = collections.Counter()
258 row["total"] = len(results)
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600259 row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"], universal_newlines=True).strip()
260 row["commit"] = subprocess.check_output(["git", "-C", args.directory, "rev-parse", "HEAD"], universal_newlines=True).strip()
261 row['commit_count'] = subprocess.check_output(["git", "-C", args.directory, "rev-list", "--count", "HEAD"], universal_newlines=True).strip()
262 row['recipe_count'] = count_recipes(layers)
263
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300264 for r in results.values():
265 if r.upstream_status in status_values:
266 row[r.upstream_status] += 1
267 if r.malformed_upstream_status or r.missing_upstream_status:
268 row['malformed-upstream-status'] += 1
269 if r.malformed_sob or r.missing_sob:
270 row['malformed-sob'] += 1
271
272 data.append(row)
Andrew Geisslerd5838332022-05-27 11:33:10 -0500273 json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300274
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500275 if args.histogram:
276 print()
277 histogram(results)