blob: b22cc07f0a5f99acb1cdb91865263db1d5d1b33e [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#! /usr/bin/env python3
Brad Bishopc342db32019-05-15 21:57:59 -04002#
Patrick Williams92b42cb2022-09-03 06:53:57 -05003# Copyright OpenEmbedded Contributors
4#
Brad Bishopc342db32019-05-15 21:57:59 -04005# SPDX-License-Identifier: GPL-2.0-only
6#
Brad Bishopd7bf8c12018-02-25 22:55:05 -05007
8# TODO
9# - option to just list all broken files
10# - test suite
11# - validate signed-off-by
12
Andrew Geissler9aee5002022-03-30 16:27:02 +000013status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
Brad Bishopd7bf8c12018-02-25 22:55:05 -050014
15class Result:
16 # Whether the patch has an Upstream-Status or not
17 missing_upstream_status = False
18 # If the Upstream-Status tag is malformed in some way (string for bad bit)
19 malformed_upstream_status = None
20 # If the Upstream-Status value is unknown (boolean)
21 unknown_upstream_status = False
22 # The upstream status value (Pending, etc)
23 upstream_status = None
24 # Whether the patch has a Signed-off-by or not
25 missing_sob = False
26 # Whether the Signed-off-by tag is malformed in some way
27 malformed_sob = False
28 # The Signed-off-by tag value
29 sob = None
30 # Whether a patch looks like a CVE but doesn't have a CVE tag
31 missing_cve = False
32
33def blame_patch(patch):
34 """
35 From a patch filename, return a list of "commit summary (author name <author
36 email>)" strings representing the history.
37 """
38 import subprocess
39 return subprocess.check_output(("git", "log",
40 "--follow", "--find-renames", "--diff-filter=A",
41 "--format=%s (%aN <%aE>)",
42 "--", patch)).decode("utf-8").splitlines()
43
Brad Bishopa5c52ff2018-11-23 10:55:50 +130044def patchreview(path, patches):
45 import re, os.path
Brad Bishopd7bf8c12018-02-25 22:55:05 -050046
47 # General pattern: start of line, optional whitespace, tag with optional
48 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
49 # insensitive.
50 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
Andrew Geissler9aee5002022-03-30 16:27:02 +000051 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050052 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
53 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
54
55 results = {}
56
57 for patch in patches:
Brad Bishopd7bf8c12018-02-25 22:55:05 -050058
Brad Bishopa5c52ff2018-11-23 10:55:50 +130059 fullpath = os.path.join(path, patch)
60 result = Result()
61 results[fullpath] = result
62
63 content = open(fullpath, encoding='ascii', errors='ignore').read()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050064
65 # Find the Signed-off-by tag
66 match = sob_re.search(content)
67 if match:
68 value = match.group(1)
69 if value != "Signed-off-by:":
70 result.malformed_sob = value
71 result.sob = match.group(2)
72 else:
73 result.missing_sob = True
74
75
76 # Find the Upstream-Status tag
77 match = status_re.search(content)
78 if match:
79 value = match.group(1)
80 if value != "Upstream-Status:":
81 result.malformed_upstream_status = value
82
83 value = match.group(2).lower()
84 # TODO: check case
85 if value not in status_values:
86 result.unknown_upstream_status = True
87 result.upstream_status = value
88 else:
89 result.missing_upstream_status = True
90
91 # Check that patches which looks like CVEs have CVE tags
92 if cve_re.search(patch) or cve_re.search(content):
93 if not cve_tag_re.search(content):
94 result.missing_cve = True
95 # TODO: extract CVE list
96
97 return results
98
99
100def analyse(results, want_blame=False, verbose=True):
101 """
102 want_blame: display blame data for each malformed patch
103 verbose: display per-file results instead of just summary
104 """
105
106 # want_blame requires verbose, so disable blame if we're not verbose
107 if want_blame and not verbose:
108 want_blame = False
109
110 total_patches = 0
111 missing_sob = 0
112 malformed_sob = 0
113 missing_status = 0
114 malformed_status = 0
115 missing_cve = 0
116 pending_patches = 0
117
118 for patch in sorted(results):
119 r = results[patch]
120 total_patches += 1
121 need_blame = False
122
123 # Build statistics
124 if r.missing_sob:
125 missing_sob += 1
126 if r.malformed_sob:
127 malformed_sob += 1
128 if r.missing_upstream_status:
129 missing_status += 1
130 if r.malformed_upstream_status or r.unknown_upstream_status:
131 malformed_status += 1
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300132 # Count patches with no status as pending
133 pending_patches +=1
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500134 if r.missing_cve:
135 missing_cve += 1
136 if r.upstream_status == "pending":
137 pending_patches += 1
138
139 # Output warnings
140 if r.missing_sob:
141 need_blame = True
142 if verbose:
143 print("Missing Signed-off-by tag (%s)" % patch)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800144 if r.malformed_sob:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500145 need_blame = True
146 if verbose:
147 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
148 if r.missing_cve:
149 need_blame = True
150 if verbose:
151 print("Missing CVE tag (%s)" % patch)
152 if r.missing_upstream_status:
153 need_blame = True
154 if verbose:
155 print("Missing Upstream-Status tag (%s)" % patch)
156 if r.malformed_upstream_status:
157 need_blame = True
158 if verbose:
159 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
160 if r.unknown_upstream_status:
161 need_blame = True
162 if verbose:
163 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
164
165 if want_blame and need_blame:
166 print("\n".join(blame_patch(patch)) + "\n")
167
168 def percent(num):
169 try:
170 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
171 except ZeroDivisionError:
172 return "N/A"
173
174 if verbose:
175 print()
176
177 print("""Total patches found: %d
178Patches missing Signed-off-by: %s
179Patches with malformed Signed-off-by: %s
180Patches missing CVE: %s
181Patches missing Upstream-Status: %s
182Patches with malformed Upstream-Status: %s
183Patches in Pending state: %s""" % (total_patches,
184 percent(missing_sob),
185 percent(malformed_sob),
186 percent(missing_cve),
187 percent(missing_status),
188 percent(malformed_status),
189 percent(pending_patches)))
190
191
192
193def histogram(results):
194 from toolz import recipes, dicttoolz
195 import math
196 counts = recipes.countby(lambda r: r.upstream_status, results.values())
197 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
198 for k in bars:
199 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
200
201
202if __name__ == "__main__":
203 import argparse, subprocess, os
204
205 args = argparse.ArgumentParser(description="Patch Review Tool")
206 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
207 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
208 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300209 args.add_argument("-j", "--json", help="update JSON")
210 args.add_argument("directory", help="directory to scan")
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500211 args = args.parse_args()
212
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300213 patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
214 results = patchreview(args.directory, patches)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500215 analyse(results, want_blame=args.blame, verbose=args.verbose)
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300216
217 if args.json:
218 import json, os.path, collections
219 if os.path.isfile(args.json):
220 data = json.load(open(args.json))
221 else:
222 data = []
223
224 row = collections.Counter()
225 row["total"] = len(results)
226 row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
Andrew Geisslerd5838332022-05-27 11:33:10 -0500227 row["commit"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%H"]).decode("utf-8").strip()
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300228 for r in results.values():
229 if r.upstream_status in status_values:
230 row[r.upstream_status] += 1
231 if r.malformed_upstream_status or r.missing_upstream_status:
232 row['malformed-upstream-status'] += 1
233 if r.malformed_sob or r.missing_sob:
234 row['malformed-sob'] += 1
235
236 data.append(row)
Andrew Geisslerd5838332022-05-27 11:33:10 -0500237 json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300238
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500239 if args.histogram:
240 print()
241 histogram(results)