Blame - meta-arm/ci/patchreview - mdmillerii/openbmc

blob: b23eda1f14319f09d414cd9060e3c3259f26e7d3 [file] [log] [blame]

Brad Bishop	bec4ebc	2022-08-03 09:55:16 -0400	[diff] [blame]	1	#! /usr/bin/env python3
				2	#
				3	# SPDX-License-Identifier: GPL-2.0-only
				4	#
				5
				6	# TODO
				7	# - option to just list all broken files
				8	# - test suite
				9	# - validate signed-off-by
				10
				11	import argparse
				12	import collections
				13	import json
				14	import os
				15	import re
				16	import subprocess
				17
				18	status_values = (
				19	"accepted",
				20	"pending",
				21	"inappropriate",
				22	"backport",
				23	"submitted",
				24	"denied",
				25	)
				26
				27
				28	class PatchResult:
				29	# Whether the patch has an Upstream-Status or not
				30	missing_upstream_status = False
				31	# If the Upstream-Status tag is malformed in some way (string for bad bit)
				32	malformed_upstream_status = None
				33	# If the Upstream-Status value is unknown (boolean)
				34	unknown_upstream_status = False
				35	# The upstream status value (Pending, etc)
				36	upstream_status = None
				37	# Whether the patch has a Signed-off-by or not
				38	missing_sob = False
				39	# Whether the Signed-off-by tag is malformed in some way
				40	malformed_sob = False
				41	# The Signed-off-by tag value
				42	sob = None
				43	# Whether a patch looks like a CVE but doesn't have a CVE tag
				44	missing_cve = False
				45
				46
				47	class Summary:
				48	total = 0
				49	cve_missing = 0
				50	sob_missing = 0
				51	sob_malformed = 0
				52	status_missing = 0
				53	status_malformed = 0
				54	status_pending = 0
				55
				56	def blame_patch(patch):
				57	"""
				58	From a patch filename, return a list of "commit summary (author name <author
				59	email>)" strings representing the history.
				60	"""
				61	return subprocess.check_output(("git", "log",
				62	"--follow", "--find-renames", "--diff-filter=A",
				63	"--format=%s (%aN <%aE>)",
				64	"--", patch)).decode("utf-8").splitlines()
				65
				66	def patchreview(patches):
				67	# General pattern: start of line, optional whitespace, tag with optional
				68	# hyphen or spaces, maybe a colon, some whitespace, then the value, all case
				69	# insensitive.
				70	sob_re = re.compile(r"^[\t ](Signed[-_ ]off[-_ ]by:?)[\t ](.+)", re.IGNORECASE \| re.MULTILINE)
				71	status_re = re.compile(r"^[\t ](Upstream[-_ ]Status:?)[\t ](\w*)", re.IGNORECASE \| re.MULTILINE)
				72	cve_tag_re = re.compile(r"^[\t ](CVE:)[\t ](.*)", re.IGNORECASE \| re.MULTILINE)
				73	cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
				74
				75	results = {}
				76
				77	for patch in patches:
				78
				79	result = PatchResult()
				80	results[patch] = result
				81
				82	content = open(patch, encoding="ascii", errors="ignore").read()
				83
				84	# Find the Signed-off-by tag
				85	match = sob_re.search(content)
				86	if match:
				87	value = match.group(1)
				88	if value != "Signed-off-by:":
				89	result.malformed_sob = value
				90	result.sob = match.group(2)
				91	else:
				92	result.missing_sob = True
				93
				94	# Find the Upstream-Status tag
				95	match = status_re.search(content)
				96	if match:
				97	value = match.group(1)
				98	if value != "Upstream-Status:":
				99	result.malformed_upstream_status = value
				100
				101	value = match.group(2).lower()
				102	# TODO: check case
				103	if value not in status_values:
				104	result.unknown_upstream_status = True
				105	result.upstream_status = value
				106	else:
				107	result.missing_upstream_status = True
				108
				109	# Check that patches which looks like CVEs have CVE tags
				110	if cve_re.search(patch) or cve_re.search(content):
				111	if not cve_tag_re.search(content):
				112	result.missing_cve = True
				113	# TODO: extract CVE list
				114
				115	return results
				116
				117
				118	def analyse(results, want_blame=False, verbose=True):
				119	"""
				120	want_blame: display blame data for each malformed patch
				121	verbose: display per-file results instead of just summary
				122	"""
				123
				124	# want_blame requires verbose, so disable blame if we're not verbose
				125	if want_blame and not verbose:
				126	want_blame = False
				127
				128	summary = Summary()
				129
				130	for patch in sorted(results):
				131	r = results[patch]
				132	summary.total += 1
				133	need_blame = False
				134
				135	# Build statistics
				136	if r.missing_sob:
				137	summary.sob_missing += 1
				138	if r.malformed_sob:
				139	summary.sob_malformed += 1
				140	if r.missing_upstream_status:
				141	summary.status_missing += 1
				142	if r.malformed_upstream_status or r.unknown_upstream_status:
				143	summary.status_malformed += 1
				144	# Count patches with no status as pending
				145	summary.status_pending += 1
				146	if r.missing_cve:
				147	summary.cve_missing += 1
				148	if r.upstream_status == "pending":
				149	summary.status_pending += 1
				150
				151	# Output warnings
				152	if r.missing_sob:
				153	need_blame = True
				154	if verbose:
				155	print("Missing Signed-off-by tag (%s)" % patch)
				156	if r.malformed_sob:
				157	need_blame = True
				158	if verbose:
				159	print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
				160	if r.missing_cve:
				161	need_blame = True
				162	if verbose:
				163	print("Missing CVE tag (%s)" % patch)
				164	if r.missing_upstream_status:
				165	need_blame = True
				166	if verbose:
				167	print("Missing Upstream-Status tag (%s)" % patch)
				168	if r.malformed_upstream_status:
				169	need_blame = True
				170	if verbose:
				171	print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
				172	if r.unknown_upstream_status:
				173	need_blame = True
				174	if verbose:
				175	print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
				176
				177	if want_blame and need_blame:
				178	print("\n".join(blame_patch(patch)) + "\n")
				179
				180	return summary
				181
				182
				183	def display_summary(summary, verbose):
				184	def percent(num):
				185	try:
				186	return "%d (%d%%)" % (num, round(num * 100.0 / summary.total))
				187	except ZeroDivisionError:
				188	return "N/A"
				189
				190	if verbose:
				191	print()
				192
				193	print("""Total patches found: %d
				194	Patches missing Signed-off-by: %s
				195	Patches with malformed Signed-off-by: %s
				196	Patches missing CVE: %s
				197	Patches missing Upstream-Status: %s
				198	Patches with malformed Upstream-Status: %s
				199	Patches in Pending state: %s""" % (summary.total,
				200	percent(summary.sob_missing),
				201	percent(summary.sob_malformed),
				202	percent(summary.cve_missing),
				203	percent(summary.status_missing),
				204	percent(summary.status_malformed),
				205	percent(summary.status_pending)))
				206
				207
				208	def generate_metrics(summary, output):
				209	# https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md
				210	# Summary attribute name, MetricPoint help
				211	mapping = (
				212	("total", "Total patches"),
				213	("cve_missing", "Patches missing CVE tag"),
				214	("sob_malformed", "Patches with malformed Signed-off-by"),
				215	("sob_missing", "Patches with missing Signed-off-by"),
				216	("status_malformed", "Patches with malformed Upstream-Status"),
				217	("status_missing", "Patches with missing Upstream-Status"),
				218	("status_pending", "Patches with Pending Upstream-Status")
				219	)
				220	for attr, help in mapping:
				221	metric = f"patch_check_{attr}"
				222	value = getattr(summary, attr)
				223	output.write(f"""
				224	# TYPE {metric} gauge
				225	# HELP {help}
				226	{metric} {value}
				227	""")
				228	output.write("\n# EOF\n")
				229
				230	def histogram(results):
				231	import math
				232
				233	from toolz import dicttoolz, recipes
				234	counts = recipes.countby(lambda r: r.upstream_status, results.values())
				235	bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
				236	for k in bars:
				237	print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
				238
				239	def gather_patches(directories):
				240	patches = []
				241	for directory in directories:
				242	filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-//.patch", "recipes-//.diff")).decode("utf-8").split()
				243	patches += [os.path.join(directory, f) for f in filenames]
				244	return patches
				245
				246	if __name__ == "__main__":
				247	args = argparse.ArgumentParser(description="Patch Review Tool")
				248	args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
				249	args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
				250	args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
				251	args.add_argument("-j", "--json", help="update JSON")
				252	args.add_argument("-m", "--metrics", type=argparse.FileType('w'), help="write OpenMetrics")
				253	args.add_argument("dirs", metavar="DIRECTORY", nargs="+", help="directory to scan")
				254	args = args.parse_args()
				255
				256	patches = gather_patches(args.dirs)
				257	results = patchreview(patches)
				258	summary = analyse(results, want_blame=args.blame, verbose=args.verbose)
				259	display_summary(summary, verbose=args.verbose)
				260
				261	if args.json:
				262	if os.path.isfile(args.json):
				263	data = json.load(open(args.json))
				264	else:
				265	data = []
				266
				267	row = collections.Counter()
				268	row["total"] = len(results)
				269	row["date"] = subprocess.check_output(["git", "-C", args.dirs[0], "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
				270	for r in results.values():
				271	if r.upstream_status in status_values:
				272	row[r.upstream_status] += 1
				273	if r.malformed_upstream_status or r.missing_upstream_status:
				274	row["malformed-upstream-status"] += 1
				275	if r.malformed_sob or r.missing_sob:
				276	row["malformed-sob"] += 1
				277
				278	data.append(row)
				279	json.dump(data, open(args.json, "w"))
				280
				281	if args.metrics:
				282	generate_metrics(summary, args.metrics)
				283
				284	if args.histogram:
				285	print()
				286	histogram(results)