blob: b22cc07f0a5f99acb1cdb91865263db1d5d1b33e [file] [log] [blame]
#! /usr/bin/env python3
#
# Copyright OpenEmbedded Contributors
#
# SPDX-License-Identifier: GPL-2.0-only
#
# TODO
# - option to just list all broken files
# - test suite
# - validate signed-off-by
status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
class Result:
# Whether the patch has an Upstream-Status or not
missing_upstream_status = False
# If the Upstream-Status tag is malformed in some way (string for bad bit)
malformed_upstream_status = None
# If the Upstream-Status value is unknown (boolean)
unknown_upstream_status = False
# The upstream status value (Pending, etc)
upstream_status = None
# Whether the patch has a Signed-off-by or not
missing_sob = False
# Whether the Signed-off-by tag is malformed in some way
malformed_sob = False
# The Signed-off-by tag value
sob = None
# Whether a patch looks like a CVE but doesn't have a CVE tag
missing_cve = False
def blame_patch(patch):
"""
From a patch filename, return a list of "commit summary (author name <author
email>)" strings representing the history.
"""
import subprocess
return subprocess.check_output(("git", "log",
"--follow", "--find-renames", "--diff-filter=A",
"--format=%s (%aN <%aE>)",
"--", patch)).decode("utf-8").splitlines()
def patchreview(path, patches):
import re, os.path
# General pattern: start of line, optional whitespace, tag with optional
# hyphen or spaces, maybe a colon, some whitespace, then the value, all case
# insensitive.
sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
results = {}
for patch in patches:
fullpath = os.path.join(path, patch)
result = Result()
results[fullpath] = result
content = open(fullpath, encoding='ascii', errors='ignore').read()
# Find the Signed-off-by tag
match = sob_re.search(content)
if match:
value = match.group(1)
if value != "Signed-off-by:":
result.malformed_sob = value
result.sob = match.group(2)
else:
result.missing_sob = True
# Find the Upstream-Status tag
match = status_re.search(content)
if match:
value = match.group(1)
if value != "Upstream-Status:":
result.malformed_upstream_status = value
value = match.group(2).lower()
# TODO: check case
if value not in status_values:
result.unknown_upstream_status = True
result.upstream_status = value
else:
result.missing_upstream_status = True
# Check that patches which looks like CVEs have CVE tags
if cve_re.search(patch) or cve_re.search(content):
if not cve_tag_re.search(content):
result.missing_cve = True
# TODO: extract CVE list
return results
def analyse(results, want_blame=False, verbose=True):
"""
want_blame: display blame data for each malformed patch
verbose: display per-file results instead of just summary
"""
# want_blame requires verbose, so disable blame if we're not verbose
if want_blame and not verbose:
want_blame = False
total_patches = 0
missing_sob = 0
malformed_sob = 0
missing_status = 0
malformed_status = 0
missing_cve = 0
pending_patches = 0
for patch in sorted(results):
r = results[patch]
total_patches += 1
need_blame = False
# Build statistics
if r.missing_sob:
missing_sob += 1
if r.malformed_sob:
malformed_sob += 1
if r.missing_upstream_status:
missing_status += 1
if r.malformed_upstream_status or r.unknown_upstream_status:
malformed_status += 1
# Count patches with no status as pending
pending_patches +=1
if r.missing_cve:
missing_cve += 1
if r.upstream_status == "pending":
pending_patches += 1
# Output warnings
if r.missing_sob:
need_blame = True
if verbose:
print("Missing Signed-off-by tag (%s)" % patch)
if r.malformed_sob:
need_blame = True
if verbose:
print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
if r.missing_cve:
need_blame = True
if verbose:
print("Missing CVE tag (%s)" % patch)
if r.missing_upstream_status:
need_blame = True
if verbose:
print("Missing Upstream-Status tag (%s)" % patch)
if r.malformed_upstream_status:
need_blame = True
if verbose:
print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
if r.unknown_upstream_status:
need_blame = True
if verbose:
print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
if want_blame and need_blame:
print("\n".join(blame_patch(patch)) + "\n")
def percent(num):
try:
return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
except ZeroDivisionError:
return "N/A"
if verbose:
print()
print("""Total patches found: %d
Patches missing Signed-off-by: %s
Patches with malformed Signed-off-by: %s
Patches missing CVE: %s
Patches missing Upstream-Status: %s
Patches with malformed Upstream-Status: %s
Patches in Pending state: %s""" % (total_patches,
percent(missing_sob),
percent(malformed_sob),
percent(missing_cve),
percent(missing_status),
percent(malformed_status),
percent(pending_patches)))
def histogram(results):
from toolz import recipes, dicttoolz
import math
counts = recipes.countby(lambda r: r.upstream_status, results.values())
bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
for k in bars:
print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
if __name__ == "__main__":
import argparse, subprocess, os
args = argparse.ArgumentParser(description="Patch Review Tool")
args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
args.add_argument("-j", "--json", help="update JSON")
args.add_argument("directory", help="directory to scan")
args = args.parse_args()
patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
results = patchreview(args.directory, patches)
analyse(results, want_blame=args.blame, verbose=args.verbose)
if args.json:
import json, os.path, collections
if os.path.isfile(args.json):
data = json.load(open(args.json))
else:
data = []
row = collections.Counter()
row["total"] = len(results)
row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
row["commit"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%H"]).decode("utf-8").strip()
for r in results.values():
if r.upstream_status in status_values:
row[r.upstream_status] += 1
if r.malformed_upstream_status or r.missing_upstream_status:
row['malformed-upstream-status'] += 1
if r.malformed_sob or r.missing_sob:
row['malformed-sob'] += 1
data.append(row)
json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
if args.histogram:
print()
histogram(results)