| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 1 | #! /usr/bin/env python3 | 
| Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 2 | # | 
| Patrick Williams | 92b42cb | 2022-09-03 06:53:57 -0500 | [diff] [blame] | 3 | # Copyright OpenEmbedded Contributors | 
|  | 4 | # | 
| Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 5 | # SPDX-License-Identifier: GPL-2.0-only | 
|  | 6 | # | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 7 |  | 
|  | 8 | # TODO | 
|  | 9 | # - option to just list all broken files | 
|  | 10 | # - test suite | 
|  | 11 | # - validate signed-off-by | 
|  | 12 |  | 
| Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 13 | status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream") | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 14 |  | 
|  | 15 | class Result: | 
|  | 16 | # Whether the patch has an Upstream-Status or not | 
|  | 17 | missing_upstream_status = False | 
|  | 18 | # If the Upstream-Status tag is malformed in some way (string for bad bit) | 
|  | 19 | malformed_upstream_status = None | 
|  | 20 | # If the Upstream-Status value is unknown (boolean) | 
|  | 21 | unknown_upstream_status = False | 
|  | 22 | # The upstream status value (Pending, etc) | 
|  | 23 | upstream_status = None | 
|  | 24 | # Whether the patch has a Signed-off-by or not | 
|  | 25 | missing_sob = False | 
|  | 26 | # Whether the Signed-off-by tag is malformed in some way | 
|  | 27 | malformed_sob = False | 
|  | 28 | # The Signed-off-by tag value | 
|  | 29 | sob = None | 
|  | 30 | # Whether a patch looks like a CVE but doesn't have a CVE tag | 
|  | 31 | missing_cve = False | 
|  | 32 |  | 
|  | 33 | def blame_patch(patch): | 
|  | 34 | """ | 
|  | 35 | From a patch filename, return a list of "commit summary (author name <author | 
|  | 36 | email>)" strings representing the history. | 
|  | 37 | """ | 
|  | 38 | import subprocess | 
|  | 39 | return subprocess.check_output(("git", "log", | 
|  | 40 | "--follow", "--find-renames", "--diff-filter=A", | 
|  | 41 | "--format=%s (%aN <%aE>)", | 
|  | 42 | "--", patch)).decode("utf-8").splitlines() | 
|  | 43 |  | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 44 | def patchreview(path, patches): | 
|  | 45 | import re, os.path | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 46 |  | 
|  | 47 | # General pattern: start of line, optional whitespace, tag with optional | 
|  | 48 | # hyphen or spaces, maybe a colon, some whitespace, then the value, all case | 
|  | 49 | # insensitive. | 
|  | 50 | sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE) | 
| Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 51 | status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE) | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 52 | cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE) | 
|  | 53 | cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE) | 
|  | 54 |  | 
|  | 55 | results = {} | 
|  | 56 |  | 
|  | 57 | for patch in patches: | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 58 |  | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 59 | fullpath = os.path.join(path, patch) | 
|  | 60 | result = Result() | 
|  | 61 | results[fullpath] = result | 
|  | 62 |  | 
|  | 63 | content = open(fullpath, encoding='ascii', errors='ignore').read() | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 64 |  | 
|  | 65 | # Find the Signed-off-by tag | 
|  | 66 | match = sob_re.search(content) | 
|  | 67 | if match: | 
|  | 68 | value = match.group(1) | 
|  | 69 | if value != "Signed-off-by:": | 
|  | 70 | result.malformed_sob = value | 
|  | 71 | result.sob = match.group(2) | 
|  | 72 | else: | 
|  | 73 | result.missing_sob = True | 
|  | 74 |  | 
|  | 75 |  | 
|  | 76 | # Find the Upstream-Status tag | 
|  | 77 | match = status_re.search(content) | 
|  | 78 | if match: | 
|  | 79 | value = match.group(1) | 
|  | 80 | if value != "Upstream-Status:": | 
|  | 81 | result.malformed_upstream_status = value | 
|  | 82 |  | 
|  | 83 | value = match.group(2).lower() | 
|  | 84 | # TODO: check case | 
|  | 85 | if value not in status_values: | 
|  | 86 | result.unknown_upstream_status = True | 
|  | 87 | result.upstream_status = value | 
|  | 88 | else: | 
|  | 89 | result.missing_upstream_status = True | 
|  | 90 |  | 
|  | 91 | # Check that patches which looks like CVEs have CVE tags | 
|  | 92 | if cve_re.search(patch) or cve_re.search(content): | 
|  | 93 | if not cve_tag_re.search(content): | 
|  | 94 | result.missing_cve = True | 
|  | 95 | # TODO: extract CVE list | 
|  | 96 |  | 
|  | 97 | return results | 
|  | 98 |  | 
|  | 99 |  | 
|  | 100 | def analyse(results, want_blame=False, verbose=True): | 
|  | 101 | """ | 
|  | 102 | want_blame: display blame data for each malformed patch | 
|  | 103 | verbose: display per-file results instead of just summary | 
|  | 104 | """ | 
|  | 105 |  | 
|  | 106 | # want_blame requires verbose, so disable blame if we're not verbose | 
|  | 107 | if want_blame and not verbose: | 
|  | 108 | want_blame = False | 
|  | 109 |  | 
|  | 110 | total_patches = 0 | 
|  | 111 | missing_sob = 0 | 
|  | 112 | malformed_sob = 0 | 
|  | 113 | missing_status = 0 | 
|  | 114 | malformed_status = 0 | 
|  | 115 | missing_cve = 0 | 
|  | 116 | pending_patches = 0 | 
|  | 117 |  | 
|  | 118 | for patch in sorted(results): | 
|  | 119 | r = results[patch] | 
|  | 120 | total_patches += 1 | 
|  | 121 | need_blame = False | 
|  | 122 |  | 
|  | 123 | # Build statistics | 
|  | 124 | if r.missing_sob: | 
|  | 125 | missing_sob += 1 | 
|  | 126 | if r.malformed_sob: | 
|  | 127 | malformed_sob += 1 | 
|  | 128 | if r.missing_upstream_status: | 
|  | 129 | missing_status += 1 | 
|  | 130 | if r.malformed_upstream_status or r.unknown_upstream_status: | 
|  | 131 | malformed_status += 1 | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 132 | # Count patches with no status as pending | 
|  | 133 | pending_patches +=1 | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 134 | if r.missing_cve: | 
|  | 135 | missing_cve += 1 | 
|  | 136 | if r.upstream_status == "pending": | 
|  | 137 | pending_patches += 1 | 
|  | 138 |  | 
|  | 139 | # Output warnings | 
|  | 140 | if r.missing_sob: | 
|  | 141 | need_blame = True | 
|  | 142 | if verbose: | 
|  | 143 | print("Missing Signed-off-by tag (%s)" % patch) | 
| Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 144 | if r.malformed_sob: | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 145 | need_blame = True | 
|  | 146 | if verbose: | 
|  | 147 | print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch)) | 
|  | 148 | if r.missing_cve: | 
|  | 149 | need_blame = True | 
|  | 150 | if verbose: | 
|  | 151 | print("Missing CVE tag (%s)" % patch) | 
|  | 152 | if r.missing_upstream_status: | 
|  | 153 | need_blame = True | 
|  | 154 | if verbose: | 
|  | 155 | print("Missing Upstream-Status tag (%s)" % patch) | 
|  | 156 | if r.malformed_upstream_status: | 
|  | 157 | need_blame = True | 
|  | 158 | if verbose: | 
|  | 159 | print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch)) | 
|  | 160 | if r.unknown_upstream_status: | 
|  | 161 | need_blame = True | 
|  | 162 | if verbose: | 
|  | 163 | print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch)) | 
|  | 164 |  | 
|  | 165 | if want_blame and need_blame: | 
|  | 166 | print("\n".join(blame_patch(patch)) + "\n") | 
|  | 167 |  | 
|  | 168 | def percent(num): | 
|  | 169 | try: | 
|  | 170 | return "%d (%d%%)" % (num, round(num * 100.0 / total_patches)) | 
|  | 171 | except ZeroDivisionError: | 
|  | 172 | return "N/A" | 
|  | 173 |  | 
|  | 174 | if verbose: | 
|  | 175 | print() | 
|  | 176 |  | 
|  | 177 | print("""Total patches found: %d | 
|  | 178 | Patches missing Signed-off-by: %s | 
|  | 179 | Patches with malformed Signed-off-by: %s | 
|  | 180 | Patches missing CVE: %s | 
|  | 181 | Patches missing Upstream-Status: %s | 
|  | 182 | Patches with malformed Upstream-Status: %s | 
|  | 183 | Patches in Pending state: %s""" % (total_patches, | 
|  | 184 | percent(missing_sob), | 
|  | 185 | percent(malformed_sob), | 
|  | 186 | percent(missing_cve), | 
|  | 187 | percent(missing_status), | 
|  | 188 | percent(malformed_status), | 
|  | 189 | percent(pending_patches))) | 
|  | 190 |  | 
|  | 191 |  | 
|  | 192 |  | 
|  | 193 | def histogram(results): | 
|  | 194 | from toolz import recipes, dicttoolz | 
|  | 195 | import math | 
|  | 196 | counts = recipes.countby(lambda r: r.upstream_status, results.values()) | 
|  | 197 | bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts) | 
|  | 198 | for k in bars: | 
|  | 199 | print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k])) | 
|  | 200 |  | 
|  | 201 |  | 
|  | 202 | if __name__ == "__main__": | 
|  | 203 | import argparse, subprocess, os | 
|  | 204 |  | 
|  | 205 | args = argparse.ArgumentParser(description="Patch Review Tool") | 
|  | 206 | args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches") | 
|  | 207 | args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results") | 
|  | 208 | args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram") | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 209 | args.add_argument("-j", "--json", help="update JSON") | 
|  | 210 | args.add_argument("directory", help="directory to scan") | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 211 | args = args.parse_args() | 
|  | 212 |  | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 213 | patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split() | 
|  | 214 | results = patchreview(args.directory, patches) | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 215 | analyse(results, want_blame=args.blame, verbose=args.verbose) | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 216 |  | 
|  | 217 | if args.json: | 
|  | 218 | import json, os.path, collections | 
|  | 219 | if os.path.isfile(args.json): | 
|  | 220 | data = json.load(open(args.json)) | 
|  | 221 | else: | 
|  | 222 | data = [] | 
|  | 223 |  | 
|  | 224 | row = collections.Counter() | 
|  | 225 | row["total"] = len(results) | 
|  | 226 | row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip() | 
| Andrew Geissler | d583833 | 2022-05-27 11:33:10 -0500 | [diff] [blame] | 227 | row["commit"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%H"]).decode("utf-8").strip() | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 228 | for r in results.values(): | 
|  | 229 | if r.upstream_status in status_values: | 
|  | 230 | row[r.upstream_status] += 1 | 
|  | 231 | if r.malformed_upstream_status or r.missing_upstream_status: | 
|  | 232 | row['malformed-upstream-status'] += 1 | 
|  | 233 | if r.malformed_sob or r.missing_sob: | 
|  | 234 | row['malformed-sob'] += 1 | 
|  | 235 |  | 
|  | 236 | data.append(row) | 
| Andrew Geissler | d583833 | 2022-05-27 11:33:10 -0500 | [diff] [blame] | 237 | json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t") | 
| Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame] | 238 |  | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 239 | if args.histogram: | 
|  | 240 | print() | 
|  | 241 | histogram(results) |