blob: 072166504da9b602e72f9efa2a8c3d5c46fb937f [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#! /usr/bin/env python3
2
3# TODO
4# - option to just list all broken files
5# - test suite
6# - validate signed-off-by
7
Brad Bishopa5c52ff2018-11-23 10:55:50 +13008status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied")
Brad Bishopd7bf8c12018-02-25 22:55:05 -05009
10class Result:
11 # Whether the patch has an Upstream-Status or not
12 missing_upstream_status = False
13 # If the Upstream-Status tag is malformed in some way (string for bad bit)
14 malformed_upstream_status = None
15 # If the Upstream-Status value is unknown (boolean)
16 unknown_upstream_status = False
17 # The upstream status value (Pending, etc)
18 upstream_status = None
19 # Whether the patch has a Signed-off-by or not
20 missing_sob = False
21 # Whether the Signed-off-by tag is malformed in some way
22 malformed_sob = False
23 # The Signed-off-by tag value
24 sob = None
25 # Whether a patch looks like a CVE but doesn't have a CVE tag
26 missing_cve = False
27
28def blame_patch(patch):
29 """
30 From a patch filename, return a list of "commit summary (author name <author
31 email>)" strings representing the history.
32 """
33 import subprocess
34 return subprocess.check_output(("git", "log",
35 "--follow", "--find-renames", "--diff-filter=A",
36 "--format=%s (%aN <%aE>)",
37 "--", patch)).decode("utf-8").splitlines()
38
Brad Bishopa5c52ff2018-11-23 10:55:50 +130039def patchreview(path, patches):
40 import re, os.path
Brad Bishopd7bf8c12018-02-25 22:55:05 -050041
42 # General pattern: start of line, optional whitespace, tag with optional
43 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
44 # insensitive.
45 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
46 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050047 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
48 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
49
50 results = {}
51
52 for patch in patches:
Brad Bishopd7bf8c12018-02-25 22:55:05 -050053
Brad Bishopa5c52ff2018-11-23 10:55:50 +130054 fullpath = os.path.join(path, patch)
55 result = Result()
56 results[fullpath] = result
57
58 content = open(fullpath, encoding='ascii', errors='ignore').read()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050059
60 # Find the Signed-off-by tag
61 match = sob_re.search(content)
62 if match:
63 value = match.group(1)
64 if value != "Signed-off-by:":
65 result.malformed_sob = value
66 result.sob = match.group(2)
67 else:
68 result.missing_sob = True
69
70
71 # Find the Upstream-Status tag
72 match = status_re.search(content)
73 if match:
74 value = match.group(1)
75 if value != "Upstream-Status:":
76 result.malformed_upstream_status = value
77
78 value = match.group(2).lower()
79 # TODO: check case
80 if value not in status_values:
81 result.unknown_upstream_status = True
82 result.upstream_status = value
83 else:
84 result.missing_upstream_status = True
85
86 # Check that patches which looks like CVEs have CVE tags
87 if cve_re.search(patch) or cve_re.search(content):
88 if not cve_tag_re.search(content):
89 result.missing_cve = True
90 # TODO: extract CVE list
91
92 return results
93
94
95def analyse(results, want_blame=False, verbose=True):
96 """
97 want_blame: display blame data for each malformed patch
98 verbose: display per-file results instead of just summary
99 """
100
101 # want_blame requires verbose, so disable blame if we're not verbose
102 if want_blame and not verbose:
103 want_blame = False
104
105 total_patches = 0
106 missing_sob = 0
107 malformed_sob = 0
108 missing_status = 0
109 malformed_status = 0
110 missing_cve = 0
111 pending_patches = 0
112
113 for patch in sorted(results):
114 r = results[patch]
115 total_patches += 1
116 need_blame = False
117
118 # Build statistics
119 if r.missing_sob:
120 missing_sob += 1
121 if r.malformed_sob:
122 malformed_sob += 1
123 if r.missing_upstream_status:
124 missing_status += 1
125 if r.malformed_upstream_status or r.unknown_upstream_status:
126 malformed_status += 1
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300127 # Count patches with no status as pending
128 pending_patches +=1
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500129 if r.missing_cve:
130 missing_cve += 1
131 if r.upstream_status == "pending":
132 pending_patches += 1
133
134 # Output warnings
135 if r.missing_sob:
136 need_blame = True
137 if verbose:
138 print("Missing Signed-off-by tag (%s)" % patch)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800139 if r.malformed_sob:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500140 need_blame = True
141 if verbose:
142 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
143 if r.missing_cve:
144 need_blame = True
145 if verbose:
146 print("Missing CVE tag (%s)" % patch)
147 if r.missing_upstream_status:
148 need_blame = True
149 if verbose:
150 print("Missing Upstream-Status tag (%s)" % patch)
151 if r.malformed_upstream_status:
152 need_blame = True
153 if verbose:
154 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
155 if r.unknown_upstream_status:
156 need_blame = True
157 if verbose:
158 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
159
160 if want_blame and need_blame:
161 print("\n".join(blame_patch(patch)) + "\n")
162
163 def percent(num):
164 try:
165 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
166 except ZeroDivisionError:
167 return "N/A"
168
169 if verbose:
170 print()
171
172 print("""Total patches found: %d
173Patches missing Signed-off-by: %s
174Patches with malformed Signed-off-by: %s
175Patches missing CVE: %s
176Patches missing Upstream-Status: %s
177Patches with malformed Upstream-Status: %s
178Patches in Pending state: %s""" % (total_patches,
179 percent(missing_sob),
180 percent(malformed_sob),
181 percent(missing_cve),
182 percent(missing_status),
183 percent(malformed_status),
184 percent(pending_patches)))
185
186
187
188def histogram(results):
189 from toolz import recipes, dicttoolz
190 import math
191 counts = recipes.countby(lambda r: r.upstream_status, results.values())
192 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
193 for k in bars:
194 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
195
196
197if __name__ == "__main__":
198 import argparse, subprocess, os
199
200 args = argparse.ArgumentParser(description="Patch Review Tool")
201 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
202 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
203 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300204 args.add_argument("-j", "--json", help="update JSON")
205 args.add_argument("directory", help="directory to scan")
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500206 args = args.parse_args()
207
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300208 patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
209 results = patchreview(args.directory, patches)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500210 analyse(results, want_blame=args.blame, verbose=args.verbose)
Brad Bishopa5c52ff2018-11-23 10:55:50 +1300211
212 if args.json:
213 import json, os.path, collections
214 if os.path.isfile(args.json):
215 data = json.load(open(args.json))
216 else:
217 data = []
218
219 row = collections.Counter()
220 row["total"] = len(results)
221 row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
222 for r in results.values():
223 if r.upstream_status in status_values:
224 row[r.upstream_status] += 1
225 if r.malformed_upstream_status or r.missing_upstream_status:
226 row['malformed-upstream-status'] += 1
227 if r.malformed_sob or r.missing_sob:
228 row['malformed-sob'] += 1
229
230 data.append(row)
231 json.dump(data, open(args.json, "w"))
232
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500233 if args.histogram:
234 print()
235 histogram(results)