blob: 1facbcd85e1e58da0bb70ba08904a1f919fd1d2c [file] [log] [blame]
Brad Bishop40320b12019-03-26 16:08:25 -04001# resulttool - regression analysis
2#
3# Copyright (c) 2019, Intel Corporation.
4# Copyright (c) 2019, Linux Foundation
5#
Brad Bishopc342db32019-05-15 21:57:59 -04006# SPDX-License-Identifier: GPL-2.0-only
Brad Bishop40320b12019-03-26 16:08:25 -04007#
Brad Bishopc342db32019-05-15 21:57:59 -04008
Brad Bishop40320b12019-03-26 16:08:25 -04009import resulttool.resultutils as resultutils
Brad Bishop40320b12019-03-26 16:08:25 -040010
11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive
13
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060014METADATA_MATCH_TABLE = {
15 "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19 "trigger-build-posttrigger": {
20 "run_all_tests": False,
21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22 "skips": None,
23 "machine": None,
24 "select_tags":None,
25 "exclude_tags": None
26 },
27 "reproducible": {
28 "run_all_tests": False,
29 "run_tests":["reproducible"],
30 "skips": None,
31 "machine": None,
32 "select_tags":None,
33 "exclude_tags": None
34 },
35 "arch-qemu-quick": {
36 "run_all_tests": True,
37 "run_tests":None,
38 "skips": None,
39 "machine": None,
40 "select_tags":["machine"],
41 "exclude_tags": None
42 },
43 "arch-qemu-full-x86-or-x86_64": {
44 "run_all_tests": True,
45 "run_tests":None,
46 "skips": None,
47 "machine": None,
48 "select_tags":["machine", "toolchain-system"],
49 "exclude_tags": None
50 },
51 "arch-qemu-full-others": {
52 "run_all_tests": True,
53 "run_tests":None,
54 "skips": None,
55 "machine": None,
56 "select_tags":["machine", "toolchain-user"],
57 "exclude_tags": None
58 },
59 "selftest": {
60 "run_all_tests": True,
61 "run_tests":None,
62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63 "machine": None,
64 "select_tags":None,
65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66 },
67 "bringup": {
68 "run_all_tests": True,
69 "run_tests":None,
70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71 "machine": None,
72 "select_tags":None,
73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74 }
75}
76
77def test_has_at_least_one_matching_tag(test, tag_list):
78 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
79
80def all_tests_have_at_least_one_matching_tag(results, tag_list):
81 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
82
83def any_test_have_any_matching_tag(results, tag_list):
84 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
85
86def have_skipped_test(result, test_prefix):
87 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
88
89def have_all_tests_skipped(result, test_prefixes_list):
90 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
91
92def guess_oeselftest_metadata(results):
93 """
94 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
95 Check results for specific values (absence/presence of oetags, number and name of executed tests...),
96 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
97 to it to allow proper test filtering.
98 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
99 as new tests will have OESELFTEST_METADATA properly appended at test reporting time
100 """
101
102 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
103 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
104 elif all(result.startswith("reproducible") for result in results):
105 return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
106 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
107 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
108 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
109 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
110 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
111 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
112 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
113 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
114 return OESELFTEST_METADATA_GUESS_TABLE['selftest']
115 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
116 return OESELFTEST_METADATA_GUESS_TABLE['bringup']
117
118 return None
119
120
121def metadata_matches(base_configuration, target_configuration):
122 """
123 For passed base and target, check test type. If test type matches one of
124 properties described in METADATA_MATCH_TABLE, compare metadata if it is
125 present in base. Return true if metadata matches, or if base lacks some
126 data (either TEST_TYPE or the corresponding metadata)
127 """
128 test_type = base_configuration.get('TEST_TYPE')
129 if test_type not in METADATA_MATCH_TABLE:
130 return True
131
132 metadata_key = METADATA_MATCH_TABLE.get(test_type)
133 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
134 return False
135
136 return True
137
138
139def machine_matches(base_configuration, target_configuration):
140 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
141
142
143def can_be_compared(logger, base, target):
144 """
145 Some tests are not relevant to be compared, for example some oeselftest
146 run with different tests sets or parameters. Return true if tests can be
147 compared
148 """
149 ret = True
150 base_configuration = base['configuration']
151 target_configuration = target['configuration']
152
153 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
154 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
155 guess = guess_oeselftest_metadata(base['result'])
156 if guess is None:
157 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
158 else:
159 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
160 base_configuration['OESELFTEST_METADATA'] = guess
161 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
162 guess = guess_oeselftest_metadata(target['result'])
163 if guess is None:
164 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
165 else:
166 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
167 target_configuration['OESELFTEST_METADATA'] = guess
168
169 # Test runs with LTP results in should only be compared with other runs with LTP tests in them
170 if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
171 ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
172
173 return ret and metadata_matches(base_configuration, target_configuration) \
174 and machine_matches(base_configuration, target_configuration)
175
176
Brad Bishop40320b12019-03-26 16:08:25 -0400177def compare_result(logger, base_name, target_name, base_result, target_result):
178 base_result = base_result.get('result')
179 target_result = target_result.get('result')
180 result = {}
181 if base_result and target_result:
182 for k in base_result:
183 base_testcase = base_result[k]
184 base_status = base_testcase.get('status')
185 if base_status:
186 target_testcase = target_result.get(k, {})
187 target_status = target_testcase.get('status')
188 if base_status != target_status:
189 result[k] = {'base': base_status, 'target': target_status}
190 else:
191 logger.error('Failed to retrieved base test case status: %s' % k)
192 if result:
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500193 new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
194 # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
195 if new_pass_count < len(result):
196 resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
197 for k in sorted(result):
198 if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
199 resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])
200 if new_pass_count > 0:
201 resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
202 else:
203 resultstring = "Improvement: %s\n %s\n (+%d test(s) passing)" % (base_name, target_name, new_pass_count)
204 result = None
Brad Bishop40320b12019-03-26 16:08:25 -0400205 else:
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500206 resultstring = "Match: %s\n %s" % (base_name, target_name)
Brad Bishop40320b12019-03-26 16:08:25 -0400207 return result, resultstring
208
209def get_results(logger, source):
210 return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
211
212def regression(args, logger):
213 base_results = get_results(logger, args.base_result)
214 target_results = get_results(logger, args.target_result)
215
216 regression_common(args, logger, base_results, target_results)
217
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500218# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
219# Truncating the test names works since they contain file and line number identifiers
220# which allows us to match them without the random components.
221def fixup_ptest_names(results, logger):
222 for r in results:
223 for i in results[r]:
224 tests = list(results[r][i]['result'].keys())
225 for test in tests:
226 new = None
227 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
228 new = test.split("_-_")[0]
229 elif test.startswith(("ptestresult.curl.")) and "__" in test:
230 new = test.split("__")[0]
231 elif test.startswith(("ptestresult.dbus.")) and "__" in test:
232 new = test.split("__")[0]
233 elif test.startswith("ptestresult.binutils") and "build-st-" in test:
234 new = test.split(" ")[0]
235 elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
236 new = ".".join(test.split(".")[:2])
237 if new:
238 results[r][i]['result'][new] = results[r][i]['result'][test]
239 del results[r][i]['result'][test]
240
Brad Bishop40320b12019-03-26 16:08:25 -0400241def regression_common(args, logger, base_results, target_results):
242 if args.base_result_id:
243 base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
244 if args.target_result_id:
245 target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
246
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500247 fixup_ptest_names(base_results, logger)
248 fixup_ptest_names(target_results, logger)
249
Brad Bishop40320b12019-03-26 16:08:25 -0400250 matches = []
251 regressions = []
252 notfound = []
253
254 for a in base_results:
255 if a in target_results:
256 base = list(base_results[a].keys())
257 target = list(target_results[a].keys())
Brad Bishopc342db32019-05-15 21:57:59 -0400258 # We may have multiple base/targets which are for different configurations. Start by
Brad Bishop40320b12019-03-26 16:08:25 -0400259 # removing any pairs which match
260 for c in base.copy():
261 for b in target.copy():
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600262 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
263 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400264 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
265 if not res:
266 matches.append(resstr)
267 base.remove(c)
268 target.remove(b)
269 break
270 # Should only now see regressions, we may not be able to match multiple pairs directly
271 for c in base:
272 for b in target:
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600273 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
274 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400275 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
276 if res:
277 regressions.append(resstr)
278 else:
279 notfound.append("%s not found in target" % a)
280 print("\n".join(sorted(matches)))
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500281 print("\n")
Brad Bishop40320b12019-03-26 16:08:25 -0400282 print("\n".join(sorted(regressions)))
283 print("\n".join(sorted(notfound)))
Brad Bishop40320b12019-03-26 16:08:25 -0400284 return 0
285
286def regression_git(args, logger):
287 base_results = {}
288 target_results = {}
289
290 tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
291 repo = GitRepo(args.repo)
292
293 revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
294
295 if args.branch2:
296 revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
297 if not len(revs2):
298 logger.error("No revisions found to compare against")
299 return 1
300 if not len(revs):
301 logger.error("No revision to report on found")
302 return 1
303 else:
304 if len(revs) < 2:
305 logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
306 return 1
307
308 # Pick revisions
309 if args.commit:
310 if args.commit_number:
311 logger.warning("Ignoring --commit-number as --commit was specified")
312 index1 = gitarchive.rev_find(revs, 'commit', args.commit)
313 elif args.commit_number:
314 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
315 else:
316 index1 = len(revs) - 1
317
318 if args.branch2:
319 revs2.append(revs[index1])
320 index1 = len(revs2) - 1
321 revs = revs2
322
323 if args.commit2:
324 if args.commit_number2:
325 logger.warning("Ignoring --commit-number2 as --commit2 was specified")
326 index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
327 elif args.commit_number2:
328 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
329 else:
330 if index1 > 0:
331 index2 = index1 - 1
332 # Find the closest matching commit number for comparision
333 # In future we could check the commit is a common ancestor and
334 # continue back if not but this good enough for now
335 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
336 index2 = index2 - 1
337 else:
338 logger.error("Unable to determine the other commit, use "
339 "--commit2 or --commit-number2 to specify it")
340 return 1
341
342 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
343
344 base_results = resultutils.git_get_result(repo, revs[index1][2])
345 target_results = resultutils.git_get_result(repo, revs[index2][2])
346
347 regression_common(args, logger, base_results, target_results)
348
349 return 0
350
351def register_commands(subparsers):
352 """Register subcommands from this plugin"""
353
354 parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
355 description='regression analysis comparing the base set of results to the target results',
356 group='analysis')
357 parser_build.set_defaults(func=regression)
358 parser_build.add_argument('base_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400359 help='base result file/directory/URL for the comparison')
Brad Bishop40320b12019-03-26 16:08:25 -0400360 parser_build.add_argument('target_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400361 help='target result file/directory/URL to compare with')
Brad Bishop40320b12019-03-26 16:08:25 -0400362 parser_build.add_argument('-b', '--base-result-id', default='',
363 help='(optional) filter the base results to this result ID')
364 parser_build.add_argument('-t', '--target-result-id', default='',
365 help='(optional) filter the target results to this result ID')
366
367 parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
368 description='regression analysis comparing base result set to target '
369 'result set',
370 group='analysis')
371 parser_build.set_defaults(func=regression_git)
372 parser_build.add_argument('repo',
373 help='the git repository containing the data')
374 parser_build.add_argument('-b', '--base-result-id', default='',
375 help='(optional) default select regression based on configurations unless base result '
376 'id was provided')
377 parser_build.add_argument('-t', '--target-result-id', default='',
378 help='(optional) default select regression based on configurations unless target result '
379 'id was provided')
380
381 parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
382 parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
383 parser_build.add_argument('--commit', help="Revision to search for")
384 parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
385 parser_build.add_argument('--commit2', help="Revision to compare with")
386 parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
387