blob: 10e7d13841a72dc93365507e27e172be334fc289 [file] [log] [blame]
Brad Bishop40320b12019-03-26 16:08:25 -04001# resulttool - regression analysis
2#
3# Copyright (c) 2019, Intel Corporation.
4# Copyright (c) 2019, Linux Foundation
5#
Brad Bishopc342db32019-05-15 21:57:59 -04006# SPDX-License-Identifier: GPL-2.0-only
Brad Bishop40320b12019-03-26 16:08:25 -04007#
Brad Bishopc342db32019-05-15 21:57:59 -04008
Brad Bishop40320b12019-03-26 16:08:25 -04009import resulttool.resultutils as resultutils
Brad Bishop40320b12019-03-26 16:08:25 -040010
11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive
13
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060014METADATA_MATCH_TABLE = {
15 "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19 "trigger-build-posttrigger": {
20 "run_all_tests": False,
21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22 "skips": None,
23 "machine": None,
24 "select_tags":None,
25 "exclude_tags": None
26 },
27 "reproducible": {
28 "run_all_tests": False,
29 "run_tests":["reproducible"],
30 "skips": None,
31 "machine": None,
32 "select_tags":None,
33 "exclude_tags": None
34 },
35 "arch-qemu-quick": {
36 "run_all_tests": True,
37 "run_tests":None,
38 "skips": None,
39 "machine": None,
40 "select_tags":["machine"],
41 "exclude_tags": None
42 },
43 "arch-qemu-full-x86-or-x86_64": {
44 "run_all_tests": True,
45 "run_tests":None,
46 "skips": None,
47 "machine": None,
48 "select_tags":["machine", "toolchain-system"],
49 "exclude_tags": None
50 },
51 "arch-qemu-full-others": {
52 "run_all_tests": True,
53 "run_tests":None,
54 "skips": None,
55 "machine": None,
56 "select_tags":["machine", "toolchain-user"],
57 "exclude_tags": None
58 },
59 "selftest": {
60 "run_all_tests": True,
61 "run_tests":None,
62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63 "machine": None,
64 "select_tags":None,
65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66 },
67 "bringup": {
68 "run_all_tests": True,
69 "run_tests":None,
70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71 "machine": None,
72 "select_tags":None,
73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74 }
75}
76
Patrick Williams2a254922023-08-11 09:48:11 -050077STATUS_STRINGS = {
78 "None": "No matching test result"
79}
80
Patrick Williamsac13d5f2023-11-24 18:59:46 -060081REGRESSIONS_DISPLAY_LIMIT=50
82
83MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
84ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
85
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060086def test_has_at_least_one_matching_tag(test, tag_list):
87 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
88
89def all_tests_have_at_least_one_matching_tag(results, tag_list):
90 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
91
92def any_test_have_any_matching_tag(results, tag_list):
93 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
94
95def have_skipped_test(result, test_prefix):
96 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
97
98def have_all_tests_skipped(result, test_prefixes_list):
99 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
100
101def guess_oeselftest_metadata(results):
102 """
103 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
104 Check results for specific values (absence/presence of oetags, number and name of executed tests...),
105 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
106 to it to allow proper test filtering.
107 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
108 as new tests will have OESELFTEST_METADATA properly appended at test reporting time
109 """
110
111 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
112 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
113 elif all(result.startswith("reproducible") for result in results):
114 return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
115 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
116 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
117 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
118 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
119 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
120 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
121 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
122 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
123 return OESELFTEST_METADATA_GUESS_TABLE['selftest']
124 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
125 return OESELFTEST_METADATA_GUESS_TABLE['bringup']
126
127 return None
128
129
130def metadata_matches(base_configuration, target_configuration):
131 """
132 For passed base and target, check test type. If test type matches one of
133 properties described in METADATA_MATCH_TABLE, compare metadata if it is
134 present in base. Return true if metadata matches, or if base lacks some
135 data (either TEST_TYPE or the corresponding metadata)
136 """
137 test_type = base_configuration.get('TEST_TYPE')
138 if test_type not in METADATA_MATCH_TABLE:
139 return True
140
141 metadata_key = METADATA_MATCH_TABLE.get(test_type)
142 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
143 return False
144
145 return True
146
147
148def machine_matches(base_configuration, target_configuration):
149 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
150
151
152def can_be_compared(logger, base, target):
153 """
154 Some tests are not relevant to be compared, for example some oeselftest
155 run with different tests sets or parameters. Return true if tests can be
156 compared
157 """
158 ret = True
159 base_configuration = base['configuration']
160 target_configuration = target['configuration']
161
162 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
163 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
164 guess = guess_oeselftest_metadata(base['result'])
165 if guess is None:
166 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
167 else:
168 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
169 base_configuration['OESELFTEST_METADATA'] = guess
170 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
171 guess = guess_oeselftest_metadata(target['result'])
172 if guess is None:
173 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
174 else:
175 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
176 target_configuration['OESELFTEST_METADATA'] = guess
177
178 # Test runs with LTP results in should only be compared with other runs with LTP tests in them
179 if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
180 ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
181
182 return ret and metadata_matches(base_configuration, target_configuration) \
183 and machine_matches(base_configuration, target_configuration)
184
Patrick Williams2a254922023-08-11 09:48:11 -0500185def get_status_str(raw_status):
186 raw_status_lower = raw_status.lower() if raw_status else "None"
187 return STATUS_STRINGS.get(raw_status_lower, raw_status)
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600188
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600189def get_additional_info_line(new_pass_count, new_tests):
190 result=[]
191 if new_tests:
192 result.append(f'+{new_tests} test(s) present')
193 if new_pass_count:
194 result.append(f'+{new_pass_count} test(s) now passing')
195
196 if not result:
197 return ""
198
199 return ' -> ' + ', '.join(result) + '\n'
200
201def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
Brad Bishop40320b12019-03-26 16:08:25 -0400202 base_result = base_result.get('result')
203 target_result = target_result.get('result')
204 result = {}
Patrick Williams2a254922023-08-11 09:48:11 -0500205 new_tests = 0
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600206 regressions = {}
207 resultstring = ""
208 new_tests = 0
209 new_pass_count = 0
210
211 display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
Patrick Williams2a254922023-08-11 09:48:11 -0500212
Brad Bishop40320b12019-03-26 16:08:25 -0400213 if base_result and target_result:
214 for k in base_result:
215 base_testcase = base_result[k]
216 base_status = base_testcase.get('status')
217 if base_status:
218 target_testcase = target_result.get(k, {})
219 target_status = target_testcase.get('status')
220 if base_status != target_status:
221 result[k] = {'base': base_status, 'target': target_status}
222 else:
223 logger.error('Failed to retrieved base test case status: %s' % k)
Patrick Williams2a254922023-08-11 09:48:11 -0500224
225 # Also count new tests that were not present in base results: it
226 # could be newly added tests, but it could also highlights some tests
227 # renames or fixed faulty ptests
228 for k in target_result:
229 if k not in base_result:
230 new_tests += 1
Brad Bishop40320b12019-03-26 16:08:25 -0400231 if result:
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500232 new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
233 # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
234 if new_pass_count < len(result):
235 resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
236 for k in sorted(result):
237 if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600238 # Differentiate each ptest kind when listing regressions
239 key_parts = k.split('.')
240 key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
241 # Append new regression to corresponding test family
242 regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
243 resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
244 for k in regressions:
245 resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
246 count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
247 resultstring += ''.join(regressions[k][:count_to_print])
248 if count_to_print < len(regressions[k]):
249 resultstring+=' [...]\n'
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500250 if new_pass_count > 0:
251 resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600252 if new_tests > 0:
253 resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500254 else:
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600255 resultstring = "%s\n%s\n" % (base_name, target_name)
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500256 result = None
Brad Bishop40320b12019-03-26 16:08:25 -0400257 else:
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600258 resultstring = "%s\n%s\n" % (base_name, target_name)
Patrick Williams2a254922023-08-11 09:48:11 -0500259
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600260 if not result:
261 additional_info = get_additional_info_line(new_pass_count, new_tests)
262 if additional_info:
263 resultstring += additional_info
264
Brad Bishop40320b12019-03-26 16:08:25 -0400265 return result, resultstring
266
267def get_results(logger, source):
268 return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
269
270def regression(args, logger):
271 base_results = get_results(logger, args.base_result)
272 target_results = get_results(logger, args.target_result)
273
274 regression_common(args, logger, base_results, target_results)
275
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500276# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
277# Truncating the test names works since they contain file and line number identifiers
278# which allows us to match them without the random components.
279def fixup_ptest_names(results, logger):
280 for r in results:
281 for i in results[r]:
282 tests = list(results[r][i]['result'].keys())
283 for test in tests:
284 new = None
285 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
286 new = test.split("_-_")[0]
287 elif test.startswith(("ptestresult.curl.")) and "__" in test:
288 new = test.split("__")[0]
289 elif test.startswith(("ptestresult.dbus.")) and "__" in test:
290 new = test.split("__")[0]
291 elif test.startswith("ptestresult.binutils") and "build-st-" in test:
292 new = test.split(" ")[0]
293 elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
294 new = ".".join(test.split(".")[:2])
295 if new:
296 results[r][i]['result'][new] = results[r][i]['result'][test]
297 del results[r][i]['result'][test]
298
Brad Bishop40320b12019-03-26 16:08:25 -0400299def regression_common(args, logger, base_results, target_results):
300 if args.base_result_id:
301 base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
302 if args.target_result_id:
303 target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
304
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500305 fixup_ptest_names(base_results, logger)
306 fixup_ptest_names(target_results, logger)
307
Brad Bishop40320b12019-03-26 16:08:25 -0400308 matches = []
309 regressions = []
310 notfound = []
311
312 for a in base_results:
313 if a in target_results:
314 base = list(base_results[a].keys())
315 target = list(target_results[a].keys())
Brad Bishopc342db32019-05-15 21:57:59 -0400316 # We may have multiple base/targets which are for different configurations. Start by
Brad Bishop40320b12019-03-26 16:08:25 -0400317 # removing any pairs which match
318 for c in base.copy():
319 for b in target.copy():
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600320 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
321 continue
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600322 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
Brad Bishop40320b12019-03-26 16:08:25 -0400323 if not res:
324 matches.append(resstr)
325 base.remove(c)
326 target.remove(b)
327 break
328 # Should only now see regressions, we may not be able to match multiple pairs directly
329 for c in base:
330 for b in target:
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600331 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
332 continue
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600333 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
Brad Bishop40320b12019-03-26 16:08:25 -0400334 if res:
335 regressions.append(resstr)
336 else:
337 notfound.append("%s not found in target" % a)
Brad Bishop40320b12019-03-26 16:08:25 -0400338 print("\n".join(sorted(regressions)))
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600339 print("\n" + MISSING_TESTS_BANNER + "\n")
Brad Bishop40320b12019-03-26 16:08:25 -0400340 print("\n".join(sorted(notfound)))
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600341 print("\n" + ADDITIONAL_DATA_BANNER + "\n")
342 print("\n".join(sorted(matches)))
Brad Bishop40320b12019-03-26 16:08:25 -0400343 return 0
344
345def regression_git(args, logger):
346 base_results = {}
347 target_results = {}
348
349 tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
350 repo = GitRepo(args.repo)
351
352 revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
353
354 if args.branch2:
355 revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
356 if not len(revs2):
357 logger.error("No revisions found to compare against")
358 return 1
359 if not len(revs):
360 logger.error("No revision to report on found")
361 return 1
362 else:
363 if len(revs) < 2:
364 logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
365 return 1
366
367 # Pick revisions
368 if args.commit:
369 if args.commit_number:
370 logger.warning("Ignoring --commit-number as --commit was specified")
371 index1 = gitarchive.rev_find(revs, 'commit', args.commit)
372 elif args.commit_number:
373 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
374 else:
375 index1 = len(revs) - 1
376
377 if args.branch2:
378 revs2.append(revs[index1])
379 index1 = len(revs2) - 1
380 revs = revs2
381
382 if args.commit2:
383 if args.commit_number2:
384 logger.warning("Ignoring --commit-number2 as --commit2 was specified")
385 index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
386 elif args.commit_number2:
387 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
388 else:
389 if index1 > 0:
390 index2 = index1 - 1
391 # Find the closest matching commit number for comparision
392 # In future we could check the commit is a common ancestor and
393 # continue back if not but this good enough for now
394 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
395 index2 = index2 - 1
396 else:
397 logger.error("Unable to determine the other commit, use "
398 "--commit2 or --commit-number2 to specify it")
399 return 1
400
401 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
402
403 base_results = resultutils.git_get_result(repo, revs[index1][2])
404 target_results = resultutils.git_get_result(repo, revs[index2][2])
405
406 regression_common(args, logger, base_results, target_results)
407
408 return 0
409
410def register_commands(subparsers):
411 """Register subcommands from this plugin"""
412
413 parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
414 description='regression analysis comparing the base set of results to the target results',
415 group='analysis')
416 parser_build.set_defaults(func=regression)
417 parser_build.add_argument('base_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400418 help='base result file/directory/URL for the comparison')
Brad Bishop40320b12019-03-26 16:08:25 -0400419 parser_build.add_argument('target_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400420 help='target result file/directory/URL to compare with')
Brad Bishop40320b12019-03-26 16:08:25 -0400421 parser_build.add_argument('-b', '--base-result-id', default='',
422 help='(optional) filter the base results to this result ID')
423 parser_build.add_argument('-t', '--target-result-id', default='',
424 help='(optional) filter the target results to this result ID')
425
426 parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
427 description='regression analysis comparing base result set to target '
428 'result set',
429 group='analysis')
430 parser_build.set_defaults(func=regression_git)
431 parser_build.add_argument('repo',
432 help='the git repository containing the data')
433 parser_build.add_argument('-b', '--base-result-id', default='',
434 help='(optional) default select regression based on configurations unless base result '
435 'id was provided')
436 parser_build.add_argument('-t', '--target-result-id', default='',
437 help='(optional) default select regression based on configurations unless target result '
438 'id was provided')
439
440 parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
441 parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
442 parser_build.add_argument('--commit', help="Revision to search for")
443 parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
444 parser_build.add_argument('--commit2', help="Revision to compare with")
445 parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600446 parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
Brad Bishop40320b12019-03-26 16:08:25 -0400447