blob: 3d64b8f4af7c7546402f210c5c99548815c05e64 [file] [log] [blame]
Brad Bishop40320b12019-03-26 16:08:25 -04001# resulttool - regression analysis
2#
3# Copyright (c) 2019, Intel Corporation.
4# Copyright (c) 2019, Linux Foundation
5#
Brad Bishopc342db32019-05-15 21:57:59 -04006# SPDX-License-Identifier: GPL-2.0-only
Brad Bishop40320b12019-03-26 16:08:25 -04007#
Brad Bishopc342db32019-05-15 21:57:59 -04008
Brad Bishop40320b12019-03-26 16:08:25 -04009import resulttool.resultutils as resultutils
Brad Bishop40320b12019-03-26 16:08:25 -040010
11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive
13
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060014METADATA_MATCH_TABLE = {
15 "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19 "trigger-build-posttrigger": {
20 "run_all_tests": False,
21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22 "skips": None,
23 "machine": None,
24 "select_tags":None,
25 "exclude_tags": None
26 },
27 "reproducible": {
28 "run_all_tests": False,
29 "run_tests":["reproducible"],
30 "skips": None,
31 "machine": None,
32 "select_tags":None,
33 "exclude_tags": None
34 },
35 "arch-qemu-quick": {
36 "run_all_tests": True,
37 "run_tests":None,
38 "skips": None,
39 "machine": None,
40 "select_tags":["machine"],
41 "exclude_tags": None
42 },
43 "arch-qemu-full-x86-or-x86_64": {
44 "run_all_tests": True,
45 "run_tests":None,
46 "skips": None,
47 "machine": None,
48 "select_tags":["machine", "toolchain-system"],
49 "exclude_tags": None
50 },
51 "arch-qemu-full-others": {
52 "run_all_tests": True,
53 "run_tests":None,
54 "skips": None,
55 "machine": None,
56 "select_tags":["machine", "toolchain-user"],
57 "exclude_tags": None
58 },
59 "selftest": {
60 "run_all_tests": True,
61 "run_tests":None,
62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63 "machine": None,
64 "select_tags":None,
65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66 },
67 "bringup": {
68 "run_all_tests": True,
69 "run_tests":None,
70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71 "machine": None,
72 "select_tags":None,
73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74 }
75}
76
Patrick Williams2a254922023-08-11 09:48:11 -050077STATUS_STRINGS = {
78 "None": "No matching test result"
79}
80
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060081def test_has_at_least_one_matching_tag(test, tag_list):
82 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
83
84def all_tests_have_at_least_one_matching_tag(results, tag_list):
85 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
86
87def any_test_have_any_matching_tag(results, tag_list):
88 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
89
90def have_skipped_test(result, test_prefix):
91 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
92
93def have_all_tests_skipped(result, test_prefixes_list):
94 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
95
96def guess_oeselftest_metadata(results):
97 """
98 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
99 Check results for specific values (absence/presence of oetags, number and name of executed tests...),
100 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
101 to it to allow proper test filtering.
102 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
103 as new tests will have OESELFTEST_METADATA properly appended at test reporting time
104 """
105
106 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
107 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
108 elif all(result.startswith("reproducible") for result in results):
109 return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
110 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
111 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
112 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
113 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
114 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
115 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
116 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
117 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
118 return OESELFTEST_METADATA_GUESS_TABLE['selftest']
119 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
120 return OESELFTEST_METADATA_GUESS_TABLE['bringup']
121
122 return None
123
124
125def metadata_matches(base_configuration, target_configuration):
126 """
127 For passed base and target, check test type. If test type matches one of
128 properties described in METADATA_MATCH_TABLE, compare metadata if it is
129 present in base. Return true if metadata matches, or if base lacks some
130 data (either TEST_TYPE or the corresponding metadata)
131 """
132 test_type = base_configuration.get('TEST_TYPE')
133 if test_type not in METADATA_MATCH_TABLE:
134 return True
135
136 metadata_key = METADATA_MATCH_TABLE.get(test_type)
137 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
138 return False
139
140 return True
141
142
143def machine_matches(base_configuration, target_configuration):
144 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
145
146
147def can_be_compared(logger, base, target):
148 """
149 Some tests are not relevant to be compared, for example some oeselftest
150 run with different tests sets or parameters. Return true if tests can be
151 compared
152 """
153 ret = True
154 base_configuration = base['configuration']
155 target_configuration = target['configuration']
156
157 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
158 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
159 guess = guess_oeselftest_metadata(base['result'])
160 if guess is None:
161 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
162 else:
163 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
164 base_configuration['OESELFTEST_METADATA'] = guess
165 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
166 guess = guess_oeselftest_metadata(target['result'])
167 if guess is None:
168 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
169 else:
170 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
171 target_configuration['OESELFTEST_METADATA'] = guess
172
173 # Test runs with LTP results in should only be compared with other runs with LTP tests in them
174 if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
175 ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
176
177 return ret and metadata_matches(base_configuration, target_configuration) \
178 and machine_matches(base_configuration, target_configuration)
179
Patrick Williams2a254922023-08-11 09:48:11 -0500180def get_status_str(raw_status):
181 raw_status_lower = raw_status.lower() if raw_status else "None"
182 return STATUS_STRINGS.get(raw_status_lower, raw_status)
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600183
Brad Bishop40320b12019-03-26 16:08:25 -0400184def compare_result(logger, base_name, target_name, base_result, target_result):
185 base_result = base_result.get('result')
186 target_result = target_result.get('result')
187 result = {}
Patrick Williams2a254922023-08-11 09:48:11 -0500188 new_tests = 0
189
Brad Bishop40320b12019-03-26 16:08:25 -0400190 if base_result and target_result:
191 for k in base_result:
192 base_testcase = base_result[k]
193 base_status = base_testcase.get('status')
194 if base_status:
195 target_testcase = target_result.get(k, {})
196 target_status = target_testcase.get('status')
197 if base_status != target_status:
198 result[k] = {'base': base_status, 'target': target_status}
199 else:
200 logger.error('Failed to retrieved base test case status: %s' % k)
Patrick Williams2a254922023-08-11 09:48:11 -0500201
202 # Also count new tests that were not present in base results: it
203 # could be newly added tests, but it could also highlights some tests
204 # renames or fixed faulty ptests
205 for k in target_result:
206 if k not in base_result:
207 new_tests += 1
Brad Bishop40320b12019-03-26 16:08:25 -0400208 if result:
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500209 new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
210 # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
211 if new_pass_count < len(result):
212 resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
213 for k in sorted(result):
214 if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
Patrick Williams2a254922023-08-11 09:48:11 -0500215 resultstring += ' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500216 if new_pass_count > 0:
217 resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
218 else:
Patrick Williams2a254922023-08-11 09:48:11 -0500219 resultstring = "Improvement: %s\n %s\n (+%d test(s) passing)\n" % (base_name, target_name, new_pass_count)
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500220 result = None
Brad Bishop40320b12019-03-26 16:08:25 -0400221 else:
Patrick Williams2a254922023-08-11 09:48:11 -0500222 resultstring = "Match: %s\n %s\n" % (base_name, target_name)
223
224 if new_tests > 0:
225 resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
Brad Bishop40320b12019-03-26 16:08:25 -0400226 return result, resultstring
227
228def get_results(logger, source):
229 return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
230
231def regression(args, logger):
232 base_results = get_results(logger, args.base_result)
233 target_results = get_results(logger, args.target_result)
234
235 regression_common(args, logger, base_results, target_results)
236
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500237# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
238# Truncating the test names works since they contain file and line number identifiers
239# which allows us to match them without the random components.
240def fixup_ptest_names(results, logger):
241 for r in results:
242 for i in results[r]:
243 tests = list(results[r][i]['result'].keys())
244 for test in tests:
245 new = None
246 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
247 new = test.split("_-_")[0]
248 elif test.startswith(("ptestresult.curl.")) and "__" in test:
249 new = test.split("__")[0]
250 elif test.startswith(("ptestresult.dbus.")) and "__" in test:
251 new = test.split("__")[0]
252 elif test.startswith("ptestresult.binutils") and "build-st-" in test:
253 new = test.split(" ")[0]
254 elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
255 new = ".".join(test.split(".")[:2])
256 if new:
257 results[r][i]['result'][new] = results[r][i]['result'][test]
258 del results[r][i]['result'][test]
259
Brad Bishop40320b12019-03-26 16:08:25 -0400260def regression_common(args, logger, base_results, target_results):
261 if args.base_result_id:
262 base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
263 if args.target_result_id:
264 target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
265
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500266 fixup_ptest_names(base_results, logger)
267 fixup_ptest_names(target_results, logger)
268
Brad Bishop40320b12019-03-26 16:08:25 -0400269 matches = []
270 regressions = []
271 notfound = []
272
273 for a in base_results:
274 if a in target_results:
275 base = list(base_results[a].keys())
276 target = list(target_results[a].keys())
Brad Bishopc342db32019-05-15 21:57:59 -0400277 # We may have multiple base/targets which are for different configurations. Start by
Brad Bishop40320b12019-03-26 16:08:25 -0400278 # removing any pairs which match
279 for c in base.copy():
280 for b in target.copy():
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600281 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
282 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400283 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
284 if not res:
285 matches.append(resstr)
286 base.remove(c)
287 target.remove(b)
288 break
289 # Should only now see regressions, we may not be able to match multiple pairs directly
290 for c in base:
291 for b in target:
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600292 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
293 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400294 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
295 if res:
296 regressions.append(resstr)
297 else:
298 notfound.append("%s not found in target" % a)
299 print("\n".join(sorted(matches)))
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500300 print("\n")
Brad Bishop40320b12019-03-26 16:08:25 -0400301 print("\n".join(sorted(regressions)))
302 print("\n".join(sorted(notfound)))
Brad Bishop40320b12019-03-26 16:08:25 -0400303 return 0
304
305def regression_git(args, logger):
306 base_results = {}
307 target_results = {}
308
309 tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
310 repo = GitRepo(args.repo)
311
312 revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
313
314 if args.branch2:
315 revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
316 if not len(revs2):
317 logger.error("No revisions found to compare against")
318 return 1
319 if not len(revs):
320 logger.error("No revision to report on found")
321 return 1
322 else:
323 if len(revs) < 2:
324 logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
325 return 1
326
327 # Pick revisions
328 if args.commit:
329 if args.commit_number:
330 logger.warning("Ignoring --commit-number as --commit was specified")
331 index1 = gitarchive.rev_find(revs, 'commit', args.commit)
332 elif args.commit_number:
333 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
334 else:
335 index1 = len(revs) - 1
336
337 if args.branch2:
338 revs2.append(revs[index1])
339 index1 = len(revs2) - 1
340 revs = revs2
341
342 if args.commit2:
343 if args.commit_number2:
344 logger.warning("Ignoring --commit-number2 as --commit2 was specified")
345 index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
346 elif args.commit_number2:
347 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
348 else:
349 if index1 > 0:
350 index2 = index1 - 1
351 # Find the closest matching commit number for comparision
352 # In future we could check the commit is a common ancestor and
353 # continue back if not but this good enough for now
354 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
355 index2 = index2 - 1
356 else:
357 logger.error("Unable to determine the other commit, use "
358 "--commit2 or --commit-number2 to specify it")
359 return 1
360
361 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
362
363 base_results = resultutils.git_get_result(repo, revs[index1][2])
364 target_results = resultutils.git_get_result(repo, revs[index2][2])
365
366 regression_common(args, logger, base_results, target_results)
367
368 return 0
369
370def register_commands(subparsers):
371 """Register subcommands from this plugin"""
372
373 parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
374 description='regression analysis comparing the base set of results to the target results',
375 group='analysis')
376 parser_build.set_defaults(func=regression)
377 parser_build.add_argument('base_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400378 help='base result file/directory/URL for the comparison')
Brad Bishop40320b12019-03-26 16:08:25 -0400379 parser_build.add_argument('target_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400380 help='target result file/directory/URL to compare with')
Brad Bishop40320b12019-03-26 16:08:25 -0400381 parser_build.add_argument('-b', '--base-result-id', default='',
382 help='(optional) filter the base results to this result ID')
383 parser_build.add_argument('-t', '--target-result-id', default='',
384 help='(optional) filter the target results to this result ID')
385
386 parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
387 description='regression analysis comparing base result set to target '
388 'result set',
389 group='analysis')
390 parser_build.set_defaults(func=regression_git)
391 parser_build.add_argument('repo',
392 help='the git repository containing the data')
393 parser_build.add_argument('-b', '--base-result-id', default='',
394 help='(optional) default select regression based on configurations unless base result '
395 'id was provided')
396 parser_build.add_argument('-t', '--target-result-id', default='',
397 help='(optional) default select regression based on configurations unless target result '
398 'id was provided')
399
400 parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
401 parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
402 parser_build.add_argument('--commit', help="Revision to search for")
403 parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
404 parser_build.add_argument('--commit2', help="Revision to compare with")
405 parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
406