blob: 74fd5f3895116136131f40ada6123a4c55c175c2 [file] [log] [blame]
Brad Bishop40320b12019-03-26 16:08:25 -04001# resulttool - regression analysis
2#
3# Copyright (c) 2019, Intel Corporation.
4# Copyright (c) 2019, Linux Foundation
5#
Brad Bishopc342db32019-05-15 21:57:59 -04006# SPDX-License-Identifier: GPL-2.0-only
Brad Bishop40320b12019-03-26 16:08:25 -04007#
Brad Bishopc342db32019-05-15 21:57:59 -04008
Brad Bishop40320b12019-03-26 16:08:25 -04009import resulttool.resultutils as resultutils
Brad Bishop40320b12019-03-26 16:08:25 -040010
11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive
13
Andrew Geissler6aa7eec2023-03-03 12:41:14 -060014METADATA_MATCH_TABLE = {
15 "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19 "trigger-build-posttrigger": {
20 "run_all_tests": False,
21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22 "skips": None,
23 "machine": None,
24 "select_tags":None,
25 "exclude_tags": None
26 },
27 "reproducible": {
28 "run_all_tests": False,
29 "run_tests":["reproducible"],
30 "skips": None,
31 "machine": None,
32 "select_tags":None,
33 "exclude_tags": None
34 },
35 "arch-qemu-quick": {
36 "run_all_tests": True,
37 "run_tests":None,
38 "skips": None,
39 "machine": None,
40 "select_tags":["machine"],
41 "exclude_tags": None
42 },
43 "arch-qemu-full-x86-or-x86_64": {
44 "run_all_tests": True,
45 "run_tests":None,
46 "skips": None,
47 "machine": None,
48 "select_tags":["machine", "toolchain-system"],
49 "exclude_tags": None
50 },
51 "arch-qemu-full-others": {
52 "run_all_tests": True,
53 "run_tests":None,
54 "skips": None,
55 "machine": None,
56 "select_tags":["machine", "toolchain-user"],
57 "exclude_tags": None
58 },
59 "selftest": {
60 "run_all_tests": True,
61 "run_tests":None,
62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63 "machine": None,
64 "select_tags":None,
65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66 },
67 "bringup": {
68 "run_all_tests": True,
69 "run_tests":None,
70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71 "machine": None,
72 "select_tags":None,
73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74 }
75}
76
77def test_has_at_least_one_matching_tag(test, tag_list):
78 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
79
80def all_tests_have_at_least_one_matching_tag(results, tag_list):
81 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
82
83def any_test_have_any_matching_tag(results, tag_list):
84 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
85
86def have_skipped_test(result, test_prefix):
87 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
88
89def have_all_tests_skipped(result, test_prefixes_list):
90 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
91
92def guess_oeselftest_metadata(results):
93 """
94 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
95 Check results for specific values (absence/presence of oetags, number and name of executed tests...),
96 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
97 to it to allow proper test filtering.
98 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
99 as new tests will have OESELFTEST_METADATA properly appended at test reporting time
100 """
101
102 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
103 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
104 elif all(result.startswith("reproducible") for result in results):
105 return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
106 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
107 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
108 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
109 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
110 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
111 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
112 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
113 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
114 return OESELFTEST_METADATA_GUESS_TABLE['selftest']
115 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
116 return OESELFTEST_METADATA_GUESS_TABLE['bringup']
117
118 return None
119
120
121def metadata_matches(base_configuration, target_configuration):
122 """
123 For passed base and target, check test type. If test type matches one of
124 properties described in METADATA_MATCH_TABLE, compare metadata if it is
125 present in base. Return true if metadata matches, or if base lacks some
126 data (either TEST_TYPE or the corresponding metadata)
127 """
128 test_type = base_configuration.get('TEST_TYPE')
129 if test_type not in METADATA_MATCH_TABLE:
130 return True
131
132 metadata_key = METADATA_MATCH_TABLE.get(test_type)
133 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
134 return False
135
136 return True
137
138
139def machine_matches(base_configuration, target_configuration):
140 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
141
142
143def can_be_compared(logger, base, target):
144 """
145 Some tests are not relevant to be compared, for example some oeselftest
146 run with different tests sets or parameters. Return true if tests can be
147 compared
148 """
149 ret = True
150 base_configuration = base['configuration']
151 target_configuration = target['configuration']
152
153 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
154 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
155 guess = guess_oeselftest_metadata(base['result'])
156 if guess is None:
157 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
158 else:
159 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
160 base_configuration['OESELFTEST_METADATA'] = guess
161 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
162 guess = guess_oeselftest_metadata(target['result'])
163 if guess is None:
164 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
165 else:
166 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
167 target_configuration['OESELFTEST_METADATA'] = guess
168
169 # Test runs with LTP results in should only be compared with other runs with LTP tests in them
170 if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
171 ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
172
173 return ret and metadata_matches(base_configuration, target_configuration) \
174 and machine_matches(base_configuration, target_configuration)
175
176
Brad Bishop40320b12019-03-26 16:08:25 -0400177def compare_result(logger, base_name, target_name, base_result, target_result):
178 base_result = base_result.get('result')
179 target_result = target_result.get('result')
180 result = {}
181 if base_result and target_result:
182 for k in base_result:
183 base_testcase = base_result[k]
184 base_status = base_testcase.get('status')
185 if base_status:
186 target_testcase = target_result.get(k, {})
187 target_status = target_testcase.get('status')
188 if base_status != target_status:
189 result[k] = {'base': base_status, 'target': target_status}
190 else:
191 logger.error('Failed to retrieved base test case status: %s' % k)
192 if result:
193 resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
194 for k in sorted(result):
195 resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])
196 else:
197 resultstring = "Match: %s\n %s" % (base_name, target_name)
198 return result, resultstring
199
200def get_results(logger, source):
201 return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
202
203def regression(args, logger):
204 base_results = get_results(logger, args.base_result)
205 target_results = get_results(logger, args.target_result)
206
207 regression_common(args, logger, base_results, target_results)
208
209def regression_common(args, logger, base_results, target_results):
210 if args.base_result_id:
211 base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
212 if args.target_result_id:
213 target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
214
215 matches = []
216 regressions = []
217 notfound = []
218
219 for a in base_results:
220 if a in target_results:
221 base = list(base_results[a].keys())
222 target = list(target_results[a].keys())
Brad Bishopc342db32019-05-15 21:57:59 -0400223 # We may have multiple base/targets which are for different configurations. Start by
Brad Bishop40320b12019-03-26 16:08:25 -0400224 # removing any pairs which match
225 for c in base.copy():
226 for b in target.copy():
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600227 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
228 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400229 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
230 if not res:
231 matches.append(resstr)
232 base.remove(c)
233 target.remove(b)
234 break
235 # Should only now see regressions, we may not be able to match multiple pairs directly
236 for c in base:
237 for b in target:
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600238 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
239 continue
Brad Bishop40320b12019-03-26 16:08:25 -0400240 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
241 if res:
242 regressions.append(resstr)
243 else:
244 notfound.append("%s not found in target" % a)
245 print("\n".join(sorted(matches)))
246 print("\n".join(sorted(regressions)))
247 print("\n".join(sorted(notfound)))
248
249 return 0
250
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600251# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
252# Truncating the test names works since they contain file and line number identifiers
253# which allows us to match them without the random components.
254def fixup_ptest_names(results, logger):
255 for r in results:
256 for i in results[r]:
257 tests = list(results[r][i]['result'].keys())
258 for test in tests:
259 new = None
260 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
261 new = test.split("_-_")[0]
262 elif test.startswith(("ptestresult.curl.")) and "__" in test:
263 new = test.split("__")[0]
264 if new:
265 results[r][i]['result'][new] = results[r][i]['result'][test]
266 del results[r][i]['result'][test]
267
268
Brad Bishop40320b12019-03-26 16:08:25 -0400269def regression_git(args, logger):
270 base_results = {}
271 target_results = {}
272
273 tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
274 repo = GitRepo(args.repo)
275
276 revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
277
278 if args.branch2:
279 revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
280 if not len(revs2):
281 logger.error("No revisions found to compare against")
282 return 1
283 if not len(revs):
284 logger.error("No revision to report on found")
285 return 1
286 else:
287 if len(revs) < 2:
288 logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
289 return 1
290
291 # Pick revisions
292 if args.commit:
293 if args.commit_number:
294 logger.warning("Ignoring --commit-number as --commit was specified")
295 index1 = gitarchive.rev_find(revs, 'commit', args.commit)
296 elif args.commit_number:
297 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
298 else:
299 index1 = len(revs) - 1
300
301 if args.branch2:
302 revs2.append(revs[index1])
303 index1 = len(revs2) - 1
304 revs = revs2
305
306 if args.commit2:
307 if args.commit_number2:
308 logger.warning("Ignoring --commit-number2 as --commit2 was specified")
309 index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
310 elif args.commit_number2:
311 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
312 else:
313 if index1 > 0:
314 index2 = index1 - 1
315 # Find the closest matching commit number for comparision
316 # In future we could check the commit is a common ancestor and
317 # continue back if not but this good enough for now
318 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
319 index2 = index2 - 1
320 else:
321 logger.error("Unable to determine the other commit, use "
322 "--commit2 or --commit-number2 to specify it")
323 return 1
324
325 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
326
327 base_results = resultutils.git_get_result(repo, revs[index1][2])
328 target_results = resultutils.git_get_result(repo, revs[index2][2])
329
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600330 fixup_ptest_names(base_results, logger)
331 fixup_ptest_names(target_results, logger)
332
Brad Bishop40320b12019-03-26 16:08:25 -0400333 regression_common(args, logger, base_results, target_results)
334
335 return 0
336
337def register_commands(subparsers):
338 """Register subcommands from this plugin"""
339
340 parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
341 description='regression analysis comparing the base set of results to the target results',
342 group='analysis')
343 parser_build.set_defaults(func=regression)
344 parser_build.add_argument('base_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400345 help='base result file/directory/URL for the comparison')
Brad Bishop40320b12019-03-26 16:08:25 -0400346 parser_build.add_argument('target_result',
Brad Bishopc342db32019-05-15 21:57:59 -0400347 help='target result file/directory/URL to compare with')
Brad Bishop40320b12019-03-26 16:08:25 -0400348 parser_build.add_argument('-b', '--base-result-id', default='',
349 help='(optional) filter the base results to this result ID')
350 parser_build.add_argument('-t', '--target-result-id', default='',
351 help='(optional) filter the target results to this result ID')
352
353 parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
354 description='regression analysis comparing base result set to target '
355 'result set',
356 group='analysis')
357 parser_build.set_defaults(func=regression_git)
358 parser_build.add_argument('repo',
359 help='the git repository containing the data')
360 parser_build.add_argument('-b', '--base-result-id', default='',
361 help='(optional) default select regression based on configurations unless base result '
362 'id was provided')
363 parser_build.add_argument('-t', '--target-result-id', default='',
364 help='(optional) default select regression based on configurations unless target result '
365 'id was provided')
366
367 parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
368 parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
369 parser_build.add_argument('--commit', help="Revision to search for")
370 parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
371 parser_build.add_argument('--commit2', help="Revision to compare with")
372 parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
373