Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 1 | # resulttool - regression analysis |
| 2 | # |
| 3 | # Copyright (c) 2019, Intel Corporation. |
| 4 | # Copyright (c) 2019, Linux Foundation |
| 5 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 6 | # SPDX-License-Identifier: GPL-2.0-only |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 7 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 8 | |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 9 | import resulttool.resultutils as resultutils |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 10 | |
| 11 | from oeqa.utils.git import GitRepo |
| 12 | import oeqa.utils.gitarchive as gitarchive |
| 13 | |
Andrew Geissler | 6aa7eec | 2023-03-03 12:41:14 -0600 | [diff] [blame] | 14 | METADATA_MATCH_TABLE = { |
| 15 | "oeselftest": "OESELFTEST_METADATA" |
| 16 | } |
| 17 | |
| 18 | OESELFTEST_METADATA_GUESS_TABLE={ |
| 19 | "trigger-build-posttrigger": { |
| 20 | "run_all_tests": False, |
| 21 | "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], |
| 22 | "skips": None, |
| 23 | "machine": None, |
| 24 | "select_tags":None, |
| 25 | "exclude_tags": None |
| 26 | }, |
| 27 | "reproducible": { |
| 28 | "run_all_tests": False, |
| 29 | "run_tests":["reproducible"], |
| 30 | "skips": None, |
| 31 | "machine": None, |
| 32 | "select_tags":None, |
| 33 | "exclude_tags": None |
| 34 | }, |
| 35 | "arch-qemu-quick": { |
| 36 | "run_all_tests": True, |
| 37 | "run_tests":None, |
| 38 | "skips": None, |
| 39 | "machine": None, |
| 40 | "select_tags":["machine"], |
| 41 | "exclude_tags": None |
| 42 | }, |
| 43 | "arch-qemu-full-x86-or-x86_64": { |
| 44 | "run_all_tests": True, |
| 45 | "run_tests":None, |
| 46 | "skips": None, |
| 47 | "machine": None, |
| 48 | "select_tags":["machine", "toolchain-system"], |
| 49 | "exclude_tags": None |
| 50 | }, |
| 51 | "arch-qemu-full-others": { |
| 52 | "run_all_tests": True, |
| 53 | "run_tests":None, |
| 54 | "skips": None, |
| 55 | "machine": None, |
| 56 | "select_tags":["machine", "toolchain-user"], |
| 57 | "exclude_tags": None |
| 58 | }, |
| 59 | "selftest": { |
| 60 | "run_all_tests": True, |
| 61 | "run_tests":None, |
| 62 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], |
| 63 | "machine": None, |
| 64 | "select_tags":None, |
| 65 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] |
| 66 | }, |
| 67 | "bringup": { |
| 68 | "run_all_tests": True, |
| 69 | "run_tests":None, |
| 70 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], |
| 71 | "machine": None, |
| 72 | "select_tags":None, |
| 73 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | def test_has_at_least_one_matching_tag(test, tag_list): |
| 78 | return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) |
| 79 | |
| 80 | def all_tests_have_at_least_one_matching_tag(results, tag_list): |
| 81 | return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) |
| 82 | |
| 83 | def any_test_have_any_matching_tag(results, tag_list): |
| 84 | return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) |
| 85 | |
| 86 | def have_skipped_test(result, test_prefix): |
| 87 | return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) |
| 88 | |
| 89 | def have_all_tests_skipped(result, test_prefixes_list): |
| 90 | return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) |
| 91 | |
| 92 | def guess_oeselftest_metadata(results): |
| 93 | """ |
| 94 | When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. |
| 95 | Check results for specific values (absence/presence of oetags, number and name of executed tests...), |
| 96 | and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA |
| 97 | to it to allow proper test filtering. |
| 98 | This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, |
| 99 | as new tests will have OESELFTEST_METADATA properly appended at test reporting time |
| 100 | """ |
| 101 | |
| 102 | if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: |
| 103 | return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] |
| 104 | elif all(result.startswith("reproducible") for result in results): |
| 105 | return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] |
| 106 | elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): |
| 107 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] |
| 108 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): |
| 109 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] |
| 110 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): |
| 111 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] |
| 112 | elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): |
| 113 | if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): |
| 114 | return OESELFTEST_METADATA_GUESS_TABLE['selftest'] |
| 115 | elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): |
| 116 | return OESELFTEST_METADATA_GUESS_TABLE['bringup'] |
| 117 | |
| 118 | return None |
| 119 | |
| 120 | |
| 121 | def metadata_matches(base_configuration, target_configuration): |
| 122 | """ |
| 123 | For passed base and target, check test type. If test type matches one of |
| 124 | properties described in METADATA_MATCH_TABLE, compare metadata if it is |
| 125 | present in base. Return true if metadata matches, or if base lacks some |
| 126 | data (either TEST_TYPE or the corresponding metadata) |
| 127 | """ |
| 128 | test_type = base_configuration.get('TEST_TYPE') |
| 129 | if test_type not in METADATA_MATCH_TABLE: |
| 130 | return True |
| 131 | |
| 132 | metadata_key = METADATA_MATCH_TABLE.get(test_type) |
| 133 | if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): |
| 134 | return False |
| 135 | |
| 136 | return True |
| 137 | |
| 138 | |
| 139 | def machine_matches(base_configuration, target_configuration): |
| 140 | return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') |
| 141 | |
| 142 | |
| 143 | def can_be_compared(logger, base, target): |
| 144 | """ |
| 145 | Some tests are not relevant to be compared, for example some oeselftest |
| 146 | run with different tests sets or parameters. Return true if tests can be |
| 147 | compared |
| 148 | """ |
| 149 | ret = True |
| 150 | base_configuration = base['configuration'] |
| 151 | target_configuration = target['configuration'] |
| 152 | |
| 153 | # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. |
| 154 | if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: |
| 155 | guess = guess_oeselftest_metadata(base['result']) |
| 156 | if guess is None: |
| 157 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") |
| 158 | else: |
| 159 | logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") |
| 160 | base_configuration['OESELFTEST_METADATA'] = guess |
| 161 | if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: |
| 162 | guess = guess_oeselftest_metadata(target['result']) |
| 163 | if guess is None: |
| 164 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") |
| 165 | else: |
| 166 | logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") |
| 167 | target_configuration['OESELFTEST_METADATA'] = guess |
| 168 | |
| 169 | # Test runs with LTP results in should only be compared with other runs with LTP tests in them |
| 170 | if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']): |
| 171 | ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result']) |
| 172 | |
| 173 | return ret and metadata_matches(base_configuration, target_configuration) \ |
| 174 | and machine_matches(base_configuration, target_configuration) |
| 175 | |
| 176 | |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 177 | def compare_result(logger, base_name, target_name, base_result, target_result): |
| 178 | base_result = base_result.get('result') |
| 179 | target_result = target_result.get('result') |
| 180 | result = {} |
| 181 | if base_result and target_result: |
| 182 | for k in base_result: |
| 183 | base_testcase = base_result[k] |
| 184 | base_status = base_testcase.get('status') |
| 185 | if base_status: |
| 186 | target_testcase = target_result.get(k, {}) |
| 187 | target_status = target_testcase.get('status') |
| 188 | if base_status != target_status: |
| 189 | result[k] = {'base': base_status, 'target': target_status} |
| 190 | else: |
| 191 | logger.error('Failed to retrieved base test case status: %s' % k) |
| 192 | if result: |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame^] | 193 | new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values()) |
| 194 | # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...) |
| 195 | if new_pass_count < len(result): |
| 196 | resultstring = "Regression: %s\n %s\n" % (base_name, target_name) |
| 197 | for k in sorted(result): |
| 198 | if not result[k]['target'] or not result[k]['target'].startswith("PASS"): |
| 199 | resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target']) |
| 200 | if new_pass_count > 0: |
| 201 | resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n' |
| 202 | else: |
| 203 | resultstring = "Improvement: %s\n %s\n (+%d test(s) passing)" % (base_name, target_name, new_pass_count) |
| 204 | result = None |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 205 | else: |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame^] | 206 | resultstring = "Match: %s\n %s" % (base_name, target_name) |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 207 | return result, resultstring |
| 208 | |
| 209 | def get_results(logger, source): |
| 210 | return resultutils.load_resultsdata(source, configmap=resultutils.regression_map) |
| 211 | |
| 212 | def regression(args, logger): |
| 213 | base_results = get_results(logger, args.base_result) |
| 214 | target_results = get_results(logger, args.target_result) |
| 215 | |
| 216 | regression_common(args, logger, base_results, target_results) |
| 217 | |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame^] | 218 | # Some test case naming is poor and contains random strings, particularly lttng/babeltrace. |
| 219 | # Truncating the test names works since they contain file and line number identifiers |
| 220 | # which allows us to match them without the random components. |
| 221 | def fixup_ptest_names(results, logger): |
| 222 | for r in results: |
| 223 | for i in results[r]: |
| 224 | tests = list(results[r][i]['result'].keys()) |
| 225 | for test in tests: |
| 226 | new = None |
| 227 | if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test: |
| 228 | new = test.split("_-_")[0] |
| 229 | elif test.startswith(("ptestresult.curl.")) and "__" in test: |
| 230 | new = test.split("__")[0] |
| 231 | elif test.startswith(("ptestresult.dbus.")) and "__" in test: |
| 232 | new = test.split("__")[0] |
| 233 | elif test.startswith("ptestresult.binutils") and "build-st-" in test: |
| 234 | new = test.split(" ")[0] |
| 235 | elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test: |
| 236 | new = ".".join(test.split(".")[:2]) |
| 237 | if new: |
| 238 | results[r][i]['result'][new] = results[r][i]['result'][test] |
| 239 | del results[r][i]['result'][test] |
| 240 | |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 241 | def regression_common(args, logger, base_results, target_results): |
| 242 | if args.base_result_id: |
| 243 | base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) |
| 244 | if args.target_result_id: |
| 245 | target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) |
| 246 | |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame^] | 247 | fixup_ptest_names(base_results, logger) |
| 248 | fixup_ptest_names(target_results, logger) |
| 249 | |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 250 | matches = [] |
| 251 | regressions = [] |
| 252 | notfound = [] |
| 253 | |
| 254 | for a in base_results: |
| 255 | if a in target_results: |
| 256 | base = list(base_results[a].keys()) |
| 257 | target = list(target_results[a].keys()) |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 258 | # We may have multiple base/targets which are for different configurations. Start by |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 259 | # removing any pairs which match |
| 260 | for c in base.copy(): |
| 261 | for b in target.copy(): |
Andrew Geissler | 6aa7eec | 2023-03-03 12:41:14 -0600 | [diff] [blame] | 262 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
| 263 | continue |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 264 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) |
| 265 | if not res: |
| 266 | matches.append(resstr) |
| 267 | base.remove(c) |
| 268 | target.remove(b) |
| 269 | break |
| 270 | # Should only now see regressions, we may not be able to match multiple pairs directly |
| 271 | for c in base: |
| 272 | for b in target: |
Andrew Geissler | 6aa7eec | 2023-03-03 12:41:14 -0600 | [diff] [blame] | 273 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
| 274 | continue |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 275 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) |
| 276 | if res: |
| 277 | regressions.append(resstr) |
| 278 | else: |
| 279 | notfound.append("%s not found in target" % a) |
| 280 | print("\n".join(sorted(matches))) |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame^] | 281 | print("\n") |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 282 | print("\n".join(sorted(regressions))) |
| 283 | print("\n".join(sorted(notfound))) |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 284 | return 0 |
| 285 | |
| 286 | def regression_git(args, logger): |
| 287 | base_results = {} |
| 288 | target_results = {} |
| 289 | |
| 290 | tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}" |
| 291 | repo = GitRepo(args.repo) |
| 292 | |
| 293 | revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch) |
| 294 | |
| 295 | if args.branch2: |
| 296 | revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2) |
| 297 | if not len(revs2): |
| 298 | logger.error("No revisions found to compare against") |
| 299 | return 1 |
| 300 | if not len(revs): |
| 301 | logger.error("No revision to report on found") |
| 302 | return 1 |
| 303 | else: |
| 304 | if len(revs) < 2: |
| 305 | logger.error("Only %d tester revisions found, unable to generate report" % len(revs)) |
| 306 | return 1 |
| 307 | |
| 308 | # Pick revisions |
| 309 | if args.commit: |
| 310 | if args.commit_number: |
| 311 | logger.warning("Ignoring --commit-number as --commit was specified") |
| 312 | index1 = gitarchive.rev_find(revs, 'commit', args.commit) |
| 313 | elif args.commit_number: |
| 314 | index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) |
| 315 | else: |
| 316 | index1 = len(revs) - 1 |
| 317 | |
| 318 | if args.branch2: |
| 319 | revs2.append(revs[index1]) |
| 320 | index1 = len(revs2) - 1 |
| 321 | revs = revs2 |
| 322 | |
| 323 | if args.commit2: |
| 324 | if args.commit_number2: |
| 325 | logger.warning("Ignoring --commit-number2 as --commit2 was specified") |
| 326 | index2 = gitarchive.rev_find(revs, 'commit', args.commit2) |
| 327 | elif args.commit_number2: |
| 328 | index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) |
| 329 | else: |
| 330 | if index1 > 0: |
| 331 | index2 = index1 - 1 |
| 332 | # Find the closest matching commit number for comparision |
| 333 | # In future we could check the commit is a common ancestor and |
| 334 | # continue back if not but this good enough for now |
| 335 | while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: |
| 336 | index2 = index2 - 1 |
| 337 | else: |
| 338 | logger.error("Unable to determine the other commit, use " |
| 339 | "--commit2 or --commit-number2 to specify it") |
| 340 | return 1 |
| 341 | |
| 342 | logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2])) |
| 343 | |
| 344 | base_results = resultutils.git_get_result(repo, revs[index1][2]) |
| 345 | target_results = resultutils.git_get_result(repo, revs[index2][2]) |
| 346 | |
| 347 | regression_common(args, logger, base_results, target_results) |
| 348 | |
| 349 | return 0 |
| 350 | |
| 351 | def register_commands(subparsers): |
| 352 | """Register subcommands from this plugin""" |
| 353 | |
| 354 | parser_build = subparsers.add_parser('regression', help='regression file/directory analysis', |
| 355 | description='regression analysis comparing the base set of results to the target results', |
| 356 | group='analysis') |
| 357 | parser_build.set_defaults(func=regression) |
| 358 | parser_build.add_argument('base_result', |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 359 | help='base result file/directory/URL for the comparison') |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 360 | parser_build.add_argument('target_result', |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 361 | help='target result file/directory/URL to compare with') |
Brad Bishop | 40320b1 | 2019-03-26 16:08:25 -0400 | [diff] [blame] | 362 | parser_build.add_argument('-b', '--base-result-id', default='', |
| 363 | help='(optional) filter the base results to this result ID') |
| 364 | parser_build.add_argument('-t', '--target-result-id', default='', |
| 365 | help='(optional) filter the target results to this result ID') |
| 366 | |
| 367 | parser_build = subparsers.add_parser('regression-git', help='regression git analysis', |
| 368 | description='regression analysis comparing base result set to target ' |
| 369 | 'result set', |
| 370 | group='analysis') |
| 371 | parser_build.set_defaults(func=regression_git) |
| 372 | parser_build.add_argument('repo', |
| 373 | help='the git repository containing the data') |
| 374 | parser_build.add_argument('-b', '--base-result-id', default='', |
| 375 | help='(optional) default select regression based on configurations unless base result ' |
| 376 | 'id was provided') |
| 377 | parser_build.add_argument('-t', '--target-result-id', default='', |
| 378 | help='(optional) default select regression based on configurations unless target result ' |
| 379 | 'id was provided') |
| 380 | |
| 381 | parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in") |
| 382 | parser_build.add_argument('--branch2', help="Branch to find comparision revisions in") |
| 383 | parser_build.add_argument('--commit', help="Revision to search for") |
| 384 | parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") |
| 385 | parser_build.add_argument('--commit2', help="Revision to compare with") |
| 386 | parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified") |
| 387 | |