| # resulttool - regression analysis |
| # |
| # Copyright (c) 2019, Intel Corporation. |
| # Copyright (c) 2019, Linux Foundation |
| # |
| # SPDX-License-Identifier: GPL-2.0-only |
| # |
| |
| import resulttool.resultutils as resultutils |
| |
| from oeqa.utils.git import GitRepo |
| import oeqa.utils.gitarchive as gitarchive |
| |
| METADATA_MATCH_TABLE = { |
| "oeselftest": "OESELFTEST_METADATA" |
| } |
| |
| OESELFTEST_METADATA_GUESS_TABLE={ |
| "trigger-build-posttrigger": { |
| "run_all_tests": False, |
| "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], |
| "skips": None, |
| "machine": None, |
| "select_tags":None, |
| "exclude_tags": None |
| }, |
| "reproducible": { |
| "run_all_tests": False, |
| "run_tests":["reproducible"], |
| "skips": None, |
| "machine": None, |
| "select_tags":None, |
| "exclude_tags": None |
| }, |
| "arch-qemu-quick": { |
| "run_all_tests": True, |
| "run_tests":None, |
| "skips": None, |
| "machine": None, |
| "select_tags":["machine"], |
| "exclude_tags": None |
| }, |
| "arch-qemu-full-x86-or-x86_64": { |
| "run_all_tests": True, |
| "run_tests":None, |
| "skips": None, |
| "machine": None, |
| "select_tags":["machine", "toolchain-system"], |
| "exclude_tags": None |
| }, |
| "arch-qemu-full-others": { |
| "run_all_tests": True, |
| "run_tests":None, |
| "skips": None, |
| "machine": None, |
| "select_tags":["machine", "toolchain-user"], |
| "exclude_tags": None |
| }, |
| "selftest": { |
| "run_all_tests": True, |
| "run_tests":None, |
| "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], |
| "machine": None, |
| "select_tags":None, |
| "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] |
| }, |
| "bringup": { |
| "run_all_tests": True, |
| "run_tests":None, |
| "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], |
| "machine": None, |
| "select_tags":None, |
| "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] |
| } |
| } |
| |
| STATUS_STRINGS = { |
| "None": "No matching test result" |
| } |
| |
| REGRESSIONS_DISPLAY_LIMIT=50 |
| |
| MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------" |
| ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------" |
| |
| def test_has_at_least_one_matching_tag(test, tag_list): |
| return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) |
| |
| def all_tests_have_at_least_one_matching_tag(results, tag_list): |
| return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) |
| |
| def any_test_have_any_matching_tag(results, tag_list): |
| return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) |
| |
| def have_skipped_test(result, test_prefix): |
| return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) |
| |
| def have_all_tests_skipped(result, test_prefixes_list): |
| return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) |
| |
| def guess_oeselftest_metadata(results): |
| """ |
| When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. |
| Check results for specific values (absence/presence of oetags, number and name of executed tests...), |
| and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA |
| to it to allow proper test filtering. |
| This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, |
| as new tests will have OESELFTEST_METADATA properly appended at test reporting time |
| """ |
| |
| if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: |
| return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] |
| elif all(result.startswith("reproducible") for result in results): |
| return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] |
| elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): |
| return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] |
| elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): |
| return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] |
| elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): |
| return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] |
| elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): |
| if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): |
| return OESELFTEST_METADATA_GUESS_TABLE['selftest'] |
| elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): |
| return OESELFTEST_METADATA_GUESS_TABLE['bringup'] |
| |
| return None |
| |
| |
| def metadata_matches(base_configuration, target_configuration): |
| """ |
| For passed base and target, check test type. If test type matches one of |
| properties described in METADATA_MATCH_TABLE, compare metadata if it is |
| present in base. Return true if metadata matches, or if base lacks some |
| data (either TEST_TYPE or the corresponding metadata) |
| """ |
| test_type = base_configuration.get('TEST_TYPE') |
| if test_type not in METADATA_MATCH_TABLE: |
| return True |
| |
| metadata_key = METADATA_MATCH_TABLE.get(test_type) |
| if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): |
| return False |
| |
| return True |
| |
| |
| def machine_matches(base_configuration, target_configuration): |
| return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') |
| |
| |
| def can_be_compared(logger, base, target): |
| """ |
| Some tests are not relevant to be compared, for example some oeselftest |
| run with different tests sets or parameters. Return true if tests can be |
| compared |
| """ |
| ret = True |
| base_configuration = base['configuration'] |
| target_configuration = target['configuration'] |
| |
| # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. |
| if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: |
| guess = guess_oeselftest_metadata(base['result']) |
| if guess is None: |
| logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") |
| else: |
| logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") |
| base_configuration['OESELFTEST_METADATA'] = guess |
| if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: |
| guess = guess_oeselftest_metadata(target['result']) |
| if guess is None: |
| logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") |
| else: |
| logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") |
| target_configuration['OESELFTEST_METADATA'] = guess |
| |
| # Test runs with LTP results in should only be compared with other runs with LTP tests in them |
| if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']): |
| ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result']) |
| |
| return ret and metadata_matches(base_configuration, target_configuration) \ |
| and machine_matches(base_configuration, target_configuration) |
| |
| def get_status_str(raw_status): |
| raw_status_lower = raw_status.lower() if raw_status else "None" |
| return STATUS_STRINGS.get(raw_status_lower, raw_status) |
| |
| def get_additional_info_line(new_pass_count, new_tests): |
| result=[] |
| if new_tests: |
| result.append(f'+{new_tests} test(s) present') |
| if new_pass_count: |
| result.append(f'+{new_pass_count} test(s) now passing') |
| |
| if not result: |
| return "" |
| |
| return ' -> ' + ', '.join(result) + '\n' |
| |
| def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None): |
| base_result = base_result.get('result') |
| target_result = target_result.get('result') |
| result = {} |
| new_tests = 0 |
| regressions = {} |
| resultstring = "" |
| new_tests = 0 |
| new_pass_count = 0 |
| |
| display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT |
| |
| if base_result and target_result: |
| for k in base_result: |
| base_testcase = base_result[k] |
| base_status = base_testcase.get('status') |
| if base_status: |
| target_testcase = target_result.get(k, {}) |
| target_status = target_testcase.get('status') |
| if base_status != target_status: |
| result[k] = {'base': base_status, 'target': target_status} |
| else: |
| logger.error('Failed to retrieved base test case status: %s' % k) |
| |
| # Also count new tests that were not present in base results: it |
| # could be newly added tests, but it could also highlights some tests |
| # renames or fixed faulty ptests |
| for k in target_result: |
| if k not in base_result: |
| new_tests += 1 |
| if result: |
| new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values()) |
| # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...) |
| if new_pass_count < len(result): |
| resultstring = "Regression: %s\n %s\n" % (base_name, target_name) |
| for k in sorted(result): |
| if not result[k]['target'] or not result[k]['target'].startswith("PASS"): |
| # Differentiate each ptest kind when listing regressions |
| key_parts = k.split('.') |
| key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0] |
| # Append new regression to corresponding test family |
| regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))] |
| resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n" |
| for k in regressions: |
| resultstring += f" {len(regressions[k])} regression(s) for {k}\n" |
| count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k]) |
| resultstring += ''.join(regressions[k][:count_to_print]) |
| if count_to_print < len(regressions[k]): |
| resultstring+=' [...]\n' |
| if new_pass_count > 0: |
| resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n' |
| if new_tests > 0: |
| resultstring += f' Additionally, {new_tests} new test(s) is/are present\n' |
| else: |
| resultstring = "%s\n%s\n" % (base_name, target_name) |
| result = None |
| else: |
| resultstring = "%s\n%s\n" % (base_name, target_name) |
| |
| if not result: |
| additional_info = get_additional_info_line(new_pass_count, new_tests) |
| if additional_info: |
| resultstring += additional_info |
| |
| return result, resultstring |
| |
| def get_results(logger, source): |
| return resultutils.load_resultsdata(source, configmap=resultutils.regression_map) |
| |
| def regression(args, logger): |
| base_results = get_results(logger, args.base_result) |
| target_results = get_results(logger, args.target_result) |
| |
| regression_common(args, logger, base_results, target_results) |
| |
| # Some test case naming is poor and contains random strings, particularly lttng/babeltrace. |
| # Truncating the test names works since they contain file and line number identifiers |
| # which allows us to match them without the random components. |
| def fixup_ptest_names(results, logger): |
| for r in results: |
| for i in results[r]: |
| tests = list(results[r][i]['result'].keys()) |
| for test in tests: |
| new = None |
| if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test: |
| new = test.split("_-_")[0] |
| elif test.startswith(("ptestresult.curl.")) and "__" in test: |
| new = test.split("__")[0] |
| elif test.startswith(("ptestresult.dbus.")) and "__" in test: |
| new = test.split("__")[0] |
| elif test.startswith("ptestresult.binutils") and "build-st-" in test: |
| new = test.split(" ")[0] |
| elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test: |
| new = ".".join(test.split(".")[:2]) |
| if new: |
| results[r][i]['result'][new] = results[r][i]['result'][test] |
| del results[r][i]['result'][test] |
| |
| def regression_common(args, logger, base_results, target_results): |
| if args.base_result_id: |
| base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) |
| if args.target_result_id: |
| target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) |
| |
| fixup_ptest_names(base_results, logger) |
| fixup_ptest_names(target_results, logger) |
| |
| matches = [] |
| regressions = [] |
| notfound = [] |
| |
| for a in base_results: |
| if a in target_results: |
| base = list(base_results[a].keys()) |
| target = list(target_results[a].keys()) |
| # We may have multiple base/targets which are for different configurations. Start by |
| # removing any pairs which match |
| for c in base.copy(): |
| for b in target.copy(): |
| if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
| continue |
| res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) |
| if not res: |
| matches.append(resstr) |
| base.remove(c) |
| target.remove(b) |
| break |
| # Should only now see regressions, we may not be able to match multiple pairs directly |
| for c in base: |
| for b in target: |
| if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
| continue |
| res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) |
| if res: |
| regressions.append(resstr) |
| else: |
| notfound.append("%s not found in target" % a) |
| print("\n".join(sorted(regressions))) |
| print("\n" + MISSING_TESTS_BANNER + "\n") |
| print("\n".join(sorted(notfound))) |
| print("\n" + ADDITIONAL_DATA_BANNER + "\n") |
| print("\n".join(sorted(matches))) |
| return 0 |
| |
| def regression_git(args, logger): |
| base_results = {} |
| target_results = {} |
| |
| tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}" |
| repo = GitRepo(args.repo) |
| |
| revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch) |
| |
| if args.branch2: |
| revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2) |
| if not len(revs2): |
| logger.error("No revisions found to compare against") |
| return 1 |
| if not len(revs): |
| logger.error("No revision to report on found") |
| return 1 |
| else: |
| if len(revs) < 2: |
| logger.error("Only %d tester revisions found, unable to generate report" % len(revs)) |
| return 1 |
| |
| # Pick revisions |
| if args.commit: |
| if args.commit_number: |
| logger.warning("Ignoring --commit-number as --commit was specified") |
| index1 = gitarchive.rev_find(revs, 'commit', args.commit) |
| elif args.commit_number: |
| index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) |
| else: |
| index1 = len(revs) - 1 |
| |
| if args.branch2: |
| revs2.append(revs[index1]) |
| index1 = len(revs2) - 1 |
| revs = revs2 |
| |
| if args.commit2: |
| if args.commit_number2: |
| logger.warning("Ignoring --commit-number2 as --commit2 was specified") |
| index2 = gitarchive.rev_find(revs, 'commit', args.commit2) |
| elif args.commit_number2: |
| index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) |
| else: |
| if index1 > 0: |
| index2 = index1 - 1 |
| # Find the closest matching commit number for comparision |
| # In future we could check the commit is a common ancestor and |
| # continue back if not but this good enough for now |
| while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: |
| index2 = index2 - 1 |
| else: |
| logger.error("Unable to determine the other commit, use " |
| "--commit2 or --commit-number2 to specify it") |
| return 1 |
| |
| logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2])) |
| |
| base_results = resultutils.git_get_result(repo, revs[index1][2]) |
| target_results = resultutils.git_get_result(repo, revs[index2][2]) |
| |
| regression_common(args, logger, base_results, target_results) |
| |
| return 0 |
| |
| def register_commands(subparsers): |
| """Register subcommands from this plugin""" |
| |
| parser_build = subparsers.add_parser('regression', help='regression file/directory analysis', |
| description='regression analysis comparing the base set of results to the target results', |
| group='analysis') |
| parser_build.set_defaults(func=regression) |
| parser_build.add_argument('base_result', |
| help='base result file/directory/URL for the comparison') |
| parser_build.add_argument('target_result', |
| help='target result file/directory/URL to compare with') |
| parser_build.add_argument('-b', '--base-result-id', default='', |
| help='(optional) filter the base results to this result ID') |
| parser_build.add_argument('-t', '--target-result-id', default='', |
| help='(optional) filter the target results to this result ID') |
| |
| parser_build = subparsers.add_parser('regression-git', help='regression git analysis', |
| description='regression analysis comparing base result set to target ' |
| 'result set', |
| group='analysis') |
| parser_build.set_defaults(func=regression_git) |
| parser_build.add_argument('repo', |
| help='the git repository containing the data') |
| parser_build.add_argument('-b', '--base-result-id', default='', |
| help='(optional) default select regression based on configurations unless base result ' |
| 'id was provided') |
| parser_build.add_argument('-t', '--target-result-id', default='', |
| help='(optional) default select regression based on configurations unless target result ' |
| 'id was provided') |
| |
| parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in") |
| parser_build.add_argument('--branch2', help="Branch to find comparision revisions in") |
| parser_build.add_argument('--commit', help="Revision to search for") |
| parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") |
| parser_build.add_argument('--commit2', help="Revision to compare with") |
| parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified") |
| parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes") |
| |