poky/scripts/lib/resulttool/regression.py - openbmc/openbmc - Gitiles

 # resulttool - regression analysis
 #
 # Copyright (c) 2019, Intel Corporation.
 # Copyright (c) 2019, Linux Foundation
 #
 # SPDX-License-Identifier: GPL-2.0-only
 #

 import resulttool.resultutils as resultutils

 from oeqa.utils.git import GitRepo
 import oeqa.utils.gitarchive as gitarchive

 METADATA_MATCH_TABLE = {
     "oeselftest": "OESELFTEST_METADATA"
 }

 OESELFTEST_METADATA_GUESS_TABLE={
     "trigger-build-posttrigger": {
         "run_all_tests": False,
         "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
         "skips": None,
         "machine": None,
         "select_tags":None,
         "exclude_tags": None
     },
     "reproducible": {
         "run_all_tests": False,
         "run_tests":["reproducible"],
         "skips": None,
         "machine": None,
         "select_tags":None,
         "exclude_tags": None
     },
     "arch-qemu-quick": {
         "run_all_tests": True,
         "run_tests":None,
         "skips": None,
         "machine": None,
         "select_tags":["machine"],
         "exclude_tags": None
     },
     "arch-qemu-full-x86-or-x86_64": {
         "run_all_tests": True,
         "run_tests":None,
         "skips": None,
         "machine": None,
         "select_tags":["machine", "toolchain-system"],
         "exclude_tags": None
     },
     "arch-qemu-full-others": {
         "run_all_tests": True,
         "run_tests":None,
         "skips": None,
         "machine": None,
         "select_tags":["machine", "toolchain-user"],
         "exclude_tags": None
     },
     "selftest": {
         "run_all_tests": True,
         "run_tests":None,
         "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
         "machine": None,
         "select_tags":None,
         "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
     },
     "bringup": {
         "run_all_tests": True,
         "run_tests":None,
         "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
         "machine": None,
         "select_tags":None,
         "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
     }
 }

 STATUS_STRINGS = {
     "None": "No matching test result"
 }

 REGRESSIONS_DISPLAY_LIMIT=50

 MISSING_TESTS_BANNER =   "-------------------------- Missing tests --------------------------"
 ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"

 def test_has_at_least_one_matching_tag(test, tag_list):
     return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])

 def all_tests_have_at_least_one_matching_tag(results, tag_list):
     return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())

 def any_test_have_any_matching_tag(results, tag_list):
     return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())

 def have_skipped_test(result, test_prefix):
     return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))

 def have_all_tests_skipped(result, test_prefixes_list):
     return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)

 def guess_oeselftest_metadata(results):
     """
     When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
     Check results for specific values (absence/presence of oetags, number and name of executed tests...),
     and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
     to it to allow proper test filtering.
     This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
     as new tests will have OESELFTEST_METADATA properly appended at test reporting time
     """

     if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
         return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
     elif all(result.startswith("reproducible") for result in results):
         return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
     elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
         return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
     elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
         return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
     elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
         return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
     elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
         if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
             return OESELFTEST_METADATA_GUESS_TABLE['selftest']
         elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
             return OESELFTEST_METADATA_GUESS_TABLE['bringup']

     return None


 def metadata_matches(base_configuration, target_configuration):
     """
     For passed base and target, check test type. If test type matches one of
     properties described in METADATA_MATCH_TABLE, compare metadata if it is
     present in base. Return true if metadata matches, or if base lacks some
     data (either TEST_TYPE or the corresponding metadata)
     """
     test_type = base_configuration.get('TEST_TYPE')
     if test_type not in METADATA_MATCH_TABLE:
         return True

     metadata_key = METADATA_MATCH_TABLE.get(test_type)
     if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
         return False

     return True


 def machine_matches(base_configuration, target_configuration):
     return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')


 def can_be_compared(logger, base, target):
     """
     Some tests are not relevant to be compared, for example some oeselftest
     run with different tests sets or parameters. Return true if tests can be
     compared
     """
     ret = True
     base_configuration = base['configuration']
     target_configuration = target['configuration']

     # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
     if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
         guess = guess_oeselftest_metadata(base['result'])
         if guess is None:
             logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
         else:
             logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
             base_configuration['OESELFTEST_METADATA'] = guess
     if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
         guess = guess_oeselftest_metadata(target['result'])
         if guess is None:
             logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
         else:
             logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
             target_configuration['OESELFTEST_METADATA'] = guess

     # Test runs with LTP results in should only be compared with other runs with LTP tests in them
     if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
         ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])

     return ret and metadata_matches(base_configuration, target_configuration) \
         and machine_matches(base_configuration, target_configuration)

 def get_status_str(raw_status):
     raw_status_lower = raw_status.lower() if raw_status else "None"
     return STATUS_STRINGS.get(raw_status_lower, raw_status)

 def get_additional_info_line(new_pass_count, new_tests):
     result=[]
     if new_tests:
         result.append(f'+{new_tests} test(s) present')
     if new_pass_count:
         result.append(f'+{new_pass_count} test(s) now passing')

     if not result:
         return ""

     return '    -> ' + ', '.join(result) + '\n'

 def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
     base_result = base_result.get('result')
     target_result = target_result.get('result')
     result = {}
     new_tests = 0
     regressions = {}
     resultstring = ""
     new_tests = 0
     new_pass_count = 0

     display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT

     if base_result and target_result:
         for k in base_result:
             if k in ['ptestresult.rawlogs', 'ptestresult.sections']:
                 continue
             base_testcase = base_result[k]
             base_status = base_testcase.get('status')
             if base_status:
                 target_testcase = target_result.get(k, {})
                 target_status = target_testcase.get('status')
                 if base_status != target_status:
                     result[k] = {'base': base_status, 'target': target_status}
             else:
                 logger.error('Failed to retrieved base test case status: %s' % k)

         # Also count new tests that were not present in base results: it
         # could be newly added tests, but it could also highlights some tests
         # renames or fixed faulty ptests
         for k in target_result:
             if k not in base_result:
                 new_tests += 1
     if result:
         new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
         # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
         if new_pass_count < len(result):
             resultstring = "Regression:  %s\n             %s\n" % (base_name, target_name)
             for k in sorted(result):
                 if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
                     # Differentiate each ptest kind when listing regressions
                     key_parts = k.split('.')
                     key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
                     # Append new regression to corresponding test family
                     regressions[key] = regressions.setdefault(key, []) + ['        %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
             resultstring += f"    Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
             for k in regressions:
                 resultstring += f"    {len(regressions[k])} regression(s) for {k}\n"
                 count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
                 resultstring += ''.join(regressions[k][:count_to_print])
                 if count_to_print < len(regressions[k]):
                     resultstring+='        [...]\n'
             if new_pass_count > 0:
                 resultstring += f'    Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
             if new_tests > 0:
                 resultstring += f'    Additionally, {new_tests} new test(s) is/are present\n'
         else:
             resultstring = "%s\n%s\n" % (base_name, target_name)
             result = None
     else:
         resultstring = "%s\n%s\n" % (base_name, target_name)

     if not result:
         additional_info = get_additional_info_line(new_pass_count, new_tests)
         if additional_info:
             resultstring += additional_info

     return result, resultstring

 def get_results(logger, source):
     return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)

 def regression(args, logger):
     base_results = get_results(logger, args.base_result)
     target_results = get_results(logger, args.target_result)

     regression_common(args, logger, base_results, target_results)

 # Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
 # Truncating the test names works since they contain file and line number identifiers
 # which allows us to match them without the random components.
 def fixup_ptest_names(results, logger):
     for r in results:
         for i in results[r]:
             tests = list(results[r][i]['result'].keys())
             for test in tests:
                 new = None
                 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
                     new = test.split("_-_")[0]
                 elif test.startswith(("ptestresult.curl.")) and "__" in test:
                     new = test.split("__")[0]
                 elif test.startswith(("ptestresult.dbus.")) and "__" in test:
                     new = test.split("__")[0]
                 elif test.startswith("ptestresult.binutils") and "build-st-" in test:
                     new = test.split(" ")[0]
                 elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
                     new = ".".join(test.split(".")[:2])
                 if new:
                     results[r][i]['result'][new] = results[r][i]['result'][test]
                     del results[r][i]['result'][test]

 def regression_common(args, logger, base_results, target_results):
     if args.base_result_id:
         base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
     if args.target_result_id:
         target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)

     fixup_ptest_names(base_results, logger)
     fixup_ptest_names(target_results, logger)

     matches = []
     regressions = []
     notfound = []

     for a in base_results:
         if a in target_results:
             base = list(base_results[a].keys())
             target = list(target_results[a].keys())
             # We may have multiple base/targets which are for different configurations. Start by
             # removing any pairs which match
             for c in base.copy():
                 for b in target.copy():
                     if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
                         continue
                     res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                     if not res:
                         matches.append(resstr)
                         base.remove(c)
                         target.remove(b)
                         break
             # Should only now see regressions, we may not be able to match multiple pairs directly
             for c in base:
                 for b in target:
                     if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
                         continue
                     res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                     if res:
                         regressions.append(resstr)
         else:
             notfound.append("%s not found in target" % a)
     print("\n".join(sorted(regressions)))
     print("\n" + MISSING_TESTS_BANNER + "\n")
     print("\n".join(sorted(notfound)))
     print("\n" + ADDITIONAL_DATA_BANNER + "\n")
     print("\n".join(sorted(matches)))
     return 0

 def regression_git(args, logger):
     base_results = {}
     target_results = {}

     tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
     repo = GitRepo(args.repo)

     revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)

     if args.branch2:
         revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
         if not len(revs2):
             logger.error("No revisions found to compare against")
             return 1
         if not len(revs):
             logger.error("No revision to report on found")
             return 1
     else:
         if len(revs) < 2:
             logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
             return 1

     # Pick revisions
     if args.commit:
         if args.commit_number:
             logger.warning("Ignoring --commit-number as --commit was specified")
         index1 = gitarchive.rev_find(revs, 'commit', args.commit)
     elif args.commit_number:
         index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
     else:
         index1 = len(revs) - 1

     if args.branch2:
         revs2.append(revs[index1])
         index1 = len(revs2) - 1
         revs = revs2

     if args.commit2:
         if args.commit_number2:
             logger.warning("Ignoring --commit-number2 as --commit2 was specified")
         index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
     elif args.commit_number2:
         index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
     else:
         if index1 > 0:
             index2 = index1 - 1
             # Find the closest matching commit number for comparision
             # In future we could check the commit is a common ancestor and
             # continue back if not but this good enough for now
             while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
                 index2 = index2 - 1
         else:
             logger.error("Unable to determine the other commit, use "
                       "--commit2 or --commit-number2 to specify it")
             return 1

     logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))

     base_results = resultutils.git_get_result(repo, revs[index1][2])
     target_results = resultutils.git_get_result(repo, revs[index2][2])

     regression_common(args, logger, base_results, target_results)

     return 0

 def register_commands(subparsers):
     """Register subcommands from this plugin"""

     parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
                                          description='regression analysis comparing the base set of results to the target results',
                                          group='analysis')
     parser_build.set_defaults(func=regression)
     parser_build.add_argument('base_result',
                               help='base result file/directory/URL for the comparison')
     parser_build.add_argument('target_result',
                               help='target result file/directory/URL to compare with')
     parser_build.add_argument('-b', '--base-result-id', default='',
                               help='(optional) filter the base results to this result ID')
     parser_build.add_argument('-t', '--target-result-id', default='',
                               help='(optional) filter the target results to this result ID')
     parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")

     parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
                                          description='regression analysis comparing base result set to target '
                                                      'result set',
                                          group='analysis')
     parser_build.set_defaults(func=regression_git)
     parser_build.add_argument('repo',
                               help='the git repository containing the data')
     parser_build.add_argument('-b', '--base-result-id', default='',
                               help='(optional) default select regression based on configurations unless base result '
                                    'id was provided')
     parser_build.add_argument('-t', '--target-result-id', default='',
                               help='(optional) default select regression based on configurations unless target result '
                                    'id was provided')

     parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
     parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
     parser_build.add_argument('--commit', help="Revision to search for")
     parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
     parser_build.add_argument('--commit2', help="Revision to compare with")
     parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
     parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
	# resulttool - regression analysis
	#
	# Copyright (c) 2019, Intel Corporation.
	# Copyright (c) 2019, Linux Foundation
	#
	# SPDX-License-Identifier: GPL-2.0-only
	#

	import resulttool.resultutils as resultutils

	from oeqa.utils.git import GitRepo
	import oeqa.utils.gitarchive as gitarchive

	METADATA_MATCH_TABLE = {
	"oeselftest": "OESELFTEST_METADATA"
	}

	OESELFTEST_METADATA_GUESS_TABLE={
	"trigger-build-posttrigger": {
	"run_all_tests": False,
	"run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
	"skips": None,
	"machine": None,
	"select_tags":None,
	"exclude_tags": None
	},
	"reproducible": {
	"run_all_tests": False,
	"run_tests":["reproducible"],
	"skips": None,
	"machine": None,
	"select_tags":None,
	"exclude_tags": None
	},
	"arch-qemu-quick": {
	"run_all_tests": True,
	"run_tests":None,
	"skips": None,
	"machine": None,
	"select_tags":["machine"],
	"exclude_tags": None
	},
	"arch-qemu-full-x86-or-x86_64": {
	"run_all_tests": True,
	"run_tests":None,
	"skips": None,
	"machine": None,
	"select_tags":["machine", "toolchain-system"],
	"exclude_tags": None
	},
	"arch-qemu-full-others": {
	"run_all_tests": True,
	"run_tests":None,
	"skips": None,
	"machine": None,
	"select_tags":["machine", "toolchain-user"],
	"exclude_tags": None
	},
	"selftest": {
	"run_all_tests": True,
	"run_tests":None,
	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
	"machine": None,
	"select_tags":None,
	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
	},
	"bringup": {
	"run_all_tests": True,
	"run_tests":None,
	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
	"machine": None,
	"select_tags":None,
	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
	}
	}

	STATUS_STRINGS = {
	"None": "No matching test result"
	}

	REGRESSIONS_DISPLAY_LIMIT=50

	MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
	ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"

	def test_has_at_least_one_matching_tag(test, tag_list):
	return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])

	def all_tests_have_at_least_one_matching_tag(results, tag_list):
	return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())

	def any_test_have_any_matching_tag(results, tag_list):
	return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())

	def have_skipped_test(result, test_prefix):
	return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))

	def have_all_tests_skipped(result, test_prefixes_list):
	return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)

	def guess_oeselftest_metadata(results):
	"""
	When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
	Check results for specific values (absence/presence of oetags, number and name of executed tests...),
	and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
	to it to allow proper test filtering.
	This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
	as new tests will have OESELFTEST_METADATA properly appended at test reporting time
	"""

	if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
	return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
	elif all(result.startswith("reproducible") for result in results):
	return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
	elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
	elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
	if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
	return OESELFTEST_METADATA_GUESS_TABLE['selftest']
	elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
	return OESELFTEST_METADATA_GUESS_TABLE['bringup']

	return None


	def metadata_matches(base_configuration, target_configuration):
	"""
	For passed base and target, check test type. If test type matches one of
	properties described in METADATA_MATCH_TABLE, compare metadata if it is
	present in base. Return true if metadata matches, or if base lacks some
	data (either TEST_TYPE or the corresponding metadata)
	"""
	test_type = base_configuration.get('TEST_TYPE')
	if test_type not in METADATA_MATCH_TABLE:
	return True

	metadata_key = METADATA_MATCH_TABLE.get(test_type)
	if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
	return False

	return True


	def machine_matches(base_configuration, target_configuration):
	return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')


	def can_be_compared(logger, base, target):
	"""
	Some tests are not relevant to be compared, for example some oeselftest
	run with different tests sets or parameters. Return true if tests can be
	compared
	"""
	ret = True
	base_configuration = base['configuration']
	target_configuration = target['configuration']

	# Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
	if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
	guess = guess_oeselftest_metadata(base['result'])
	if guess is None:
	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
	else:
	logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
	base_configuration['OESELFTEST_METADATA'] = guess
	if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
	guess = guess_oeselftest_metadata(target['result'])
	if guess is None:
	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
	else:
	logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
	target_configuration['OESELFTEST_METADATA'] = guess

	# Test runs with LTP results in should only be compared with other runs with LTP tests in them
	if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
	ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])

	return ret and metadata_matches(base_configuration, target_configuration) \
	and machine_matches(base_configuration, target_configuration)

	def get_status_str(raw_status):
	raw_status_lower = raw_status.lower() if raw_status else "None"
	return STATUS_STRINGS.get(raw_status_lower, raw_status)

	def get_additional_info_line(new_pass_count, new_tests):
	result=[]
	if new_tests:
	result.append(f'+{new_tests} test(s) present')
	if new_pass_count:
	result.append(f'+{new_pass_count} test(s) now passing')

	if not result:
	return ""

	return ' -> ' + ', '.join(result) + '\n'

	def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
	base_result = base_result.get('result')
	target_result = target_result.get('result')
	result = {}
	new_tests = 0
	regressions = {}
	resultstring = ""
	new_tests = 0
	new_pass_count = 0

	display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT

	if base_result and target_result:
	for k in base_result:
	if k in ['ptestresult.rawlogs', 'ptestresult.sections']:
	continue
	base_testcase = base_result[k]
	base_status = base_testcase.get('status')
	if base_status:
	target_testcase = target_result.get(k, {})
	target_status = target_testcase.get('status')
	if base_status != target_status:
	result[k] = {'base': base_status, 'target': target_status}
	else:
	logger.error('Failed to retrieved base test case status: %s' % k)

	# Also count new tests that were not present in base results: it
	# could be newly added tests, but it could also highlights some tests
	# renames or fixed faulty ptests
	for k in target_result:
	if k not in base_result:
	new_tests += 1
	if result:
	new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
	# Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
	if new_pass_count < len(result):
	resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
	for k in sorted(result):
	if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
	# Differentiate each ptest kind when listing regressions
	key_parts = k.split('.')
	key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
	# Append new regression to corresponding test family
	regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
	resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
	for k in regressions:
	resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
	count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
	resultstring += ''.join(regressions[k][:count_to_print])
	if count_to_print < len(regressions[k]):
	resultstring+=' [...]\n'
	if new_pass_count > 0:
	resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
	if new_tests > 0:
	resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
	else:
	resultstring = "%s\n%s\n" % (base_name, target_name)
	result = None
	else:
	resultstring = "%s\n%s\n" % (base_name, target_name)

	if not result:
	additional_info = get_additional_info_line(new_pass_count, new_tests)
	if additional_info:
	resultstring += additional_info

	return result, resultstring

	def get_results(logger, source):
	return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)

	def regression(args, logger):
	base_results = get_results(logger, args.base_result)
	target_results = get_results(logger, args.target_result)

	regression_common(args, logger, base_results, target_results)

	# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
	# Truncating the test names works since they contain file and line number identifiers
	# which allows us to match them without the random components.
	def fixup_ptest_names(results, logger):
	for r in results:
	for i in results[r]:
	tests = list(results[r][i]['result'].keys())
	for test in tests:
	new = None
	if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
	new = test.split("_-_")[0]
	elif test.startswith(("ptestresult.curl.")) and "__" in test:
	new = test.split("__")[0]
	elif test.startswith(("ptestresult.dbus.")) and "__" in test:
	new = test.split("__")[0]
	elif test.startswith("ptestresult.binutils") and "build-st-" in test:
	new = test.split(" ")[0]
	elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
	new = ".".join(test.split(".")[:2])
	if new:
	results[r][i]['result'][new] = results[r][i]['result'][test]
	del results[r][i]['result'][test]

	def regression_common(args, logger, base_results, target_results):
	if args.base_result_id:
	base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
	if args.target_result_id:
	target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)

	fixup_ptest_names(base_results, logger)
	fixup_ptest_names(target_results, logger)

	matches = []
	regressions = []
	notfound = []

	for a in base_results:
	if a in target_results:
	base = list(base_results[a].keys())
	target = list(target_results[a].keys())
	# We may have multiple base/targets which are for different configurations. Start by
	# removing any pairs which match
	for c in base.copy():
	for b in target.copy():
	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
	continue
	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
	if not res:
	matches.append(resstr)
	base.remove(c)
	target.remove(b)
	break
	# Should only now see regressions, we may not be able to match multiple pairs directly
	for c in base:
	for b in target:
	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
	continue
	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
	if res:
	regressions.append(resstr)
	else:
	notfound.append("%s not found in target" % a)
	print("\n".join(sorted(regressions)))
	print("\n" + MISSING_TESTS_BANNER + "\n")
	print("\n".join(sorted(notfound)))
	print("\n" + ADDITIONAL_DATA_BANNER + "\n")
	print("\n".join(sorted(matches)))
	return 0

	def regression_git(args, logger):
	base_results = {}
	target_results = {}

	tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
	repo = GitRepo(args.repo)

	revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)

	if args.branch2:
	revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
	if not len(revs2):
	logger.error("No revisions found to compare against")
	return 1
	if not len(revs):
	logger.error("No revision to report on found")
	return 1
	else:
	if len(revs) < 2:
	logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
	return 1

	# Pick revisions
	if args.commit:
	if args.commit_number:
	logger.warning("Ignoring --commit-number as --commit was specified")
	index1 = gitarchive.rev_find(revs, 'commit', args.commit)
	elif args.commit_number:
	index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
	else:
	index1 = len(revs) - 1

	if args.branch2:
	revs2.append(revs[index1])
	index1 = len(revs2) - 1
	revs = revs2

	if args.commit2:
	if args.commit_number2:
	logger.warning("Ignoring --commit-number2 as --commit2 was specified")
	index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
	elif args.commit_number2:
	index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
	else:
	if index1 > 0:
	index2 = index1 - 1
	# Find the closest matching commit number for comparision
	# In future we could check the commit is a common ancestor and
	# continue back if not but this good enough for now
	while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
	index2 = index2 - 1
	else:
	logger.error("Unable to determine the other commit, use "
	"--commit2 or --commit-number2 to specify it")
	return 1

	logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))

	base_results = resultutils.git_get_result(repo, revs[index1][2])
	target_results = resultutils.git_get_result(repo, revs[index2][2])

	regression_common(args, logger, base_results, target_results)

	return 0

	def register_commands(subparsers):
	"""Register subcommands from this plugin"""

	parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
	description='regression analysis comparing the base set of results to the target results',
	group='analysis')
	parser_build.set_defaults(func=regression)
	parser_build.add_argument('base_result',
	help='base result file/directory/URL for the comparison')
	parser_build.add_argument('target_result',
	help='target result file/directory/URL to compare with')
	parser_build.add_argument('-b', '--base-result-id', default='',
	help='(optional) filter the base results to this result ID')
	parser_build.add_argument('-t', '--target-result-id', default='',
	help='(optional) filter the target results to this result ID')
	parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")

	parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
	description='regression analysis comparing base result set to target '
	'result set',
	group='analysis')
	parser_build.set_defaults(func=regression_git)
	parser_build.add_argument('repo',
	help='the git repository containing the data')
	parser_build.add_argument('-b', '--base-result-id', default='',
	help='(optional) default select regression based on configurations unless base result '
	'id was provided')
	parser_build.add_argument('-t', '--target-result-id', default='',
	help='(optional) default select regression based on configurations unless target result '
	'id was provided')

	parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
	parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
	parser_build.add_argument('--commit', help="Revision to search for")
	parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
	parser_build.add_argument('--commit2', help="Revision to compare with")
	parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
	parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")