Blame - import-layers/yocto-poky/scripts/oe-build-perf-report - openbmc/openbmc

blob: 6f0b84f9ece39495a745483d206f94a8b7702f0c [file] [log] [blame]

Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame^]	1	#!/usr/bin/python3
				2	#
				3	# Examine build performance test results
				4	#
				5	# Copyright (c) 2017, Intel Corporation.
				6	#
				7	# This program is free software; you can redistribute it and/or modify it
				8	# under the terms and conditions of the GNU General Public License,
				9	# version 2, as published by the Free Software Foundation.
				10	#
				11	# This program is distributed in the hope it will be useful, but WITHOUT
				12	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				14	# more details.
				15	#
				16	import argparse
				17	import json
				18	import logging
				19	import os
				20	import re
				21	import sys
				22	from collections import namedtuple, OrderedDict
				23	from operator import attrgetter
				24	from xml.etree import ElementTree as ET
				25
				26	# Import oe libs
				27	scripts_path = os.path.dirname(os.path.realpath(__file__))
				28	sys.path.append(os.path.join(scripts_path, 'lib'))
				29	import scriptpath
				30	from build_perf import print_table
				31	from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
				32	aggregate_data, aggregate_metadata, measurement_stats)
				33	from build_perf import html
				34
				35	scriptpath.add_oe_lib_path()
				36
				37	from oeqa.utils.git import GitRepo
				38
				39
				40	# Setup logging
				41	logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
				42	log = logging.getLogger('oe-build-perf-report')
				43
				44
				45	# Container class for tester revisions
				46	TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
				47
				48
				49	def get_test_runs(repo, tag_name, **kwargs):
				50	"""Get a sorted list of test runs, matching given pattern"""
				51	# First, get field names from the tag name pattern
				52	field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
				53	undef_fields = [f for f in field_names if f not in kwargs.keys()]
				54
				55	# Fields for formatting tag name pattern
				56	str_fields = dict([(f, '*') for f in field_names])
				57	str_fields.update(kwargs)
				58
				59	# Get a list of all matching tags
				60	tag_pattern = tag_name.format(**str_fields)
				61	tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
				62	log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
				63
				64	# Parse undefined fields from tag names
				65	str_fields = dict([(f, r'(?P<{}>[\w\-.()]+)'.format(f)) for f in field_names])
				66	str_fields['branch'] = r'(?P<branch>[\w\-.()/]+)'
				67	str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
				68	str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
				69	str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
				70	# escape parenthesis in fields in order to not messa up the regexp
				71	fixed_fields = dict([(k, v.replace('(', r'\(').replace(')', r'\)')) for k, v in kwargs.items()])
				72	str_fields.update(fixed_fields)
				73	tag_re = re.compile(tag_name.format(**str_fields))
				74
				75	# Parse fields from tags
				76	revs = []
				77	for tag in tags:
				78	m = tag_re.match(tag)
				79	groups = m.groupdict()
				80	revs.append([groups[f] for f in undef_fields] + [tag])
				81
				82	# Return field names and a sorted list of revs
				83	return undef_fields, sorted(revs)
				84
				85	def list_test_revs(repo, tag_name, **kwargs):
				86	"""Get list of all tested revisions"""
				87	fields, revs = get_test_runs(repo, tag_name, **kwargs)
				88	ignore_fields = ['tag_number']
				89	print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
				90
				91	# Sort revs
				92	rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
				93	prev = [''] * len(revs)
				94	for rev in revs:
				95	# Only use fields that we want to print
				96	rev = [rev[i] for i in print_fields]
				97
				98	if rev != prev:
				99	new_row = [''] * len(print_fields) + [1]
				100	for i in print_fields:
				101	if rev[i] != prev[i]:
				102	break
				103	new_row[i:-1] = rev[i:]
				104	rows.append(new_row)
				105	else:
				106	rows[-1][-1] += 1
				107	prev = rev
				108
				109	print_table(rows)
				110
				111	def get_test_revs(repo, tag_name, **kwargs):
				112	"""Get list of all tested revisions"""
				113	fields, runs = get_test_runs(repo, tag_name, **kwargs)
				114
				115	revs = {}
				116	commit_i = fields.index('commit')
				117	commit_num_i = fields.index('commit_number')
				118	for run in runs:
				119	commit = run[commit_i]
				120	commit_num = run[commit_num_i]
				121	tag = run[-1]
				122	if not commit in revs:
				123	revs[commit] = TestedRev(commit, commit_num, [tag])
				124	else:
				125	assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
				126	revs[commit].tags.append(tag)
				127
				128	# Return in sorted table
				129	revs = sorted(revs.values(), key=attrgetter('commit_number'))
				130	log.debug("Found %d tested revisions:\n %s", len(revs),
				131	"\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
				132	return revs
				133
				134	def rev_find(revs, attr, val):
				135	"""Search from a list of TestedRev"""
				136	for i, rev in enumerate(revs):
				137	if getattr(rev, attr) == val:
				138	return i
				139	raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
				140
				141	def is_xml_format(repo, commit):
				142	"""Check if the commit contains xml (or json) data"""
				143	if repo.rev_parse(commit + ':results.xml'):
				144	log.debug("Detected report in xml format in %s", commit)
				145	return True
				146	else:
				147	log.debug("No xml report in %s, assuming json formatted results", commit)
				148	return False
				149
				150	def read_results(repo, tags, xml=True):
				151	"""Read result files from repo"""
				152
				153	def parse_xml_stream(data):
				154	"""Parse multiple concatenated XML objects"""
				155	objs = []
				156	xml_d = ""
				157	for line in data.splitlines():
				158	if xml_d and line.startswith('<?xml version='):
				159	objs.append(ET.fromstring(xml_d))
				160	xml_d = line
				161	else:
				162	xml_d += line
				163	objs.append(ET.fromstring(xml_d))
				164	return objs
				165
				166	def parse_json_stream(data):
				167	"""Parse multiple concatenated JSON objects"""
				168	objs = []
				169	json_d = ""
				170	for line in data.splitlines():
				171	if line == '}{':
				172	json_d += '}'
				173	objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
				174	json_d = '{'
				175	else:
				176	json_d += line
				177	objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
				178	return objs
				179
				180	num_revs = len(tags)
				181
				182	# Optimize by reading all data with one git command
				183	log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
				184	if xml:
				185	git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
				186	data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
				187	return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
				188	[results_xml_to_json(e) for e in data[num_revs:]])
				189	else:
				190	git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
				191	data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
				192	return data[0:num_revs], data[num_revs:]
				193
				194
				195	def get_data_item(data, key):
				196	"""Nested getitem lookup"""
				197	for k in key.split('.'):
				198	data = data[k]
				199	return data
				200
				201
				202	def metadata_diff(metadata_l, metadata_r):
				203	"""Prepare a metadata diff for printing"""
				204	keys = [('Hostname', 'hostname', 'hostname'),
				205	('Branch', 'branch', 'layers.meta.branch'),
				206	('Commit number', 'commit_num', 'layers.meta.commit_count'),
				207	('Commit', 'commit', 'layers.meta.commit'),
				208	('Number of test runs', 'testrun_count', 'testrun_count')
				209	]
				210
				211	def _metadata_diff(key):
				212	"""Diff metadata from two test reports"""
				213	try:
				214	val1 = get_data_item(metadata_l, key)
				215	except KeyError:
				216	val1 = '(N/A)'
				217	try:
				218	val2 = get_data_item(metadata_r, key)
				219	except KeyError:
				220	val2 = '(N/A)'
				221	return val1, val2
				222
				223	metadata = OrderedDict()
				224	for title, key, key_json in keys:
				225	value_l, value_r = _metadata_diff(key_json)
				226	metadata[key] = {'title': title,
				227	'value_old': value_l,
				228	'value': value_r}
				229	return metadata
				230
				231
				232	def print_diff_report(metadata_l, data_l, metadata_r, data_r):
				233	"""Print differences between two data sets"""
				234
				235	# First, print general metadata
				236	print("\nTEST METADATA:\n==============")
				237	meta_diff = metadata_diff(metadata_l, metadata_r)
				238	rows = []
				239	row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
				240	rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']]
				241	for key, val in meta_diff.items():
				242	# Shorten commit hashes
				243	if key == 'commit':
				244	rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
				245	else:
				246	rows.append([val['title'] + ':', val['value'], val['value_old']])
				247	print_table(rows, row_fmt)
				248
				249
				250	# Print test results
				251	print("\nTEST RESULTS:\n=============")
				252
				253	tests = list(data_l['tests'].keys())
				254	# Append tests that are only present in 'right' set
				255	tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
				256
				257	# Prepare data to be printed
				258	rows = []
				259	row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
				260	' {:>{wid}}', ' {:>{wid}}']
				261	num_cols = len(row_fmt)
				262	for test in tests:
				263	test_l = data_l['tests'][test] if test in data_l['tests'] else None
				264	test_r = data_r['tests'][test] if test in data_r['tests'] else None
				265	pref = ' '
				266	if test_l is None:
				267	pref = '+'
				268	elif test_r is None:
				269	pref = '-'
				270	descr = test_l['description'] if test_l else test_r['description']
				271	heading = "{} {}: {}".format(pref, test, descr)
				272
				273	rows.append([heading])
				274
				275	# Generate the list of measurements
				276	meas_l = test_l['measurements'] if test_l else {}
				277	meas_r = test_r['measurements'] if test_r else {}
				278	measurements = list(meas_l.keys())
				279	measurements += [m for m in list(meas_r.keys()) if m not in measurements]
				280
				281	for meas in measurements:
				282	m_pref = ' '
				283	if meas in meas_l:
				284	stats_l = measurement_stats(meas_l[meas], 'l.')
				285	else:
				286	stats_l = measurement_stats(None, 'l.')
				287	m_pref = '+'
				288	if meas in meas_r:
				289	stats_r = measurement_stats(meas_r[meas], 'r.')
				290	else:
				291	stats_r = measurement_stats(None, 'r.')
				292	m_pref = '-'
				293	stats = stats_l.copy()
				294	stats.update(stats_r)
				295
				296	absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
				297	reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
				298	if stats['r.mean'] > stats['l.mean']:
				299	absdiff = '+' + str(absdiff)
				300	else:
				301	absdiff = str(absdiff)
				302	rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
				303	str(stats['l.mean']), '->', str(stats['r.mean']),
				304	absdiff, reldiff])
				305	rows.append([''] * num_cols)
				306
				307	print_table(rows, row_fmt)
				308
				309	print()
				310
				311
				312	def print_html_report(data, id_comp):
				313	"""Print report in html format"""
				314	# Handle metadata
				315	metadata = {'branch': {'title': 'Branch', 'value': 'master'},
				316	'hostname': {'title': 'Hostname', 'value': 'foobar'},
				317	'commit': {'title': 'Commit', 'value': '1234'}
				318	}
				319	metadata = metadata_diff(data[id_comp][0], data[-1][0])
				320
				321
				322	# Generate list of tests
				323	tests = []
				324	for test in data[-1][1]['tests'].keys():
				325	test_r = data[-1][1]['tests'][test]
				326	new_test = {'name': test_r['name'],
				327	'description': test_r['description'],
				328	'status': test_r['status'],
				329	'measurements': [],
				330	'err_type': test_r.get('err_type'),
				331	}
				332	# Limit length of err output shown
				333	if 'message' in test_r:
				334	lines = test_r['message'].splitlines()
				335	if len(lines) > 20:
				336	new_test['message'] = '...\n' + '\n'.join(lines[-20:])
				337	else:
				338	new_test['message'] = test_r['message']
				339
				340
				341	# Generate the list of measurements
				342	for meas in test_r['measurements'].keys():
				343	meas_r = test_r['measurements'][meas]
				344	meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
				345	new_meas = {'name': meas_r['name'],
				346	'legend': meas_r['legend'],
				347	'description': meas_r['name'] + ' ' + meas_type,
				348	}
				349	samples = []
				350
				351	# Run through all revisions in our data
				352	for meta, test_data in data:
				353	if (not test in test_data['tests'] or
				354	not meas in test_data['tests'][test]['measurements']):
				355	samples.append(measurement_stats(None))
				356	continue
				357	test_i = test_data['tests'][test]
				358	meas_i = test_i['measurements'][meas]
				359	commit_num = get_data_item(meta, 'layers.meta.commit_count')
				360	samples.append(measurement_stats(meas_i))
				361	samples[-1]['commit_num'] = commit_num
				362
				363	absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
				364	new_meas['absdiff'] = absdiff
				365	new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
				366	new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
				367	new_meas['samples'] = samples
				368	new_meas['value'] = samples[-1]
				369	new_meas['value_type'] = samples[-1]['val_cls']
				370
				371	new_test['measurements'].append(new_meas)
				372	tests.append(new_test)
				373
				374	# Chart options
				375	chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
				376	'max': get_data_item(data[-1][0], 'layers.meta.commit_count')}
				377	}
				378
				379	print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
				380
				381
				382	def auto_args(repo, args):
				383	"""Guess arguments, if not defined by the user"""
				384	# Get the latest commit in the repo
				385	log.debug("Guessing arguments from the latest commit")
				386	msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b'])
				387	for line in msg.splitlines():
				388	split = line.split(':', 1)
				389	if len(split) != 2:
				390	continue
				391
				392	key = split[0]
				393	val = split[1].strip()
				394	if key == 'hostname':
				395	log.debug("Using hostname %s", val)
				396	args.hostname = val
				397	elif key == 'branch':
				398	log.debug("Using branch %s", val)
				399	args.branch = val
				400
				401
				402	def parse_args(argv):
				403	"""Parse command line arguments"""
				404	description = """
				405	Examine build performance test results from a Git repository"""
				406	parser = argparse.ArgumentParser(
				407	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
				408	description=description)
				409
				410	parser.add_argument('--debug', '-d', action='store_true',
				411	help="Verbose logging")
				412	parser.add_argument('--repo', '-r', required=True,
				413	help="Results repository (local git clone)")
				414	parser.add_argument('--list', '-l', action='store_true',
				415	help="List available test runs")
				416	parser.add_argument('--html', action='store_true',
				417	help="Generate report in html format")
				418	group = parser.add_argument_group('Tag and revision')
				419	group.add_argument('--tag-name', '-t',
				420	default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
				421	help="Tag name (pattern) for finding results")
				422	group.add_argument('--hostname', '-H')
				423	group.add_argument('--branch', '-B', default='master')
				424	group.add_argument('--machine', default='qemux86')
				425	group.add_argument('--history-length', default=25, type=int,
				426	help="Number of tested revisions to plot in html report")
				427	group.add_argument('--commit',
				428	help="Revision to search for")
				429	group.add_argument('--commit-number',
				430	help="Revision number to search for, redundant if "
				431	"--commit is specified")
				432	group.add_argument('--commit2',
				433	help="Revision to compare with")
				434	group.add_argument('--commit-number2',
				435	help="Revision number to compare with, redundant if "
				436	"--commit2 is specified")
				437
				438	return parser.parse_args(argv)
				439
				440
				441	def main(argv=None):
				442	"""Script entry point"""
				443	args = parse_args(argv)
				444	if args.debug:
				445	log.setLevel(logging.DEBUG)
				446
				447	repo = GitRepo(args.repo)
				448
				449	if args.list:
				450	list_test_revs(repo, args.tag_name)
				451	return 0
				452
				453	# Determine hostname which to use
				454	if not args.hostname:
				455	auto_args(repo, args)
				456
				457	revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
				458	branch=args.branch, machine=args.machine)
				459	if len(revs) < 2:
				460	log.error("%d tester revisions found, unable to generate report",
				461	len(revs))
				462	return 1
				463
				464	# Pick revisions
				465	if args.commit:
				466	if args.commit_number:
				467	log.warning("Ignoring --commit-number as --commit was specified")
				468	index1 = rev_find(revs, 'commit', args.commit)
				469	elif args.commit_number:
				470	index1 = rev_find(revs, 'commit_number', args.commit_number)
				471	else:
				472	index1 = len(revs) - 1
				473
				474	if args.commit2:
				475	if args.commit_number2:
				476	log.warning("Ignoring --commit-number2 as --commit2 was specified")
				477	index2 = rev_find(revs, 'commit', args.commit2)
				478	elif args.commit_number2:
				479	index2 = rev_find(revs, 'commit_number', args.commit_number2)
				480	else:
				481	if index1 > 0:
				482	index2 = index1 - 1
				483	else:
				484	log.error("Unable to determine the other commit, use "
				485	"--commit2 or --commit-number2 to specify it")
				486	return 1
				487
				488	index_l = min(index1, index2)
				489	index_r = max(index1, index2)
				490
				491	rev_l = revs[index_l]
				492	rev_r = revs[index_r]
				493	log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
				494	rev_l.commit_number, rev_l.commit, len(rev_l.tags),
				495	'\n '.join(rev_l.tags))
				496	log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
				497	rev_r.commit_number, rev_r.commit, len(rev_r.tags),
				498	'\n '.join(rev_r.tags))
				499
				500	# Check report format used in the repo (assume all reports in the same fmt)
				501	xml = is_xml_format(repo, revs[index_r].tags[-1])
				502
				503	if args.html:
				504	index_0 = max(0, index_r - args.history_length)
				505	rev_range = range(index_0, index_r + 1)
				506	else:
				507	# We do not need range of commits for text report (no graphs)
				508	index_0 = index_l
				509	rev_range = (index_l, index_r)
				510
				511	# Read raw data
				512	log.debug("Reading %d revisions, starting from %s (%s)",
				513	len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
				514	raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
				515
				516	data = []
				517	for raw_m, raw_d in raw_data:
				518	data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
				519
				520	# Re-map list indexes to the new table starting from index 0
				521	index_r = index_r - index_0
				522	index_l = index_l - index_0
				523
				524	# Print report
				525	if not args.html:
				526	print_diff_report(data[index_l][0], data[index_l][1],
				527	data[index_r][0], data[index_r][1])
				528	else:
				529	print_html_report(data, index_l)
				530
				531	return 0
				532
				533	if __name__ == "__main__":
				534	sys.exit(main())