Blame - import-layers/yocto-poky/scripts/buildstats-diff - openbmc/owners-plugin-test

blob: adeba44988fc242ba897f20450f303153979505a [file] [log] [blame]

Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	1	#!/usr/bin/python3
				2	#
				3	# Script for comparing buildstats from two different builds
				4	#
				5	# Copyright (c) 2016, Intel Corporation.
				6	#
				7	# This program is free software; you can redistribute it and/or modify it
				8	# under the terms and conditions of the GNU General Public License,
				9	# version 2, as published by the Free Software Foundation.
				10	#
				11	# This program is distributed in the hope it will be useful, but WITHOUT
				12	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				14	# more details.
				15	#
				16	import argparse
				17	import glob
				18	import json
				19	import logging
				20	import math
				21	import os
				22	import re
				23	import sys
				24	from collections import namedtuple
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	25	from operator import attrgetter
				26
				27	# Setup logging
				28	logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
				29	log = logging.getLogger()
				30
				31
				32	class ScriptError(Exception):
				33	"""Exception for internal error handling of this script"""
				34	pass
				35
				36
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	37	taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
				38	'absdiff', 'reldiff')
				39	TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
				40
				41
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	42	class BSTask(dict):
				43	def __init__(self, args, *kwargs):
				44	self['start_time'] = None
				45	self['elapsed_time'] = None
				46	self['status'] = None
				47	self['iostat'] = {}
				48	self['rusage'] = {}
				49	self['child_rusage'] = {}
				50	super(BSTask, self).__init__(args, *kwargs)
				51
				52	@property
				53	def cputime(self):
				54	"""Sum of user and system time taken by the task"""
				55	return self['rusage']['ru_stime'] + self['rusage']['ru_utime'] + \
				56	self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
				57
				58	@property
				59	def walltime(self):
				60	"""Elapsed wall clock time"""
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	61	return self['elapsed_time']
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	62
				63	@property
				64	def read_bytes(self):
				65	"""Bytes read from the block layer"""
				66	return self['iostat']['read_bytes']
				67
				68	@property
				69	def write_bytes(self):
				70	"""Bytes written to the block layer"""
				71	return self['iostat']['write_bytes']
				72
				73	@property
				74	def read_ops(self):
				75	"""Number of read operations on the block layer"""
				76	return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
				77
				78	@property
				79	def write_ops(self):
				80	"""Number of write operations on the block layer"""
				81	return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
				82
				83
				84	def read_buildstats_file(buildstat_file):
				85	"""Convert buildstat text file into dict/json"""
				86	bs_task = BSTask()
				87	log.debug("Reading task buildstats from %s", buildstat_file)
				88	with open(buildstat_file) as fobj:
				89	for line in fobj.readlines():
				90	key, val = line.split(':', 1)
				91	val = val.strip()
				92	if key == 'Started':
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	93	start_time = float(val)
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	94	bs_task['start_time'] = start_time
				95	elif key == 'Ended':
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	96	end_time = float(val)
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	97	elif key.startswith('IO '):
				98	split = key.split()
				99	bs_task['iostat'][split[1]] = int(val)
				100	elif key.find('rusage') >= 0:
				101	split = key.split()
				102	ru_key = split[-1]
				103	if ru_key in ('ru_stime', 'ru_utime'):
				104	val = float(val)
				105	else:
				106	val = int(val)
				107	ru_type = 'rusage' if split[0] == 'rusage' else \
				108	'child_rusage'
				109	bs_task[ru_type][ru_key] = val
				110	elif key == 'Status':
				111	bs_task['status'] = val
				112	bs_task['elapsed_time'] = end_time - start_time
				113	return bs_task
				114
				115
				116	def read_buildstats_dir(bs_dir):
				117	"""Read buildstats directory"""
				118	def split_nevr(nevr):
				119	"""Split name and version information from recipe "nevr" string"""
				120	n_e_v, revision = nevr.rsplit('-', 1)
				121	match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
				122	n_e_v)
				123	if not match:
				124	# If we're not able to parse a version starting with a number, just
				125	# take the part after last dash
				126	match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
				127	n_e_v)
				128	name = match.group('name')
				129	version = match.group('version')
				130	epoch = match.group('epoch')
				131	return name, epoch, version, revision
				132
				133	if not os.path.isfile(os.path.join(bs_dir, 'build_stats')):
				134	raise ScriptError("{} does not look like a buildstats directory".format(bs_dir))
				135
				136	log.debug("Reading buildstats directory %s", bs_dir)
				137
				138	buildstats = {}
				139	subdirs = os.listdir(bs_dir)
				140	for dirname in subdirs:
				141	recipe_dir = os.path.join(bs_dir, dirname)
				142	if not os.path.isdir(recipe_dir):
				143	continue
				144	name, epoch, version, revision = split_nevr(dirname)
				145	recipe_bs = {'nevr': dirname,
				146	'name': name,
				147	'epoch': epoch,
				148	'version': version,
				149	'revision': revision,
				150	'tasks': {}}
				151	for task in os.listdir(recipe_dir):
				152	recipe_bs['tasks'][task] = [read_buildstats_file(
				153	os.path.join(recipe_dir, task))]
				154	if name in buildstats:
				155	raise ScriptError("Cannot handle multiple versions of the same "
				156	"package ({})".format(name))
				157	buildstats[name] = recipe_bs
				158
				159	return buildstats
				160
				161
				162	def bs_append(dst, src):
				163	"""Append data from another buildstats"""
				164	if set(dst.keys()) != set(src.keys()):
				165	raise ScriptError("Refusing to join buildstats, set of packages is "
				166	"different")
				167	for pkg, data in dst.items():
				168	if data['nevr'] != src[pkg]['nevr']:
				169	raise ScriptError("Refusing to join buildstats, package version "
				170	"differs: {} vs. {}".format(data['nevr'], src[pkg]['nevr']))
				171	if set(data['tasks'].keys()) != set(src[pkg]['tasks'].keys()):
				172	raise ScriptError("Refusing to join buildstats, set of tasks "
				173	"in {} differ".format(pkg))
				174	for taskname, taskdata in data['tasks'].items():
				175	taskdata.extend(src[pkg]['tasks'][taskname])
				176
				177
				178	def read_buildstats_json(path):
				179	"""Read buildstats from JSON file"""
				180	buildstats = {}
				181	with open(path) as fobj:
				182	bs_json = json.load(fobj)
				183	for recipe_bs in bs_json:
				184	if recipe_bs['name'] in buildstats:
				185	raise ScriptError("Cannot handle multiple versions of the same "
				186	"package ({})".format(recipe_bs['name']))
				187
				188	if recipe_bs['epoch'] is None:
				189	recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
				190	else:
				191	recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
				192
				193	for task, data in recipe_bs['tasks'].copy().items():
				194	recipe_bs['tasks'][task] = [BSTask(data)]
				195
				196	buildstats[recipe_bs['name']] = recipe_bs
				197
				198	return buildstats
				199
				200
				201	def read_buildstats(path, multi):
				202	"""Read buildstats"""
				203	if not os.path.exists(path):
				204	raise ScriptError("No such file or directory: {}".format(path))
				205
				206	if os.path.isfile(path):
				207	return read_buildstats_json(path)
				208
				209	if os.path.isfile(os.path.join(path, 'build_stats')):
				210	return read_buildstats_dir(path)
				211
				212	# Handle a non-buildstat directory
				213	subpaths = sorted(glob.glob(path + '/*'))
				214	if len(subpaths) > 1:
				215	if multi:
				216	log.info("Averaging over {} buildstats from {}".format(
				217	len(subpaths), path))
				218	else:
				219	raise ScriptError("Multiple buildstats found in '{}'. Please give "
				220	"a single buildstat directory of use the --multi "
				221	"option".format(path))
				222	bs = None
				223	for subpath in subpaths:
				224	if os.path.isfile(subpath):
				225	tmpbs = read_buildstats_json(subpath)
				226	else:
				227	tmpbs = read_buildstats_dir(subpath)
				228	if not bs:
				229	bs = tmpbs
				230	else:
				231	log.debug("Joining buildstats")
				232	bs_append(bs, tmpbs)
				233
				234	if not bs:
				235	raise ScriptError("No buildstats found under {}".format(path))
				236	return bs
				237
				238
				239	def print_ver_diff(bs1, bs2):
				240	"""Print package version differences"""
				241	pkgs1 = set(bs1.keys())
				242	pkgs2 = set(bs2.keys())
				243	new_pkgs = pkgs2 - pkgs1
				244	deleted_pkgs = pkgs1 - pkgs2
				245
				246	echanged = []
				247	vchanged = []
				248	rchanged = []
				249	unchanged = []
				250	common_pkgs = pkgs2.intersection(pkgs1)
				251	if common_pkgs:
				252	for pkg in common_pkgs:
				253	if bs1[pkg]['epoch'] != bs2[pkg]['epoch']:
				254	echanged.append(pkg)
				255	elif bs1[pkg]['version'] != bs2[pkg]['version']:
				256	vchanged.append(pkg)
				257	elif bs1[pkg]['revision'] != bs2[pkg]['revision']:
				258	rchanged.append(pkg)
				259	else:
				260	unchanged.append(pkg)
				261
				262	maxlen = max([len(pkg) for pkg in pkgs1.union(pkgs2)])
				263	fmt_str = " {:{maxlen}} ({})"
				264	# if unchanged:
				265	# print("\nUNCHANGED PACKAGES:")
				266	# print("-------------------")
				267	# maxlen = max([len(pkg) for pkg in unchanged])
				268	# for pkg in sorted(unchanged):
				269	# print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
				270
				271	if new_pkgs:
				272	print("\nNEW PACKAGES:")
				273	print("-------------")
				274	for pkg in sorted(new_pkgs):
				275	print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
				276
				277	if deleted_pkgs:
				278	print("\nDELETED PACKAGES:")
				279	print("-----------------")
				280	for pkg in sorted(deleted_pkgs):
				281	print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen))
				282
				283	fmt_str = " {0:{maxlen}} {1:<20} ({2})"
				284	if rchanged:
				285	print("\nREVISION CHANGED:")
				286	print("-----------------")
				287	for pkg in sorted(rchanged):
				288	field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision'])
				289	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				290	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				291
				292	if vchanged:
				293	print("\nVERSION CHANGED:")
				294	print("----------------")
				295	for pkg in sorted(vchanged):
				296	field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version'])
				297	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				298	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				299
				300	if echanged:
				301	print("\nEPOCH CHANGED:")
				302	print("--------------")
				303	for pkg in sorted(echanged):
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	304	field1 = "{} -> {}".format(bs1[pkg]['epoch'], bs2[pkg]['epoch'])
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	305	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				306	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				307
				308
				309	def print_task_diff(bs1, bs2, val_type, min_val=0, min_absdiff=0, sort_by=('absdiff',)):
				310	"""Diff task execution times"""
				311	def val_to_str(val, human_readable=False):
				312	"""Convert raw value to printable string"""
				313	def hms_time(secs):
				314	"""Get time in human-readable HH:MM:SS format"""
				315	h = int(secs / 3600)
				316	m = int((secs % 3600) / 60)
				317	s = secs % 60
				318	if h == 0:
				319	return "{:02d}:{:04.1f}".format(m, s)
				320	else:
				321	return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
				322
				323	if 'time' in val_type:
				324	if human_readable:
				325	return hms_time(val)
				326	else:
				327	return "{:.1f}s".format(val)
				328	elif 'bytes' in val_type and human_readable:
				329	prefix = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi']
				330	dec = int(math.log(val, 2) / 10)
				331	prec = 1 if dec > 0 else 0
				332	return "{:.{prec}f}{}B".format(val / (2 ** (10 * dec)),
				333	prefix[dec], prec=prec)
				334	elif 'ops' in val_type and human_readable:
				335	prefix = ['', 'k', 'M', 'G', 'T', 'P']
				336	dec = int(math.log(val, 1000))
				337	prec = 1 if dec > 0 else 0
				338	return "{:.{prec}f}{}ops".format(val / (1000 ** dec),
				339	prefix[dec], prec=prec)
				340	return str(int(val))
				341
				342	def sum_vals(buildstats):
				343	"""Get cumulative sum of all tasks"""
				344	total = 0.0
				345	for recipe_data in buildstats.values():
				346	for bs_task in recipe_data['tasks'].values():
				347	total += sum([getattr(b, val_type) for b in bs_task]) / len(bs_task)
				348	return total
				349
				350	tasks_diff = []
				351
				352	if min_val:
				353	print("Ignoring tasks less than {} ({})".format(
				354	val_to_str(min_val, True), val_to_str(min_val)))
				355	if min_absdiff:
				356	print("Ignoring differences less than {} ({})".format(
				357	val_to_str(min_absdiff, True), val_to_str(min_absdiff)))
				358
				359	# Prepare the data
				360	pkgs = set(bs1.keys()).union(set(bs2.keys()))
				361	for pkg in pkgs:
				362	tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
				363	tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
				364	if not tasks1:
				365	pkg_op = '+ '
				366	elif not tasks2:
				367	pkg_op = '- '
				368	else:
				369	pkg_op = ' '
				370
				371	for task in set(tasks1.keys()).union(set(tasks2.keys())):
				372	task_op = ' '
				373	if task in tasks1:
				374	# Average over all values
				375	val1 = [getattr(b, val_type) for b in bs1[pkg]['tasks'][task]]
				376	val1 = sum(val1) / len(val1)
				377	else:
				378	task_op = '+ '
				379	val1 = 0
				380	if task in tasks2:
				381	# Average over all values
				382	val2 = [getattr(b, val_type) for b in bs2[pkg]['tasks'][task]]
				383	val2 = sum(val2) / len(val2)
				384	else:
				385	val2 = 0
				386	task_op = '- '
				387
				388	if val1 == 0:
				389	reldiff = float('inf')
				390	else:
				391	reldiff = 100 * (val2 - val1) / val1
				392
				393	if max(val1, val2) < min_val:
				394	log.debug("Filtering out %s:%s (%s)", pkg, task,
				395	val_to_str(max(val1, val2)))
				396	continue
				397	if abs(val2 - val1) < min_absdiff:
				398	log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
				399	val_to_str(val2-val1))
				400	continue
				401	tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
				402	val2-val1, reldiff))
				403
				404	# Sort our list
				405	for field in reversed(sort_by):
				406	if field.startswith('-'):
				407	field = field[1:]
				408	reverse = True
				409	else:
				410	reverse = False
				411	tasks_diff = sorted(tasks_diff, key=attrgetter(field), reverse=reverse)
				412
				413	linedata = [(' ', 'PKG', ' ', 'TASK', 'ABSDIFF', 'RELDIFF',
				414	val_type.upper() + '1', val_type.upper() + '2')]
				415	field_lens = dict([('len_{}'.format(i), len(f)) for i, f in enumerate(linedata[0])])
				416
				417	# Prepare fields in string format and measure field lengths
				418	for diff in tasks_diff:
				419	task_prefix = diff.task_op if diff.pkg_op == ' ' else ' '
				420	linedata.append((diff.pkg_op, diff.pkg, task_prefix, diff.task,
				421	val_to_str(diff.absdiff),
				422	'{:+.1f}%'.format(diff.reldiff),
				423	val_to_str(diff.value1),
				424	val_to_str(diff.value2)))
				425	for i, field in enumerate(linedata[-1]):
				426	key = 'len_{}'.format(i)
				427	if len(field) > field_lens[key]:
				428	field_lens[key] = len(field)
				429
				430	# Print data
				431	print()
				432	for fields in linedata:
				433	print("{:{len_0}}{:{len_1}} {:{len_2}}{:{len_3}} {:>{len_4}} {:>{len_5}} {:>{len_6}} -> {:{len_7}}".format(
				434	fields, *field_lens))
				435
				436	# Print summary of the diffs
				437	total1 = sum_vals(bs1)
				438	total2 = sum_vals(bs2)
				439	print("\nCumulative {}:".format(val_type))
				440	print (" {} {:+.1f}% {} ({}) -> {} ({})".format(
				441	val_to_str(total2 - total1), 100 * (total2-total1) / total1,
				442	val_to_str(total1, True), val_to_str(total1),
				443	val_to_str(total2, True), val_to_str(total2)))
				444
				445
				446	def parse_args(argv):
				447	"""Parse cmdline arguments"""
				448	description="""
				449	Script for comparing buildstats of two separate builds."""
				450	parser = argparse.ArgumentParser(
				451	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
				452	description=description)
				453
				454	min_val_defaults = {'cputime': 3.0,
				455	'read_bytes': 524288,
				456	'write_bytes': 524288,
				457	'read_ops': 500,
				458	'write_ops': 500,
				459	'walltime': 5}
				460	min_absdiff_defaults = {'cputime': 1.0,
				461	'read_bytes': 131072,
				462	'write_bytes': 131072,
				463	'read_ops': 50,
				464	'write_ops': 50,
				465	'walltime': 2}
				466
				467	parser.add_argument('--debug', '-d', action='store_true',
				468	help="Verbose logging")
				469	parser.add_argument('--ver-diff', action='store_true',
				470	help="Show package version differences and exit")
				471	parser.add_argument('--diff-attr', default='cputime',
				472	choices=min_val_defaults.keys(),
				473	help="Buildstat attribute which to compare")
				474	parser.add_argument('--min-val', default=min_val_defaults, type=float,
				475	help="Filter out tasks less than MIN_VAL. "
				476	"Default depends on --diff-attr.")
				477	parser.add_argument('--min-absdiff', default=min_absdiff_defaults, type=float,
				478	help="Filter out tasks whose difference is less than "
				479	"MIN_ABSDIFF, Default depends on --diff-attr.")
				480	parser.add_argument('--sort-by', default='absdiff',
				481	help="Comma-separated list of field sort order. "
				482	"Prepend the field name with '-' for reversed sort. "
				483	"Available fields are: {}".format(', '.join(taskdiff_fields)))
				484	parser.add_argument('--multi', action='store_true',
				485	help="Read all buildstats from the given paths and "
				486	"average over them")
				487	parser.add_argument('buildstats1', metavar='BUILDSTATS1', help="'Left' buildstat")
				488	parser.add_argument('buildstats2', metavar='BUILDSTATS2', help="'Right' buildstat")
				489
				490	args = parser.parse_args(argv)
				491
				492	# We do not nedd/want to read all buildstats if we just want to look at the
				493	# package versions
				494	if args.ver_diff:
				495	args.multi = False
				496
				497	# Handle defaults for the filter arguments
				498	if args.min_val is min_val_defaults:
				499	args.min_val = min_val_defaults[args.diff_attr]
				500	if args.min_absdiff is min_absdiff_defaults:
				501	args.min_absdiff = min_absdiff_defaults[args.diff_attr]
				502
				503	return args
				504
				505
				506	def main(argv=None):
				507	"""Script entry point"""
				508	args = parse_args(argv)
				509	if args.debug:
				510	log.setLevel(logging.DEBUG)
				511
				512	# Validate sort fields
				513	sort_by = []
				514	for field in args.sort_by.split(','):
				515	if field.lstrip('-') not in taskdiff_fields:
				516	log.error("Invalid sort field '%s' (must be one of: %s)" %
				517	(field, ', '.join(taskdiff_fields)))
				518	sys.exit(1)
				519	sort_by.append(field)
				520
				521	try:
				522	bs1 = read_buildstats(args.buildstats1, args.multi)
				523	bs2 = read_buildstats(args.buildstats2, args.multi)
				524
				525	if args.ver_diff:
				526	print_ver_diff(bs1, bs2)
				527	else:
				528	print_task_diff(bs1, bs2, args.diff_attr, args.min_val,
				529	args.min_absdiff, sort_by)
				530	except ScriptError as err:
				531	log.error(str(err))
				532	return 1
				533	return 0
				534
				535	if __name__ == "__main__":
				536	sys.exit(main())