Blame - import-layers/yocto-poky/scripts/buildstats-diff - openbmc/openbmc

blob: f918a6d5e027e15fc9586e88bce493371a584809 [file] [log] [blame]

Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame^]	1	#!/usr/bin/python3
				2	#
				3	# Script for comparing buildstats from two different builds
				4	#
				5	# Copyright (c) 2016, Intel Corporation.
				6	#
				7	# This program is free software; you can redistribute it and/or modify it
				8	# under the terms and conditions of the GNU General Public License,
				9	# version 2, as published by the Free Software Foundation.
				10	#
				11	# This program is distributed in the hope it will be useful, but WITHOUT
				12	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				14	# more details.
				15	#
				16	import argparse
				17	import glob
				18	import json
				19	import logging
				20	import math
				21	import os
				22	import re
				23	import sys
				24	from collections import namedtuple
				25	from datetime import datetime, timedelta, tzinfo
				26	from operator import attrgetter
				27
				28	# Setup logging
				29	logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
				30	log = logging.getLogger()
				31
				32
				33	class ScriptError(Exception):
				34	"""Exception for internal error handling of this script"""
				35	pass
				36
				37
				38	class TimeZone(tzinfo):
				39	"""Simple fixed-offset tzinfo"""
				40	def __init__(self, seconds, name):
				41	self._offset = timedelta(seconds=seconds)
				42	self._name = name
				43
				44	def utcoffset(self, dt):
				45	return self._offset
				46
				47	def tzname(self, dt):
				48	return self._name
				49
				50	def dst(self, dt):
				51	return None
				52
				53	TIMEZONES = {'UTC': TimeZone(0, 'UTC'),
				54	'EET': TimeZone(7200, 'EET'),
				55	'EEST': TimeZone(10800, 'EEST')}
				56
				57	taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
				58	'absdiff', 'reldiff')
				59	TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
				60
				61
				62	def to_datetime_obj(obj):
				63	"""Helper for getting timestamps in datetime format"""
				64	if isinstance(obj, datetime):
				65	return obj
				66	else:
				67	return datetime.utcfromtimestamp(obj).replace(tzinfo=TIMEZONES['UTC'])
				68
				69
				70	class BSTask(dict):
				71	def __init__(self, args, *kwargs):
				72	self['start_time'] = None
				73	self['elapsed_time'] = None
				74	self['status'] = None
				75	self['iostat'] = {}
				76	self['rusage'] = {}
				77	self['child_rusage'] = {}
				78	super(BSTask, self).__init__(args, *kwargs)
				79
				80	@property
				81	def cputime(self):
				82	"""Sum of user and system time taken by the task"""
				83	return self['rusage']['ru_stime'] + self['rusage']['ru_utime'] + \
				84	self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
				85
				86	@property
				87	def walltime(self):
				88	"""Elapsed wall clock time"""
				89	return self['elapsed_time'].total_seconds()
				90
				91	@property
				92	def read_bytes(self):
				93	"""Bytes read from the block layer"""
				94	return self['iostat']['read_bytes']
				95
				96	@property
				97	def write_bytes(self):
				98	"""Bytes written to the block layer"""
				99	return self['iostat']['write_bytes']
				100
				101	@property
				102	def read_ops(self):
				103	"""Number of read operations on the block layer"""
				104	return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
				105
				106	@property
				107	def write_ops(self):
				108	"""Number of write operations on the block layer"""
				109	return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
				110
				111
				112	def read_buildstats_file(buildstat_file):
				113	"""Convert buildstat text file into dict/json"""
				114	bs_task = BSTask()
				115	log.debug("Reading task buildstats from %s", buildstat_file)
				116	with open(buildstat_file) as fobj:
				117	for line in fobj.readlines():
				118	key, val = line.split(':', 1)
				119	val = val.strip()
				120	if key == 'Started':
				121	start_time = to_datetime_obj(float(val))
				122	bs_task['start_time'] = start_time
				123	elif key == 'Ended':
				124	end_time = to_datetime_obj(float(val))
				125	elif key.startswith('IO '):
				126	split = key.split()
				127	bs_task['iostat'][split[1]] = int(val)
				128	elif key.find('rusage') >= 0:
				129	split = key.split()
				130	ru_key = split[-1]
				131	if ru_key in ('ru_stime', 'ru_utime'):
				132	val = float(val)
				133	else:
				134	val = int(val)
				135	ru_type = 'rusage' if split[0] == 'rusage' else \
				136	'child_rusage'
				137	bs_task[ru_type][ru_key] = val
				138	elif key == 'Status':
				139	bs_task['status'] = val
				140	bs_task['elapsed_time'] = end_time - start_time
				141	return bs_task
				142
				143
				144	def read_buildstats_dir(bs_dir):
				145	"""Read buildstats directory"""
				146	def split_nevr(nevr):
				147	"""Split name and version information from recipe "nevr" string"""
				148	n_e_v, revision = nevr.rsplit('-', 1)
				149	match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
				150	n_e_v)
				151	if not match:
				152	# If we're not able to parse a version starting with a number, just
				153	# take the part after last dash
				154	match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
				155	n_e_v)
				156	name = match.group('name')
				157	version = match.group('version')
				158	epoch = match.group('epoch')
				159	return name, epoch, version, revision
				160
				161	if not os.path.isfile(os.path.join(bs_dir, 'build_stats')):
				162	raise ScriptError("{} does not look like a buildstats directory".format(bs_dir))
				163
				164	log.debug("Reading buildstats directory %s", bs_dir)
				165
				166	buildstats = {}
				167	subdirs = os.listdir(bs_dir)
				168	for dirname in subdirs:
				169	recipe_dir = os.path.join(bs_dir, dirname)
				170	if not os.path.isdir(recipe_dir):
				171	continue
				172	name, epoch, version, revision = split_nevr(dirname)
				173	recipe_bs = {'nevr': dirname,
				174	'name': name,
				175	'epoch': epoch,
				176	'version': version,
				177	'revision': revision,
				178	'tasks': {}}
				179	for task in os.listdir(recipe_dir):
				180	recipe_bs['tasks'][task] = [read_buildstats_file(
				181	os.path.join(recipe_dir, task))]
				182	if name in buildstats:
				183	raise ScriptError("Cannot handle multiple versions of the same "
				184	"package ({})".format(name))
				185	buildstats[name] = recipe_bs
				186
				187	return buildstats
				188
				189
				190	def bs_append(dst, src):
				191	"""Append data from another buildstats"""
				192	if set(dst.keys()) != set(src.keys()):
				193	raise ScriptError("Refusing to join buildstats, set of packages is "
				194	"different")
				195	for pkg, data in dst.items():
				196	if data['nevr'] != src[pkg]['nevr']:
				197	raise ScriptError("Refusing to join buildstats, package version "
				198	"differs: {} vs. {}".format(data['nevr'], src[pkg]['nevr']))
				199	if set(data['tasks'].keys()) != set(src[pkg]['tasks'].keys()):
				200	raise ScriptError("Refusing to join buildstats, set of tasks "
				201	"in {} differ".format(pkg))
				202	for taskname, taskdata in data['tasks'].items():
				203	taskdata.extend(src[pkg]['tasks'][taskname])
				204
				205
				206	def read_buildstats_json(path):
				207	"""Read buildstats from JSON file"""
				208	buildstats = {}
				209	with open(path) as fobj:
				210	bs_json = json.load(fobj)
				211	for recipe_bs in bs_json:
				212	if recipe_bs['name'] in buildstats:
				213	raise ScriptError("Cannot handle multiple versions of the same "
				214	"package ({})".format(recipe_bs['name']))
				215
				216	if recipe_bs['epoch'] is None:
				217	recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
				218	else:
				219	recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
				220
				221	for task, data in recipe_bs['tasks'].copy().items():
				222	recipe_bs['tasks'][task] = [BSTask(data)]
				223
				224	buildstats[recipe_bs['name']] = recipe_bs
				225
				226	return buildstats
				227
				228
				229	def read_buildstats(path, multi):
				230	"""Read buildstats"""
				231	if not os.path.exists(path):
				232	raise ScriptError("No such file or directory: {}".format(path))
				233
				234	if os.path.isfile(path):
				235	return read_buildstats_json(path)
				236
				237	if os.path.isfile(os.path.join(path, 'build_stats')):
				238	return read_buildstats_dir(path)
				239
				240	# Handle a non-buildstat directory
				241	subpaths = sorted(glob.glob(path + '/*'))
				242	if len(subpaths) > 1:
				243	if multi:
				244	log.info("Averaging over {} buildstats from {}".format(
				245	len(subpaths), path))
				246	else:
				247	raise ScriptError("Multiple buildstats found in '{}'. Please give "
				248	"a single buildstat directory of use the --multi "
				249	"option".format(path))
				250	bs = None
				251	for subpath in subpaths:
				252	if os.path.isfile(subpath):
				253	tmpbs = read_buildstats_json(subpath)
				254	else:
				255	tmpbs = read_buildstats_dir(subpath)
				256	if not bs:
				257	bs = tmpbs
				258	else:
				259	log.debug("Joining buildstats")
				260	bs_append(bs, tmpbs)
				261
				262	if not bs:
				263	raise ScriptError("No buildstats found under {}".format(path))
				264	return bs
				265
				266
				267	def print_ver_diff(bs1, bs2):
				268	"""Print package version differences"""
				269	pkgs1 = set(bs1.keys())
				270	pkgs2 = set(bs2.keys())
				271	new_pkgs = pkgs2 - pkgs1
				272	deleted_pkgs = pkgs1 - pkgs2
				273
				274	echanged = []
				275	vchanged = []
				276	rchanged = []
				277	unchanged = []
				278	common_pkgs = pkgs2.intersection(pkgs1)
				279	if common_pkgs:
				280	for pkg in common_pkgs:
				281	if bs1[pkg]['epoch'] != bs2[pkg]['epoch']:
				282	echanged.append(pkg)
				283	elif bs1[pkg]['version'] != bs2[pkg]['version']:
				284	vchanged.append(pkg)
				285	elif bs1[pkg]['revision'] != bs2[pkg]['revision']:
				286	rchanged.append(pkg)
				287	else:
				288	unchanged.append(pkg)
				289
				290	maxlen = max([len(pkg) for pkg in pkgs1.union(pkgs2)])
				291	fmt_str = " {:{maxlen}} ({})"
				292	# if unchanged:
				293	# print("\nUNCHANGED PACKAGES:")
				294	# print("-------------------")
				295	# maxlen = max([len(pkg) for pkg in unchanged])
				296	# for pkg in sorted(unchanged):
				297	# print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
				298
				299	if new_pkgs:
				300	print("\nNEW PACKAGES:")
				301	print("-------------")
				302	for pkg in sorted(new_pkgs):
				303	print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
				304
				305	if deleted_pkgs:
				306	print("\nDELETED PACKAGES:")
				307	print("-----------------")
				308	for pkg in sorted(deleted_pkgs):
				309	print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen))
				310
				311	fmt_str = " {0:{maxlen}} {1:<20} ({2})"
				312	if rchanged:
				313	print("\nREVISION CHANGED:")
				314	print("-----------------")
				315	for pkg in sorted(rchanged):
				316	field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision'])
				317	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				318	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				319
				320	if vchanged:
				321	print("\nVERSION CHANGED:")
				322	print("----------------")
				323	for pkg in sorted(vchanged):
				324	field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version'])
				325	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				326	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				327
				328	if echanged:
				329	print("\nEPOCH CHANGED:")
				330	print("--------------")
				331	for pkg in sorted(echanged):
				332	field1 = "{} -> {}".format(pkg, bs1[pkg]['epoch'], bs2[pkg]['epoch'])
				333	field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
				334	print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
				335
				336
				337	def print_task_diff(bs1, bs2, val_type, min_val=0, min_absdiff=0, sort_by=('absdiff',)):
				338	"""Diff task execution times"""
				339	def val_to_str(val, human_readable=False):
				340	"""Convert raw value to printable string"""
				341	def hms_time(secs):
				342	"""Get time in human-readable HH:MM:SS format"""
				343	h = int(secs / 3600)
				344	m = int((secs % 3600) / 60)
				345	s = secs % 60
				346	if h == 0:
				347	return "{:02d}:{:04.1f}".format(m, s)
				348	else:
				349	return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
				350
				351	if 'time' in val_type:
				352	if human_readable:
				353	return hms_time(val)
				354	else:
				355	return "{:.1f}s".format(val)
				356	elif 'bytes' in val_type and human_readable:
				357	prefix = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi']
				358	dec = int(math.log(val, 2) / 10)
				359	prec = 1 if dec > 0 else 0
				360	return "{:.{prec}f}{}B".format(val / (2 ** (10 * dec)),
				361	prefix[dec], prec=prec)
				362	elif 'ops' in val_type and human_readable:
				363	prefix = ['', 'k', 'M', 'G', 'T', 'P']
				364	dec = int(math.log(val, 1000))
				365	prec = 1 if dec > 0 else 0
				366	return "{:.{prec}f}{}ops".format(val / (1000 ** dec),
				367	prefix[dec], prec=prec)
				368	return str(int(val))
				369
				370	def sum_vals(buildstats):
				371	"""Get cumulative sum of all tasks"""
				372	total = 0.0
				373	for recipe_data in buildstats.values():
				374	for bs_task in recipe_data['tasks'].values():
				375	total += sum([getattr(b, val_type) for b in bs_task]) / len(bs_task)
				376	return total
				377
				378	tasks_diff = []
				379
				380	if min_val:
				381	print("Ignoring tasks less than {} ({})".format(
				382	val_to_str(min_val, True), val_to_str(min_val)))
				383	if min_absdiff:
				384	print("Ignoring differences less than {} ({})".format(
				385	val_to_str(min_absdiff, True), val_to_str(min_absdiff)))
				386
				387	# Prepare the data
				388	pkgs = set(bs1.keys()).union(set(bs2.keys()))
				389	for pkg in pkgs:
				390	tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
				391	tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
				392	if not tasks1:
				393	pkg_op = '+ '
				394	elif not tasks2:
				395	pkg_op = '- '
				396	else:
				397	pkg_op = ' '
				398
				399	for task in set(tasks1.keys()).union(set(tasks2.keys())):
				400	task_op = ' '
				401	if task in tasks1:
				402	# Average over all values
				403	val1 = [getattr(b, val_type) for b in bs1[pkg]['tasks'][task]]
				404	val1 = sum(val1) / len(val1)
				405	else:
				406	task_op = '+ '
				407	val1 = 0
				408	if task in tasks2:
				409	# Average over all values
				410	val2 = [getattr(b, val_type) for b in bs2[pkg]['tasks'][task]]
				411	val2 = sum(val2) / len(val2)
				412	else:
				413	val2 = 0
				414	task_op = '- '
				415
				416	if val1 == 0:
				417	reldiff = float('inf')
				418	else:
				419	reldiff = 100 * (val2 - val1) / val1
				420
				421	if max(val1, val2) < min_val:
				422	log.debug("Filtering out %s:%s (%s)", pkg, task,
				423	val_to_str(max(val1, val2)))
				424	continue
				425	if abs(val2 - val1) < min_absdiff:
				426	log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
				427	val_to_str(val2-val1))
				428	continue
				429	tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
				430	val2-val1, reldiff))
				431
				432	# Sort our list
				433	for field in reversed(sort_by):
				434	if field.startswith('-'):
				435	field = field[1:]
				436	reverse = True
				437	else:
				438	reverse = False
				439	tasks_diff = sorted(tasks_diff, key=attrgetter(field), reverse=reverse)
				440
				441	linedata = [(' ', 'PKG', ' ', 'TASK', 'ABSDIFF', 'RELDIFF',
				442	val_type.upper() + '1', val_type.upper() + '2')]
				443	field_lens = dict([('len_{}'.format(i), len(f)) for i, f in enumerate(linedata[0])])
				444
				445	# Prepare fields in string format and measure field lengths
				446	for diff in tasks_diff:
				447	task_prefix = diff.task_op if diff.pkg_op == ' ' else ' '
				448	linedata.append((diff.pkg_op, diff.pkg, task_prefix, diff.task,
				449	val_to_str(diff.absdiff),
				450	'{:+.1f}%'.format(diff.reldiff),
				451	val_to_str(diff.value1),
				452	val_to_str(diff.value2)))
				453	for i, field in enumerate(linedata[-1]):
				454	key = 'len_{}'.format(i)
				455	if len(field) > field_lens[key]:
				456	field_lens[key] = len(field)
				457
				458	# Print data
				459	print()
				460	for fields in linedata:
				461	print("{:{len_0}}{:{len_1}} {:{len_2}}{:{len_3}} {:>{len_4}} {:>{len_5}} {:>{len_6}} -> {:{len_7}}".format(
				462	fields, *field_lens))
				463
				464	# Print summary of the diffs
				465	total1 = sum_vals(bs1)
				466	total2 = sum_vals(bs2)
				467	print("\nCumulative {}:".format(val_type))
				468	print (" {} {:+.1f}% {} ({}) -> {} ({})".format(
				469	val_to_str(total2 - total1), 100 * (total2-total1) / total1,
				470	val_to_str(total1, True), val_to_str(total1),
				471	val_to_str(total2, True), val_to_str(total2)))
				472
				473
				474	def parse_args(argv):
				475	"""Parse cmdline arguments"""
				476	description="""
				477	Script for comparing buildstats of two separate builds."""
				478	parser = argparse.ArgumentParser(
				479	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
				480	description=description)
				481
				482	min_val_defaults = {'cputime': 3.0,
				483	'read_bytes': 524288,
				484	'write_bytes': 524288,
				485	'read_ops': 500,
				486	'write_ops': 500,
				487	'walltime': 5}
				488	min_absdiff_defaults = {'cputime': 1.0,
				489	'read_bytes': 131072,
				490	'write_bytes': 131072,
				491	'read_ops': 50,
				492	'write_ops': 50,
				493	'walltime': 2}
				494
				495	parser.add_argument('--debug', '-d', action='store_true',
				496	help="Verbose logging")
				497	parser.add_argument('--ver-diff', action='store_true',
				498	help="Show package version differences and exit")
				499	parser.add_argument('--diff-attr', default='cputime',
				500	choices=min_val_defaults.keys(),
				501	help="Buildstat attribute which to compare")
				502	parser.add_argument('--min-val', default=min_val_defaults, type=float,
				503	help="Filter out tasks less than MIN_VAL. "
				504	"Default depends on --diff-attr.")
				505	parser.add_argument('--min-absdiff', default=min_absdiff_defaults, type=float,
				506	help="Filter out tasks whose difference is less than "
				507	"MIN_ABSDIFF, Default depends on --diff-attr.")
				508	parser.add_argument('--sort-by', default='absdiff',
				509	help="Comma-separated list of field sort order. "
				510	"Prepend the field name with '-' for reversed sort. "
				511	"Available fields are: {}".format(', '.join(taskdiff_fields)))
				512	parser.add_argument('--multi', action='store_true',
				513	help="Read all buildstats from the given paths and "
				514	"average over them")
				515	parser.add_argument('buildstats1', metavar='BUILDSTATS1', help="'Left' buildstat")
				516	parser.add_argument('buildstats2', metavar='BUILDSTATS2', help="'Right' buildstat")
				517
				518	args = parser.parse_args(argv)
				519
				520	# We do not nedd/want to read all buildstats if we just want to look at the
				521	# package versions
				522	if args.ver_diff:
				523	args.multi = False
				524
				525	# Handle defaults for the filter arguments
				526	if args.min_val is min_val_defaults:
				527	args.min_val = min_val_defaults[args.diff_attr]
				528	if args.min_absdiff is min_absdiff_defaults:
				529	args.min_absdiff = min_absdiff_defaults[args.diff_attr]
				530
				531	return args
				532
				533
				534	def main(argv=None):
				535	"""Script entry point"""
				536	args = parse_args(argv)
				537	if args.debug:
				538	log.setLevel(logging.DEBUG)
				539
				540	# Validate sort fields
				541	sort_by = []
				542	for field in args.sort_by.split(','):
				543	if field.lstrip('-') not in taskdiff_fields:
				544	log.error("Invalid sort field '%s' (must be one of: %s)" %
				545	(field, ', '.join(taskdiff_fields)))
				546	sys.exit(1)
				547	sort_by.append(field)
				548
				549	try:
				550	bs1 = read_buildstats(args.buildstats1, args.multi)
				551	bs2 = read_buildstats(args.buildstats2, args.multi)
				552
				553	if args.ver_diff:
				554	print_ver_diff(bs1, bs2)
				555	else:
				556	print_task_diff(bs1, bs2, args.diff_attr, args.min_val,
				557	args.min_absdiff, sort_by)
				558	except ScriptError as err:
				559	log.error(str(err))
				560	return 1
				561	return 0
				562
				563	if __name__ == "__main__":
				564	sys.exit(main())