blob: f918a6d5e027e15fc9586e88bce493371a584809 [file] [log] [blame]
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001#!/usr/bin/python3
2#
3# Script for comparing buildstats from two different builds
4#
5# Copyright (c) 2016, Intel Corporation.
6#
7# This program is free software; you can redistribute it and/or modify it
8# under the terms and conditions of the GNU General Public License,
9# version 2, as published by the Free Software Foundation.
10#
11# This program is distributed in the hope it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14# more details.
15#
16import argparse
17import glob
18import json
19import logging
20import math
21import os
22import re
23import sys
24from collections import namedtuple
25from datetime import datetime, timedelta, tzinfo
26from operator import attrgetter
27
28# Setup logging
29logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
30log = logging.getLogger()
31
32
33class ScriptError(Exception):
34 """Exception for internal error handling of this script"""
35 pass
36
37
38class TimeZone(tzinfo):
39 """Simple fixed-offset tzinfo"""
40 def __init__(self, seconds, name):
41 self._offset = timedelta(seconds=seconds)
42 self._name = name
43
44 def utcoffset(self, dt):
45 return self._offset
46
47 def tzname(self, dt):
48 return self._name
49
50 def dst(self, dt):
51 return None
52
53TIMEZONES = {'UTC': TimeZone(0, 'UTC'),
54 'EET': TimeZone(7200, 'EET'),
55 'EEST': TimeZone(10800, 'EEST')}
56
57taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
58 'absdiff', 'reldiff')
59TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
60
61
62def to_datetime_obj(obj):
63 """Helper for getting timestamps in datetime format"""
64 if isinstance(obj, datetime):
65 return obj
66 else:
67 return datetime.utcfromtimestamp(obj).replace(tzinfo=TIMEZONES['UTC'])
68
69
70class BSTask(dict):
71 def __init__(self, *args, **kwargs):
72 self['start_time'] = None
73 self['elapsed_time'] = None
74 self['status'] = None
75 self['iostat'] = {}
76 self['rusage'] = {}
77 self['child_rusage'] = {}
78 super(BSTask, self).__init__(*args, **kwargs)
79
80 @property
81 def cputime(self):
82 """Sum of user and system time taken by the task"""
83 return self['rusage']['ru_stime'] + self['rusage']['ru_utime'] + \
84 self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
85
86 @property
87 def walltime(self):
88 """Elapsed wall clock time"""
89 return self['elapsed_time'].total_seconds()
90
91 @property
92 def read_bytes(self):
93 """Bytes read from the block layer"""
94 return self['iostat']['read_bytes']
95
96 @property
97 def write_bytes(self):
98 """Bytes written to the block layer"""
99 return self['iostat']['write_bytes']
100
101 @property
102 def read_ops(self):
103 """Number of read operations on the block layer"""
104 return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
105
106 @property
107 def write_ops(self):
108 """Number of write operations on the block layer"""
109 return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
110
111
112def read_buildstats_file(buildstat_file):
113 """Convert buildstat text file into dict/json"""
114 bs_task = BSTask()
115 log.debug("Reading task buildstats from %s", buildstat_file)
116 with open(buildstat_file) as fobj:
117 for line in fobj.readlines():
118 key, val = line.split(':', 1)
119 val = val.strip()
120 if key == 'Started':
121 start_time = to_datetime_obj(float(val))
122 bs_task['start_time'] = start_time
123 elif key == 'Ended':
124 end_time = to_datetime_obj(float(val))
125 elif key.startswith('IO '):
126 split = key.split()
127 bs_task['iostat'][split[1]] = int(val)
128 elif key.find('rusage') >= 0:
129 split = key.split()
130 ru_key = split[-1]
131 if ru_key in ('ru_stime', 'ru_utime'):
132 val = float(val)
133 else:
134 val = int(val)
135 ru_type = 'rusage' if split[0] == 'rusage' else \
136 'child_rusage'
137 bs_task[ru_type][ru_key] = val
138 elif key == 'Status':
139 bs_task['status'] = val
140 bs_task['elapsed_time'] = end_time - start_time
141 return bs_task
142
143
144def read_buildstats_dir(bs_dir):
145 """Read buildstats directory"""
146 def split_nevr(nevr):
147 """Split name and version information from recipe "nevr" string"""
148 n_e_v, revision = nevr.rsplit('-', 1)
149 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
150 n_e_v)
151 if not match:
152 # If we're not able to parse a version starting with a number, just
153 # take the part after last dash
154 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
155 n_e_v)
156 name = match.group('name')
157 version = match.group('version')
158 epoch = match.group('epoch')
159 return name, epoch, version, revision
160
161 if not os.path.isfile(os.path.join(bs_dir, 'build_stats')):
162 raise ScriptError("{} does not look like a buildstats directory".format(bs_dir))
163
164 log.debug("Reading buildstats directory %s", bs_dir)
165
166 buildstats = {}
167 subdirs = os.listdir(bs_dir)
168 for dirname in subdirs:
169 recipe_dir = os.path.join(bs_dir, dirname)
170 if not os.path.isdir(recipe_dir):
171 continue
172 name, epoch, version, revision = split_nevr(dirname)
173 recipe_bs = {'nevr': dirname,
174 'name': name,
175 'epoch': epoch,
176 'version': version,
177 'revision': revision,
178 'tasks': {}}
179 for task in os.listdir(recipe_dir):
180 recipe_bs['tasks'][task] = [read_buildstats_file(
181 os.path.join(recipe_dir, task))]
182 if name in buildstats:
183 raise ScriptError("Cannot handle multiple versions of the same "
184 "package ({})".format(name))
185 buildstats[name] = recipe_bs
186
187 return buildstats
188
189
190def bs_append(dst, src):
191 """Append data from another buildstats"""
192 if set(dst.keys()) != set(src.keys()):
193 raise ScriptError("Refusing to join buildstats, set of packages is "
194 "different")
195 for pkg, data in dst.items():
196 if data['nevr'] != src[pkg]['nevr']:
197 raise ScriptError("Refusing to join buildstats, package version "
198 "differs: {} vs. {}".format(data['nevr'], src[pkg]['nevr']))
199 if set(data['tasks'].keys()) != set(src[pkg]['tasks'].keys()):
200 raise ScriptError("Refusing to join buildstats, set of tasks "
201 "in {} differ".format(pkg))
202 for taskname, taskdata in data['tasks'].items():
203 taskdata.extend(src[pkg]['tasks'][taskname])
204
205
206def read_buildstats_json(path):
207 """Read buildstats from JSON file"""
208 buildstats = {}
209 with open(path) as fobj:
210 bs_json = json.load(fobj)
211 for recipe_bs in bs_json:
212 if recipe_bs['name'] in buildstats:
213 raise ScriptError("Cannot handle multiple versions of the same "
214 "package ({})".format(recipe_bs['name']))
215
216 if recipe_bs['epoch'] is None:
217 recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
218 else:
219 recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
220
221 for task, data in recipe_bs['tasks'].copy().items():
222 recipe_bs['tasks'][task] = [BSTask(data)]
223
224 buildstats[recipe_bs['name']] = recipe_bs
225
226 return buildstats
227
228
229def read_buildstats(path, multi):
230 """Read buildstats"""
231 if not os.path.exists(path):
232 raise ScriptError("No such file or directory: {}".format(path))
233
234 if os.path.isfile(path):
235 return read_buildstats_json(path)
236
237 if os.path.isfile(os.path.join(path, 'build_stats')):
238 return read_buildstats_dir(path)
239
240 # Handle a non-buildstat directory
241 subpaths = sorted(glob.glob(path + '/*'))
242 if len(subpaths) > 1:
243 if multi:
244 log.info("Averaging over {} buildstats from {}".format(
245 len(subpaths), path))
246 else:
247 raise ScriptError("Multiple buildstats found in '{}'. Please give "
248 "a single buildstat directory of use the --multi "
249 "option".format(path))
250 bs = None
251 for subpath in subpaths:
252 if os.path.isfile(subpath):
253 tmpbs = read_buildstats_json(subpath)
254 else:
255 tmpbs = read_buildstats_dir(subpath)
256 if not bs:
257 bs = tmpbs
258 else:
259 log.debug("Joining buildstats")
260 bs_append(bs, tmpbs)
261
262 if not bs:
263 raise ScriptError("No buildstats found under {}".format(path))
264 return bs
265
266
267def print_ver_diff(bs1, bs2):
268 """Print package version differences"""
269 pkgs1 = set(bs1.keys())
270 pkgs2 = set(bs2.keys())
271 new_pkgs = pkgs2 - pkgs1
272 deleted_pkgs = pkgs1 - pkgs2
273
274 echanged = []
275 vchanged = []
276 rchanged = []
277 unchanged = []
278 common_pkgs = pkgs2.intersection(pkgs1)
279 if common_pkgs:
280 for pkg in common_pkgs:
281 if bs1[pkg]['epoch'] != bs2[pkg]['epoch']:
282 echanged.append(pkg)
283 elif bs1[pkg]['version'] != bs2[pkg]['version']:
284 vchanged.append(pkg)
285 elif bs1[pkg]['revision'] != bs2[pkg]['revision']:
286 rchanged.append(pkg)
287 else:
288 unchanged.append(pkg)
289
290 maxlen = max([len(pkg) for pkg in pkgs1.union(pkgs2)])
291 fmt_str = " {:{maxlen}} ({})"
292# if unchanged:
293# print("\nUNCHANGED PACKAGES:")
294# print("-------------------")
295# maxlen = max([len(pkg) for pkg in unchanged])
296# for pkg in sorted(unchanged):
297# print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
298
299 if new_pkgs:
300 print("\nNEW PACKAGES:")
301 print("-------------")
302 for pkg in sorted(new_pkgs):
303 print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
304
305 if deleted_pkgs:
306 print("\nDELETED PACKAGES:")
307 print("-----------------")
308 for pkg in sorted(deleted_pkgs):
309 print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen))
310
311 fmt_str = " {0:{maxlen}} {1:<20} ({2})"
312 if rchanged:
313 print("\nREVISION CHANGED:")
314 print("-----------------")
315 for pkg in sorted(rchanged):
316 field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision'])
317 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
318 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
319
320 if vchanged:
321 print("\nVERSION CHANGED:")
322 print("----------------")
323 for pkg in sorted(vchanged):
324 field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version'])
325 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
326 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
327
328 if echanged:
329 print("\nEPOCH CHANGED:")
330 print("--------------")
331 for pkg in sorted(echanged):
332 field1 = "{} -> {}".format(pkg, bs1[pkg]['epoch'], bs2[pkg]['epoch'])
333 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
334 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
335
336
337def print_task_diff(bs1, bs2, val_type, min_val=0, min_absdiff=0, sort_by=('absdiff',)):
338 """Diff task execution times"""
339 def val_to_str(val, human_readable=False):
340 """Convert raw value to printable string"""
341 def hms_time(secs):
342 """Get time in human-readable HH:MM:SS format"""
343 h = int(secs / 3600)
344 m = int((secs % 3600) / 60)
345 s = secs % 60
346 if h == 0:
347 return "{:02d}:{:04.1f}".format(m, s)
348 else:
349 return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
350
351 if 'time' in val_type:
352 if human_readable:
353 return hms_time(val)
354 else:
355 return "{:.1f}s".format(val)
356 elif 'bytes' in val_type and human_readable:
357 prefix = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi']
358 dec = int(math.log(val, 2) / 10)
359 prec = 1 if dec > 0 else 0
360 return "{:.{prec}f}{}B".format(val / (2 ** (10 * dec)),
361 prefix[dec], prec=prec)
362 elif 'ops' in val_type and human_readable:
363 prefix = ['', 'k', 'M', 'G', 'T', 'P']
364 dec = int(math.log(val, 1000))
365 prec = 1 if dec > 0 else 0
366 return "{:.{prec}f}{}ops".format(val / (1000 ** dec),
367 prefix[dec], prec=prec)
368 return str(int(val))
369
370 def sum_vals(buildstats):
371 """Get cumulative sum of all tasks"""
372 total = 0.0
373 for recipe_data in buildstats.values():
374 for bs_task in recipe_data['tasks'].values():
375 total += sum([getattr(b, val_type) for b in bs_task]) / len(bs_task)
376 return total
377
378 tasks_diff = []
379
380 if min_val:
381 print("Ignoring tasks less than {} ({})".format(
382 val_to_str(min_val, True), val_to_str(min_val)))
383 if min_absdiff:
384 print("Ignoring differences less than {} ({})".format(
385 val_to_str(min_absdiff, True), val_to_str(min_absdiff)))
386
387 # Prepare the data
388 pkgs = set(bs1.keys()).union(set(bs2.keys()))
389 for pkg in pkgs:
390 tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
391 tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
392 if not tasks1:
393 pkg_op = '+ '
394 elif not tasks2:
395 pkg_op = '- '
396 else:
397 pkg_op = ' '
398
399 for task in set(tasks1.keys()).union(set(tasks2.keys())):
400 task_op = ' '
401 if task in tasks1:
402 # Average over all values
403 val1 = [getattr(b, val_type) for b in bs1[pkg]['tasks'][task]]
404 val1 = sum(val1) / len(val1)
405 else:
406 task_op = '+ '
407 val1 = 0
408 if task in tasks2:
409 # Average over all values
410 val2 = [getattr(b, val_type) for b in bs2[pkg]['tasks'][task]]
411 val2 = sum(val2) / len(val2)
412 else:
413 val2 = 0
414 task_op = '- '
415
416 if val1 == 0:
417 reldiff = float('inf')
418 else:
419 reldiff = 100 * (val2 - val1) / val1
420
421 if max(val1, val2) < min_val:
422 log.debug("Filtering out %s:%s (%s)", pkg, task,
423 val_to_str(max(val1, val2)))
424 continue
425 if abs(val2 - val1) < min_absdiff:
426 log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
427 val_to_str(val2-val1))
428 continue
429 tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
430 val2-val1, reldiff))
431
432 # Sort our list
433 for field in reversed(sort_by):
434 if field.startswith('-'):
435 field = field[1:]
436 reverse = True
437 else:
438 reverse = False
439 tasks_diff = sorted(tasks_diff, key=attrgetter(field), reverse=reverse)
440
441 linedata = [(' ', 'PKG', ' ', 'TASK', 'ABSDIFF', 'RELDIFF',
442 val_type.upper() + '1', val_type.upper() + '2')]
443 field_lens = dict([('len_{}'.format(i), len(f)) for i, f in enumerate(linedata[0])])
444
445 # Prepare fields in string format and measure field lengths
446 for diff in tasks_diff:
447 task_prefix = diff.task_op if diff.pkg_op == ' ' else ' '
448 linedata.append((diff.pkg_op, diff.pkg, task_prefix, diff.task,
449 val_to_str(diff.absdiff),
450 '{:+.1f}%'.format(diff.reldiff),
451 val_to_str(diff.value1),
452 val_to_str(diff.value2)))
453 for i, field in enumerate(linedata[-1]):
454 key = 'len_{}'.format(i)
455 if len(field) > field_lens[key]:
456 field_lens[key] = len(field)
457
458 # Print data
459 print()
460 for fields in linedata:
461 print("{:{len_0}}{:{len_1}} {:{len_2}}{:{len_3}} {:>{len_4}} {:>{len_5}} {:>{len_6}} -> {:{len_7}}".format(
462 *fields, **field_lens))
463
464 # Print summary of the diffs
465 total1 = sum_vals(bs1)
466 total2 = sum_vals(bs2)
467 print("\nCumulative {}:".format(val_type))
468 print (" {} {:+.1f}% {} ({}) -> {} ({})".format(
469 val_to_str(total2 - total1), 100 * (total2-total1) / total1,
470 val_to_str(total1, True), val_to_str(total1),
471 val_to_str(total2, True), val_to_str(total2)))
472
473
474def parse_args(argv):
475 """Parse cmdline arguments"""
476 description="""
477Script for comparing buildstats of two separate builds."""
478 parser = argparse.ArgumentParser(
479 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
480 description=description)
481
482 min_val_defaults = {'cputime': 3.0,
483 'read_bytes': 524288,
484 'write_bytes': 524288,
485 'read_ops': 500,
486 'write_ops': 500,
487 'walltime': 5}
488 min_absdiff_defaults = {'cputime': 1.0,
489 'read_bytes': 131072,
490 'write_bytes': 131072,
491 'read_ops': 50,
492 'write_ops': 50,
493 'walltime': 2}
494
495 parser.add_argument('--debug', '-d', action='store_true',
496 help="Verbose logging")
497 parser.add_argument('--ver-diff', action='store_true',
498 help="Show package version differences and exit")
499 parser.add_argument('--diff-attr', default='cputime',
500 choices=min_val_defaults.keys(),
501 help="Buildstat attribute which to compare")
502 parser.add_argument('--min-val', default=min_val_defaults, type=float,
503 help="Filter out tasks less than MIN_VAL. "
504 "Default depends on --diff-attr.")
505 parser.add_argument('--min-absdiff', default=min_absdiff_defaults, type=float,
506 help="Filter out tasks whose difference is less than "
507 "MIN_ABSDIFF, Default depends on --diff-attr.")
508 parser.add_argument('--sort-by', default='absdiff',
509 help="Comma-separated list of field sort order. "
510 "Prepend the field name with '-' for reversed sort. "
511 "Available fields are: {}".format(', '.join(taskdiff_fields)))
512 parser.add_argument('--multi', action='store_true',
513 help="Read all buildstats from the given paths and "
514 "average over them")
515 parser.add_argument('buildstats1', metavar='BUILDSTATS1', help="'Left' buildstat")
516 parser.add_argument('buildstats2', metavar='BUILDSTATS2', help="'Right' buildstat")
517
518 args = parser.parse_args(argv)
519
520 # We do not nedd/want to read all buildstats if we just want to look at the
521 # package versions
522 if args.ver_diff:
523 args.multi = False
524
525 # Handle defaults for the filter arguments
526 if args.min_val is min_val_defaults:
527 args.min_val = min_val_defaults[args.diff_attr]
528 if args.min_absdiff is min_absdiff_defaults:
529 args.min_absdiff = min_absdiff_defaults[args.diff_attr]
530
531 return args
532
533
534def main(argv=None):
535 """Script entry point"""
536 args = parse_args(argv)
537 if args.debug:
538 log.setLevel(logging.DEBUG)
539
540 # Validate sort fields
541 sort_by = []
542 for field in args.sort_by.split(','):
543 if field.lstrip('-') not in taskdiff_fields:
544 log.error("Invalid sort field '%s' (must be one of: %s)" %
545 (field, ', '.join(taskdiff_fields)))
546 sys.exit(1)
547 sort_by.append(field)
548
549 try:
550 bs1 = read_buildstats(args.buildstats1, args.multi)
551 bs2 = read_buildstats(args.buildstats2, args.multi)
552
553 if args.ver_diff:
554 print_ver_diff(bs1, bs2)
555 else:
556 print_task_diff(bs1, bs2, args.diff_attr, args.min_val,
557 args.min_absdiff, sort_by)
558 except ScriptError as err:
559 log.error(str(err))
560 return 1
561 return 0
562
563if __name__ == "__main__":
564 sys.exit(main())