blob: 6f0b84f9ece39495a745483d206f94a8b7702f0c [file] [log] [blame]
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001#!/usr/bin/python3
2#
3# Examine build performance test results
4#
5# Copyright (c) 2017, Intel Corporation.
6#
7# This program is free software; you can redistribute it and/or modify it
8# under the terms and conditions of the GNU General Public License,
9# version 2, as published by the Free Software Foundation.
10#
11# This program is distributed in the hope it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14# more details.
15#
16import argparse
17import json
18import logging
19import os
20import re
21import sys
22from collections import namedtuple, OrderedDict
23from operator import attrgetter
24from xml.etree import ElementTree as ET
25
26# Import oe libs
27scripts_path = os.path.dirname(os.path.realpath(__file__))
28sys.path.append(os.path.join(scripts_path, 'lib'))
29import scriptpath
30from build_perf import print_table
31from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
32 aggregate_data, aggregate_metadata, measurement_stats)
33from build_perf import html
34
35scriptpath.add_oe_lib_path()
36
37from oeqa.utils.git import GitRepo
38
39
40# Setup logging
41logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
42log = logging.getLogger('oe-build-perf-report')
43
44
45# Container class for tester revisions
46TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
47
48
49def get_test_runs(repo, tag_name, **kwargs):
50 """Get a sorted list of test runs, matching given pattern"""
51 # First, get field names from the tag name pattern
52 field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
53 undef_fields = [f for f in field_names if f not in kwargs.keys()]
54
55 # Fields for formatting tag name pattern
56 str_fields = dict([(f, '*') for f in field_names])
57 str_fields.update(kwargs)
58
59 # Get a list of all matching tags
60 tag_pattern = tag_name.format(**str_fields)
61 tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
62 log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
63
64 # Parse undefined fields from tag names
65 str_fields = dict([(f, r'(?P<{}>[\w\-.()]+)'.format(f)) for f in field_names])
66 str_fields['branch'] = r'(?P<branch>[\w\-.()/]+)'
67 str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
68 str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
69 str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
70 # escape parenthesis in fields in order to not messa up the regexp
71 fixed_fields = dict([(k, v.replace('(', r'\(').replace(')', r'\)')) for k, v in kwargs.items()])
72 str_fields.update(fixed_fields)
73 tag_re = re.compile(tag_name.format(**str_fields))
74
75 # Parse fields from tags
76 revs = []
77 for tag in tags:
78 m = tag_re.match(tag)
79 groups = m.groupdict()
80 revs.append([groups[f] for f in undef_fields] + [tag])
81
82 # Return field names and a sorted list of revs
83 return undef_fields, sorted(revs)
84
85def list_test_revs(repo, tag_name, **kwargs):
86 """Get list of all tested revisions"""
87 fields, revs = get_test_runs(repo, tag_name, **kwargs)
88 ignore_fields = ['tag_number']
89 print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
90
91 # Sort revs
92 rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
93 prev = [''] * len(revs)
94 for rev in revs:
95 # Only use fields that we want to print
96 rev = [rev[i] for i in print_fields]
97
98 if rev != prev:
99 new_row = [''] * len(print_fields) + [1]
100 for i in print_fields:
101 if rev[i] != prev[i]:
102 break
103 new_row[i:-1] = rev[i:]
104 rows.append(new_row)
105 else:
106 rows[-1][-1] += 1
107 prev = rev
108
109 print_table(rows)
110
111def get_test_revs(repo, tag_name, **kwargs):
112 """Get list of all tested revisions"""
113 fields, runs = get_test_runs(repo, tag_name, **kwargs)
114
115 revs = {}
116 commit_i = fields.index('commit')
117 commit_num_i = fields.index('commit_number')
118 for run in runs:
119 commit = run[commit_i]
120 commit_num = run[commit_num_i]
121 tag = run[-1]
122 if not commit in revs:
123 revs[commit] = TestedRev(commit, commit_num, [tag])
124 else:
125 assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
126 revs[commit].tags.append(tag)
127
128 # Return in sorted table
129 revs = sorted(revs.values(), key=attrgetter('commit_number'))
130 log.debug("Found %d tested revisions:\n %s", len(revs),
131 "\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
132 return revs
133
134def rev_find(revs, attr, val):
135 """Search from a list of TestedRev"""
136 for i, rev in enumerate(revs):
137 if getattr(rev, attr) == val:
138 return i
139 raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
140
141def is_xml_format(repo, commit):
142 """Check if the commit contains xml (or json) data"""
143 if repo.rev_parse(commit + ':results.xml'):
144 log.debug("Detected report in xml format in %s", commit)
145 return True
146 else:
147 log.debug("No xml report in %s, assuming json formatted results", commit)
148 return False
149
150def read_results(repo, tags, xml=True):
151 """Read result files from repo"""
152
153 def parse_xml_stream(data):
154 """Parse multiple concatenated XML objects"""
155 objs = []
156 xml_d = ""
157 for line in data.splitlines():
158 if xml_d and line.startswith('<?xml version='):
159 objs.append(ET.fromstring(xml_d))
160 xml_d = line
161 else:
162 xml_d += line
163 objs.append(ET.fromstring(xml_d))
164 return objs
165
166 def parse_json_stream(data):
167 """Parse multiple concatenated JSON objects"""
168 objs = []
169 json_d = ""
170 for line in data.splitlines():
171 if line == '}{':
172 json_d += '}'
173 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
174 json_d = '{'
175 else:
176 json_d += line
177 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
178 return objs
179
180 num_revs = len(tags)
181
182 # Optimize by reading all data with one git command
183 log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
184 if xml:
185 git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
186 data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
187 return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
188 [results_xml_to_json(e) for e in data[num_revs:]])
189 else:
190 git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
191 data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
192 return data[0:num_revs], data[num_revs:]
193
194
195def get_data_item(data, key):
196 """Nested getitem lookup"""
197 for k in key.split('.'):
198 data = data[k]
199 return data
200
201
202def metadata_diff(metadata_l, metadata_r):
203 """Prepare a metadata diff for printing"""
204 keys = [('Hostname', 'hostname', 'hostname'),
205 ('Branch', 'branch', 'layers.meta.branch'),
206 ('Commit number', 'commit_num', 'layers.meta.commit_count'),
207 ('Commit', 'commit', 'layers.meta.commit'),
208 ('Number of test runs', 'testrun_count', 'testrun_count')
209 ]
210
211 def _metadata_diff(key):
212 """Diff metadata from two test reports"""
213 try:
214 val1 = get_data_item(metadata_l, key)
215 except KeyError:
216 val1 = '(N/A)'
217 try:
218 val2 = get_data_item(metadata_r, key)
219 except KeyError:
220 val2 = '(N/A)'
221 return val1, val2
222
223 metadata = OrderedDict()
224 for title, key, key_json in keys:
225 value_l, value_r = _metadata_diff(key_json)
226 metadata[key] = {'title': title,
227 'value_old': value_l,
228 'value': value_r}
229 return metadata
230
231
232def print_diff_report(metadata_l, data_l, metadata_r, data_r):
233 """Print differences between two data sets"""
234
235 # First, print general metadata
236 print("\nTEST METADATA:\n==============")
237 meta_diff = metadata_diff(metadata_l, metadata_r)
238 rows = []
239 row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
240 rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']]
241 for key, val in meta_diff.items():
242 # Shorten commit hashes
243 if key == 'commit':
244 rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
245 else:
246 rows.append([val['title'] + ':', val['value'], val['value_old']])
247 print_table(rows, row_fmt)
248
249
250 # Print test results
251 print("\nTEST RESULTS:\n=============")
252
253 tests = list(data_l['tests'].keys())
254 # Append tests that are only present in 'right' set
255 tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
256
257 # Prepare data to be printed
258 rows = []
259 row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
260 ' {:>{wid}}', ' {:>{wid}}']
261 num_cols = len(row_fmt)
262 for test in tests:
263 test_l = data_l['tests'][test] if test in data_l['tests'] else None
264 test_r = data_r['tests'][test] if test in data_r['tests'] else None
265 pref = ' '
266 if test_l is None:
267 pref = '+'
268 elif test_r is None:
269 pref = '-'
270 descr = test_l['description'] if test_l else test_r['description']
271 heading = "{} {}: {}".format(pref, test, descr)
272
273 rows.append([heading])
274
275 # Generate the list of measurements
276 meas_l = test_l['measurements'] if test_l else {}
277 meas_r = test_r['measurements'] if test_r else {}
278 measurements = list(meas_l.keys())
279 measurements += [m for m in list(meas_r.keys()) if m not in measurements]
280
281 for meas in measurements:
282 m_pref = ' '
283 if meas in meas_l:
284 stats_l = measurement_stats(meas_l[meas], 'l.')
285 else:
286 stats_l = measurement_stats(None, 'l.')
287 m_pref = '+'
288 if meas in meas_r:
289 stats_r = measurement_stats(meas_r[meas], 'r.')
290 else:
291 stats_r = measurement_stats(None, 'r.')
292 m_pref = '-'
293 stats = stats_l.copy()
294 stats.update(stats_r)
295
296 absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
297 reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
298 if stats['r.mean'] > stats['l.mean']:
299 absdiff = '+' + str(absdiff)
300 else:
301 absdiff = str(absdiff)
302 rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
303 str(stats['l.mean']), '->', str(stats['r.mean']),
304 absdiff, reldiff])
305 rows.append([''] * num_cols)
306
307 print_table(rows, row_fmt)
308
309 print()
310
311
312def print_html_report(data, id_comp):
313 """Print report in html format"""
314 # Handle metadata
315 metadata = {'branch': {'title': 'Branch', 'value': 'master'},
316 'hostname': {'title': 'Hostname', 'value': 'foobar'},
317 'commit': {'title': 'Commit', 'value': '1234'}
318 }
319 metadata = metadata_diff(data[id_comp][0], data[-1][0])
320
321
322 # Generate list of tests
323 tests = []
324 for test in data[-1][1]['tests'].keys():
325 test_r = data[-1][1]['tests'][test]
326 new_test = {'name': test_r['name'],
327 'description': test_r['description'],
328 'status': test_r['status'],
329 'measurements': [],
330 'err_type': test_r.get('err_type'),
331 }
332 # Limit length of err output shown
333 if 'message' in test_r:
334 lines = test_r['message'].splitlines()
335 if len(lines) > 20:
336 new_test['message'] = '...\n' + '\n'.join(lines[-20:])
337 else:
338 new_test['message'] = test_r['message']
339
340
341 # Generate the list of measurements
342 for meas in test_r['measurements'].keys():
343 meas_r = test_r['measurements'][meas]
344 meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
345 new_meas = {'name': meas_r['name'],
346 'legend': meas_r['legend'],
347 'description': meas_r['name'] + ' ' + meas_type,
348 }
349 samples = []
350
351 # Run through all revisions in our data
352 for meta, test_data in data:
353 if (not test in test_data['tests'] or
354 not meas in test_data['tests'][test]['measurements']):
355 samples.append(measurement_stats(None))
356 continue
357 test_i = test_data['tests'][test]
358 meas_i = test_i['measurements'][meas]
359 commit_num = get_data_item(meta, 'layers.meta.commit_count')
360 samples.append(measurement_stats(meas_i))
361 samples[-1]['commit_num'] = commit_num
362
363 absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
364 new_meas['absdiff'] = absdiff
365 new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
366 new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
367 new_meas['samples'] = samples
368 new_meas['value'] = samples[-1]
369 new_meas['value_type'] = samples[-1]['val_cls']
370
371 new_test['measurements'].append(new_meas)
372 tests.append(new_test)
373
374 # Chart options
375 chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
376 'max': get_data_item(data[-1][0], 'layers.meta.commit_count')}
377 }
378
379 print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
380
381
382def auto_args(repo, args):
383 """Guess arguments, if not defined by the user"""
384 # Get the latest commit in the repo
385 log.debug("Guessing arguments from the latest commit")
386 msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b'])
387 for line in msg.splitlines():
388 split = line.split(':', 1)
389 if len(split) != 2:
390 continue
391
392 key = split[0]
393 val = split[1].strip()
394 if key == 'hostname':
395 log.debug("Using hostname %s", val)
396 args.hostname = val
397 elif key == 'branch':
398 log.debug("Using branch %s", val)
399 args.branch = val
400
401
402def parse_args(argv):
403 """Parse command line arguments"""
404 description = """
405Examine build performance test results from a Git repository"""
406 parser = argparse.ArgumentParser(
407 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
408 description=description)
409
410 parser.add_argument('--debug', '-d', action='store_true',
411 help="Verbose logging")
412 parser.add_argument('--repo', '-r', required=True,
413 help="Results repository (local git clone)")
414 parser.add_argument('--list', '-l', action='store_true',
415 help="List available test runs")
416 parser.add_argument('--html', action='store_true',
417 help="Generate report in html format")
418 group = parser.add_argument_group('Tag and revision')
419 group.add_argument('--tag-name', '-t',
420 default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
421 help="Tag name (pattern) for finding results")
422 group.add_argument('--hostname', '-H')
423 group.add_argument('--branch', '-B', default='master')
424 group.add_argument('--machine', default='qemux86')
425 group.add_argument('--history-length', default=25, type=int,
426 help="Number of tested revisions to plot in html report")
427 group.add_argument('--commit',
428 help="Revision to search for")
429 group.add_argument('--commit-number',
430 help="Revision number to search for, redundant if "
431 "--commit is specified")
432 group.add_argument('--commit2',
433 help="Revision to compare with")
434 group.add_argument('--commit-number2',
435 help="Revision number to compare with, redundant if "
436 "--commit2 is specified")
437
438 return parser.parse_args(argv)
439
440
441def main(argv=None):
442 """Script entry point"""
443 args = parse_args(argv)
444 if args.debug:
445 log.setLevel(logging.DEBUG)
446
447 repo = GitRepo(args.repo)
448
449 if args.list:
450 list_test_revs(repo, args.tag_name)
451 return 0
452
453 # Determine hostname which to use
454 if not args.hostname:
455 auto_args(repo, args)
456
457 revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
458 branch=args.branch, machine=args.machine)
459 if len(revs) < 2:
460 log.error("%d tester revisions found, unable to generate report",
461 len(revs))
462 return 1
463
464 # Pick revisions
465 if args.commit:
466 if args.commit_number:
467 log.warning("Ignoring --commit-number as --commit was specified")
468 index1 = rev_find(revs, 'commit', args.commit)
469 elif args.commit_number:
470 index1 = rev_find(revs, 'commit_number', args.commit_number)
471 else:
472 index1 = len(revs) - 1
473
474 if args.commit2:
475 if args.commit_number2:
476 log.warning("Ignoring --commit-number2 as --commit2 was specified")
477 index2 = rev_find(revs, 'commit', args.commit2)
478 elif args.commit_number2:
479 index2 = rev_find(revs, 'commit_number', args.commit_number2)
480 else:
481 if index1 > 0:
482 index2 = index1 - 1
483 else:
484 log.error("Unable to determine the other commit, use "
485 "--commit2 or --commit-number2 to specify it")
486 return 1
487
488 index_l = min(index1, index2)
489 index_r = max(index1, index2)
490
491 rev_l = revs[index_l]
492 rev_r = revs[index_r]
493 log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
494 rev_l.commit_number, rev_l.commit, len(rev_l.tags),
495 '\n '.join(rev_l.tags))
496 log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
497 rev_r.commit_number, rev_r.commit, len(rev_r.tags),
498 '\n '.join(rev_r.tags))
499
500 # Check report format used in the repo (assume all reports in the same fmt)
501 xml = is_xml_format(repo, revs[index_r].tags[-1])
502
503 if args.html:
504 index_0 = max(0, index_r - args.history_length)
505 rev_range = range(index_0, index_r + 1)
506 else:
507 # We do not need range of commits for text report (no graphs)
508 index_0 = index_l
509 rev_range = (index_l, index_r)
510
511 # Read raw data
512 log.debug("Reading %d revisions, starting from %s (%s)",
513 len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
514 raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
515
516 data = []
517 for raw_m, raw_d in raw_data:
518 data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
519
520 # Re-map list indexes to the new table starting from index 0
521 index_r = index_r - index_0
522 index_l = index_l - index_0
523
524 # Print report
525 if not args.html:
526 print_diff_report(data[index_l][0], data[index_l][1],
527 data[index_r][0], data[index_r][1])
528 else:
529 print_html_report(data, index_l)
530
531 return 0
532
533if __name__ == "__main__":
534 sys.exit(main())