Blame - poky/scripts/pybootchartgui/pybootchartgui/parsing.py - openbmc/openbmc

blob: 004d6fb9538d40a94577051fcc9b2386cda88db3 [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	# This file is part of pybootchartgui.
				2
				3	# pybootchartgui is free software: you can redistribute it and/or modify
				4	# it under the terms of the GNU General Public License as published by
				5	# the Free Software Foundation, either version 3 of the License, or
				6	# (at your option) any later version.
				7
				8	# pybootchartgui is distributed in the hope that it will be useful,
				9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	# GNU General Public License for more details.
				12
				13	# You should have received a copy of the GNU General Public License
				14	# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
				15
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	16	import os
				17	import string
				18	import re
				19	import sys
				20	import tarfile
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	21	import time
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	22	from collections import defaultdict
				23	from functools import reduce
				24
				25	from .samples import *
				26	from .process_tree import ProcessTree
				27
				28	if sys.version_info >= (3, 0):
				29	long = int
				30
				31	# Parsing produces as its end result a 'Trace'
				32
				33	class Trace:
				34	def __init__(self, writer, paths, options):
				35	self.processes = {}
				36	self.start = {}
				37	self.end = {}
				38	self.min = None
				39	self.max = None
				40	self.headers = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	41	self.disk_stats = []
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	42	self.ps_stats = None
				43	self.taskstats = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	44	self.cpu_stats = []
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	45	self.cmdline = None
				46	self.kernel = None
				47	self.kernel_tree = None
				48	self.filename = None
				49	self.parent_map = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	50	self.mem_stats = []
				51	self.monitor_disk = None
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame^]	52	self.cpu_pressure = []
				53	self.io_pressure = []
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	54	self.times = [] # Always empty, but expected by draw.py when drawing system charts.
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	55
				56	if len(paths):
				57	parse_paths (writer, self, paths)
				58	if not self.valid():
				59	raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
				60
				61	if options.full_time:
				62	self.min = min(self.start.keys())
				63	self.max = max(self.end.keys())
				64
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	65
				66	# Rendering system charts depends on start and end
				67	# time. Provide them where the original drawing code expects
				68	# them, i.e. in proc_tree.
				69	class BitbakeProcessTree:
				70	def __init__(self, start_time, end_time):
				71	self.start_time = start_time
				72	self.end_time = end_time
				73	self.duration = self.end_time - self.start_time
				74	self.proc_tree = BitbakeProcessTree(min(self.start.keys()),
				75	max(self.end.keys()))
				76
				77
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	78	return
				79
				80	# Turn that parsed information into something more useful
				81	# link processes into a tree of pointers, calculate statistics
				82	self.compile(writer)
				83
				84	# Crop the chart to the end of the first idle period after the given
				85	# process
				86	if options.crop_after:
				87	idle = self.crop (writer, options.crop_after)
				88	else:
				89	idle = None
				90
				91	# Annotate other times as the first start point of given process lists
				92	self.times = [ idle ]
				93	if options.annotate:
				94	for procnames in options.annotate:
				95	names = [x[:15] for x in procnames.split(",")]
				96	for proc in self.ps_stats.process_map.values():
				97	if proc.cmd in names:
				98	self.times.append(proc.start_time)
				99	break
				100	else:
				101	self.times.append(None)
				102
				103	self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats,
				104	self.ps_stats.sample_period,
				105	self.headers.get("profile.process"),
				106	options.prune, idle, self.taskstats,
				107	self.parent_map is not None)
				108
				109	if self.kernel is not None:
				110	self.kernel_tree = ProcessTree(writer, self.kernel, None, 0,
				111	self.headers.get("profile.process"),
				112	False, None, None, True)
				113
				114	def valid(self):
				115	return len(self.processes) != 0
				116	return self.headers != None and self.disk_stats != None and \
				117	self.ps_stats != None and self.cpu_stats != None
				118
				119	def add_process(self, process, start, end):
				120	self.processes[process] = [start, end]
				121	if start not in self.start:
				122	self.start[start] = []
				123	if process not in self.start[start]:
				124	self.start[start].append(process)
				125	if end not in self.end:
				126	self.end[end] = []
				127	if process not in self.end[end]:
				128	self.end[end].append(process)
				129
				130	def compile(self, writer):
				131
				132	def find_parent_id_for(pid):
				133	if pid is 0:
				134	return 0
				135	ppid = self.parent_map.get(pid)
				136	if ppid:
				137	# many of these double forks are so short lived
				138	# that we have no samples, or process info for them
				139	# so climb the parent hierarcy to find one
				140	if int (ppid * 1000) not in self.ps_stats.process_map:
				141	# print "Pid '%d' short lived with no process" % ppid
				142	ppid = find_parent_id_for (ppid)
				143	# else:
				144	# print "Pid '%d' has an entry" % ppid
				145	else:
				146	# print "Pid '%d' missing from pid map" % pid
				147	return 0
				148	return ppid
				149
				150	# merge in the cmdline data
				151	if self.cmdline is not None:
				152	for proc in self.ps_stats.process_map.values():
				153	rpid = int (proc.pid // 1000)
				154	if rpid in self.cmdline:
				155	cmd = self.cmdline[rpid]
				156	proc.exe = cmd['exe']
				157	proc.args = cmd['args']
				158	# else:
				159	# print "proc %d '%s' not in cmdline" % (rpid, proc.exe)
				160
				161	# re-parent any stray orphans if we can
				162	if self.parent_map is not None:
				163	for process in self.ps_stats.process_map.values():
				164	ppid = find_parent_id_for (int(process.pid // 1000))
				165	if ppid:
				166	process.ppid = ppid * 1000
				167
				168	# stitch the tree together with pointers
				169	for process in self.ps_stats.process_map.values():
				170	process.set_parent (self.ps_stats.process_map)
				171
				172	# count on fingers variously
				173	for process in self.ps_stats.process_map.values():
				174	process.calc_stats (self.ps_stats.sample_period)
				175
				176	def crop(self, writer, crop_after):
				177
				178	def is_idle_at(util, start, j):
				179	k = j + 1
				180	while k < len(util) and util[k][0] < start + 300:
				181	k += 1
				182	k = min(k, len(util)-1)
				183
				184	if util[j][1] >= 0.25:
				185	return False
				186
				187	avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1)
				188	if avgload < 0.25:
				189	return True
				190	else:
				191	return False
				192	def is_idle(util, start):
				193	for j in range(0, len(util)):
				194	if util[j][0] < start:
				195	continue
				196	return is_idle_at(util, start, j)
				197	else:
				198	return False
				199
				200	names = [x[:15] for x in crop_after.split(",")]
				201	for proc in self.ps_stats.process_map.values():
				202	if proc.cmd in names or proc.exe in names:
				203	writer.info("selected proc '%s' from list (start %d)"
				204	% (proc.cmd, proc.start_time))
				205	break
				206	if proc is None:
				207	writer.warn("no selected crop proc '%s' in list" % crop_after)
				208
				209
				210	cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats]
				211	disk_util = [(sample.time, sample.util) for sample in self.disk_stats]
				212
				213	idle = None
				214	for i in range(0, len(cpu_util)):
				215	if cpu_util[i][0] < proc.start_time:
				216	continue
				217	if is_idle_at(cpu_util, cpu_util[i][0], i) \
				218	and is_idle(disk_util, cpu_util[i][0]):
				219	idle = cpu_util[i][0]
				220	break
				221
				222	if idle is None:
				223	writer.warn ("not idle after proc '%s'" % crop_after)
				224	return None
				225
				226	crop_at = idle + 300
				227	writer.info ("cropping at time %d" % crop_at)
				228	while len (self.cpu_stats) \
				229	and self.cpu_stats[-1].time > crop_at:
				230	self.cpu_stats.pop()
				231	while len (self.disk_stats) \
				232	and self.disk_stats[-1].time > crop_at:
				233	self.disk_stats.pop()
				234
				235	self.ps_stats.end_time = crop_at
				236
				237	cropped_map = {}
				238	for key, value in self.ps_stats.process_map.items():
				239	if (value.start_time <= crop_at):
				240	cropped_map[key] = value
				241
				242	for proc in cropped_map.values():
				243	proc.duration = min (proc.duration, crop_at - proc.start_time)
				244	while len (proc.samples) \
				245	and proc.samples[-1].time > crop_at:
				246	proc.samples.pop()
				247
				248	self.ps_stats.process_map = cropped_map
				249
				250	return idle
				251
				252
				253
				254	class ParseError(Exception):
				255	"""Represents errors during parse of the bootchart."""
				256	def __init__(self, value):
				257	self.value = value
				258
				259	def __str__(self):
				260	return self.value
				261
				262	def _parse_headers(file):
				263	"""Parses the headers of the bootchart."""
				264	def parse(acc, line):
				265	(headers, last) = acc
				266	if '=' in line:
				267	last, value = map (lambda x: x.strip(), line.split('=', 1))
				268	else:
				269	value = line.strip()
				270	headers[last] += value
				271	return headers, last
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	272	return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	273
				274	def _parse_timed_blocks(file):
				275	"""Parses (ie., splits) a file into so-called timed-blocks. A
				276	timed-block consists of a timestamp on a line by itself followed
				277	by zero or more lines of data for that point in time."""
				278	def parse(block):
				279	lines = block.split('\n')
				280	if not lines:
				281	raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
				282	try:
				283	return (int(lines[0]), lines[1:])
				284	except ValueError:
				285	raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	286	blocks = file.read().split('\n\n')
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	287	return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
				288
				289	def _parse_proc_ps_log(writer, file):
				290	"""
				291	* See proc(5) for details.
				292	*
				293	* {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
				294	* cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
				295	* kstkesp, kstkeip}
				296	"""
				297	processMap = {}
				298	ltime = 0
				299	timed_blocks = _parse_timed_blocks(file)
				300	for time, lines in timed_blocks:
				301	for line in lines:
				302	if not line: continue
				303	tokens = line.split(' ')
				304	if len(tokens) < 21:
				305	continue
				306
				307	offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0]
				308	pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
				309	userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
				310
				311	# magic fixed point-ness ...
				312	pid *= 1000
				313	ppid *= 1000
				314	if pid in processMap:
				315	process = processMap[pid]
				316	process.cmd = cmd.strip('()') # why rename after latest name??
				317	else:
				318	process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime))
				319	processMap[pid] = process
				320
				321	if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
				322	userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime))
				323	cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
				324	process.samples.append(ProcessSample(time, state, cpuSample))
				325
				326	process.last_user_cpu_time = userCpu
				327	process.last_sys_cpu_time = sysCpu
				328	ltime = time
				329
				330	if len (timed_blocks) < 2:
				331	return None
				332
				333	startTime = timed_blocks[0][0]
				334	avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
				335
				336	return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
				337
				338	def _parse_taskstats_log(writer, file):
				339	"""
				340	* See bootchart-collector.c for details.
				341	*
				342	* { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total }
				343	*
				344	"""
				345	processMap = {}
				346	pidRewrites = {}
				347	ltime = None
				348	timed_blocks = _parse_timed_blocks(file)
				349	for time, lines in timed_blocks:
				350	# we have no 'stime' from taskstats, so prep 'init'
				351	if ltime is None:
				352	process = Process(writer, 1, '[init]', 0, 0)
				353	processMap[1000] = process
				354	ltime = time
				355	# continue
				356	for line in lines:
				357	if not line: continue
				358	tokens = line.split(' ')
				359	if len(tokens) != 6:
				360	continue
				361
				362	opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
				363	cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
				364
				365	# make space for trees of pids
				366	opid *= 1000
				367	ppid *= 1000
				368
				369	# when the process name changes, we re-write the pid.
				370	if opid in pidRewrites:
				371	pid = pidRewrites[opid]
				372	else:
				373	pid = opid
				374
				375	cmd = cmd.strip('(').strip(')')
				376	if pid in processMap:
				377	process = processMap[pid]
				378	if process.cmd != cmd:
				379	pid += 1
				380	pidRewrites[opid] = pid
				381	# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid)
				382	process = process.split (writer, pid, cmd, ppid, time)
				383	processMap[pid] = process
				384	else:
				385	process.cmd = cmd;
				386	else:
				387	process = Process(writer, pid, cmd, ppid, time)
				388	processMap[pid] = process
				389
				390	delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns)
				391	delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns)
				392	delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns)
				393
				394	# make up some state data ...
				395	if delta_cpu_ns > 0:
				396	state = "R"
				397	elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
				398	state = "D"
				399	else:
				400	state = "S"
				401
				402	# retain the ns timing information into a CPUSample - that tries
				403	# with the old-style to be a %age of CPU used in this time-slice.
				404	if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
				405	# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns)
				406	cpuSample = CPUSample('null', delta_cpu_ns, 0.0,
				407	delta_blkio_delay_ns,
				408	delta_swapin_delay_ns)
				409	process.samples.append(ProcessSample(time, state, cpuSample))
				410
				411	process.last_cpu_ns = cpu_ns
				412	process.last_blkio_delay_ns = blkio_delay_ns
				413	process.last_swapin_delay_ns = swapin_delay_ns
				414	ltime = time
				415
				416	if len (timed_blocks) < 2:
				417	return None
				418
				419	startTime = timed_blocks[0][0]
				420	avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
				421
				422	return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
				423
				424	def _parse_proc_stat_log(file):
				425	samples = []
				426	ltimes = None
				427	for time, lines in _parse_timed_blocks(file):
				428	# skip emtpy lines
				429	if not lines:
				430	continue
				431	# CPU times {user, nice, system, idle, io_wait, irq, softirq}
				432	tokens = lines[0].split()
				433	times = [ int(token) for token in tokens[1:] ]
				434	if ltimes:
				435	user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
				436	system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
				437	idle = float(times[3] - ltimes[3])
				438	iowait = float(times[4] - ltimes[4])
				439
				440	aSum = max(user + system + idle + iowait, 1)
				441	samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
				442
				443	ltimes = times
				444	# skip the rest of statistics lines
				445	return samples
				446
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	447	def _parse_reduced_log(file, sample_class):
				448	samples = []
				449	for time, lines in _parse_timed_blocks(file):
				450	samples.append(sample_class(time, *[float(x) for x in lines[0].split()]))
				451	return samples
				452
				453	def _parse_proc_disk_stat_log(file):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	454	"""
				455	Parse file for disk stats, but only look at the whole device, eg. sda,
				456	not sda1, sda2 etc. The format of relevant lines should be:
				457	{major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
				458	"""
				459	disk_regex_re = re.compile ('^([hsv]d.\|mtdblock\d\|mmcblk\d\|cciss/c\d+d\d+.*)$')
				460
				461	# this gets called an awful lot.
				462	def is_relevant_line(linetokens):
				463	if len(linetokens) != 14:
				464	return False
				465	disk = linetokens[2]
				466	return disk_regex_re.match(disk)
				467
				468	disk_stat_samples = []
				469
				470	for time, lines in _parse_timed_blocks(file):
				471	sample = DiskStatSample(time)
				472	relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)]
				473
				474	for tokens in relevant_tokens:
				475	disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
				476	sample.add_diskdata([rsect, wsect, use])
				477
				478	disk_stat_samples.append(sample)
				479
				480	disk_stats = []
				481	for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
				482	interval = sample1.time - sample2.time
				483	if interval == 0:
				484	interval = 1
				485	sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
				486	readTput = sums[0] / 2.0 * 100.0 / interval
				487	writeTput = sums[1] / 2.0 * 100.0 / interval
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	488	util = float( sums[2] ) / 10 / interval
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	489	util = max(0.0, min(1.0, util))
				490	disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
				491
				492	return disk_stats
				493
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	494	def _parse_reduced_proc_meminfo_log(file):
				495	"""
				496	Parse file for global memory statistics with
				497	'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree' values
				498	(in that order) directly stored on one line.
				499	"""
				500	used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
				501
				502	mem_stats = []
				503	for time, lines in _parse_timed_blocks(file):
				504	sample = MemSample(time)
				505	for name, value in zip(used_values, lines[0].split()):
				506	sample.add_value(name, int(value))
				507
				508	if sample.valid():
				509	mem_stats.append(DrawMemSample(sample))
				510
				511	return mem_stats
				512
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	513	def _parse_proc_meminfo_log(file):
				514	"""
				515	Parse file for global memory statistics.
				516	The format of relevant lines should be: ^key: value( unit)?
				517	"""
				518	used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
				519
				520	mem_stats = []
				521	meminfo_re = re.compile(r'([^ \t:]+):\s(\d+).')
				522
				523	for time, lines in _parse_timed_blocks(file):
				524	sample = MemSample(time)
				525
				526	for line in lines:
				527	match = meminfo_re.match(line)
				528	if not match:
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	529	raise ParseError("Invalid meminfo line \"%s\"" % line)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	530	sample.add_value(match.group(1), int(match.group(2)))
				531
				532	if sample.valid():
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	533	mem_stats.append(DrawMemSample(sample))
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	534
				535	return mem_stats
				536
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	537	def _parse_monitor_disk_log(file):
				538	"""
				539	Parse file with information about amount of diskspace used.
				540	The format of relevant lines should be: ^volume path: number-of-bytes?
				541	"""
				542	disk_stats = []
				543	diskinfo_re = re.compile(r'^(.+):\s*(\d+)$')
				544
				545	for time, lines in _parse_timed_blocks(file):
				546	sample = DiskSpaceSample(time)
				547
				548	for line in lines:
				549	match = diskinfo_re.match(line)
				550	if not match:
				551	raise ParseError("Invalid monitor_disk line \"%s\"" % line)
				552	sample.add_value(match.group(1), int(match.group(2)))
				553
				554	if sample.valid():
				555	disk_stats.append(sample)
				556
				557	return disk_stats
				558
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame^]	559	def _parse_pressure_logs(file, filename):
				560	"""
				561	Parse file for "some" pressure with 'avg10', 'avg60' 'avg300' and delta total values
				562	(in that order) directly stored on one line for both CPU and IO, based on filename.
				563	"""
				564	pressure_stats = []
				565	if filename == "cpu.log":
				566	SamplingClass = CPUPressureSample
				567	else:
				568	SamplingClass = IOPressureSample
				569	for time, lines in _parse_timed_blocks(file):
				570	for line in lines:
				571	if not line: continue
				572	tokens = line.split()
				573	avg10 = float(tokens[0])
				574	avg60 = float(tokens[1])
				575	avg300 = float(tokens[2])
				576	delta = float(tokens[3])
				577	pressure_stats.append(SamplingClass(time, avg10, avg60, avg300, delta))
				578
				579	return pressure_stats
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	580
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	581	# if we boot the kernel with: initcall_debug printk.time=1 we can
				582	# get all manner of interesting data from the dmesg output
				583	# We turn this into a pseudo-process tree: each event is
				584	# characterised by a
				585	# we don't try to detect a "kernel finished" state - since the kernel
				586	# continues to do interesting things after init is called.
				587	#
				588	# sample input:
				589	# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013)
				590	# ...
				591	# [ 0.039993] calling migration_init+0x0/0x6b @ 1
				592	# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs
				593	def _parse_dmesg(writer, file):
				594	timestamp_re = re.compile ("^\[\s(\d+\.\d+)\s]\s+(.*)$")
				595	split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$")
				596	processMap = {}
				597	idx = 0
				598	inc = 1.0 / 1000000
				599	kernel = Process(writer, idx, "k-boot", 0, 0.1)
				600	processMap['k-boot'] = kernel
				601	base_ts = False
				602	max_ts = 0
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	603	for line in file.read().split('\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	604	t = timestamp_re.match (line)
				605	if t is None:
				606	# print "duff timestamp " + line
				607	continue
				608
				609	time_ms = float (t.group(1)) * 1000
				610	# looks like we may have a huge diff after the clock
				611	# has been set up. This could lead to huge graph:
				612	# so huge we will be killed by the OOM.
				613	# So instead of using the plain timestamp we will
				614	# use a delta to first one and skip the first one
				615	# for convenience
				616	if max_ts == 0 and not base_ts and time_ms > 1000:
				617	base_ts = time_ms
				618	continue
				619	max_ts = max(time_ms, max_ts)
				620	if base_ts:
				621	# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms)
				622	time_ms -= base_ts
				623	m = split_re.match (t.group(2))
				624
				625	if m is None:
				626	continue
				627	# print "match: '%s'" % (m.group(1))
				628	type = m.group(1)
				629	func = m.group(2)
				630	rest = m.group(3)
				631
				632	if t.group(2).startswith ('Write protecting the') or \
				633	t.group(2).startswith ('Freeing unused kernel memory'):
				634	kernel.duration = time_ms / 10
				635	continue
				636
				637	# print "foo: '%s' '%s' '%s'" % (type, func, rest)
				638	if type == "calling":
				639	ppid = kernel.pid
				640	p = re.match ("\@ (\d+)", rest)
				641	if p is not None:
				642	ppid = float (p.group(1)) // 1000
				643	# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms)
				644	name = func.split ('+', 1) [0]
				645	idx += inc
				646	processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10)
				647	elif type == "initcall":
				648	# print "finished: '%s' at '%s'" % (func, time_ms)
				649	if func in processMap:
				650	process = processMap[func]
				651	process.duration = (time_ms / 10) - process.start_time
				652	else:
				653	print("corrupted init call for %s" % (func))
				654
				655	elif type == "async_waiting" or type == "async_continuing":
				656	continue # ignore
				657
				658	return processMap.values()
				659
				660	#
				661	# Parse binary pacct accounting file output if we have one
				662	# cf. /usr/include/linux/acct.h
				663	#
				664	def _parse_pacct(writer, file):
				665	# read LE int32
				666	def _read_le_int32(file):
				667	byts = file.read(4)
				668	return (ord(byts[0])) \| (ord(byts[1]) << 8) \| \
				669	(ord(byts[2]) << 16) \| (ord(byts[3]) << 24)
				670
				671	parent_map = {}
				672	parent_map[0] = 0
				673	while file.read(1) != "": # ignore flags
				674	ver = file.read(1)
				675	if ord(ver) < 3:
				676	print("Invalid version 0x%x" % (ord(ver)))
				677	return None
				678
				679	file.seek (14, 1) # user, group etc.
				680	pid = _read_le_int32 (file)
				681	ppid = _read_le_int32 (file)
				682	# print "Parent of %d is %d" % (pid, ppid)
				683	parent_map[pid] = ppid
				684	file.seek (4 + 4 + 16, 1) # timings
				685	file.seek (16, 1) # acct_comm
				686	return parent_map
				687
				688	def _parse_paternity_log(writer, file):
				689	parent_map = {}
				690	parent_map[0] = 0
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	691	for line in file.read().split('\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	692	if not line:
				693	continue
				694	elems = line.split(' ') # <Child> <Parent>
				695	if len (elems) >= 2:
				696	# print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
				697	parent_map[int(elems[0])] = int(elems[1])
				698	else:
				699	print("Odd paternity line '%s'" % (line))
				700	return parent_map
				701
				702	def _parse_cmdline_log(writer, file):
				703	cmdLines = {}
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	704	for block in file.read().split('\n\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	705	lines = block.split('\n')
				706	if len (lines) >= 3:
				707	# print "Lines '%s'" % (lines[0])
				708	pid = int (lines[0])
				709	values = {}
				710	values['exe'] = lines[1].lstrip(':')
				711	args = lines[2].lstrip(':').split('\0')
				712	args.pop()
				713	values['args'] = args
				714	cmdLines[pid] = values
				715	return cmdLines
				716
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	717	def _parse_bitbake_buildstats(writer, state, filename, file):
				718	paths = filename.split("/")
				719	task = paths[-1]
				720	pn = paths[-2]
				721	start = None
				722	end = None
				723	for line in file:
				724	if line.startswith("Started:"):
				725	start = int(float(line.split()[-1]))
				726	elif line.startswith("Ended:"):
				727	end = int(float(line.split()[-1]))
				728	if start and end:
				729	state.add_process(pn + ":" + task, start, end)
				730
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	731	def get_num_cpus(headers):
				732	"""Get the number of CPUs from the system.cpu header property. As the
				733	CPU utilization graphs are relative, the number of CPUs currently makes
				734	no difference."""
				735	if headers is None:
				736	return 1
				737	if headers.get("system.cpu.num"):
				738	return max (int (headers.get("system.cpu.num")), 1)
				739	cpu_model = headers.get("system.cpu")
				740	if cpu_model is None:
				741	return 1
				742	mat = re.match(".*\$(\\d+)\$", cpu_model)
				743	if mat is None:
				744	return 1
				745	return max (int(mat.group(1)), 1)
				746
				747	def _do_parse(writer, state, filename, file):
				748	writer.info("parsing '%s'" % filename)
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	749	t1 = time.process_time()
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	750	name = os.path.basename(filename)
				751	if name == "proc_diskstats.log":
				752	state.disk_stats = _parse_proc_disk_stat_log(file)
				753	elif name == "reduced_proc_diskstats.log":
				754	state.disk_stats = _parse_reduced_log(file, DiskSample)
				755	elif name == "proc_stat.log":
				756	state.cpu_stats = _parse_proc_stat_log(file)
				757	elif name == "reduced_proc_stat.log":
				758	state.cpu_stats = _parse_reduced_log(file, CPUSample)
				759	elif name == "proc_meminfo.log":
				760	state.mem_stats = _parse_proc_meminfo_log(file)
				761	elif name == "reduced_proc_meminfo.log":
				762	state.mem_stats = _parse_reduced_proc_meminfo_log(file)
				763	elif name == "cmdline2.log":
				764	state.cmdline = _parse_cmdline_log(writer, file)
				765	elif name == "monitor_disk.log":
				766	state.monitor_disk = _parse_monitor_disk_log(file)
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame^]	767	#pressure logs are in a subdirectory
				768	elif name == "cpu.log":
				769	state.cpu_pressure = _parse_pressure_logs(file, name)
				770	elif name == "io.log":
				771	state.io_pressure = _parse_pressure_logs(file, name)
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	772	elif not filename.endswith('.log'):
				773	_parse_bitbake_buildstats(writer, state, filename, file)
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	774	t2 = time.process_time()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	775	writer.info(" %s seconds" % str(t2-t1))
				776	return state
				777
				778	def parse_file(writer, state, filename):
				779	if state.filename is None:
				780	state.filename = filename
				781	basename = os.path.basename(filename)
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	782	with open(filename, "r") as file:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	783	return _do_parse(writer, state, filename, file)
				784
				785	def parse_paths(writer, state, paths):
				786	for path in paths:
				787	if state.filename is None:
				788	state.filename = path
				789	root, extension = os.path.splitext(path)
				790	if not(os.path.exists(path)):
				791	writer.warn("warning: path '%s' does not exist, ignoring." % path)
				792	continue
				793	#state.filename = path
				794	if os.path.isdir(path):
				795	files = sorted([os.path.join(path, f) for f in os.listdir(path)])
				796	state = parse_paths(writer, state, files)
				797	elif extension in [".tar", ".tgz", ".gz"]:
				798	if extension == ".gz":
				799	root, extension = os.path.splitext(root)
				800	if extension != ".tar":
				801	writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension)
				802	continue
				803	tf = None
				804	try:
				805	writer.status("parsing '%s'" % path)
				806	tf = tarfile.open(path, 'r:*')
				807	for name in tf.getnames():
				808	state = _do_parse(writer, state, name, tf.extractfile(name))
				809	except tarfile.ReadError as error:
				810	raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
				811	finally:
				812	if tf != None:
				813	tf.close()
				814	else:
				815	state = parse_file(writer, state, path)
				816	return state
				817
				818	def split_res(res, options):
				819	""" Split the res into n pieces """
				820	res_list = []
				821	if options.num > 1:
				822	s_list = sorted(res.start.keys())
				823	frag_size = len(s_list) / float(options.num)
				824	# Need the top value
				825	if frag_size > int(frag_size):
				826	frag_size = int(frag_size + 1)
				827	else:
				828	frag_size = int(frag_size)
				829
				830	start = 0
				831	end = frag_size
				832	while start < end:
				833	state = Trace(None, [], None)
				834	if options.full_time:
				835	state.min = min(res.start.keys())
				836	state.max = max(res.end.keys())
				837	for i in range(start, end):
				838	# Add this line for reference
				839	#state.add_process(pn + ":" + task, start, end)
				840	for p in res.start[s_list[i]]:
				841	state.add_process(p, s_list[i], res.processes[p][1])
				842	start = end
				843	end = end + frag_size
				844	if end > len(s_list):
				845	end = len(s_list)
				846	res_list.append(state)
				847	else:
				848	res_list.append(res)
				849	return res_list