Blame - poky/scripts/pybootchartgui/pybootchartgui/parsing.py - openbmc/openbmc

blob: 63a53b6b8855a8b1e4919dd9950aab03f98f012a [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	# This file is part of pybootchartgui.
				2
				3	# pybootchartgui is free software: you can redistribute it and/or modify
				4	# it under the terms of the GNU General Public License as published by
				5	# the Free Software Foundation, either version 3 of the License, or
				6	# (at your option) any later version.
				7
				8	# pybootchartgui is distributed in the hope that it will be useful,
				9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	# GNU General Public License for more details.
				12
				13	# You should have received a copy of the GNU General Public License
				14	# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
				15
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	16	import os
				17	import string
				18	import re
				19	import sys
				20	import tarfile
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	21	import time
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	22	from collections import defaultdict
				23	from functools import reduce
				24
				25	from .samples import *
				26	from .process_tree import ProcessTree
				27
				28	if sys.version_info >= (3, 0):
				29	long = int
				30
				31	# Parsing produces as its end result a 'Trace'
				32
				33	class Trace:
				34	def __init__(self, writer, paths, options):
				35	self.processes = {}
				36	self.start = {}
				37	self.end = {}
				38	self.min = None
				39	self.max = None
				40	self.headers = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	41	self.disk_stats = []
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	42	self.ps_stats = None
				43	self.taskstats = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	44	self.cpu_stats = []
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	45	self.cmdline = None
				46	self.kernel = None
				47	self.kernel_tree = None
				48	self.filename = None
				49	self.parent_map = None
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	50	self.mem_stats = []
				51	self.monitor_disk = None
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame]	52	self.cpu_pressure = []
				53	self.io_pressure = []
Patrick Williams	db4c27e	2022-08-05 08:10:29 -0500	[diff] [blame]	54	self.mem_pressure = []
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	55	self.times = [] # Always empty, but expected by draw.py when drawing system charts.
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	56
				57	if len(paths):
				58	parse_paths (writer, self, paths)
				59	if not self.valid():
				60	raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
				61
				62	if options.full_time:
				63	self.min = min(self.start.keys())
				64	self.max = max(self.end.keys())
				65
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	66
				67	# Rendering system charts depends on start and end
				68	# time. Provide them where the original drawing code expects
				69	# them, i.e. in proc_tree.
				70	class BitbakeProcessTree:
				71	def __init__(self, start_time, end_time):
				72	self.start_time = start_time
				73	self.end_time = end_time
				74	self.duration = self.end_time - self.start_time
				75	self.proc_tree = BitbakeProcessTree(min(self.start.keys()),
				76	max(self.end.keys()))
				77
				78
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	79	return
				80
				81	# Turn that parsed information into something more useful
				82	# link processes into a tree of pointers, calculate statistics
				83	self.compile(writer)
				84
				85	# Crop the chart to the end of the first idle period after the given
				86	# process
				87	if options.crop_after:
				88	idle = self.crop (writer, options.crop_after)
				89	else:
				90	idle = None
				91
				92	# Annotate other times as the first start point of given process lists
				93	self.times = [ idle ]
				94	if options.annotate:
				95	for procnames in options.annotate:
				96	names = [x[:15] for x in procnames.split(",")]
				97	for proc in self.ps_stats.process_map.values():
				98	if proc.cmd in names:
				99	self.times.append(proc.start_time)
				100	break
				101	else:
				102	self.times.append(None)
				103
				104	self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats,
				105	self.ps_stats.sample_period,
				106	self.headers.get("profile.process"),
				107	options.prune, idle, self.taskstats,
				108	self.parent_map is not None)
				109
				110	if self.kernel is not None:
				111	self.kernel_tree = ProcessTree(writer, self.kernel, None, 0,
				112	self.headers.get("profile.process"),
				113	False, None, None, True)
				114
				115	def valid(self):
				116	return len(self.processes) != 0
				117	return self.headers != None and self.disk_stats != None and \
				118	self.ps_stats != None and self.cpu_stats != None
				119
				120	def add_process(self, process, start, end):
				121	self.processes[process] = [start, end]
				122	if start not in self.start:
				123	self.start[start] = []
				124	if process not in self.start[start]:
				125	self.start[start].append(process)
				126	if end not in self.end:
				127	self.end[end] = []
				128	if process not in self.end[end]:
				129	self.end[end].append(process)
				130
				131	def compile(self, writer):
				132
				133	def find_parent_id_for(pid):
Andrew Geissler	fc113ea	2023-03-31 09:59:46 -0500	[diff] [blame]	134	if pid == 0:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	135	return 0
				136	ppid = self.parent_map.get(pid)
				137	if ppid:
				138	# many of these double forks are so short lived
				139	# that we have no samples, or process info for them
				140	# so climb the parent hierarcy to find one
				141	if int (ppid * 1000) not in self.ps_stats.process_map:
				142	# print "Pid '%d' short lived with no process" % ppid
				143	ppid = find_parent_id_for (ppid)
				144	# else:
				145	# print "Pid '%d' has an entry" % ppid
				146	else:
				147	# print "Pid '%d' missing from pid map" % pid
				148	return 0
				149	return ppid
				150
				151	# merge in the cmdline data
				152	if self.cmdline is not None:
				153	for proc in self.ps_stats.process_map.values():
				154	rpid = int (proc.pid // 1000)
				155	if rpid in self.cmdline:
				156	cmd = self.cmdline[rpid]
				157	proc.exe = cmd['exe']
				158	proc.args = cmd['args']
				159	# else:
				160	# print "proc %d '%s' not in cmdline" % (rpid, proc.exe)
				161
				162	# re-parent any stray orphans if we can
				163	if self.parent_map is not None:
				164	for process in self.ps_stats.process_map.values():
				165	ppid = find_parent_id_for (int(process.pid // 1000))
				166	if ppid:
				167	process.ppid = ppid * 1000
				168
				169	# stitch the tree together with pointers
				170	for process in self.ps_stats.process_map.values():
				171	process.set_parent (self.ps_stats.process_map)
				172
				173	# count on fingers variously
				174	for process in self.ps_stats.process_map.values():
				175	process.calc_stats (self.ps_stats.sample_period)
				176
				177	def crop(self, writer, crop_after):
				178
				179	def is_idle_at(util, start, j):
				180	k = j + 1
				181	while k < len(util) and util[k][0] < start + 300:
				182	k += 1
				183	k = min(k, len(util)-1)
				184
				185	if util[j][1] >= 0.25:
				186	return False
				187
				188	avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1)
				189	if avgload < 0.25:
				190	return True
				191	else:
				192	return False
				193	def is_idle(util, start):
				194	for j in range(0, len(util)):
				195	if util[j][0] < start:
				196	continue
				197	return is_idle_at(util, start, j)
				198	else:
				199	return False
				200
				201	names = [x[:15] for x in crop_after.split(",")]
				202	for proc in self.ps_stats.process_map.values():
				203	if proc.cmd in names or proc.exe in names:
				204	writer.info("selected proc '%s' from list (start %d)"
				205	% (proc.cmd, proc.start_time))
				206	break
				207	if proc is None:
				208	writer.warn("no selected crop proc '%s' in list" % crop_after)
				209
				210
				211	cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats]
				212	disk_util = [(sample.time, sample.util) for sample in self.disk_stats]
				213
				214	idle = None
				215	for i in range(0, len(cpu_util)):
				216	if cpu_util[i][0] < proc.start_time:
				217	continue
				218	if is_idle_at(cpu_util, cpu_util[i][0], i) \
				219	and is_idle(disk_util, cpu_util[i][0]):
				220	idle = cpu_util[i][0]
				221	break
				222
				223	if idle is None:
				224	writer.warn ("not idle after proc '%s'" % crop_after)
				225	return None
				226
				227	crop_at = idle + 300
				228	writer.info ("cropping at time %d" % crop_at)
				229	while len (self.cpu_stats) \
				230	and self.cpu_stats[-1].time > crop_at:
				231	self.cpu_stats.pop()
				232	while len (self.disk_stats) \
				233	and self.disk_stats[-1].time > crop_at:
				234	self.disk_stats.pop()
				235
				236	self.ps_stats.end_time = crop_at
				237
				238	cropped_map = {}
				239	for key, value in self.ps_stats.process_map.items():
				240	if (value.start_time <= crop_at):
				241	cropped_map[key] = value
				242
				243	for proc in cropped_map.values():
				244	proc.duration = min (proc.duration, crop_at - proc.start_time)
				245	while len (proc.samples) \
				246	and proc.samples[-1].time > crop_at:
				247	proc.samples.pop()
				248
				249	self.ps_stats.process_map = cropped_map
				250
				251	return idle
				252
				253
				254
				255	class ParseError(Exception):
				256	"""Represents errors during parse of the bootchart."""
				257	def __init__(self, value):
				258	self.value = value
				259
				260	def __str__(self):
				261	return self.value
				262
				263	def _parse_headers(file):
				264	"""Parses the headers of the bootchart."""
				265	def parse(acc, line):
				266	(headers, last) = acc
				267	if '=' in line:
				268	last, value = map (lambda x: x.strip(), line.split('=', 1))
				269	else:
				270	value = line.strip()
				271	headers[last] += value
				272	return headers, last
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	273	return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	274
				275	def _parse_timed_blocks(file):
				276	"""Parses (ie., splits) a file into so-called timed-blocks. A
				277	timed-block consists of a timestamp on a line by itself followed
				278	by zero or more lines of data for that point in time."""
				279	def parse(block):
				280	lines = block.split('\n')
				281	if not lines:
				282	raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
				283	try:
				284	return (int(lines[0]), lines[1:])
				285	except ValueError:
				286	raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	287	blocks = file.read().split('\n\n')
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	288	return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
				289
				290	def _parse_proc_ps_log(writer, file):
				291	"""
				292	* See proc(5) for details.
				293	*
				294	* {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
				295	* cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
				296	* kstkesp, kstkeip}
				297	"""
				298	processMap = {}
				299	ltime = 0
				300	timed_blocks = _parse_timed_blocks(file)
				301	for time, lines in timed_blocks:
				302	for line in lines:
				303	if not line: continue
				304	tokens = line.split(' ')
				305	if len(tokens) < 21:
				306	continue
				307
				308	offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0]
				309	pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
				310	userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
				311
				312	# magic fixed point-ness ...
				313	pid *= 1000
				314	ppid *= 1000
				315	if pid in processMap:
				316	process = processMap[pid]
				317	process.cmd = cmd.strip('()') # why rename after latest name??
				318	else:
				319	process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime))
				320	processMap[pid] = process
				321
				322	if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
				323	userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime))
				324	cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
				325	process.samples.append(ProcessSample(time, state, cpuSample))
				326
				327	process.last_user_cpu_time = userCpu
				328	process.last_sys_cpu_time = sysCpu
				329	ltime = time
				330
				331	if len (timed_blocks) < 2:
				332	return None
				333
				334	startTime = timed_blocks[0][0]
				335	avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
				336
				337	return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
				338
				339	def _parse_taskstats_log(writer, file):
				340	"""
				341	* See bootchart-collector.c for details.
				342	*
				343	* { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total }
				344	*
				345	"""
				346	processMap = {}
				347	pidRewrites = {}
				348	ltime = None
				349	timed_blocks = _parse_timed_blocks(file)
				350	for time, lines in timed_blocks:
				351	# we have no 'stime' from taskstats, so prep 'init'
				352	if ltime is None:
				353	process = Process(writer, 1, '[init]', 0, 0)
				354	processMap[1000] = process
				355	ltime = time
				356	# continue
				357	for line in lines:
				358	if not line: continue
				359	tokens = line.split(' ')
				360	if len(tokens) != 6:
				361	continue
				362
				363	opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
				364	cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
				365
				366	# make space for trees of pids
				367	opid *= 1000
				368	ppid *= 1000
				369
				370	# when the process name changes, we re-write the pid.
				371	if opid in pidRewrites:
				372	pid = pidRewrites[opid]
				373	else:
				374	pid = opid
				375
				376	cmd = cmd.strip('(').strip(')')
				377	if pid in processMap:
				378	process = processMap[pid]
				379	if process.cmd != cmd:
				380	pid += 1
				381	pidRewrites[opid] = pid
				382	# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid)
				383	process = process.split (writer, pid, cmd, ppid, time)
				384	processMap[pid] = process
				385	else:
				386	process.cmd = cmd;
				387	else:
				388	process = Process(writer, pid, cmd, ppid, time)
				389	processMap[pid] = process
				390
				391	delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns)
				392	delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns)
				393	delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns)
				394
				395	# make up some state data ...
				396	if delta_cpu_ns > 0:
				397	state = "R"
				398	elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
				399	state = "D"
				400	else:
				401	state = "S"
				402
				403	# retain the ns timing information into a CPUSample - that tries
				404	# with the old-style to be a %age of CPU used in this time-slice.
				405	if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
				406	# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns)
				407	cpuSample = CPUSample('null', delta_cpu_ns, 0.0,
				408	delta_blkio_delay_ns,
				409	delta_swapin_delay_ns)
				410	process.samples.append(ProcessSample(time, state, cpuSample))
				411
				412	process.last_cpu_ns = cpu_ns
				413	process.last_blkio_delay_ns = blkio_delay_ns
				414	process.last_swapin_delay_ns = swapin_delay_ns
				415	ltime = time
				416
				417	if len (timed_blocks) < 2:
				418	return None
				419
				420	startTime = timed_blocks[0][0]
				421	avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
				422
				423	return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
				424
				425	def _parse_proc_stat_log(file):
				426	samples = []
				427	ltimes = None
				428	for time, lines in _parse_timed_blocks(file):
				429	# skip emtpy lines
				430	if not lines:
				431	continue
				432	# CPU times {user, nice, system, idle, io_wait, irq, softirq}
				433	tokens = lines[0].split()
				434	times = [ int(token) for token in tokens[1:] ]
				435	if ltimes:
				436	user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
				437	system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
				438	idle = float(times[3] - ltimes[3])
				439	iowait = float(times[4] - ltimes[4])
				440
				441	aSum = max(user + system + idle + iowait, 1)
				442	samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
				443
				444	ltimes = times
				445	# skip the rest of statistics lines
				446	return samples
				447
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	448	def _parse_reduced_log(file, sample_class):
				449	samples = []
				450	for time, lines in _parse_timed_blocks(file):
				451	samples.append(sample_class(time, *[float(x) for x in lines[0].split()]))
				452	return samples
				453
				454	def _parse_proc_disk_stat_log(file):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	455	"""
				456	Parse file for disk stats, but only look at the whole device, eg. sda,
				457	not sda1, sda2 etc. The format of relevant lines should be:
				458	{major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
				459	"""
				460	disk_regex_re = re.compile ('^([hsv]d.\|mtdblock\d\|mmcblk\d\|cciss/c\d+d\d+.*)$')
				461
				462	# this gets called an awful lot.
				463	def is_relevant_line(linetokens):
				464	if len(linetokens) != 14:
				465	return False
				466	disk = linetokens[2]
				467	return disk_regex_re.match(disk)
				468
				469	disk_stat_samples = []
				470
				471	for time, lines in _parse_timed_blocks(file):
				472	sample = DiskStatSample(time)
				473	relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)]
				474
				475	for tokens in relevant_tokens:
				476	disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
				477	sample.add_diskdata([rsect, wsect, use])
				478
				479	disk_stat_samples.append(sample)
				480
				481	disk_stats = []
				482	for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
				483	interval = sample1.time - sample2.time
				484	if interval == 0:
				485	interval = 1
				486	sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
				487	readTput = sums[0] / 2.0 * 100.0 / interval
				488	writeTput = sums[1] / 2.0 * 100.0 / interval
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	489	util = float( sums[2] ) / 10 / interval
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	490	util = max(0.0, min(1.0, util))
				491	disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
				492
				493	return disk_stats
				494
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	495	def _parse_reduced_proc_meminfo_log(file):
				496	"""
				497	Parse file for global memory statistics with
				498	'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree' values
				499	(in that order) directly stored on one line.
				500	"""
				501	used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
				502
				503	mem_stats = []
				504	for time, lines in _parse_timed_blocks(file):
				505	sample = MemSample(time)
				506	for name, value in zip(used_values, lines[0].split()):
				507	sample.add_value(name, int(value))
				508
				509	if sample.valid():
				510	mem_stats.append(DrawMemSample(sample))
				511
				512	return mem_stats
				513
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	514	def _parse_proc_meminfo_log(file):
				515	"""
				516	Parse file for global memory statistics.
				517	The format of relevant lines should be: ^key: value( unit)?
				518	"""
				519	used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
				520
				521	mem_stats = []
				522	meminfo_re = re.compile(r'([^ \t:]+):\s(\d+).')
				523
				524	for time, lines in _parse_timed_blocks(file):
				525	sample = MemSample(time)
				526
				527	for line in lines:
				528	match = meminfo_re.match(line)
				529	if not match:
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	530	raise ParseError("Invalid meminfo line \"%s\"" % line)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	531	sample.add_value(match.group(1), int(match.group(2)))
				532
				533	if sample.valid():
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	534	mem_stats.append(DrawMemSample(sample))
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	535
				536	return mem_stats
				537
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	538	def _parse_monitor_disk_log(file):
				539	"""
				540	Parse file with information about amount of diskspace used.
				541	The format of relevant lines should be: ^volume path: number-of-bytes?
				542	"""
				543	disk_stats = []
				544	diskinfo_re = re.compile(r'^(.+):\s*(\d+)$')
				545
				546	for time, lines in _parse_timed_blocks(file):
				547	sample = DiskSpaceSample(time)
				548
				549	for line in lines:
				550	match = diskinfo_re.match(line)
				551	if not match:
				552	raise ParseError("Invalid monitor_disk line \"%s\"" % line)
				553	sample.add_value(match.group(1), int(match.group(2)))
				554
				555	if sample.valid():
				556	disk_stats.append(sample)
				557
				558	return disk_stats
				559
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame]	560	def _parse_pressure_logs(file, filename):
				561	"""
				562	Parse file for "some" pressure with 'avg10', 'avg60' 'avg300' and delta total values
				563	(in that order) directly stored on one line for both CPU and IO, based on filename.
				564	"""
				565	pressure_stats = []
				566	if filename == "cpu.log":
				567	SamplingClass = CPUPressureSample
Patrick Williams	db4c27e	2022-08-05 08:10:29 -0500	[diff] [blame]	568	elif filename == "memory.log":
				569	SamplingClass = MemPressureSample
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame]	570	else:
				571	SamplingClass = IOPressureSample
				572	for time, lines in _parse_timed_blocks(file):
				573	for line in lines:
				574	if not line: continue
				575	tokens = line.split()
				576	avg10 = float(tokens[0])
				577	avg60 = float(tokens[1])
				578	avg300 = float(tokens[2])
				579	delta = float(tokens[3])
				580	pressure_stats.append(SamplingClass(time, avg10, avg60, avg300, delta))
				581
				582	return pressure_stats
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	583
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	584	# if we boot the kernel with: initcall_debug printk.time=1 we can
				585	# get all manner of interesting data from the dmesg output
				586	# We turn this into a pseudo-process tree: each event is
				587	# characterised by a
				588	# we don't try to detect a "kernel finished" state - since the kernel
				589	# continues to do interesting things after init is called.
				590	#
				591	# sample input:
				592	# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013)
				593	# ...
				594	# [ 0.039993] calling migration_init+0x0/0x6b @ 1
				595	# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs
				596	def _parse_dmesg(writer, file):
				597	timestamp_re = re.compile ("^\[\s(\d+\.\d+)\s]\s+(.*)$")
				598	split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$")
				599	processMap = {}
				600	idx = 0
				601	inc = 1.0 / 1000000
				602	kernel = Process(writer, idx, "k-boot", 0, 0.1)
				603	processMap['k-boot'] = kernel
				604	base_ts = False
				605	max_ts = 0
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	606	for line in file.read().split('\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	607	t = timestamp_re.match (line)
				608	if t is None:
				609	# print "duff timestamp " + line
				610	continue
				611
				612	time_ms = float (t.group(1)) * 1000
				613	# looks like we may have a huge diff after the clock
				614	# has been set up. This could lead to huge graph:
				615	# so huge we will be killed by the OOM.
				616	# So instead of using the plain timestamp we will
				617	# use a delta to first one and skip the first one
				618	# for convenience
				619	if max_ts == 0 and not base_ts and time_ms > 1000:
				620	base_ts = time_ms
				621	continue
				622	max_ts = max(time_ms, max_ts)
				623	if base_ts:
				624	# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms)
				625	time_ms -= base_ts
				626	m = split_re.match (t.group(2))
				627
				628	if m is None:
				629	continue
				630	# print "match: '%s'" % (m.group(1))
				631	type = m.group(1)
				632	func = m.group(2)
				633	rest = m.group(3)
				634
				635	if t.group(2).startswith ('Write protecting the') or \
				636	t.group(2).startswith ('Freeing unused kernel memory'):
				637	kernel.duration = time_ms / 10
				638	continue
				639
				640	# print "foo: '%s' '%s' '%s'" % (type, func, rest)
				641	if type == "calling":
				642	ppid = kernel.pid
				643	p = re.match ("\@ (\d+)", rest)
				644	if p is not None:
				645	ppid = float (p.group(1)) // 1000
				646	# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms)
				647	name = func.split ('+', 1) [0]
				648	idx += inc
				649	processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10)
				650	elif type == "initcall":
				651	# print "finished: '%s' at '%s'" % (func, time_ms)
				652	if func in processMap:
				653	process = processMap[func]
				654	process.duration = (time_ms / 10) - process.start_time
				655	else:
				656	print("corrupted init call for %s" % (func))
				657
				658	elif type == "async_waiting" or type == "async_continuing":
				659	continue # ignore
				660
				661	return processMap.values()
				662
				663	#
				664	# Parse binary pacct accounting file output if we have one
				665	# cf. /usr/include/linux/acct.h
				666	#
				667	def _parse_pacct(writer, file):
				668	# read LE int32
				669	def _read_le_int32(file):
				670	byts = file.read(4)
				671	return (ord(byts[0])) \| (ord(byts[1]) << 8) \| \
				672	(ord(byts[2]) << 16) \| (ord(byts[3]) << 24)
				673
				674	parent_map = {}
				675	parent_map[0] = 0
				676	while file.read(1) != "": # ignore flags
				677	ver = file.read(1)
				678	if ord(ver) < 3:
				679	print("Invalid version 0x%x" % (ord(ver)))
				680	return None
				681
				682	file.seek (14, 1) # user, group etc.
				683	pid = _read_le_int32 (file)
				684	ppid = _read_le_int32 (file)
				685	# print "Parent of %d is %d" % (pid, ppid)
				686	parent_map[pid] = ppid
				687	file.seek (4 + 4 + 16, 1) # timings
				688	file.seek (16, 1) # acct_comm
				689	return parent_map
				690
				691	def _parse_paternity_log(writer, file):
				692	parent_map = {}
				693	parent_map[0] = 0
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	694	for line in file.read().split('\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	695	if not line:
				696	continue
				697	elems = line.split(' ') # <Child> <Parent>
				698	if len (elems) >= 2:
				699	# print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
				700	parent_map[int(elems[0])] = int(elems[1])
				701	else:
				702	print("Odd paternity line '%s'" % (line))
				703	return parent_map
				704
				705	def _parse_cmdline_log(writer, file):
				706	cmdLines = {}
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	707	for block in file.read().split('\n\n'):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	708	lines = block.split('\n')
				709	if len (lines) >= 3:
				710	# print "Lines '%s'" % (lines[0])
				711	pid = int (lines[0])
				712	values = {}
				713	values['exe'] = lines[1].lstrip(':')
				714	args = lines[2].lstrip(':').split('\0')
				715	args.pop()
				716	values['args'] = args
				717	cmdLines[pid] = values
				718	return cmdLines
				719
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	720	def _parse_bitbake_buildstats(writer, state, filename, file):
				721	paths = filename.split("/")
				722	task = paths[-1]
				723	pn = paths[-2]
				724	start = None
				725	end = None
				726	for line in file:
				727	if line.startswith("Started:"):
				728	start = int(float(line.split()[-1]))
				729	elif line.startswith("Ended:"):
				730	end = int(float(line.split()[-1]))
				731	if start and end:
				732	state.add_process(pn + ":" + task, start, end)
				733
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	734	def get_num_cpus(headers):
				735	"""Get the number of CPUs from the system.cpu header property. As the
				736	CPU utilization graphs are relative, the number of CPUs currently makes
				737	no difference."""
				738	if headers is None:
				739	return 1
				740	if headers.get("system.cpu.num"):
				741	return max (int (headers.get("system.cpu.num")), 1)
				742	cpu_model = headers.get("system.cpu")
				743	if cpu_model is None:
				744	return 1
				745	mat = re.match(".*\$(\\d+)\$", cpu_model)
				746	if mat is None:
				747	return 1
				748	return max (int(mat.group(1)), 1)
				749
				750	def _do_parse(writer, state, filename, file):
				751	writer.info("parsing '%s'" % filename)
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	752	t1 = time.process_time()
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	753	name = os.path.basename(filename)
				754	if name == "proc_diskstats.log":
				755	state.disk_stats = _parse_proc_disk_stat_log(file)
				756	elif name == "reduced_proc_diskstats.log":
				757	state.disk_stats = _parse_reduced_log(file, DiskSample)
				758	elif name == "proc_stat.log":
				759	state.cpu_stats = _parse_proc_stat_log(file)
				760	elif name == "reduced_proc_stat.log":
				761	state.cpu_stats = _parse_reduced_log(file, CPUSample)
				762	elif name == "proc_meminfo.log":
				763	state.mem_stats = _parse_proc_meminfo_log(file)
				764	elif name == "reduced_proc_meminfo.log":
				765	state.mem_stats = _parse_reduced_proc_meminfo_log(file)
				766	elif name == "cmdline2.log":
				767	state.cmdline = _parse_cmdline_log(writer, file)
				768	elif name == "monitor_disk.log":
				769	state.monitor_disk = _parse_monitor_disk_log(file)
Andrew Geissler	615f2f1	2022-07-15 14:00:58 -0500	[diff] [blame]	770	#pressure logs are in a subdirectory
				771	elif name == "cpu.log":
				772	state.cpu_pressure = _parse_pressure_logs(file, name)
				773	elif name == "io.log":
				774	state.io_pressure = _parse_pressure_logs(file, name)
Patrick Williams	db4c27e	2022-08-05 08:10:29 -0500	[diff] [blame]	775	elif name == "memory.log":
				776	state.mem_pressure = _parse_pressure_logs(file, name)
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	777	elif not filename.endswith('.log'):
				778	_parse_bitbake_buildstats(writer, state, filename, file)
Andrew Geissler	82c905d	2020-04-13 13:39:40 -0500	[diff] [blame]	779	t2 = time.process_time()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	780	writer.info(" %s seconds" % str(t2-t1))
				781	return state
				782
				783	def parse_file(writer, state, filename):
				784	if state.filename is None:
				785	state.filename = filename
				786	basename = os.path.basename(filename)
Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	787	with open(filename, "r") as file:
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	788	return _do_parse(writer, state, filename, file)
				789
				790	def parse_paths(writer, state, paths):
				791	for path in paths:
				792	if state.filename is None:
				793	state.filename = path
				794	root, extension = os.path.splitext(path)
				795	if not(os.path.exists(path)):
				796	writer.warn("warning: path '%s' does not exist, ignoring." % path)
				797	continue
				798	#state.filename = path
				799	if os.path.isdir(path):
				800	files = sorted([os.path.join(path, f) for f in os.listdir(path)])
				801	state = parse_paths(writer, state, files)
				802	elif extension in [".tar", ".tgz", ".gz"]:
				803	if extension == ".gz":
				804	root, extension = os.path.splitext(root)
				805	if extension != ".tar":
				806	writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension)
				807	continue
				808	tf = None
				809	try:
				810	writer.status("parsing '%s'" % path)
				811	tf = tarfile.open(path, 'r:*')
				812	for name in tf.getnames():
				813	state = _do_parse(writer, state, name, tf.extractfile(name))
				814	except tarfile.ReadError as error:
				815	raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
				816	finally:
				817	if tf != None:
				818	tf.close()
				819	else:
				820	state = parse_file(writer, state, path)
				821	return state
				822
				823	def split_res(res, options):
				824	""" Split the res into n pieces """
				825	res_list = []
				826	if options.num > 1:
				827	s_list = sorted(res.start.keys())
				828	frag_size = len(s_list) / float(options.num)
				829	# Need the top value
				830	if frag_size > int(frag_size):
				831	frag_size = int(frag_size + 1)
				832	else:
				833	frag_size = int(frag_size)
				834
				835	start = 0
				836	end = frag_size
				837	while start < end:
				838	state = Trace(None, [], None)
				839	if options.full_time:
				840	state.min = min(res.start.keys())
				841	state.max = max(res.end.keys())
				842	for i in range(start, end):
				843	# Add this line for reference
				844	#state.add_process(pn + ":" + task, start, end)
				845	for p in res.start[s_list[i]]:
				846	state.add_process(p, s_list[i], res.processes[p][1])
				847	start = end
				848	end = end + frag_size
				849	if end > len(s_list):
				850	end = len(s_list)
				851	res_list.append(state)
				852	else:
				853	res_list.append(res)
				854	return res_list