Blame - poky/bitbake/lib/bb/codeparser.py - mdmillerii/openbmc

blob: fd2c4734f061378684c21cb26fcdf5d8f63db03b [file] [log] [blame]

Brad Bishop	c342db3	2019-05-15 21:57:59 -0400	[diff] [blame]	1	#
				2	# SPDX-License-Identifier: GPL-2.0-only
				3	#
				4
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	5	"""
				6	BitBake code parser
				7
				8	Parses actual code (i.e. python and shell) for functions and in-line
				9	expressions. Used mainly to determine dependencies on other functions
				10	and variables within the BitBake metadata. Also provides a cache for
				11	this information in order to speed up processing.
				12
				13	(Not to be confused with the code that parses the metadata itself,
				14	see lib/bb/parse/ for that).
				15
				16	NOTE: if you change how the parsers gather information you will almost
				17	certainly need to increment CodeParserCache.CACHE_VERSION below so that
				18	any existing codeparser cache gets invalidated. Additionally you'll need
				19	to increment __cache_version__ in cache.py in order to ensure that old
				20	recipe caches don't trigger "Taskhash mismatch" errors.
				21
				22	"""
				23
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	24	import ast
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	25	import sys
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	26	import codegen
				27	import logging
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	28	import pickle
				29	import bb.pysh as pysh
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	30	import os.path
				31	import bb.utils, bb.data
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	32	import hashlib
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	33	from itertools import chain
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	34	from bb.pysh import pyshyacc, pyshlex, sherrors
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	35	from bb.cache import MultiProcessCache
				36
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	37	logger = logging.getLogger('BitBake.CodeParser')
				38
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	39	def bbhash(s):
Brad Bishop	1932369	2019-04-05 15:28:33 -0400	[diff] [blame]	40	return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	41
				42	def check_indent(codestr):
				43	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
				44
				45	i = 0
				46	while codestr[i] in ["\n", "\t", " "]:
				47	i = i + 1
				48
				49	if i == 0:
				50	return codestr
				51
				52	if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	53	if codestr[0] == "\n":
				54	# Since we're adding a line, we need to remove one line of any empty padding
				55	# to ensure line numbers are correct
				56	codestr = codestr[1:]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	57	return "if 1:\n" + codestr
				58
				59	return codestr
				60
				61
				62	# Basically pickle, in python 2.7.3 at least, does badly with data duplication
				63	# upon pickling and unpickling. Combine this with duplicate objects and things
				64	# are a mess.
				65	#
				66	# When the sets are originally created, python calls intern() on the set keys
				67	# which significantly improves memory usage. Sadly the pickle/unpickle process
				68	# doesn't call intern() on the keys and results in the same strings being duplicated
				69	# in memory. This also means pickle will save the same string multiple times in
				70	# the cache file.
				71	#
				72	# By having shell and python cacheline objects with setstate/getstate, we force
				73	# the object creation through our own routine where we can call intern (via internSet).
				74	#
				75	# We also use hashable frozensets and ensure we use references to these so that
				76	# duplicates can be removed, both in memory and in the resulting pickled data.
				77	#
				78	# By playing these games, the size of the cache file shrinks dramatically
				79	# meaning faster load times and the reloaded cache files also consume much less
				80	# memory. Smaller cache files, faster load times and lower memory usage is good.
				81	#
				82	# A custom getstate/setstate using tuples is actually worth 15% cachesize by
				83	# avoiding duplication of the attribute names!
				84
				85	class SetCache(object):
				86	def __init__(self):
				87	self.setcache = {}
				88
				89	def internSet(self, items):
				90
				91	new = []
				92	for i in items:
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	93	new.append(sys.intern(i))
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	94	s = frozenset(new)
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	95	h = hash(s)
				96	if h in self.setcache:
				97	return self.setcache[h]
				98	self.setcache[h] = s
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	99	return s
				100
				101	codecache = SetCache()
				102
				103	class pythonCacheLine(object):
				104	def __init__(self, refs, execs, contains):
				105	self.refs = codecache.internSet(refs)
				106	self.execs = codecache.internSet(execs)
				107	self.contains = {}
				108	for c in contains:
				109	self.contains[c] = codecache.internSet(contains[c])
				110
				111	def __getstate__(self):
				112	return (self.refs, self.execs, self.contains)
				113
				114	def __setstate__(self, state):
				115	(refs, execs, contains) = state
				116	self.__init__(refs, execs, contains)
				117	def __hash__(self):
				118	l = (hash(self.refs), hash(self.execs))
				119	for c in sorted(self.contains.keys()):
				120	l = l + (c, hash(self.contains[c]))
				121	return hash(l)
				122	def __repr__(self):
				123	return " ".join([str(self.refs), str(self.execs), str(self.contains)])
				124
				125
				126	class shellCacheLine(object):
				127	def __init__(self, execs):
				128	self.execs = codecache.internSet(execs)
				129
				130	def __getstate__(self):
				131	return (self.execs)
				132
				133	def __setstate__(self, state):
				134	(execs) = state
				135	self.__init__(execs)
				136	def __hash__(self):
				137	return hash(self.execs)
				138	def __repr__(self):
				139	return str(self.execs)
				140
				141	class CodeParserCache(MultiProcessCache):
				142	cache_file_name = "bb_codeparser.dat"
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	143	# NOTE: you must increment this if you change how the parsers gather information,
				144	# so that an existing cache gets invalidated. Additionally you'll need
				145	# to increment __cache_version__ in cache.py in order to ensure that old
				146	# recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop	1932369	2019-04-05 15:28:33 -0400	[diff] [blame]	147	CACHE_VERSION = 11
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	148
				149	def __init__(self):
				150	MultiProcessCache.__init__(self)
				151	self.pythoncache = self.cachedata[0]
				152	self.shellcache = self.cachedata[1]
				153	self.pythoncacheextras = self.cachedata_extras[0]
				154	self.shellcacheextras = self.cachedata_extras[1]
				155
				156	# To avoid duplication in the codeparser cache, keep
				157	# a lookup of hashes of objects we already have
				158	self.pythoncachelines = {}
				159	self.shellcachelines = {}
				160
				161	def newPythonCacheLine(self, refs, execs, contains):
				162	cacheline = pythonCacheLine(refs, execs, contains)
				163	h = hash(cacheline)
				164	if h in self.pythoncachelines:
				165	return self.pythoncachelines[h]
				166	self.pythoncachelines[h] = cacheline
				167	return cacheline
				168
				169	def newShellCacheLine(self, execs):
				170	cacheline = shellCacheLine(execs)
				171	h = hash(cacheline)
				172	if h in self.shellcachelines:
				173	return self.shellcachelines[h]
				174	self.shellcachelines[h] = cacheline
				175	return cacheline
				176
				177	def init_cache(self, d):
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	178	# Check if we already have the caches
				179	if self.pythoncache:
				180	return
				181
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	182	MultiProcessCache.init_cache(self, d)
				183
				184	# cachedata gets re-assigned in the parent
				185	self.pythoncache = self.cachedata[0]
				186	self.shellcache = self.cachedata[1]
				187
				188	def create_cachedata(self):
				189	data = [{}, {}]
				190	return data
				191
				192	codeparsercache = CodeParserCache()
				193
				194	def parser_cache_init(d):
				195	codeparsercache.init_cache(d)
				196
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	197	def parser_cache_save():
				198	codeparsercache.save_extras()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	199
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	200	def parser_cache_savemerge():
				201	codeparsercache.save_merge()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	202
				203	Logger = logging.getLoggerClass()
				204	class BufferedLogger(Logger):
				205	def __init__(self, name, level=0, target=None):
				206	Logger.__init__(self, name)
				207	self.setLevel(level)
				208	self.buffer = []
				209	self.target = target
				210
				211	def handle(self, record):
				212	self.buffer.append(record)
				213
				214	def flush(self):
				215	for record in self.buffer:
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	216	if self.target.isEnabledFor(record.levelno):
				217	self.target.handle(record)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	218	self.buffer = []
				219
				220	class PythonParser():
Brad Bishop	1a4b7ee	2018-12-16 17:11:34 -0800	[diff] [blame]	221	getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	222	getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	223	containsfuncs = ("bb.utils.contains", "base_contains")
				224	containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	225	execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
				226
				227	def warn(self, func, arg):
				228	"""Warn about calls of bitbake APIs which pass a non-literal
				229	argument for the variable name, as we're not able to track such
				230	a reference.
				231	"""
				232
				233	try:
				234	funcstr = codegen.to_source(func)
				235	argstr = codegen.to_source(arg)
				236	except TypeError:
				237	self.log.debug(2, 'Failed to convert function and argument to source form')
				238	else:
				239	self.log.debug(1, self.unhandled_message % (funcstr, argstr))
				240
				241	def visit_Call(self, node):
				242	name = self.called_node_name(node.func)
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	243	if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	244	if isinstance(node.args[0], ast.Str):
				245	varname = node.args[0].s
				246	if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
				247	if varname not in self.contains:
				248	self.contains[varname] = set()
				249	self.contains[varname].add(node.args[1].s)
Brad Bishop	6e60e8b	2018-02-01 10:27:11 -0500	[diff] [blame]	250	elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
				251	if varname not in self.contains:
				252	self.contains[varname] = set()
				253	self.contains[varname].update(node.args[1].s.split())
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	254	elif name.endswith(self.getvarflags):
				255	if isinstance(node.args[1], ast.Str):
				256	self.references.add('%s[%s]' % (varname, node.args[1].s))
				257	else:
				258	self.warn(node.func, node.args[1])
				259	else:
				260	self.references.add(varname)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	261	else:
				262	self.warn(node.func, node.args[0])
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	263	elif name and name.endswith(".expand"):
				264	if isinstance(node.args[0], ast.Str):
				265	value = node.args[0].s
				266	d = bb.data.init()
				267	parser = d.expandWithRefs(value, self.name)
				268	self.references \|= parser.references
				269	self.execs \|= parser.execs
				270	for varname in parser.contains:
				271	if varname not in self.contains:
				272	self.contains[varname] = set()
				273	self.contains[varname] \|= parser.contains[varname]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	274	elif name in self.execfuncs:
				275	if isinstance(node.args[0], ast.Str):
				276	self.var_execs.add(node.args[0].s)
				277	else:
				278	self.warn(node.func, node.args[0])
				279	elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
				280	self.execs.add(name)
				281
				282	def called_node_name(self, node):
				283	"""Given a called node, return its original string form"""
				284	components = []
				285	while node:
				286	if isinstance(node, ast.Attribute):
				287	components.append(node.attr)
				288	node = node.value
				289	elif isinstance(node, ast.Name):
				290	components.append(node.id)
				291	return '.'.join(reversed(components))
				292	else:
				293	break
				294
				295	def __init__(self, name, log):
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	296	self.name = name
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	297	self.var_execs = set()
				298	self.contains = {}
				299	self.execs = set()
				300	self.references = set()
				301	self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
				302
				303	self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
				304	self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
				305
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	306	def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	307	if not node or not node.strip():
				308	return
				309
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	310	h = bbhash(str(node))
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	311
				312	if h in codeparsercache.pythoncache:
				313	self.references = set(codeparsercache.pythoncache[h].refs)
				314	self.execs = set(codeparsercache.pythoncache[h].execs)
				315	self.contains = {}
				316	for i in codeparsercache.pythoncache[h].contains:
				317	self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
				318	return
				319
				320	if h in codeparsercache.pythoncacheextras:
				321	self.references = set(codeparsercache.pythoncacheextras[h].refs)
				322	self.execs = set(codeparsercache.pythoncacheextras[h].execs)
				323	self.contains = {}
				324	for i in codeparsercache.pythoncacheextras[h].contains:
				325	self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
				326	return
				327
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	328	# We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
				329	node = "\n" * int(lineno) + node
				330	code = compile(check_indent(str(node)), filename, "exec",
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	331	ast.PyCF_ONLY_AST)
				332
				333	for n in ast.walk(code):
				334	if n.__class__.__name__ == "Call":
				335	self.visit_Call(n)
				336
				337	self.execs.update(self.var_execs)
				338
				339	codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
				340
				341	class ShellParser():
				342	def __init__(self, name, log):
				343	self.funcdefs = set()
				344	self.allexecs = set()
				345	self.execs = set()
				346	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
				347	self.unhandled_template = "unable to handle non-literal command '%s'"
				348	self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
				349
				350	def parse_shell(self, value):
				351	"""Parse the supplied shell code in a string, returning the external
				352	commands it executes.
				353	"""
				354
Patrick Williams	c0f7c04	2017-02-23 20:41:17 -0600	[diff] [blame]	355	h = bbhash(str(value))
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	356
				357	if h in codeparsercache.shellcache:
				358	self.execs = set(codeparsercache.shellcache[h].execs)
				359	return self.execs
				360
				361	if h in codeparsercache.shellcacheextras:
				362	self.execs = set(codeparsercache.shellcacheextras[h].execs)
				363	return self.execs
				364
				365	self._parse_shell(value)
				366	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
				367
				368	codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
				369
				370	return self.execs
				371
				372	def _parse_shell(self, value):
				373	try:
				374	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop	1932369	2019-04-05 15:28:33 -0400	[diff] [blame]	375	except Exception:
				376	bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
				377	raise
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	378
Brad Bishop	37a0e4d	2017-12-04 01:01:44 -0500	[diff] [blame]	379	self.process_tokens(tokens)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	380
				381	def process_tokens(self, tokens):
				382	"""Process a supplied portion of the syntax tree as returned by
				383	pyshyacc.parse.
				384	"""
				385
				386	def function_definition(value):
				387	self.funcdefs.add(value.name)
				388	return [value.body], None
				389
				390	def case_clause(value):
				391	# Element 0 of each item in the case is the list of patterns, and
				392	# Element 1 of each item in the case is the list of commands to be
				393	# executed when that pattern matches.
				394	words = chain(*[item[0] for item in value.items])
				395	cmds = chain(*[item[1] for item in value.items])
				396	return cmds, words
				397
				398	def if_clause(value):
				399	main = chain(value.cond, value.if_cmds)
				400	rest = value.else_cmds
				401	if isinstance(rest, tuple) and rest[0] == "elif":
				402	return chain(main, if_clause(rest[1]))
				403	else:
				404	return chain(main, rest)
				405
				406	def simple_command(value):
				407	return None, chain(value.words, (assign[1] for assign in value.assigns))
				408
				409	token_handlers = {
				410	"and_or": lambda x: ((x.left, x.right), None),
				411	"async": lambda x: ([x], None),
				412	"brace_group": lambda x: (x.cmds, None),
				413	"for_clause": lambda x: (x.cmds, x.items),
				414	"function_definition": function_definition,
				415	"if_clause": lambda x: (if_clause(x), None),
				416	"pipeline": lambda x: (x.commands, None),
				417	"redirect_list": lambda x: ([x.cmd], None),
				418	"subshell": lambda x: (x.cmds, None),
				419	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
				420	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
				421	"simple_command": simple_command,
				422	"case_clause": case_clause,
				423	}
				424
Brad Bishop	37a0e4d	2017-12-04 01:01:44 -0500	[diff] [blame]	425	def process_token_list(tokens):
				426	for token in tokens:
				427	if isinstance(token, list):
				428	process_token_list(token)
				429	continue
				430	name, value = token
				431	try:
				432	more_tokens, words = token_handlers[name](value)
				433	except KeyError:
				434	raise NotImplementedError("Unsupported token type " + name)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	435
Brad Bishop	37a0e4d	2017-12-04 01:01:44 -0500	[diff] [blame]	436	if more_tokens:
				437	self.process_tokens(more_tokens)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	438
Brad Bishop	37a0e4d	2017-12-04 01:01:44 -0500	[diff] [blame]	439	if words:
				440	self.process_words(words)
				441
				442	process_token_list(tokens)
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	443
				444	def process_words(self, words):
				445	"""Process a set of 'words' in pyshyacc parlance, which includes
				446	extraction of executed commands from $() blocks, as well as grabbing
				447	the command name argument.
				448	"""
				449
				450	words = list(words)
				451	for word in list(words):
				452	wtree = pyshlex.make_wordtree(word[1])
				453	for part in wtree:
				454	if not isinstance(part, list):
				455	continue
				456
				457	if part[0] in ('`', '$('):
				458	command = pyshlex.wordtree_as_string(part[1:-1])
				459	self._parse_shell(command)
				460
				461	if word[0] in ("cmd_name", "cmd_word"):
				462	if word in words:
				463	words.remove(word)
				464
				465	usetoken = False
				466	for word in words:
				467	if word[0] in ("cmd_name", "cmd_word") or \
				468	(usetoken and word[0] == "TOKEN"):
				469	if "=" in word[1]:
				470	usetoken = True
				471	continue
				472
				473	cmd = word[1]
				474	if cmd.startswith("$"):
				475	self.log.debug(1, self.unhandled_template % cmd)
				476	elif cmd == "eval":
				477	command = " ".join(word for _, word in words[1:])
				478	self._parse_shell(command)
				479	else:
				480	self.allexecs.add(cmd)
				481	break