Blame - import-layers/yocto-poky/bitbake/lib/bb/codeparser.py - stefanberger/openbmc

blob: 3ee4d5622bb8199717a0dcb46ab9db73cdaa40d4 [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	import ast
				2	import codegen
				3	import logging
				4	import os.path
				5	import bb.utils, bb.data
				6	from itertools import chain
				7	from pysh import pyshyacc, pyshlex, sherrors
				8	from bb.cache import MultiProcessCache
				9
				10
				11	logger = logging.getLogger('BitBake.CodeParser')
				12
				13	try:
				14	import cPickle as pickle
				15	except ImportError:
				16	import pickle
				17	logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
				18
				19
				20	def check_indent(codestr):
				21	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
				22
				23	i = 0
				24	while codestr[i] in ["\n", "\t", " "]:
				25	i = i + 1
				26
				27	if i == 0:
				28	return codestr
				29
				30	if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	31	if codestr[0] == "\n":
				32	# Since we're adding a line, we need to remove one line of any empty padding
				33	# to ensure line numbers are correct
				34	codestr = codestr[1:]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	35	return "if 1:\n" + codestr
				36
				37	return codestr
				38
				39
				40	# Basically pickle, in python 2.7.3 at least, does badly with data duplication
				41	# upon pickling and unpickling. Combine this with duplicate objects and things
				42	# are a mess.
				43	#
				44	# When the sets are originally created, python calls intern() on the set keys
				45	# which significantly improves memory usage. Sadly the pickle/unpickle process
				46	# doesn't call intern() on the keys and results in the same strings being duplicated
				47	# in memory. This also means pickle will save the same string multiple times in
				48	# the cache file.
				49	#
				50	# By having shell and python cacheline objects with setstate/getstate, we force
				51	# the object creation through our own routine where we can call intern (via internSet).
				52	#
				53	# We also use hashable frozensets and ensure we use references to these so that
				54	# duplicates can be removed, both in memory and in the resulting pickled data.
				55	#
				56	# By playing these games, the size of the cache file shrinks dramatically
				57	# meaning faster load times and the reloaded cache files also consume much less
				58	# memory. Smaller cache files, faster load times and lower memory usage is good.
				59	#
				60	# A custom getstate/setstate using tuples is actually worth 15% cachesize by
				61	# avoiding duplication of the attribute names!
				62
				63	class SetCache(object):
				64	def __init__(self):
				65	self.setcache = {}
				66
				67	def internSet(self, items):
				68
				69	new = []
				70	for i in items:
				71	new.append(intern(i))
				72	s = frozenset(new)
				73	if hash(s) in self.setcache:
				74	return self.setcache[hash(s)]
				75	self.setcache[hash(s)] = s
				76	return s
				77
				78	codecache = SetCache()
				79
				80	class pythonCacheLine(object):
				81	def __init__(self, refs, execs, contains):
				82	self.refs = codecache.internSet(refs)
				83	self.execs = codecache.internSet(execs)
				84	self.contains = {}
				85	for c in contains:
				86	self.contains[c] = codecache.internSet(contains[c])
				87
				88	def __getstate__(self):
				89	return (self.refs, self.execs, self.contains)
				90
				91	def __setstate__(self, state):
				92	(refs, execs, contains) = state
				93	self.__init__(refs, execs, contains)
				94	def __hash__(self):
				95	l = (hash(self.refs), hash(self.execs))
				96	for c in sorted(self.contains.keys()):
				97	l = l + (c, hash(self.contains[c]))
				98	return hash(l)
				99	def __repr__(self):
				100	return " ".join([str(self.refs), str(self.execs), str(self.contains)])
				101
				102
				103	class shellCacheLine(object):
				104	def __init__(self, execs):
				105	self.execs = codecache.internSet(execs)
				106
				107	def __getstate__(self):
				108	return (self.execs)
				109
				110	def __setstate__(self, state):
				111	(execs) = state
				112	self.__init__(execs)
				113	def __hash__(self):
				114	return hash(self.execs)
				115	def __repr__(self):
				116	return str(self.execs)
				117
				118	class CodeParserCache(MultiProcessCache):
				119	cache_file_name = "bb_codeparser.dat"
				120	CACHE_VERSION = 7
				121
				122	def __init__(self):
				123	MultiProcessCache.__init__(self)
				124	self.pythoncache = self.cachedata[0]
				125	self.shellcache = self.cachedata[1]
				126	self.pythoncacheextras = self.cachedata_extras[0]
				127	self.shellcacheextras = self.cachedata_extras[1]
				128
				129	# To avoid duplication in the codeparser cache, keep
				130	# a lookup of hashes of objects we already have
				131	self.pythoncachelines = {}
				132	self.shellcachelines = {}
				133
				134	def newPythonCacheLine(self, refs, execs, contains):
				135	cacheline = pythonCacheLine(refs, execs, contains)
				136	h = hash(cacheline)
				137	if h in self.pythoncachelines:
				138	return self.pythoncachelines[h]
				139	self.pythoncachelines[h] = cacheline
				140	return cacheline
				141
				142	def newShellCacheLine(self, execs):
				143	cacheline = shellCacheLine(execs)
				144	h = hash(cacheline)
				145	if h in self.shellcachelines:
				146	return self.shellcachelines[h]
				147	self.shellcachelines[h] = cacheline
				148	return cacheline
				149
				150	def init_cache(self, d):
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	151	# Check if we already have the caches
				152	if self.pythoncache:
				153	return
				154
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	155	MultiProcessCache.init_cache(self, d)
				156
				157	# cachedata gets re-assigned in the parent
				158	self.pythoncache = self.cachedata[0]
				159	self.shellcache = self.cachedata[1]
				160
				161	def create_cachedata(self):
				162	data = [{}, {}]
				163	return data
				164
				165	codeparsercache = CodeParserCache()
				166
				167	def parser_cache_init(d):
				168	codeparsercache.init_cache(d)
				169
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	170	def parser_cache_save():
				171	codeparsercache.save_extras()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	172
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	173	def parser_cache_savemerge():
				174	codeparsercache.save_merge()
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	175
				176	Logger = logging.getLoggerClass()
				177	class BufferedLogger(Logger):
				178	def __init__(self, name, level=0, target=None):
				179	Logger.__init__(self, name)
				180	self.setLevel(level)
				181	self.buffer = []
				182	self.target = target
				183
				184	def handle(self, record):
				185	self.buffer.append(record)
				186
				187	def flush(self):
				188	for record in self.buffer:
				189	self.target.handle(record)
				190	self.buffer = []
				191
				192	class PythonParser():
				193	getvars = (".getVar", ".appendVar", ".prependVar")
				194	containsfuncs = ("bb.utils.contains", "base_contains", "bb.utils.contains_any")
				195	execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
				196
				197	def warn(self, func, arg):
				198	"""Warn about calls of bitbake APIs which pass a non-literal
				199	argument for the variable name, as we're not able to track such
				200	a reference.
				201	"""
				202
				203	try:
				204	funcstr = codegen.to_source(func)
				205	argstr = codegen.to_source(arg)
				206	except TypeError:
				207	self.log.debug(2, 'Failed to convert function and argument to source form')
				208	else:
				209	self.log.debug(1, self.unhandled_message % (funcstr, argstr))
				210
				211	def visit_Call(self, node):
				212	name = self.called_node_name(node.func)
				213	if name and name.endswith(self.getvars) or name in self.containsfuncs:
				214	if isinstance(node.args[0], ast.Str):
				215	varname = node.args[0].s
				216	if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
				217	if varname not in self.contains:
				218	self.contains[varname] = set()
				219	self.contains[varname].add(node.args[1].s)
				220	else:
				221	self.references.add(node.args[0].s)
				222	else:
				223	self.warn(node.func, node.args[0])
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	224	elif name and name.endswith(".expand"):
				225	if isinstance(node.args[0], ast.Str):
				226	value = node.args[0].s
				227	d = bb.data.init()
				228	parser = d.expandWithRefs(value, self.name)
				229	self.references \|= parser.references
				230	self.execs \|= parser.execs
				231	for varname in parser.contains:
				232	if varname not in self.contains:
				233	self.contains[varname] = set()
				234	self.contains[varname] \|= parser.contains[varname]
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	235	elif name in self.execfuncs:
				236	if isinstance(node.args[0], ast.Str):
				237	self.var_execs.add(node.args[0].s)
				238	else:
				239	self.warn(node.func, node.args[0])
				240	elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
				241	self.execs.add(name)
				242
				243	def called_node_name(self, node):
				244	"""Given a called node, return its original string form"""
				245	components = []
				246	while node:
				247	if isinstance(node, ast.Attribute):
				248	components.append(node.attr)
				249	node = node.value
				250	elif isinstance(node, ast.Name):
				251	components.append(node.id)
				252	return '.'.join(reversed(components))
				253	else:
				254	break
				255
				256	def __init__(self, name, log):
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	257	self.name = name
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	258	self.var_execs = set()
				259	self.contains = {}
				260	self.execs = set()
				261	self.references = set()
				262	self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
				263
				264	self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
				265	self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
				266
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	267	def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	268	if not node or not node.strip():
				269	return
				270
				271	h = hash(str(node))
				272
				273	if h in codeparsercache.pythoncache:
				274	self.references = set(codeparsercache.pythoncache[h].refs)
				275	self.execs = set(codeparsercache.pythoncache[h].execs)
				276	self.contains = {}
				277	for i in codeparsercache.pythoncache[h].contains:
				278	self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
				279	return
				280
				281	if h in codeparsercache.pythoncacheextras:
				282	self.references = set(codeparsercache.pythoncacheextras[h].refs)
				283	self.execs = set(codeparsercache.pythoncacheextras[h].execs)
				284	self.contains = {}
				285	for i in codeparsercache.pythoncacheextras[h].contains:
				286	self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
				287	return
				288
Patrick Williams	d8c66bc	2016-06-20 12:57:21 -0500	[diff] [blame]	289	# We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
				290	node = "\n" * int(lineno) + node
				291	code = compile(check_indent(str(node)), filename, "exec",
Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	292	ast.PyCF_ONLY_AST)
				293
				294	for n in ast.walk(code):
				295	if n.__class__.__name__ == "Call":
				296	self.visit_Call(n)
				297
				298	self.execs.update(self.var_execs)
				299
				300	codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
				301
				302	class ShellParser():
				303	def __init__(self, name, log):
				304	self.funcdefs = set()
				305	self.allexecs = set()
				306	self.execs = set()
				307	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
				308	self.unhandled_template = "unable to handle non-literal command '%s'"
				309	self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
				310
				311	def parse_shell(self, value):
				312	"""Parse the supplied shell code in a string, returning the external
				313	commands it executes.
				314	"""
				315
				316	h = hash(str(value))
				317
				318	if h in codeparsercache.shellcache:
				319	self.execs = set(codeparsercache.shellcache[h].execs)
				320	return self.execs
				321
				322	if h in codeparsercache.shellcacheextras:
				323	self.execs = set(codeparsercache.shellcacheextras[h].execs)
				324	return self.execs
				325
				326	self._parse_shell(value)
				327	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
				328
				329	codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
				330
				331	return self.execs
				332
				333	def _parse_shell(self, value):
				334	try:
				335	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
				336	except pyshlex.NeedMore:
				337	raise sherrors.ShellSyntaxError("Unexpected EOF")
				338
				339	for token in tokens:
				340	self.process_tokens(token)
				341
				342	def process_tokens(self, tokens):
				343	"""Process a supplied portion of the syntax tree as returned by
				344	pyshyacc.parse.
				345	"""
				346
				347	def function_definition(value):
				348	self.funcdefs.add(value.name)
				349	return [value.body], None
				350
				351	def case_clause(value):
				352	# Element 0 of each item in the case is the list of patterns, and
				353	# Element 1 of each item in the case is the list of commands to be
				354	# executed when that pattern matches.
				355	words = chain(*[item[0] for item in value.items])
				356	cmds = chain(*[item[1] for item in value.items])
				357	return cmds, words
				358
				359	def if_clause(value):
				360	main = chain(value.cond, value.if_cmds)
				361	rest = value.else_cmds
				362	if isinstance(rest, tuple) and rest[0] == "elif":
				363	return chain(main, if_clause(rest[1]))
				364	else:
				365	return chain(main, rest)
				366
				367	def simple_command(value):
				368	return None, chain(value.words, (assign[1] for assign in value.assigns))
				369
				370	token_handlers = {
				371	"and_or": lambda x: ((x.left, x.right), None),
				372	"async": lambda x: ([x], None),
				373	"brace_group": lambda x: (x.cmds, None),
				374	"for_clause": lambda x: (x.cmds, x.items),
				375	"function_definition": function_definition,
				376	"if_clause": lambda x: (if_clause(x), None),
				377	"pipeline": lambda x: (x.commands, None),
				378	"redirect_list": lambda x: ([x.cmd], None),
				379	"subshell": lambda x: (x.cmds, None),
				380	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
				381	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
				382	"simple_command": simple_command,
				383	"case_clause": case_clause,
				384	}
				385
				386	for token in tokens:
				387	name, value = token
				388	try:
				389	more_tokens, words = token_handlers[name](value)
				390	except KeyError:
				391	raise NotImplementedError("Unsupported token type " + name)
				392
				393	if more_tokens:
				394	self.process_tokens(more_tokens)
				395
				396	if words:
				397	self.process_words(words)
				398
				399	def process_words(self, words):
				400	"""Process a set of 'words' in pyshyacc parlance, which includes
				401	extraction of executed commands from $() blocks, as well as grabbing
				402	the command name argument.
				403	"""
				404
				405	words = list(words)
				406	for word in list(words):
				407	wtree = pyshlex.make_wordtree(word[1])
				408	for part in wtree:
				409	if not isinstance(part, list):
				410	continue
				411
				412	if part[0] in ('`', '$('):
				413	command = pyshlex.wordtree_as_string(part[1:-1])
				414	self._parse_shell(command)
				415
				416	if word[0] in ("cmd_name", "cmd_word"):
				417	if word in words:
				418	words.remove(word)
				419
				420	usetoken = False
				421	for word in words:
				422	if word[0] in ("cmd_name", "cmd_word") or \
				423	(usetoken and word[0] == "TOKEN"):
				424	if "=" in word[1]:
				425	usetoken = True
				426	continue
				427
				428	cmd = word[1]
				429	if cmd.startswith("$"):
				430	self.log.debug(1, self.unhandled_template % cmd)
				431	elif cmd == "eval":
				432	command = " ".join(word for _, word in words[1:])
				433	self._parse_shell(command)
				434	else:
				435	self.allexecs.add(cmd)
				436	break