Blame - yocto-poky/bitbake/lib/bb/codeparser.py - mdmillerii/openbmc

blob: 82a3af4e06f62f015bc633f8d763e83e4da02a11 [file] [log] [blame]

Patrick Williams	c124f4f	2015-09-15 14:41:29 -0500	[diff] [blame]	1	import ast
				2	import codegen
				3	import logging
				4	import os.path
				5	import bb.utils, bb.data
				6	from itertools import chain
				7	from pysh import pyshyacc, pyshlex, sherrors
				8	from bb.cache import MultiProcessCache
				9
				10
				11	logger = logging.getLogger('BitBake.CodeParser')
				12
				13	try:
				14	import cPickle as pickle
				15	except ImportError:
				16	import pickle
				17	logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
				18
				19
				20	def check_indent(codestr):
				21	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
				22
				23	i = 0
				24	while codestr[i] in ["\n", "\t", " "]:
				25	i = i + 1
				26
				27	if i == 0:
				28	return codestr
				29
				30	if codestr[i-1] == "\t" or codestr[i-1] == " ":
				31	return "if 1:\n" + codestr
				32
				33	return codestr
				34
				35
				36	# Basically pickle, in python 2.7.3 at least, does badly with data duplication
				37	# upon pickling and unpickling. Combine this with duplicate objects and things
				38	# are a mess.
				39	#
				40	# When the sets are originally created, python calls intern() on the set keys
				41	# which significantly improves memory usage. Sadly the pickle/unpickle process
				42	# doesn't call intern() on the keys and results in the same strings being duplicated
				43	# in memory. This also means pickle will save the same string multiple times in
				44	# the cache file.
				45	#
				46	# By having shell and python cacheline objects with setstate/getstate, we force
				47	# the object creation through our own routine where we can call intern (via internSet).
				48	#
				49	# We also use hashable frozensets and ensure we use references to these so that
				50	# duplicates can be removed, both in memory and in the resulting pickled data.
				51	#
				52	# By playing these games, the size of the cache file shrinks dramatically
				53	# meaning faster load times and the reloaded cache files also consume much less
				54	# memory. Smaller cache files, faster load times and lower memory usage is good.
				55	#
				56	# A custom getstate/setstate using tuples is actually worth 15% cachesize by
				57	# avoiding duplication of the attribute names!
				58
				59	class SetCache(object):
				60	def __init__(self):
				61	self.setcache = {}
				62
				63	def internSet(self, items):
				64
				65	new = []
				66	for i in items:
				67	new.append(intern(i))
				68	s = frozenset(new)
				69	if hash(s) in self.setcache:
				70	return self.setcache[hash(s)]
				71	self.setcache[hash(s)] = s
				72	return s
				73
				74	codecache = SetCache()
				75
				76	class pythonCacheLine(object):
				77	def __init__(self, refs, execs, contains):
				78	self.refs = codecache.internSet(refs)
				79	self.execs = codecache.internSet(execs)
				80	self.contains = {}
				81	for c in contains:
				82	self.contains[c] = codecache.internSet(contains[c])
				83
				84	def __getstate__(self):
				85	return (self.refs, self.execs, self.contains)
				86
				87	def __setstate__(self, state):
				88	(refs, execs, contains) = state
				89	self.__init__(refs, execs, contains)
				90	def __hash__(self):
				91	l = (hash(self.refs), hash(self.execs))
				92	for c in sorted(self.contains.keys()):
				93	l = l + (c, hash(self.contains[c]))
				94	return hash(l)
				95	def __repr__(self):
				96	return " ".join([str(self.refs), str(self.execs), str(self.contains)])
				97
				98
				99	class shellCacheLine(object):
				100	def __init__(self, execs):
				101	self.execs = codecache.internSet(execs)
				102
				103	def __getstate__(self):
				104	return (self.execs)
				105
				106	def __setstate__(self, state):
				107	(execs) = state
				108	self.__init__(execs)
				109	def __hash__(self):
				110	return hash(self.execs)
				111	def __repr__(self):
				112	return str(self.execs)
				113
				114	class CodeParserCache(MultiProcessCache):
				115	cache_file_name = "bb_codeparser.dat"
				116	CACHE_VERSION = 7
				117
				118	def __init__(self):
				119	MultiProcessCache.__init__(self)
				120	self.pythoncache = self.cachedata[0]
				121	self.shellcache = self.cachedata[1]
				122	self.pythoncacheextras = self.cachedata_extras[0]
				123	self.shellcacheextras = self.cachedata_extras[1]
				124
				125	# To avoid duplication in the codeparser cache, keep
				126	# a lookup of hashes of objects we already have
				127	self.pythoncachelines = {}
				128	self.shellcachelines = {}
				129
				130	def newPythonCacheLine(self, refs, execs, contains):
				131	cacheline = pythonCacheLine(refs, execs, contains)
				132	h = hash(cacheline)
				133	if h in self.pythoncachelines:
				134	return self.pythoncachelines[h]
				135	self.pythoncachelines[h] = cacheline
				136	return cacheline
				137
				138	def newShellCacheLine(self, execs):
				139	cacheline = shellCacheLine(execs)
				140	h = hash(cacheline)
				141	if h in self.shellcachelines:
				142	return self.shellcachelines[h]
				143	self.shellcachelines[h] = cacheline
				144	return cacheline
				145
				146	def init_cache(self, d):
				147	MultiProcessCache.init_cache(self, d)
				148
				149	# cachedata gets re-assigned in the parent
				150	self.pythoncache = self.cachedata[0]
				151	self.shellcache = self.cachedata[1]
				152
				153	def create_cachedata(self):
				154	data = [{}, {}]
				155	return data
				156
				157	codeparsercache = CodeParserCache()
				158
				159	def parser_cache_init(d):
				160	codeparsercache.init_cache(d)
				161
				162	def parser_cache_save(d):
				163	codeparsercache.save_extras(d)
				164
				165	def parser_cache_savemerge(d):
				166	codeparsercache.save_merge(d)
				167
				168	Logger = logging.getLoggerClass()
				169	class BufferedLogger(Logger):
				170	def __init__(self, name, level=0, target=None):
				171	Logger.__init__(self, name)
				172	self.setLevel(level)
				173	self.buffer = []
				174	self.target = target
				175
				176	def handle(self, record):
				177	self.buffer.append(record)
				178
				179	def flush(self):
				180	for record in self.buffer:
				181	self.target.handle(record)
				182	self.buffer = []
				183
				184	class PythonParser():
				185	getvars = (".getVar", ".appendVar", ".prependVar")
				186	containsfuncs = ("bb.utils.contains", "base_contains", "bb.utils.contains_any")
				187	execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
				188
				189	def warn(self, func, arg):
				190	"""Warn about calls of bitbake APIs which pass a non-literal
				191	argument for the variable name, as we're not able to track such
				192	a reference.
				193	"""
				194
				195	try:
				196	funcstr = codegen.to_source(func)
				197	argstr = codegen.to_source(arg)
				198	except TypeError:
				199	self.log.debug(2, 'Failed to convert function and argument to source form')
				200	else:
				201	self.log.debug(1, self.unhandled_message % (funcstr, argstr))
				202
				203	def visit_Call(self, node):
				204	name = self.called_node_name(node.func)
				205	if name and name.endswith(self.getvars) or name in self.containsfuncs:
				206	if isinstance(node.args[0], ast.Str):
				207	varname = node.args[0].s
				208	if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
				209	if varname not in self.contains:
				210	self.contains[varname] = set()
				211	self.contains[varname].add(node.args[1].s)
				212	else:
				213	self.references.add(node.args[0].s)
				214	else:
				215	self.warn(node.func, node.args[0])
				216	elif name in self.execfuncs:
				217	if isinstance(node.args[0], ast.Str):
				218	self.var_execs.add(node.args[0].s)
				219	else:
				220	self.warn(node.func, node.args[0])
				221	elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
				222	self.execs.add(name)
				223
				224	def called_node_name(self, node):
				225	"""Given a called node, return its original string form"""
				226	components = []
				227	while node:
				228	if isinstance(node, ast.Attribute):
				229	components.append(node.attr)
				230	node = node.value
				231	elif isinstance(node, ast.Name):
				232	components.append(node.id)
				233	return '.'.join(reversed(components))
				234	else:
				235	break
				236
				237	def __init__(self, name, log):
				238	self.var_execs = set()
				239	self.contains = {}
				240	self.execs = set()
				241	self.references = set()
				242	self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
				243
				244	self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
				245	self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
				246
				247	def parse_python(self, node):
				248	if not node or not node.strip():
				249	return
				250
				251	h = hash(str(node))
				252
				253	if h in codeparsercache.pythoncache:
				254	self.references = set(codeparsercache.pythoncache[h].refs)
				255	self.execs = set(codeparsercache.pythoncache[h].execs)
				256	self.contains = {}
				257	for i in codeparsercache.pythoncache[h].contains:
				258	self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
				259	return
				260
				261	if h in codeparsercache.pythoncacheextras:
				262	self.references = set(codeparsercache.pythoncacheextras[h].refs)
				263	self.execs = set(codeparsercache.pythoncacheextras[h].execs)
				264	self.contains = {}
				265	for i in codeparsercache.pythoncacheextras[h].contains:
				266	self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
				267	return
				268
				269	code = compile(check_indent(str(node)), "<string>", "exec",
				270	ast.PyCF_ONLY_AST)
				271
				272	for n in ast.walk(code):
				273	if n.__class__.__name__ == "Call":
				274	self.visit_Call(n)
				275
				276	self.execs.update(self.var_execs)
				277
				278	codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
				279
				280	class ShellParser():
				281	def __init__(self, name, log):
				282	self.funcdefs = set()
				283	self.allexecs = set()
				284	self.execs = set()
				285	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
				286	self.unhandled_template = "unable to handle non-literal command '%s'"
				287	self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
				288
				289	def parse_shell(self, value):
				290	"""Parse the supplied shell code in a string, returning the external
				291	commands it executes.
				292	"""
				293
				294	h = hash(str(value))
				295
				296	if h in codeparsercache.shellcache:
				297	self.execs = set(codeparsercache.shellcache[h].execs)
				298	return self.execs
				299
				300	if h in codeparsercache.shellcacheextras:
				301	self.execs = set(codeparsercache.shellcacheextras[h].execs)
				302	return self.execs
				303
				304	self._parse_shell(value)
				305	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
				306
				307	codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
				308
				309	return self.execs
				310
				311	def _parse_shell(self, value):
				312	try:
				313	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
				314	except pyshlex.NeedMore:
				315	raise sherrors.ShellSyntaxError("Unexpected EOF")
				316
				317	for token in tokens:
				318	self.process_tokens(token)
				319
				320	def process_tokens(self, tokens):
				321	"""Process a supplied portion of the syntax tree as returned by
				322	pyshyacc.parse.
				323	"""
				324
				325	def function_definition(value):
				326	self.funcdefs.add(value.name)
				327	return [value.body], None
				328
				329	def case_clause(value):
				330	# Element 0 of each item in the case is the list of patterns, and
				331	# Element 1 of each item in the case is the list of commands to be
				332	# executed when that pattern matches.
				333	words = chain(*[item[0] for item in value.items])
				334	cmds = chain(*[item[1] for item in value.items])
				335	return cmds, words
				336
				337	def if_clause(value):
				338	main = chain(value.cond, value.if_cmds)
				339	rest = value.else_cmds
				340	if isinstance(rest, tuple) and rest[0] == "elif":
				341	return chain(main, if_clause(rest[1]))
				342	else:
				343	return chain(main, rest)
				344
				345	def simple_command(value):
				346	return None, chain(value.words, (assign[1] for assign in value.assigns))
				347
				348	token_handlers = {
				349	"and_or": lambda x: ((x.left, x.right), None),
				350	"async": lambda x: ([x], None),
				351	"brace_group": lambda x: (x.cmds, None),
				352	"for_clause": lambda x: (x.cmds, x.items),
				353	"function_definition": function_definition,
				354	"if_clause": lambda x: (if_clause(x), None),
				355	"pipeline": lambda x: (x.commands, None),
				356	"redirect_list": lambda x: ([x.cmd], None),
				357	"subshell": lambda x: (x.cmds, None),
				358	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
				359	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
				360	"simple_command": simple_command,
				361	"case_clause": case_clause,
				362	}
				363
				364	for token in tokens:
				365	name, value = token
				366	try:
				367	more_tokens, words = token_handlers[name](value)
				368	except KeyError:
				369	raise NotImplementedError("Unsupported token type " + name)
				370
				371	if more_tokens:
				372	self.process_tokens(more_tokens)
				373
				374	if words:
				375	self.process_words(words)
				376
				377	def process_words(self, words):
				378	"""Process a set of 'words' in pyshyacc parlance, which includes
				379	extraction of executed commands from $() blocks, as well as grabbing
				380	the command name argument.
				381	"""
				382
				383	words = list(words)
				384	for word in list(words):
				385	wtree = pyshlex.make_wordtree(word[1])
				386	for part in wtree:
				387	if not isinstance(part, list):
				388	continue
				389
				390	if part[0] in ('`', '$('):
				391	command = pyshlex.wordtree_as_string(part[1:-1])
				392	self._parse_shell(command)
				393
				394	if word[0] in ("cmd_name", "cmd_word"):
				395	if word in words:
				396	words.remove(word)
				397
				398	usetoken = False
				399	for word in words:
				400	if word[0] in ("cmd_name", "cmd_word") or \
				401	(usetoken and word[0] == "TOKEN"):
				402	if "=" in word[1]:
				403	usetoken = True
				404	continue
				405
				406	cmd = word[1]
				407	if cmd.startswith("$"):
				408	self.log.debug(1, self.unhandled_template % cmd)
				409	elif cmd == "eval":
				410	command = " ".join(word for _, word in words[1:])
				411	self._parse_shell(command)
				412	else:
				413	self.allexecs.add(cmd)
				414	break