blob: ddd1b97dcb1a41e751427d6b98fe6a93456a4b7b [file] [log] [blame]
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001"""
2BitBake code parser
3
4Parses actual code (i.e. python and shell) for functions and in-line
5expressions. Used mainly to determine dependencies on other functions
6and variables within the BitBake metadata. Also provides a cache for
7this information in order to speed up processing.
8
9(Not to be confused with the code that parses the metadata itself,
10see lib/bb/parse/ for that).
11
12NOTE: if you change how the parsers gather information you will almost
13certainly need to increment CodeParserCache.CACHE_VERSION below so that
14any existing codeparser cache gets invalidated. Additionally you'll need
15to increment __cache_version__ in cache.py in order to ensure that old
16recipe caches don't trigger "Taskhash mismatch" errors.
17
18"""
19
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050022import codegen
23import logging
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import pickle
25import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import os.path
27import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060028import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050029from itertools import chain
Patrick Williamsc0f7c042017-02-23 20:41:17 -060030from bb.pysh import pyshyacc, pyshlex, sherrors
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031from bb.cache import MultiProcessCache
32
Patrick Williamsc124f4f2015-09-15 14:41:29 -050033logger = logging.getLogger('BitBake.CodeParser')
34
Patrick Williamsc0f7c042017-02-23 20:41:17 -060035def bbhash(s):
36 return hashlib.md5(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037
38def check_indent(codestr):
39 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
40
41 i = 0
42 while codestr[i] in ["\n", "\t", " "]:
43 i = i + 1
44
45 if i == 0:
46 return codestr
47
48 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050049 if codestr[0] == "\n":
50 # Since we're adding a line, we need to remove one line of any empty padding
51 # to ensure line numbers are correct
52 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050053 return "if 1:\n" + codestr
54
55 return codestr
56
57
58# Basically pickle, in python 2.7.3 at least, does badly with data duplication
59# upon pickling and unpickling. Combine this with duplicate objects and things
60# are a mess.
61#
62# When the sets are originally created, python calls intern() on the set keys
63# which significantly improves memory usage. Sadly the pickle/unpickle process
64# doesn't call intern() on the keys and results in the same strings being duplicated
65# in memory. This also means pickle will save the same string multiple times in
66# the cache file.
67#
68# By having shell and python cacheline objects with setstate/getstate, we force
69# the object creation through our own routine where we can call intern (via internSet).
70#
71# We also use hashable frozensets and ensure we use references to these so that
72# duplicates can be removed, both in memory and in the resulting pickled data.
73#
74# By playing these games, the size of the cache file shrinks dramatically
75# meaning faster load times and the reloaded cache files also consume much less
76# memory. Smaller cache files, faster load times and lower memory usage is good.
77#
78# A custom getstate/setstate using tuples is actually worth 15% cachesize by
79# avoiding duplication of the attribute names!
80
81class SetCache(object):
82 def __init__(self):
83 self.setcache = {}
84
85 def internSet(self, items):
86
87 new = []
88 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -060089 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -060091 h = hash(s)
92 if h in self.setcache:
93 return self.setcache[h]
94 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095 return s
96
97codecache = SetCache()
98
99class pythonCacheLine(object):
100 def __init__(self, refs, execs, contains):
101 self.refs = codecache.internSet(refs)
102 self.execs = codecache.internSet(execs)
103 self.contains = {}
104 for c in contains:
105 self.contains[c] = codecache.internSet(contains[c])
106
107 def __getstate__(self):
108 return (self.refs, self.execs, self.contains)
109
110 def __setstate__(self, state):
111 (refs, execs, contains) = state
112 self.__init__(refs, execs, contains)
113 def __hash__(self):
114 l = (hash(self.refs), hash(self.execs))
115 for c in sorted(self.contains.keys()):
116 l = l + (c, hash(self.contains[c]))
117 return hash(l)
118 def __repr__(self):
119 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
120
121
122class shellCacheLine(object):
123 def __init__(self, execs):
124 self.execs = codecache.internSet(execs)
125
126 def __getstate__(self):
127 return (self.execs)
128
129 def __setstate__(self, state):
130 (execs) = state
131 self.__init__(execs)
132 def __hash__(self):
133 return hash(self.execs)
134 def __repr__(self):
135 return str(self.execs)
136
137class CodeParserCache(MultiProcessCache):
138 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500139 # NOTE: you must increment this if you change how the parsers gather information,
140 # so that an existing cache gets invalidated. Additionally you'll need
141 # to increment __cache_version__ in cache.py in order to ensure that old
142 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800143 CACHE_VERSION = 10
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500144
145 def __init__(self):
146 MultiProcessCache.__init__(self)
147 self.pythoncache = self.cachedata[0]
148 self.shellcache = self.cachedata[1]
149 self.pythoncacheextras = self.cachedata_extras[0]
150 self.shellcacheextras = self.cachedata_extras[1]
151
152 # To avoid duplication in the codeparser cache, keep
153 # a lookup of hashes of objects we already have
154 self.pythoncachelines = {}
155 self.shellcachelines = {}
156
157 def newPythonCacheLine(self, refs, execs, contains):
158 cacheline = pythonCacheLine(refs, execs, contains)
159 h = hash(cacheline)
160 if h in self.pythoncachelines:
161 return self.pythoncachelines[h]
162 self.pythoncachelines[h] = cacheline
163 return cacheline
164
165 def newShellCacheLine(self, execs):
166 cacheline = shellCacheLine(execs)
167 h = hash(cacheline)
168 if h in self.shellcachelines:
169 return self.shellcachelines[h]
170 self.shellcachelines[h] = cacheline
171 return cacheline
172
173 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500174 # Check if we already have the caches
175 if self.pythoncache:
176 return
177
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500178 MultiProcessCache.init_cache(self, d)
179
180 # cachedata gets re-assigned in the parent
181 self.pythoncache = self.cachedata[0]
182 self.shellcache = self.cachedata[1]
183
184 def create_cachedata(self):
185 data = [{}, {}]
186 return data
187
188codeparsercache = CodeParserCache()
189
190def parser_cache_init(d):
191 codeparsercache.init_cache(d)
192
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500193def parser_cache_save():
194 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500196def parser_cache_savemerge():
197 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500198
199Logger = logging.getLoggerClass()
200class BufferedLogger(Logger):
201 def __init__(self, name, level=0, target=None):
202 Logger.__init__(self, name)
203 self.setLevel(level)
204 self.buffer = []
205 self.target = target
206
207 def handle(self, record):
208 self.buffer.append(record)
209
210 def flush(self):
211 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500212 if self.target.isEnabledFor(record.levelno):
213 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500214 self.buffer = []
215
216class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800217 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600218 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500219 containsfuncs = ("bb.utils.contains", "base_contains")
220 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500221 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
222
223 def warn(self, func, arg):
224 """Warn about calls of bitbake APIs which pass a non-literal
225 argument for the variable name, as we're not able to track such
226 a reference.
227 """
228
229 try:
230 funcstr = codegen.to_source(func)
231 argstr = codegen.to_source(arg)
232 except TypeError:
233 self.log.debug(2, 'Failed to convert function and argument to source form')
234 else:
235 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
236
237 def visit_Call(self, node):
238 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500239 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240 if isinstance(node.args[0], ast.Str):
241 varname = node.args[0].s
242 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
243 if varname not in self.contains:
244 self.contains[varname] = set()
245 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500246 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
247 if varname not in self.contains:
248 self.contains[varname] = set()
249 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600250 elif name.endswith(self.getvarflags):
251 if isinstance(node.args[1], ast.Str):
252 self.references.add('%s[%s]' % (varname, node.args[1].s))
253 else:
254 self.warn(node.func, node.args[1])
255 else:
256 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500257 else:
258 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500259 elif name and name.endswith(".expand"):
260 if isinstance(node.args[0], ast.Str):
261 value = node.args[0].s
262 d = bb.data.init()
263 parser = d.expandWithRefs(value, self.name)
264 self.references |= parser.references
265 self.execs |= parser.execs
266 for varname in parser.contains:
267 if varname not in self.contains:
268 self.contains[varname] = set()
269 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270 elif name in self.execfuncs:
271 if isinstance(node.args[0], ast.Str):
272 self.var_execs.add(node.args[0].s)
273 else:
274 self.warn(node.func, node.args[0])
275 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
276 self.execs.add(name)
277
278 def called_node_name(self, node):
279 """Given a called node, return its original string form"""
280 components = []
281 while node:
282 if isinstance(node, ast.Attribute):
283 components.append(node.attr)
284 node = node.value
285 elif isinstance(node, ast.Name):
286 components.append(node.id)
287 return '.'.join(reversed(components))
288 else:
289 break
290
291 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500292 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 self.var_execs = set()
294 self.contains = {}
295 self.execs = set()
296 self.references = set()
297 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
298
299 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
300 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
301
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500302 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500303 if not node or not node.strip():
304 return
305
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600306 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
308 if h in codeparsercache.pythoncache:
309 self.references = set(codeparsercache.pythoncache[h].refs)
310 self.execs = set(codeparsercache.pythoncache[h].execs)
311 self.contains = {}
312 for i in codeparsercache.pythoncache[h].contains:
313 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
314 return
315
316 if h in codeparsercache.pythoncacheextras:
317 self.references = set(codeparsercache.pythoncacheextras[h].refs)
318 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
319 self.contains = {}
320 for i in codeparsercache.pythoncacheextras[h].contains:
321 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
322 return
323
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500324 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
325 node = "\n" * int(lineno) + node
326 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500327 ast.PyCF_ONLY_AST)
328
329 for n in ast.walk(code):
330 if n.__class__.__name__ == "Call":
331 self.visit_Call(n)
332
333 self.execs.update(self.var_execs)
334
335 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
336
337class ShellParser():
338 def __init__(self, name, log):
339 self.funcdefs = set()
340 self.allexecs = set()
341 self.execs = set()
342 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
343 self.unhandled_template = "unable to handle non-literal command '%s'"
344 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
345
346 def parse_shell(self, value):
347 """Parse the supplied shell code in a string, returning the external
348 commands it executes.
349 """
350
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600351 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500352
353 if h in codeparsercache.shellcache:
354 self.execs = set(codeparsercache.shellcache[h].execs)
355 return self.execs
356
357 if h in codeparsercache.shellcacheextras:
358 self.execs = set(codeparsercache.shellcacheextras[h].execs)
359 return self.execs
360
361 self._parse_shell(value)
362 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
363
364 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
365
366 return self.execs
367
368 def _parse_shell(self, value):
369 try:
370 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
371 except pyshlex.NeedMore:
372 raise sherrors.ShellSyntaxError("Unexpected EOF")
373
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500374 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500375
376 def process_tokens(self, tokens):
377 """Process a supplied portion of the syntax tree as returned by
378 pyshyacc.parse.
379 """
380
381 def function_definition(value):
382 self.funcdefs.add(value.name)
383 return [value.body], None
384
385 def case_clause(value):
386 # Element 0 of each item in the case is the list of patterns, and
387 # Element 1 of each item in the case is the list of commands to be
388 # executed when that pattern matches.
389 words = chain(*[item[0] for item in value.items])
390 cmds = chain(*[item[1] for item in value.items])
391 return cmds, words
392
393 def if_clause(value):
394 main = chain(value.cond, value.if_cmds)
395 rest = value.else_cmds
396 if isinstance(rest, tuple) and rest[0] == "elif":
397 return chain(main, if_clause(rest[1]))
398 else:
399 return chain(main, rest)
400
401 def simple_command(value):
402 return None, chain(value.words, (assign[1] for assign in value.assigns))
403
404 token_handlers = {
405 "and_or": lambda x: ((x.left, x.right), None),
406 "async": lambda x: ([x], None),
407 "brace_group": lambda x: (x.cmds, None),
408 "for_clause": lambda x: (x.cmds, x.items),
409 "function_definition": function_definition,
410 "if_clause": lambda x: (if_clause(x), None),
411 "pipeline": lambda x: (x.commands, None),
412 "redirect_list": lambda x: ([x.cmd], None),
413 "subshell": lambda x: (x.cmds, None),
414 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
415 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
416 "simple_command": simple_command,
417 "case_clause": case_clause,
418 }
419
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500420 def process_token_list(tokens):
421 for token in tokens:
422 if isinstance(token, list):
423 process_token_list(token)
424 continue
425 name, value = token
426 try:
427 more_tokens, words = token_handlers[name](value)
428 except KeyError:
429 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500430
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500431 if more_tokens:
432 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500433
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500434 if words:
435 self.process_words(words)
436
437 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438
439 def process_words(self, words):
440 """Process a set of 'words' in pyshyacc parlance, which includes
441 extraction of executed commands from $() blocks, as well as grabbing
442 the command name argument.
443 """
444
445 words = list(words)
446 for word in list(words):
447 wtree = pyshlex.make_wordtree(word[1])
448 for part in wtree:
449 if not isinstance(part, list):
450 continue
451
452 if part[0] in ('`', '$('):
453 command = pyshlex.wordtree_as_string(part[1:-1])
454 self._parse_shell(command)
455
456 if word[0] in ("cmd_name", "cmd_word"):
457 if word in words:
458 words.remove(word)
459
460 usetoken = False
461 for word in words:
462 if word[0] in ("cmd_name", "cmd_word") or \
463 (usetoken and word[0] == "TOKEN"):
464 if "=" in word[1]:
465 usetoken = True
466 continue
467
468 cmd = word[1]
469 if cmd.startswith("$"):
470 self.log.debug(1, self.unhandled_template % cmd)
471 elif cmd == "eval":
472 command = " ".join(word for _, word in words[1:])
473 self._parse_shell(command)
474 else:
475 self.allexecs.add(cmd)
476 break