blob: fd2c4734f061378684c21cb26fcdf5d8f63db03b [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Brad Bishop6e60e8b2018-02-01 10:27:11 -05005"""
6BitBake code parser
7
8Parses actual code (i.e. python and shell) for functions and in-line
9expressions. Used mainly to determine dependencies on other functions
10and variables within the BitBake metadata. Also provides a cache for
11this information in order to speed up processing.
12
13(Not to be confused with the code that parses the metadata itself,
14see lib/bb/parse/ for that).
15
16NOTE: if you change how the parsers gather information you will almost
17certainly need to increment CodeParserCache.CACHE_VERSION below so that
18any existing codeparser cache gets invalidated. Additionally you'll need
19to increment __cache_version__ in cache.py in order to ensure that old
20recipe caches don't trigger "Taskhash mismatch" errors.
21
22"""
23
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060025import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import codegen
27import logging
Patrick Williamsc0f7c042017-02-23 20:41:17 -060028import pickle
29import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030import os.path
31import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060032import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050033from itertools import chain
Patrick Williamsc0f7c042017-02-23 20:41:17 -060034from bb.pysh import pyshyacc, pyshlex, sherrors
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035from bb.cache import MultiProcessCache
36
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037logger = logging.getLogger('BitBake.CodeParser')
38
Patrick Williamsc0f7c042017-02-23 20:41:17 -060039def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040040 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050041
42def check_indent(codestr):
43 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
44
45 i = 0
46 while codestr[i] in ["\n", "\t", " "]:
47 i = i + 1
48
49 if i == 0:
50 return codestr
51
52 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050053 if codestr[0] == "\n":
54 # Since we're adding a line, we need to remove one line of any empty padding
55 # to ensure line numbers are correct
56 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057 return "if 1:\n" + codestr
58
59 return codestr
60
61
62# Basically pickle, in python 2.7.3 at least, does badly with data duplication
63# upon pickling and unpickling. Combine this with duplicate objects and things
64# are a mess.
65#
66# When the sets are originally created, python calls intern() on the set keys
67# which significantly improves memory usage. Sadly the pickle/unpickle process
68# doesn't call intern() on the keys and results in the same strings being duplicated
69# in memory. This also means pickle will save the same string multiple times in
70# the cache file.
71#
72# By having shell and python cacheline objects with setstate/getstate, we force
73# the object creation through our own routine where we can call intern (via internSet).
74#
75# We also use hashable frozensets and ensure we use references to these so that
76# duplicates can be removed, both in memory and in the resulting pickled data.
77#
78# By playing these games, the size of the cache file shrinks dramatically
79# meaning faster load times and the reloaded cache files also consume much less
80# memory. Smaller cache files, faster load times and lower memory usage is good.
81#
82# A custom getstate/setstate using tuples is actually worth 15% cachesize by
83# avoiding duplication of the attribute names!
84
85class SetCache(object):
86 def __init__(self):
87 self.setcache = {}
88
89 def internSet(self, items):
90
91 new = []
92 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -060093 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -060095 h = hash(s)
96 if h in self.setcache:
97 return self.setcache[h]
98 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -050099 return s
100
101codecache = SetCache()
102
103class pythonCacheLine(object):
104 def __init__(self, refs, execs, contains):
105 self.refs = codecache.internSet(refs)
106 self.execs = codecache.internSet(execs)
107 self.contains = {}
108 for c in contains:
109 self.contains[c] = codecache.internSet(contains[c])
110
111 def __getstate__(self):
112 return (self.refs, self.execs, self.contains)
113
114 def __setstate__(self, state):
115 (refs, execs, contains) = state
116 self.__init__(refs, execs, contains)
117 def __hash__(self):
118 l = (hash(self.refs), hash(self.execs))
119 for c in sorted(self.contains.keys()):
120 l = l + (c, hash(self.contains[c]))
121 return hash(l)
122 def __repr__(self):
123 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
124
125
126class shellCacheLine(object):
127 def __init__(self, execs):
128 self.execs = codecache.internSet(execs)
129
130 def __getstate__(self):
131 return (self.execs)
132
133 def __setstate__(self, state):
134 (execs) = state
135 self.__init__(execs)
136 def __hash__(self):
137 return hash(self.execs)
138 def __repr__(self):
139 return str(self.execs)
140
141class CodeParserCache(MultiProcessCache):
142 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500143 # NOTE: you must increment this if you change how the parsers gather information,
144 # so that an existing cache gets invalidated. Additionally you'll need
145 # to increment __cache_version__ in cache.py in order to ensure that old
146 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400147 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500148
149 def __init__(self):
150 MultiProcessCache.__init__(self)
151 self.pythoncache = self.cachedata[0]
152 self.shellcache = self.cachedata[1]
153 self.pythoncacheextras = self.cachedata_extras[0]
154 self.shellcacheextras = self.cachedata_extras[1]
155
156 # To avoid duplication in the codeparser cache, keep
157 # a lookup of hashes of objects we already have
158 self.pythoncachelines = {}
159 self.shellcachelines = {}
160
161 def newPythonCacheLine(self, refs, execs, contains):
162 cacheline = pythonCacheLine(refs, execs, contains)
163 h = hash(cacheline)
164 if h in self.pythoncachelines:
165 return self.pythoncachelines[h]
166 self.pythoncachelines[h] = cacheline
167 return cacheline
168
169 def newShellCacheLine(self, execs):
170 cacheline = shellCacheLine(execs)
171 h = hash(cacheline)
172 if h in self.shellcachelines:
173 return self.shellcachelines[h]
174 self.shellcachelines[h] = cacheline
175 return cacheline
176
177 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500178 # Check if we already have the caches
179 if self.pythoncache:
180 return
181
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500182 MultiProcessCache.init_cache(self, d)
183
184 # cachedata gets re-assigned in the parent
185 self.pythoncache = self.cachedata[0]
186 self.shellcache = self.cachedata[1]
187
188 def create_cachedata(self):
189 data = [{}, {}]
190 return data
191
192codeparsercache = CodeParserCache()
193
194def parser_cache_init(d):
195 codeparsercache.init_cache(d)
196
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500197def parser_cache_save():
198 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500200def parser_cache_savemerge():
201 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500202
203Logger = logging.getLoggerClass()
204class BufferedLogger(Logger):
205 def __init__(self, name, level=0, target=None):
206 Logger.__init__(self, name)
207 self.setLevel(level)
208 self.buffer = []
209 self.target = target
210
211 def handle(self, record):
212 self.buffer.append(record)
213
214 def flush(self):
215 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500216 if self.target.isEnabledFor(record.levelno):
217 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218 self.buffer = []
219
220class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800221 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600222 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500223 containsfuncs = ("bb.utils.contains", "base_contains")
224 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
226
227 def warn(self, func, arg):
228 """Warn about calls of bitbake APIs which pass a non-literal
229 argument for the variable name, as we're not able to track such
230 a reference.
231 """
232
233 try:
234 funcstr = codegen.to_source(func)
235 argstr = codegen.to_source(arg)
236 except TypeError:
237 self.log.debug(2, 'Failed to convert function and argument to source form')
238 else:
239 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
240
241 def visit_Call(self, node):
242 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500243 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500244 if isinstance(node.args[0], ast.Str):
245 varname = node.args[0].s
246 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
247 if varname not in self.contains:
248 self.contains[varname] = set()
249 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500250 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
251 if varname not in self.contains:
252 self.contains[varname] = set()
253 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600254 elif name.endswith(self.getvarflags):
255 if isinstance(node.args[1], ast.Str):
256 self.references.add('%s[%s]' % (varname, node.args[1].s))
257 else:
258 self.warn(node.func, node.args[1])
259 else:
260 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500261 else:
262 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500263 elif name and name.endswith(".expand"):
264 if isinstance(node.args[0], ast.Str):
265 value = node.args[0].s
266 d = bb.data.init()
267 parser = d.expandWithRefs(value, self.name)
268 self.references |= parser.references
269 self.execs |= parser.execs
270 for varname in parser.contains:
271 if varname not in self.contains:
272 self.contains[varname] = set()
273 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500274 elif name in self.execfuncs:
275 if isinstance(node.args[0], ast.Str):
276 self.var_execs.add(node.args[0].s)
277 else:
278 self.warn(node.func, node.args[0])
279 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
280 self.execs.add(name)
281
282 def called_node_name(self, node):
283 """Given a called node, return its original string form"""
284 components = []
285 while node:
286 if isinstance(node, ast.Attribute):
287 components.append(node.attr)
288 node = node.value
289 elif isinstance(node, ast.Name):
290 components.append(node.id)
291 return '.'.join(reversed(components))
292 else:
293 break
294
295 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500296 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297 self.var_execs = set()
298 self.contains = {}
299 self.execs = set()
300 self.references = set()
301 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
302
303 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
304 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
305
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307 if not node or not node.strip():
308 return
309
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600310 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500311
312 if h in codeparsercache.pythoncache:
313 self.references = set(codeparsercache.pythoncache[h].refs)
314 self.execs = set(codeparsercache.pythoncache[h].execs)
315 self.contains = {}
316 for i in codeparsercache.pythoncache[h].contains:
317 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
318 return
319
320 if h in codeparsercache.pythoncacheextras:
321 self.references = set(codeparsercache.pythoncacheextras[h].refs)
322 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
323 self.contains = {}
324 for i in codeparsercache.pythoncacheextras[h].contains:
325 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
326 return
327
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500328 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
329 node = "\n" * int(lineno) + node
330 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500331 ast.PyCF_ONLY_AST)
332
333 for n in ast.walk(code):
334 if n.__class__.__name__ == "Call":
335 self.visit_Call(n)
336
337 self.execs.update(self.var_execs)
338
339 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
340
341class ShellParser():
342 def __init__(self, name, log):
343 self.funcdefs = set()
344 self.allexecs = set()
345 self.execs = set()
346 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
347 self.unhandled_template = "unable to handle non-literal command '%s'"
348 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
349
350 def parse_shell(self, value):
351 """Parse the supplied shell code in a string, returning the external
352 commands it executes.
353 """
354
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600355 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500356
357 if h in codeparsercache.shellcache:
358 self.execs = set(codeparsercache.shellcache[h].execs)
359 return self.execs
360
361 if h in codeparsercache.shellcacheextras:
362 self.execs = set(codeparsercache.shellcacheextras[h].execs)
363 return self.execs
364
365 self._parse_shell(value)
366 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
367
368 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
369
370 return self.execs
371
372 def _parse_shell(self, value):
373 try:
374 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400375 except Exception:
376 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
377 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500378
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500379 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380
381 def process_tokens(self, tokens):
382 """Process a supplied portion of the syntax tree as returned by
383 pyshyacc.parse.
384 """
385
386 def function_definition(value):
387 self.funcdefs.add(value.name)
388 return [value.body], None
389
390 def case_clause(value):
391 # Element 0 of each item in the case is the list of patterns, and
392 # Element 1 of each item in the case is the list of commands to be
393 # executed when that pattern matches.
394 words = chain(*[item[0] for item in value.items])
395 cmds = chain(*[item[1] for item in value.items])
396 return cmds, words
397
398 def if_clause(value):
399 main = chain(value.cond, value.if_cmds)
400 rest = value.else_cmds
401 if isinstance(rest, tuple) and rest[0] == "elif":
402 return chain(main, if_clause(rest[1]))
403 else:
404 return chain(main, rest)
405
406 def simple_command(value):
407 return None, chain(value.words, (assign[1] for assign in value.assigns))
408
409 token_handlers = {
410 "and_or": lambda x: ((x.left, x.right), None),
411 "async": lambda x: ([x], None),
412 "brace_group": lambda x: (x.cmds, None),
413 "for_clause": lambda x: (x.cmds, x.items),
414 "function_definition": function_definition,
415 "if_clause": lambda x: (if_clause(x), None),
416 "pipeline": lambda x: (x.commands, None),
417 "redirect_list": lambda x: ([x.cmd], None),
418 "subshell": lambda x: (x.cmds, None),
419 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
420 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
421 "simple_command": simple_command,
422 "case_clause": case_clause,
423 }
424
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500425 def process_token_list(tokens):
426 for token in tokens:
427 if isinstance(token, list):
428 process_token_list(token)
429 continue
430 name, value = token
431 try:
432 more_tokens, words = token_handlers[name](value)
433 except KeyError:
434 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500436 if more_tokens:
437 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500439 if words:
440 self.process_words(words)
441
442 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500443
444 def process_words(self, words):
445 """Process a set of 'words' in pyshyacc parlance, which includes
446 extraction of executed commands from $() blocks, as well as grabbing
447 the command name argument.
448 """
449
450 words = list(words)
451 for word in list(words):
452 wtree = pyshlex.make_wordtree(word[1])
453 for part in wtree:
454 if not isinstance(part, list):
455 continue
456
457 if part[0] in ('`', '$('):
458 command = pyshlex.wordtree_as_string(part[1:-1])
459 self._parse_shell(command)
460
461 if word[0] in ("cmd_name", "cmd_word"):
462 if word in words:
463 words.remove(word)
464
465 usetoken = False
466 for word in words:
467 if word[0] in ("cmd_name", "cmd_word") or \
468 (usetoken and word[0] == "TOKEN"):
469 if "=" in word[1]:
470 usetoken = True
471 continue
472
473 cmd = word[1]
474 if cmd.startswith("$"):
475 self.log.debug(1, self.unhandled_template % cmd)
476 elif cmd == "eval":
477 command = " ".join(word for _, word in words[1:])
478 self._parse_shell(command)
479 else:
480 self.allexecs.add(cmd)
481 break