blob: cd39409434b25f4d879237b8186e3285643c82d8 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williams92b42cb2022-09-03 06:53:57 -05002# Copyright BitBake Contributors
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
5#
6
Brad Bishop6e60e8b2018-02-01 10:27:11 -05007"""
8BitBake code parser
9
10Parses actual code (i.e. python and shell) for functions and in-line
11expressions. Used mainly to determine dependencies on other functions
12and variables within the BitBake metadata. Also provides a cache for
13this information in order to speed up processing.
14
15(Not to be confused with the code that parses the metadata itself,
16see lib/bb/parse/ for that).
17
18NOTE: if you change how the parsers gather information you will almost
19certainly need to increment CodeParserCache.CACHE_VERSION below so that
20any existing codeparser cache gets invalidated. Additionally you'll need
21to increment __cache_version__ in cache.py in order to ensure that old
22recipe caches don't trigger "Taskhash mismatch" errors.
23
24"""
25
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060027import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050028import codegen
29import logging
Andrew Geissler517393d2023-01-13 08:55:19 -060030import inspect
Patrick Williamsc0f7c042017-02-23 20:41:17 -060031import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034from itertools import chain
Andrew Geissler82c905d2020-04-13 13:39:40 -050035from bb.pysh import pyshyacc, pyshlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050036from bb.cache import MultiProcessCache
37
Patrick Williamsc124f4f2015-09-15 14:41:29 -050038logger = logging.getLogger('BitBake.CodeParser')
39
Patrick Williamsc0f7c042017-02-23 20:41:17 -060040def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040041 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050042
43def check_indent(codestr):
44 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
45
46 i = 0
47 while codestr[i] in ["\n", "\t", " "]:
48 i = i + 1
49
50 if i == 0:
51 return codestr
52
53 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050054 if codestr[0] == "\n":
55 # Since we're adding a line, we need to remove one line of any empty padding
56 # to ensure line numbers are correct
57 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050058 return "if 1:\n" + codestr
59
60 return codestr
61
Andrew Geissler517393d2023-01-13 08:55:19 -060062modulecode_deps = {}
63
64def add_module_functions(fn, functions, namespace):
Patrick Williamsac13d5f2023-11-24 18:59:46 -060065 import os
Andrew Geissler517393d2023-01-13 08:55:19 -060066 fstat = os.stat(fn)
67 fixedhash = fn + ":" + str(fstat.st_size) + ":" + str(fstat.st_mtime)
68 for f in functions:
69 name = "%s.%s" % (namespace, f)
70 parser = PythonParser(name, logger)
71 try:
72 parser.parse_python(None, filename=fn, lineno=1, fixedhash=fixedhash+f)
73 #bb.warn("Cached %s" % f)
74 except KeyError:
75 lines, lineno = inspect.getsourcelines(functions[f])
76 src = "".join(lines)
77 parser.parse_python(src, filename=fn, lineno=lineno, fixedhash=fixedhash+f)
78 #bb.warn("Not cached %s" % f)
79 execs = parser.execs.copy()
80 # Expand internal module exec references
81 for e in parser.execs:
82 if e in functions:
83 execs.remove(e)
84 execs.add(namespace + "." + e)
85 modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
Andrew Geissler220dafd2023-10-04 10:18:08 -050086 #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, fn, parser.references, parser.execs, parser.var_execs, parser.contains))
Andrew Geissler517393d2023-01-13 08:55:19 -060087
88def update_module_dependencies(d):
89 for mod in modulecode_deps:
90 excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
91 if excludes:
92 modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3]]
93
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094# A custom getstate/setstate using tuples is actually worth 15% cachesize by
95# avoiding duplication of the attribute names!
Patrick Williamsc124f4f2015-09-15 14:41:29 -050096class SetCache(object):
97 def __init__(self):
98 self.setcache = {}
99
100 def internSet(self, items):
101
102 new = []
103 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600104 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500105 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600106 h = hash(s)
107 if h in self.setcache:
108 return self.setcache[h]
109 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500110 return s
111
112codecache = SetCache()
113
114class pythonCacheLine(object):
115 def __init__(self, refs, execs, contains):
116 self.refs = codecache.internSet(refs)
117 self.execs = codecache.internSet(execs)
118 self.contains = {}
119 for c in contains:
120 self.contains[c] = codecache.internSet(contains[c])
121
122 def __getstate__(self):
123 return (self.refs, self.execs, self.contains)
124
125 def __setstate__(self, state):
126 (refs, execs, contains) = state
127 self.__init__(refs, execs, contains)
128 def __hash__(self):
129 l = (hash(self.refs), hash(self.execs))
130 for c in sorted(self.contains.keys()):
131 l = l + (c, hash(self.contains[c]))
132 return hash(l)
133 def __repr__(self):
134 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
135
136
137class shellCacheLine(object):
138 def __init__(self, execs):
139 self.execs = codecache.internSet(execs)
140
141 def __getstate__(self):
142 return (self.execs)
143
144 def __setstate__(self, state):
145 (execs) = state
146 self.__init__(execs)
147 def __hash__(self):
148 return hash(self.execs)
149 def __repr__(self):
150 return str(self.execs)
151
152class CodeParserCache(MultiProcessCache):
153 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500154 # NOTE: you must increment this if you change how the parsers gather information,
155 # so that an existing cache gets invalidated. Additionally you'll need
156 # to increment __cache_version__ in cache.py in order to ensure that old
157 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400158 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159
160 def __init__(self):
161 MultiProcessCache.__init__(self)
162 self.pythoncache = self.cachedata[0]
163 self.shellcache = self.cachedata[1]
164 self.pythoncacheextras = self.cachedata_extras[0]
165 self.shellcacheextras = self.cachedata_extras[1]
166
167 # To avoid duplication in the codeparser cache, keep
168 # a lookup of hashes of objects we already have
169 self.pythoncachelines = {}
170 self.shellcachelines = {}
171
172 def newPythonCacheLine(self, refs, execs, contains):
173 cacheline = pythonCacheLine(refs, execs, contains)
174 h = hash(cacheline)
175 if h in self.pythoncachelines:
176 return self.pythoncachelines[h]
177 self.pythoncachelines[h] = cacheline
178 return cacheline
179
180 def newShellCacheLine(self, execs):
181 cacheline = shellCacheLine(execs)
182 h = hash(cacheline)
183 if h in self.shellcachelines:
184 return self.shellcachelines[h]
185 self.shellcachelines[h] = cacheline
186 return cacheline
187
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600188 def init_cache(self, cachedir):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500189 # Check if we already have the caches
190 if self.pythoncache:
191 return
192
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600193 MultiProcessCache.init_cache(self, cachedir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500194
195 # cachedata gets re-assigned in the parent
196 self.pythoncache = self.cachedata[0]
197 self.shellcache = self.cachedata[1]
198
199 def create_cachedata(self):
200 data = [{}, {}]
201 return data
202
203codeparsercache = CodeParserCache()
204
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600205def parser_cache_init(cachedir):
206 codeparsercache.init_cache(cachedir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500207
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500208def parser_cache_save():
209 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500210
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500211def parser_cache_savemerge():
212 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500213
214Logger = logging.getLoggerClass()
215class BufferedLogger(Logger):
216 def __init__(self, name, level=0, target=None):
217 Logger.__init__(self, name)
218 self.setLevel(level)
219 self.buffer = []
220 self.target = target
221
222 def handle(self, record):
223 self.buffer.append(record)
224
225 def flush(self):
226 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500227 if self.target.isEnabledFor(record.levelno):
228 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500229 self.buffer = []
230
Andrew Geissler9aee5002022-03-30 16:27:02 +0000231class DummyLogger():
232 def flush(self):
233 return
234
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800236 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600237 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500238 containsfuncs = ("bb.utils.contains", "base_contains")
239 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
241
242 def warn(self, func, arg):
243 """Warn about calls of bitbake APIs which pass a non-literal
244 argument for the variable name, as we're not able to track such
245 a reference.
246 """
247
248 try:
249 funcstr = codegen.to_source(func)
250 argstr = codegen.to_source(arg)
251 except TypeError:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500252 self.log.debug2('Failed to convert function and argument to source form')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500253 else:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500254 self.log.debug(self.unhandled_message % (funcstr, argstr))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500255
256 def visit_Call(self, node):
257 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500258 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsac13d5f2023-11-24 18:59:46 -0600259 if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
260 varname = node.args[0].value
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500261 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
262 if varname not in self.contains:
263 self.contains[varname] = set()
264 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500265 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
266 if varname not in self.contains:
267 self.contains[varname] = set()
268 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600269 elif name.endswith(self.getvarflags):
270 if isinstance(node.args[1], ast.Str):
271 self.references.add('%s[%s]' % (varname, node.args[1].s))
272 else:
273 self.warn(node.func, node.args[1])
274 else:
275 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500276 else:
277 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500278 elif name and name.endswith(".expand"):
279 if isinstance(node.args[0], ast.Str):
280 value = node.args[0].s
281 d = bb.data.init()
282 parser = d.expandWithRefs(value, self.name)
283 self.references |= parser.references
284 self.execs |= parser.execs
285 for varname in parser.contains:
286 if varname not in self.contains:
287 self.contains[varname] = set()
288 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 elif name in self.execfuncs:
290 if isinstance(node.args[0], ast.Str):
291 self.var_execs.add(node.args[0].s)
292 else:
293 self.warn(node.func, node.args[0])
294 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
295 self.execs.add(name)
296
297 def called_node_name(self, node):
298 """Given a called node, return its original string form"""
299 components = []
300 while node:
301 if isinstance(node, ast.Attribute):
302 components.append(node.attr)
303 node = node.value
304 elif isinstance(node, ast.Name):
305 components.append(node.id)
306 return '.'.join(reversed(components))
307 else:
308 break
309
310 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500311 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500312 self.var_execs = set()
313 self.contains = {}
314 self.execs = set()
315 self.references = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000316 self._log = log
317 # Defer init as expensive
318 self.log = DummyLogger()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500319
320 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
321 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
322
Andrew Geissler517393d2023-01-13 08:55:19 -0600323 # For the python module code it is expensive to have the function text so it is
324 # uses a different fixedhash to cache against. We can take the hit on obtaining the
325 # text if it isn't in the cache.
326 def parse_python(self, node, lineno=0, filename="<string>", fixedhash=None):
327 if not fixedhash and (not node or not node.strip()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500328 return
329
Andrew Geissler517393d2023-01-13 08:55:19 -0600330 if fixedhash:
331 h = fixedhash
332 else:
333 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500334
335 if h in codeparsercache.pythoncache:
336 self.references = set(codeparsercache.pythoncache[h].refs)
337 self.execs = set(codeparsercache.pythoncache[h].execs)
338 self.contains = {}
339 for i in codeparsercache.pythoncache[h].contains:
340 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
341 return
342
343 if h in codeparsercache.pythoncacheextras:
344 self.references = set(codeparsercache.pythoncacheextras[h].refs)
345 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
346 self.contains = {}
347 for i in codeparsercache.pythoncacheextras[h].contains:
348 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
349 return
350
Andrew Geissler517393d2023-01-13 08:55:19 -0600351 if fixedhash and not node:
352 raise KeyError
353
Andrew Geissler9aee5002022-03-30 16:27:02 +0000354 # Need to parse so take the hit on the real log buffer
355 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
356
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500357 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
358 node = "\n" * int(lineno) + node
359 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500360 ast.PyCF_ONLY_AST)
361
362 for n in ast.walk(code):
363 if n.__class__.__name__ == "Call":
364 self.visit_Call(n)
365
366 self.execs.update(self.var_execs)
367
368 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
369
370class ShellParser():
371 def __init__(self, name, log):
372 self.funcdefs = set()
373 self.allexecs = set()
374 self.execs = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000375 self._name = name
376 self._log = log
377 # Defer init as expensive
378 self.log = DummyLogger()
379
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380 self.unhandled_template = "unable to handle non-literal command '%s'"
381 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
382
383 def parse_shell(self, value):
384 """Parse the supplied shell code in a string, returning the external
385 commands it executes.
386 """
387
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600388 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500389
390 if h in codeparsercache.shellcache:
391 self.execs = set(codeparsercache.shellcache[h].execs)
392 return self.execs
393
394 if h in codeparsercache.shellcacheextras:
395 self.execs = set(codeparsercache.shellcacheextras[h].execs)
396 return self.execs
397
Andrew Geissler9aee5002022-03-30 16:27:02 +0000398 # Need to parse so take the hit on the real log buffer
399 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
400
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500401 self._parse_shell(value)
402 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
403
404 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
405
406 return self.execs
407
408 def _parse_shell(self, value):
409 try:
410 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400411 except Exception:
412 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
413 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500414
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500415 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500416
417 def process_tokens(self, tokens):
418 """Process a supplied portion of the syntax tree as returned by
419 pyshyacc.parse.
420 """
421
422 def function_definition(value):
423 self.funcdefs.add(value.name)
424 return [value.body], None
425
426 def case_clause(value):
427 # Element 0 of each item in the case is the list of patterns, and
428 # Element 1 of each item in the case is the list of commands to be
429 # executed when that pattern matches.
430 words = chain(*[item[0] for item in value.items])
431 cmds = chain(*[item[1] for item in value.items])
432 return cmds, words
433
434 def if_clause(value):
435 main = chain(value.cond, value.if_cmds)
436 rest = value.else_cmds
437 if isinstance(rest, tuple) and rest[0] == "elif":
438 return chain(main, if_clause(rest[1]))
439 else:
440 return chain(main, rest)
441
442 def simple_command(value):
443 return None, chain(value.words, (assign[1] for assign in value.assigns))
444
445 token_handlers = {
446 "and_or": lambda x: ((x.left, x.right), None),
447 "async": lambda x: ([x], None),
448 "brace_group": lambda x: (x.cmds, None),
449 "for_clause": lambda x: (x.cmds, x.items),
450 "function_definition": function_definition,
451 "if_clause": lambda x: (if_clause(x), None),
452 "pipeline": lambda x: (x.commands, None),
453 "redirect_list": lambda x: ([x.cmd], None),
454 "subshell": lambda x: (x.cmds, None),
455 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
456 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
457 "simple_command": simple_command,
458 "case_clause": case_clause,
459 }
460
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500461 def process_token_list(tokens):
462 for token in tokens:
463 if isinstance(token, list):
464 process_token_list(token)
465 continue
466 name, value = token
467 try:
468 more_tokens, words = token_handlers[name](value)
469 except KeyError:
470 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500472 if more_tokens:
473 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500474
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500475 if words:
476 self.process_words(words)
477
478 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500479
480 def process_words(self, words):
481 """Process a set of 'words' in pyshyacc parlance, which includes
482 extraction of executed commands from $() blocks, as well as grabbing
483 the command name argument.
484 """
485
486 words = list(words)
487 for word in list(words):
488 wtree = pyshlex.make_wordtree(word[1])
489 for part in wtree:
490 if not isinstance(part, list):
491 continue
492
493 if part[0] in ('`', '$('):
494 command = pyshlex.wordtree_as_string(part[1:-1])
495 self._parse_shell(command)
496
497 if word[0] in ("cmd_name", "cmd_word"):
498 if word in words:
499 words.remove(word)
500
501 usetoken = False
502 for word in words:
503 if word[0] in ("cmd_name", "cmd_word") or \
504 (usetoken and word[0] == "TOKEN"):
505 if "=" in word[1]:
506 usetoken = True
507 continue
508
509 cmd = word[1]
510 if cmd.startswith("$"):
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500511 self.log.debug(self.unhandled_template % cmd)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500512 elif cmd == "eval":
513 command = " ".join(word for _, word in words[1:])
514 self._parse_shell(command)
515 else:
516 self.allexecs.add(cmd)
517 break