blob: d6b81025854e2518dd7c2edb44771cc717a84391 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williams92b42cb2022-09-03 06:53:57 -05002# Copyright BitBake Contributors
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
5#
6
Brad Bishop6e60e8b2018-02-01 10:27:11 -05007"""
8BitBake code parser
9
10Parses actual code (i.e. python and shell) for functions and in-line
11expressions. Used mainly to determine dependencies on other functions
12and variables within the BitBake metadata. Also provides a cache for
13this information in order to speed up processing.
14
15(Not to be confused with the code that parses the metadata itself,
16see lib/bb/parse/ for that).
17
18NOTE: if you change how the parsers gather information you will almost
19certainly need to increment CodeParserCache.CACHE_VERSION below so that
20any existing codeparser cache gets invalidated. Additionally you'll need
21to increment __cache_version__ in cache.py in order to ensure that old
22recipe caches don't trigger "Taskhash mismatch" errors.
23
24"""
25
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060027import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050028import codegen
29import logging
Andrew Geissler517393d2023-01-13 08:55:19 -060030import inspect
Patrick Williamsc0f7c042017-02-23 20:41:17 -060031import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034from itertools import chain
Andrew Geissler82c905d2020-04-13 13:39:40 -050035from bb.pysh import pyshyacc, pyshlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050036from bb.cache import MultiProcessCache
37
Patrick Williamsc124f4f2015-09-15 14:41:29 -050038logger = logging.getLogger('BitBake.CodeParser')
39
Patrick Williamsc0f7c042017-02-23 20:41:17 -060040def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040041 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050042
43def check_indent(codestr):
44 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
45
46 i = 0
47 while codestr[i] in ["\n", "\t", " "]:
48 i = i + 1
49
50 if i == 0:
51 return codestr
52
53 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050054 if codestr[0] == "\n":
55 # Since we're adding a line, we need to remove one line of any empty padding
56 # to ensure line numbers are correct
57 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050058 return "if 1:\n" + codestr
59
60 return codestr
61
Andrew Geissler517393d2023-01-13 08:55:19 -060062modulecode_deps = {}
63
64def add_module_functions(fn, functions, namespace):
65 fstat = os.stat(fn)
66 fixedhash = fn + ":" + str(fstat.st_size) + ":" + str(fstat.st_mtime)
67 for f in functions:
68 name = "%s.%s" % (namespace, f)
69 parser = PythonParser(name, logger)
70 try:
71 parser.parse_python(None, filename=fn, lineno=1, fixedhash=fixedhash+f)
72 #bb.warn("Cached %s" % f)
73 except KeyError:
74 lines, lineno = inspect.getsourcelines(functions[f])
75 src = "".join(lines)
76 parser.parse_python(src, filename=fn, lineno=lineno, fixedhash=fixedhash+f)
77 #bb.warn("Not cached %s" % f)
78 execs = parser.execs.copy()
79 # Expand internal module exec references
80 for e in parser.execs:
81 if e in functions:
82 execs.remove(e)
83 execs.add(namespace + "." + e)
84 modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
85 #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, src, parser.references, parser.execs, parser.var_execs, parser.contains))
86
87def update_module_dependencies(d):
88 for mod in modulecode_deps:
89 excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
90 if excludes:
91 modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3]]
92
Patrick Williamsc124f4f2015-09-15 14:41:29 -050093# A custom getstate/setstate using tuples is actually worth 15% cachesize by
94# avoiding duplication of the attribute names!
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095class SetCache(object):
96 def __init__(self):
97 self.setcache = {}
98
99 def internSet(self, items):
100
101 new = []
102 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600103 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500104 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600105 h = hash(s)
106 if h in self.setcache:
107 return self.setcache[h]
108 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500109 return s
110
111codecache = SetCache()
112
113class pythonCacheLine(object):
114 def __init__(self, refs, execs, contains):
115 self.refs = codecache.internSet(refs)
116 self.execs = codecache.internSet(execs)
117 self.contains = {}
118 for c in contains:
119 self.contains[c] = codecache.internSet(contains[c])
120
121 def __getstate__(self):
122 return (self.refs, self.execs, self.contains)
123
124 def __setstate__(self, state):
125 (refs, execs, contains) = state
126 self.__init__(refs, execs, contains)
127 def __hash__(self):
128 l = (hash(self.refs), hash(self.execs))
129 for c in sorted(self.contains.keys()):
130 l = l + (c, hash(self.contains[c]))
131 return hash(l)
132 def __repr__(self):
133 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
134
135
136class shellCacheLine(object):
137 def __init__(self, execs):
138 self.execs = codecache.internSet(execs)
139
140 def __getstate__(self):
141 return (self.execs)
142
143 def __setstate__(self, state):
144 (execs) = state
145 self.__init__(execs)
146 def __hash__(self):
147 return hash(self.execs)
148 def __repr__(self):
149 return str(self.execs)
150
151class CodeParserCache(MultiProcessCache):
152 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500153 # NOTE: you must increment this if you change how the parsers gather information,
154 # so that an existing cache gets invalidated. Additionally you'll need
155 # to increment __cache_version__ in cache.py in order to ensure that old
156 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400157 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500158
159 def __init__(self):
160 MultiProcessCache.__init__(self)
161 self.pythoncache = self.cachedata[0]
162 self.shellcache = self.cachedata[1]
163 self.pythoncacheextras = self.cachedata_extras[0]
164 self.shellcacheextras = self.cachedata_extras[1]
165
166 # To avoid duplication in the codeparser cache, keep
167 # a lookup of hashes of objects we already have
168 self.pythoncachelines = {}
169 self.shellcachelines = {}
170
171 def newPythonCacheLine(self, refs, execs, contains):
172 cacheline = pythonCacheLine(refs, execs, contains)
173 h = hash(cacheline)
174 if h in self.pythoncachelines:
175 return self.pythoncachelines[h]
176 self.pythoncachelines[h] = cacheline
177 return cacheline
178
179 def newShellCacheLine(self, execs):
180 cacheline = shellCacheLine(execs)
181 h = hash(cacheline)
182 if h in self.shellcachelines:
183 return self.shellcachelines[h]
184 self.shellcachelines[h] = cacheline
185 return cacheline
186
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600187 def init_cache(self, cachedir):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500188 # Check if we already have the caches
189 if self.pythoncache:
190 return
191
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600192 MultiProcessCache.init_cache(self, cachedir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500193
194 # cachedata gets re-assigned in the parent
195 self.pythoncache = self.cachedata[0]
196 self.shellcache = self.cachedata[1]
197
198 def create_cachedata(self):
199 data = [{}, {}]
200 return data
201
202codeparsercache = CodeParserCache()
203
Andrew Geisslerc5535c92023-01-27 16:10:19 -0600204def parser_cache_init(cachedir):
205 codeparsercache.init_cache(cachedir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500206
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500207def parser_cache_save():
208 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500209
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500210def parser_cache_savemerge():
211 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212
213Logger = logging.getLoggerClass()
214class BufferedLogger(Logger):
215 def __init__(self, name, level=0, target=None):
216 Logger.__init__(self, name)
217 self.setLevel(level)
218 self.buffer = []
219 self.target = target
220
221 def handle(self, record):
222 self.buffer.append(record)
223
224 def flush(self):
225 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500226 if self.target.isEnabledFor(record.levelno):
227 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500228 self.buffer = []
229
Andrew Geissler9aee5002022-03-30 16:27:02 +0000230class DummyLogger():
231 def flush(self):
232 return
233
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500234class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800235 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600236 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500237 containsfuncs = ("bb.utils.contains", "base_contains")
238 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500239 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
240
241 def warn(self, func, arg):
242 """Warn about calls of bitbake APIs which pass a non-literal
243 argument for the variable name, as we're not able to track such
244 a reference.
245 """
246
247 try:
248 funcstr = codegen.to_source(func)
249 argstr = codegen.to_source(arg)
250 except TypeError:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500251 self.log.debug2('Failed to convert function and argument to source form')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500252 else:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500253 self.log.debug(self.unhandled_message % (funcstr, argstr))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500254
255 def visit_Call(self, node):
256 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500257 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500258 if isinstance(node.args[0], ast.Str):
259 varname = node.args[0].s
260 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
261 if varname not in self.contains:
262 self.contains[varname] = set()
263 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500264 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
265 if varname not in self.contains:
266 self.contains[varname] = set()
267 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600268 elif name.endswith(self.getvarflags):
269 if isinstance(node.args[1], ast.Str):
270 self.references.add('%s[%s]' % (varname, node.args[1].s))
271 else:
272 self.warn(node.func, node.args[1])
273 else:
274 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500275 else:
276 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500277 elif name and name.endswith(".expand"):
278 if isinstance(node.args[0], ast.Str):
279 value = node.args[0].s
280 d = bb.data.init()
281 parser = d.expandWithRefs(value, self.name)
282 self.references |= parser.references
283 self.execs |= parser.execs
284 for varname in parser.contains:
285 if varname not in self.contains:
286 self.contains[varname] = set()
287 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500288 elif name in self.execfuncs:
289 if isinstance(node.args[0], ast.Str):
290 self.var_execs.add(node.args[0].s)
291 else:
292 self.warn(node.func, node.args[0])
293 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
294 self.execs.add(name)
295
296 def called_node_name(self, node):
297 """Given a called node, return its original string form"""
298 components = []
299 while node:
300 if isinstance(node, ast.Attribute):
301 components.append(node.attr)
302 node = node.value
303 elif isinstance(node, ast.Name):
304 components.append(node.id)
305 return '.'.join(reversed(components))
306 else:
307 break
308
309 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500310 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500311 self.var_execs = set()
312 self.contains = {}
313 self.execs = set()
314 self.references = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000315 self._log = log
316 # Defer init as expensive
317 self.log = DummyLogger()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500318
319 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
320 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
321
Andrew Geissler517393d2023-01-13 08:55:19 -0600322 # For the python module code it is expensive to have the function text so it is
323 # uses a different fixedhash to cache against. We can take the hit on obtaining the
324 # text if it isn't in the cache.
325 def parse_python(self, node, lineno=0, filename="<string>", fixedhash=None):
326 if not fixedhash and (not node or not node.strip()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500327 return
328
Andrew Geissler517393d2023-01-13 08:55:19 -0600329 if fixedhash:
330 h = fixedhash
331 else:
332 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500333
334 if h in codeparsercache.pythoncache:
335 self.references = set(codeparsercache.pythoncache[h].refs)
336 self.execs = set(codeparsercache.pythoncache[h].execs)
337 self.contains = {}
338 for i in codeparsercache.pythoncache[h].contains:
339 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
340 return
341
342 if h in codeparsercache.pythoncacheextras:
343 self.references = set(codeparsercache.pythoncacheextras[h].refs)
344 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
345 self.contains = {}
346 for i in codeparsercache.pythoncacheextras[h].contains:
347 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
348 return
349
Andrew Geissler517393d2023-01-13 08:55:19 -0600350 if fixedhash and not node:
351 raise KeyError
352
Andrew Geissler9aee5002022-03-30 16:27:02 +0000353 # Need to parse so take the hit on the real log buffer
354 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
355
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500356 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
357 node = "\n" * int(lineno) + node
358 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500359 ast.PyCF_ONLY_AST)
360
361 for n in ast.walk(code):
362 if n.__class__.__name__ == "Call":
363 self.visit_Call(n)
364
365 self.execs.update(self.var_execs)
366
367 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
368
369class ShellParser():
370 def __init__(self, name, log):
371 self.funcdefs = set()
372 self.allexecs = set()
373 self.execs = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000374 self._name = name
375 self._log = log
376 # Defer init as expensive
377 self.log = DummyLogger()
378
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500379 self.unhandled_template = "unable to handle non-literal command '%s'"
380 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
381
382 def parse_shell(self, value):
383 """Parse the supplied shell code in a string, returning the external
384 commands it executes.
385 """
386
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600387 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388
389 if h in codeparsercache.shellcache:
390 self.execs = set(codeparsercache.shellcache[h].execs)
391 return self.execs
392
393 if h in codeparsercache.shellcacheextras:
394 self.execs = set(codeparsercache.shellcacheextras[h].execs)
395 return self.execs
396
Andrew Geissler9aee5002022-03-30 16:27:02 +0000397 # Need to parse so take the hit on the real log buffer
398 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
399
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500400 self._parse_shell(value)
401 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
402
403 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
404
405 return self.execs
406
407 def _parse_shell(self, value):
408 try:
409 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400410 except Exception:
411 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
412 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500413
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500414 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500415
416 def process_tokens(self, tokens):
417 """Process a supplied portion of the syntax tree as returned by
418 pyshyacc.parse.
419 """
420
421 def function_definition(value):
422 self.funcdefs.add(value.name)
423 return [value.body], None
424
425 def case_clause(value):
426 # Element 0 of each item in the case is the list of patterns, and
427 # Element 1 of each item in the case is the list of commands to be
428 # executed when that pattern matches.
429 words = chain(*[item[0] for item in value.items])
430 cmds = chain(*[item[1] for item in value.items])
431 return cmds, words
432
433 def if_clause(value):
434 main = chain(value.cond, value.if_cmds)
435 rest = value.else_cmds
436 if isinstance(rest, tuple) and rest[0] == "elif":
437 return chain(main, if_clause(rest[1]))
438 else:
439 return chain(main, rest)
440
441 def simple_command(value):
442 return None, chain(value.words, (assign[1] for assign in value.assigns))
443
444 token_handlers = {
445 "and_or": lambda x: ((x.left, x.right), None),
446 "async": lambda x: ([x], None),
447 "brace_group": lambda x: (x.cmds, None),
448 "for_clause": lambda x: (x.cmds, x.items),
449 "function_definition": function_definition,
450 "if_clause": lambda x: (if_clause(x), None),
451 "pipeline": lambda x: (x.commands, None),
452 "redirect_list": lambda x: ([x.cmd], None),
453 "subshell": lambda x: (x.cmds, None),
454 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
455 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
456 "simple_command": simple_command,
457 "case_clause": case_clause,
458 }
459
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500460 def process_token_list(tokens):
461 for token in tokens:
462 if isinstance(token, list):
463 process_token_list(token)
464 continue
465 name, value = token
466 try:
467 more_tokens, words = token_handlers[name](value)
468 except KeyError:
469 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500470
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500471 if more_tokens:
472 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500473
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500474 if words:
475 self.process_words(words)
476
477 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500478
479 def process_words(self, words):
480 """Process a set of 'words' in pyshyacc parlance, which includes
481 extraction of executed commands from $() blocks, as well as grabbing
482 the command name argument.
483 """
484
485 words = list(words)
486 for word in list(words):
487 wtree = pyshlex.make_wordtree(word[1])
488 for part in wtree:
489 if not isinstance(part, list):
490 continue
491
492 if part[0] in ('`', '$('):
493 command = pyshlex.wordtree_as_string(part[1:-1])
494 self._parse_shell(command)
495
496 if word[0] in ("cmd_name", "cmd_word"):
497 if word in words:
498 words.remove(word)
499
500 usetoken = False
501 for word in words:
502 if word[0] in ("cmd_name", "cmd_word") or \
503 (usetoken and word[0] == "TOKEN"):
504 if "=" in word[1]:
505 usetoken = True
506 continue
507
508 cmd = word[1]
509 if cmd.startswith("$"):
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500510 self.log.debug(self.unhandled_template % cmd)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500511 elif cmd == "eval":
512 command = " ".join(word for _, word in words[1:])
513 self._parse_shell(command)
514 else:
515 self.allexecs.add(cmd)
516 break