blob: 9d66d3ae412c9647dc986490b608b7855b530bcd [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williams92b42cb2022-09-03 06:53:57 -05002# Copyright BitBake Contributors
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
5#
6
Brad Bishop6e60e8b2018-02-01 10:27:11 -05007"""
8BitBake code parser
9
10Parses actual code (i.e. python and shell) for functions and in-line
11expressions. Used mainly to determine dependencies on other functions
12and variables within the BitBake metadata. Also provides a cache for
13this information in order to speed up processing.
14
15(Not to be confused with the code that parses the metadata itself,
16see lib/bb/parse/ for that).
17
18NOTE: if you change how the parsers gather information you will almost
19certainly need to increment CodeParserCache.CACHE_VERSION below so that
20any existing codeparser cache gets invalidated. Additionally you'll need
21to increment __cache_version__ in cache.py in order to ensure that old
22recipe caches don't trigger "Taskhash mismatch" errors.
23
24"""
25
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060027import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050028import codegen
29import logging
Patrick Williamsc0f7c042017-02-23 20:41:17 -060030import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060032import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050033from itertools import chain
Andrew Geissler82c905d2020-04-13 13:39:40 -050034from bb.pysh import pyshyacc, pyshlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035from bb.cache import MultiProcessCache
36
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037logger = logging.getLogger('BitBake.CodeParser')
38
Patrick Williamsc0f7c042017-02-23 20:41:17 -060039def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040040 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050041
42def check_indent(codestr):
43 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
44
45 i = 0
46 while codestr[i] in ["\n", "\t", " "]:
47 i = i + 1
48
49 if i == 0:
50 return codestr
51
52 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050053 if codestr[0] == "\n":
54 # Since we're adding a line, we need to remove one line of any empty padding
55 # to ensure line numbers are correct
56 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057 return "if 1:\n" + codestr
58
59 return codestr
60
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061# A custom getstate/setstate using tuples is actually worth 15% cachesize by
62# avoiding duplication of the attribute names!
63
Andrew Geissler82c905d2020-04-13 13:39:40 -050064
Patrick Williamsc124f4f2015-09-15 14:41:29 -050065class SetCache(object):
66 def __init__(self):
67 self.setcache = {}
68
69 def internSet(self, items):
70
71 new = []
72 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -060073 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050074 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -060075 h = hash(s)
76 if h in self.setcache:
77 return self.setcache[h]
78 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -050079 return s
80
81codecache = SetCache()
82
83class pythonCacheLine(object):
84 def __init__(self, refs, execs, contains):
85 self.refs = codecache.internSet(refs)
86 self.execs = codecache.internSet(execs)
87 self.contains = {}
88 for c in contains:
89 self.contains[c] = codecache.internSet(contains[c])
90
91 def __getstate__(self):
92 return (self.refs, self.execs, self.contains)
93
94 def __setstate__(self, state):
95 (refs, execs, contains) = state
96 self.__init__(refs, execs, contains)
97 def __hash__(self):
98 l = (hash(self.refs), hash(self.execs))
99 for c in sorted(self.contains.keys()):
100 l = l + (c, hash(self.contains[c]))
101 return hash(l)
102 def __repr__(self):
103 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
104
105
106class shellCacheLine(object):
107 def __init__(self, execs):
108 self.execs = codecache.internSet(execs)
109
110 def __getstate__(self):
111 return (self.execs)
112
113 def __setstate__(self, state):
114 (execs) = state
115 self.__init__(execs)
116 def __hash__(self):
117 return hash(self.execs)
118 def __repr__(self):
119 return str(self.execs)
120
121class CodeParserCache(MultiProcessCache):
122 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500123 # NOTE: you must increment this if you change how the parsers gather information,
124 # so that an existing cache gets invalidated. Additionally you'll need
125 # to increment __cache_version__ in cache.py in order to ensure that old
126 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400127 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500128
129 def __init__(self):
130 MultiProcessCache.__init__(self)
131 self.pythoncache = self.cachedata[0]
132 self.shellcache = self.cachedata[1]
133 self.pythoncacheextras = self.cachedata_extras[0]
134 self.shellcacheextras = self.cachedata_extras[1]
135
136 # To avoid duplication in the codeparser cache, keep
137 # a lookup of hashes of objects we already have
138 self.pythoncachelines = {}
139 self.shellcachelines = {}
140
141 def newPythonCacheLine(self, refs, execs, contains):
142 cacheline = pythonCacheLine(refs, execs, contains)
143 h = hash(cacheline)
144 if h in self.pythoncachelines:
145 return self.pythoncachelines[h]
146 self.pythoncachelines[h] = cacheline
147 return cacheline
148
149 def newShellCacheLine(self, execs):
150 cacheline = shellCacheLine(execs)
151 h = hash(cacheline)
152 if h in self.shellcachelines:
153 return self.shellcachelines[h]
154 self.shellcachelines[h] = cacheline
155 return cacheline
156
157 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500158 # Check if we already have the caches
159 if self.pythoncache:
160 return
161
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162 MultiProcessCache.init_cache(self, d)
163
164 # cachedata gets re-assigned in the parent
165 self.pythoncache = self.cachedata[0]
166 self.shellcache = self.cachedata[1]
167
168 def create_cachedata(self):
169 data = [{}, {}]
170 return data
171
172codeparsercache = CodeParserCache()
173
174def parser_cache_init(d):
175 codeparsercache.init_cache(d)
176
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500177def parser_cache_save():
178 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500180def parser_cache_savemerge():
181 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500182
183Logger = logging.getLoggerClass()
184class BufferedLogger(Logger):
185 def __init__(self, name, level=0, target=None):
186 Logger.__init__(self, name)
187 self.setLevel(level)
188 self.buffer = []
189 self.target = target
190
191 def handle(self, record):
192 self.buffer.append(record)
193
194 def flush(self):
195 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500196 if self.target.isEnabledFor(record.levelno):
197 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500198 self.buffer = []
199
Andrew Geissler9aee5002022-03-30 16:27:02 +0000200class DummyLogger():
201 def flush(self):
202 return
203
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500204class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800205 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600206 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500207 containsfuncs = ("bb.utils.contains", "base_contains")
208 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500209 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
210
211 def warn(self, func, arg):
212 """Warn about calls of bitbake APIs which pass a non-literal
213 argument for the variable name, as we're not able to track such
214 a reference.
215 """
216
217 try:
218 funcstr = codegen.to_source(func)
219 argstr = codegen.to_source(arg)
220 except TypeError:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500221 self.log.debug2('Failed to convert function and argument to source form')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 else:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500223 self.log.debug(self.unhandled_message % (funcstr, argstr))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500224
225 def visit_Call(self, node):
226 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500227 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500228 if isinstance(node.args[0], ast.Str):
229 varname = node.args[0].s
230 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
231 if varname not in self.contains:
232 self.contains[varname] = set()
233 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500234 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
235 if varname not in self.contains:
236 self.contains[varname] = set()
237 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600238 elif name.endswith(self.getvarflags):
239 if isinstance(node.args[1], ast.Str):
240 self.references.add('%s[%s]' % (varname, node.args[1].s))
241 else:
242 self.warn(node.func, node.args[1])
243 else:
244 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245 else:
246 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500247 elif name and name.endswith(".expand"):
248 if isinstance(node.args[0], ast.Str):
249 value = node.args[0].s
250 d = bb.data.init()
251 parser = d.expandWithRefs(value, self.name)
252 self.references |= parser.references
253 self.execs |= parser.execs
254 for varname in parser.contains:
255 if varname not in self.contains:
256 self.contains[varname] = set()
257 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500258 elif name in self.execfuncs:
259 if isinstance(node.args[0], ast.Str):
260 self.var_execs.add(node.args[0].s)
261 else:
262 self.warn(node.func, node.args[0])
263 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
264 self.execs.add(name)
265
266 def called_node_name(self, node):
267 """Given a called node, return its original string form"""
268 components = []
269 while node:
270 if isinstance(node, ast.Attribute):
271 components.append(node.attr)
272 node = node.value
273 elif isinstance(node, ast.Name):
274 components.append(node.id)
275 return '.'.join(reversed(components))
276 else:
277 break
278
279 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500280 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500281 self.var_execs = set()
282 self.contains = {}
283 self.execs = set()
284 self.references = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000285 self._log = log
286 # Defer init as expensive
287 self.log = DummyLogger()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500288
289 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
290 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
291
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500292 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 if not node or not node.strip():
294 return
295
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600296 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297
298 if h in codeparsercache.pythoncache:
299 self.references = set(codeparsercache.pythoncache[h].refs)
300 self.execs = set(codeparsercache.pythoncache[h].execs)
301 self.contains = {}
302 for i in codeparsercache.pythoncache[h].contains:
303 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
304 return
305
306 if h in codeparsercache.pythoncacheextras:
307 self.references = set(codeparsercache.pythoncacheextras[h].refs)
308 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
309 self.contains = {}
310 for i in codeparsercache.pythoncacheextras[h].contains:
311 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
312 return
313
Andrew Geissler9aee5002022-03-30 16:27:02 +0000314 # Need to parse so take the hit on the real log buffer
315 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
316
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500317 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
318 node = "\n" * int(lineno) + node
319 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500320 ast.PyCF_ONLY_AST)
321
322 for n in ast.walk(code):
323 if n.__class__.__name__ == "Call":
324 self.visit_Call(n)
325
326 self.execs.update(self.var_execs)
327
328 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
329
330class ShellParser():
331 def __init__(self, name, log):
332 self.funcdefs = set()
333 self.allexecs = set()
334 self.execs = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000335 self._name = name
336 self._log = log
337 # Defer init as expensive
338 self.log = DummyLogger()
339
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500340 self.unhandled_template = "unable to handle non-literal command '%s'"
341 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
342
343 def parse_shell(self, value):
344 """Parse the supplied shell code in a string, returning the external
345 commands it executes.
346 """
347
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600348 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500349
350 if h in codeparsercache.shellcache:
351 self.execs = set(codeparsercache.shellcache[h].execs)
352 return self.execs
353
354 if h in codeparsercache.shellcacheextras:
355 self.execs = set(codeparsercache.shellcacheextras[h].execs)
356 return self.execs
357
Andrew Geissler9aee5002022-03-30 16:27:02 +0000358 # Need to parse so take the hit on the real log buffer
359 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
360
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500361 self._parse_shell(value)
362 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
363
364 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
365
366 return self.execs
367
368 def _parse_shell(self, value):
369 try:
370 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400371 except Exception:
372 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
373 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500374
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500375 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500376
377 def process_tokens(self, tokens):
378 """Process a supplied portion of the syntax tree as returned by
379 pyshyacc.parse.
380 """
381
382 def function_definition(value):
383 self.funcdefs.add(value.name)
384 return [value.body], None
385
386 def case_clause(value):
387 # Element 0 of each item in the case is the list of patterns, and
388 # Element 1 of each item in the case is the list of commands to be
389 # executed when that pattern matches.
390 words = chain(*[item[0] for item in value.items])
391 cmds = chain(*[item[1] for item in value.items])
392 return cmds, words
393
394 def if_clause(value):
395 main = chain(value.cond, value.if_cmds)
396 rest = value.else_cmds
397 if isinstance(rest, tuple) and rest[0] == "elif":
398 return chain(main, if_clause(rest[1]))
399 else:
400 return chain(main, rest)
401
402 def simple_command(value):
403 return None, chain(value.words, (assign[1] for assign in value.assigns))
404
405 token_handlers = {
406 "and_or": lambda x: ((x.left, x.right), None),
407 "async": lambda x: ([x], None),
408 "brace_group": lambda x: (x.cmds, None),
409 "for_clause": lambda x: (x.cmds, x.items),
410 "function_definition": function_definition,
411 "if_clause": lambda x: (if_clause(x), None),
412 "pipeline": lambda x: (x.commands, None),
413 "redirect_list": lambda x: ([x.cmd], None),
414 "subshell": lambda x: (x.cmds, None),
415 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
416 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
417 "simple_command": simple_command,
418 "case_clause": case_clause,
419 }
420
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500421 def process_token_list(tokens):
422 for token in tokens:
423 if isinstance(token, list):
424 process_token_list(token)
425 continue
426 name, value = token
427 try:
428 more_tokens, words = token_handlers[name](value)
429 except KeyError:
430 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500431
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500432 if more_tokens:
433 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500434
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500435 if words:
436 self.process_words(words)
437
438 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500439
440 def process_words(self, words):
441 """Process a set of 'words' in pyshyacc parlance, which includes
442 extraction of executed commands from $() blocks, as well as grabbing
443 the command name argument.
444 """
445
446 words = list(words)
447 for word in list(words):
448 wtree = pyshlex.make_wordtree(word[1])
449 for part in wtree:
450 if not isinstance(part, list):
451 continue
452
453 if part[0] in ('`', '$('):
454 command = pyshlex.wordtree_as_string(part[1:-1])
455 self._parse_shell(command)
456
457 if word[0] in ("cmd_name", "cmd_word"):
458 if word in words:
459 words.remove(word)
460
461 usetoken = False
462 for word in words:
463 if word[0] in ("cmd_name", "cmd_word") or \
464 (usetoken and word[0] == "TOKEN"):
465 if "=" in word[1]:
466 usetoken = True
467 continue
468
469 cmd = word[1]
470 if cmd.startswith("$"):
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500471 self.log.debug(self.unhandled_template % cmd)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500472 elif cmd == "eval":
473 command = " ".join(word for _, word in words[1:])
474 self._parse_shell(command)
475 else:
476 self.allexecs.add(cmd)
477 break