blob: 3b3c3b41ff8159e512317f493d56d3c69dac6ef6 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Brad Bishop6e60e8b2018-02-01 10:27:11 -05005"""
6BitBake code parser
7
8Parses actual code (i.e. python and shell) for functions and in-line
9expressions. Used mainly to determine dependencies on other functions
10and variables within the BitBake metadata. Also provides a cache for
11this information in order to speed up processing.
12
13(Not to be confused with the code that parses the metadata itself,
14see lib/bb/parse/ for that).
15
16NOTE: if you change how the parsers gather information you will almost
17certainly need to increment CodeParserCache.CACHE_VERSION below so that
18any existing codeparser cache gets invalidated. Additionally you'll need
19to increment __cache_version__ in cache.py in order to ensure that old
20recipe caches don't trigger "Taskhash mismatch" errors.
21
22"""
23
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060025import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import codegen
27import logging
Patrick Williamsc0f7c042017-02-23 20:41:17 -060028import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050029import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060030import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031from itertools import chain
Andrew Geissler82c905d2020-04-13 13:39:40 -050032from bb.pysh import pyshyacc, pyshlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050033from bb.cache import MultiProcessCache
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035logger = logging.getLogger('BitBake.CodeParser')
36
Patrick Williamsc0f7c042017-02-23 20:41:17 -060037def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040038 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050039
40def check_indent(codestr):
41 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
42
43 i = 0
44 while codestr[i] in ["\n", "\t", " "]:
45 i = i + 1
46
47 if i == 0:
48 return codestr
49
50 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050051 if codestr[0] == "\n":
52 # Since we're adding a line, we need to remove one line of any empty padding
53 # to ensure line numbers are correct
54 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050055 return "if 1:\n" + codestr
56
57 return codestr
58
Patrick Williamsc124f4f2015-09-15 14:41:29 -050059# A custom getstate/setstate using tuples is actually worth 15% cachesize by
60# avoiding duplication of the attribute names!
61
Andrew Geissler82c905d2020-04-13 13:39:40 -050062
Patrick Williamsc124f4f2015-09-15 14:41:29 -050063class SetCache(object):
64 def __init__(self):
65 self.setcache = {}
66
67 def internSet(self, items):
68
69 new = []
70 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -060071 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050072 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -060073 h = hash(s)
74 if h in self.setcache:
75 return self.setcache[h]
76 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -050077 return s
78
79codecache = SetCache()
80
81class pythonCacheLine(object):
82 def __init__(self, refs, execs, contains):
83 self.refs = codecache.internSet(refs)
84 self.execs = codecache.internSet(execs)
85 self.contains = {}
86 for c in contains:
87 self.contains[c] = codecache.internSet(contains[c])
88
89 def __getstate__(self):
90 return (self.refs, self.execs, self.contains)
91
92 def __setstate__(self, state):
93 (refs, execs, contains) = state
94 self.__init__(refs, execs, contains)
95 def __hash__(self):
96 l = (hash(self.refs), hash(self.execs))
97 for c in sorted(self.contains.keys()):
98 l = l + (c, hash(self.contains[c]))
99 return hash(l)
100 def __repr__(self):
101 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
102
103
104class shellCacheLine(object):
105 def __init__(self, execs):
106 self.execs = codecache.internSet(execs)
107
108 def __getstate__(self):
109 return (self.execs)
110
111 def __setstate__(self, state):
112 (execs) = state
113 self.__init__(execs)
114 def __hash__(self):
115 return hash(self.execs)
116 def __repr__(self):
117 return str(self.execs)
118
119class CodeParserCache(MultiProcessCache):
120 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500121 # NOTE: you must increment this if you change how the parsers gather information,
122 # so that an existing cache gets invalidated. Additionally you'll need
123 # to increment __cache_version__ in cache.py in order to ensure that old
124 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400125 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126
127 def __init__(self):
128 MultiProcessCache.__init__(self)
129 self.pythoncache = self.cachedata[0]
130 self.shellcache = self.cachedata[1]
131 self.pythoncacheextras = self.cachedata_extras[0]
132 self.shellcacheextras = self.cachedata_extras[1]
133
134 # To avoid duplication in the codeparser cache, keep
135 # a lookup of hashes of objects we already have
136 self.pythoncachelines = {}
137 self.shellcachelines = {}
138
139 def newPythonCacheLine(self, refs, execs, contains):
140 cacheline = pythonCacheLine(refs, execs, contains)
141 h = hash(cacheline)
142 if h in self.pythoncachelines:
143 return self.pythoncachelines[h]
144 self.pythoncachelines[h] = cacheline
145 return cacheline
146
147 def newShellCacheLine(self, execs):
148 cacheline = shellCacheLine(execs)
149 h = hash(cacheline)
150 if h in self.shellcachelines:
151 return self.shellcachelines[h]
152 self.shellcachelines[h] = cacheline
153 return cacheline
154
155 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500156 # Check if we already have the caches
157 if self.pythoncache:
158 return
159
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500160 MultiProcessCache.init_cache(self, d)
161
162 # cachedata gets re-assigned in the parent
163 self.pythoncache = self.cachedata[0]
164 self.shellcache = self.cachedata[1]
165
166 def create_cachedata(self):
167 data = [{}, {}]
168 return data
169
170codeparsercache = CodeParserCache()
171
172def parser_cache_init(d):
173 codeparsercache.init_cache(d)
174
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500175def parser_cache_save():
176 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500178def parser_cache_savemerge():
179 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500180
181Logger = logging.getLoggerClass()
182class BufferedLogger(Logger):
183 def __init__(self, name, level=0, target=None):
184 Logger.__init__(self, name)
185 self.setLevel(level)
186 self.buffer = []
187 self.target = target
188
189 def handle(self, record):
190 self.buffer.append(record)
191
192 def flush(self):
193 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500194 if self.target.isEnabledFor(record.levelno):
195 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500196 self.buffer = []
197
Andrew Geissler9aee5002022-03-30 16:27:02 +0000198class DummyLogger():
199 def flush(self):
200 return
201
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500202class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800203 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600204 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500205 containsfuncs = ("bb.utils.contains", "base_contains")
206 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500207 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
208
209 def warn(self, func, arg):
210 """Warn about calls of bitbake APIs which pass a non-literal
211 argument for the variable name, as we're not able to track such
212 a reference.
213 """
214
215 try:
216 funcstr = codegen.to_source(func)
217 argstr = codegen.to_source(arg)
218 except TypeError:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500219 self.log.debug2('Failed to convert function and argument to source form')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500220 else:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500221 self.log.debug(self.unhandled_message % (funcstr, argstr))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222
223 def visit_Call(self, node):
224 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500225 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500226 if isinstance(node.args[0], ast.Str):
227 varname = node.args[0].s
228 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
229 if varname not in self.contains:
230 self.contains[varname] = set()
231 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500232 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
233 if varname not in self.contains:
234 self.contains[varname] = set()
235 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600236 elif name.endswith(self.getvarflags):
237 if isinstance(node.args[1], ast.Str):
238 self.references.add('%s[%s]' % (varname, node.args[1].s))
239 else:
240 self.warn(node.func, node.args[1])
241 else:
242 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500243 else:
244 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500245 elif name and name.endswith(".expand"):
246 if isinstance(node.args[0], ast.Str):
247 value = node.args[0].s
248 d = bb.data.init()
249 parser = d.expandWithRefs(value, self.name)
250 self.references |= parser.references
251 self.execs |= parser.execs
252 for varname in parser.contains:
253 if varname not in self.contains:
254 self.contains[varname] = set()
255 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500256 elif name in self.execfuncs:
257 if isinstance(node.args[0], ast.Str):
258 self.var_execs.add(node.args[0].s)
259 else:
260 self.warn(node.func, node.args[0])
261 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
262 self.execs.add(name)
263
264 def called_node_name(self, node):
265 """Given a called node, return its original string form"""
266 components = []
267 while node:
268 if isinstance(node, ast.Attribute):
269 components.append(node.attr)
270 node = node.value
271 elif isinstance(node, ast.Name):
272 components.append(node.id)
273 return '.'.join(reversed(components))
274 else:
275 break
276
277 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500278 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279 self.var_execs = set()
280 self.contains = {}
281 self.execs = set()
282 self.references = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000283 self._log = log
284 # Defer init as expensive
285 self.log = DummyLogger()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500286
287 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
288 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
289
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500290 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291 if not node or not node.strip():
292 return
293
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600294 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295
296 if h in codeparsercache.pythoncache:
297 self.references = set(codeparsercache.pythoncache[h].refs)
298 self.execs = set(codeparsercache.pythoncache[h].execs)
299 self.contains = {}
300 for i in codeparsercache.pythoncache[h].contains:
301 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
302 return
303
304 if h in codeparsercache.pythoncacheextras:
305 self.references = set(codeparsercache.pythoncacheextras[h].refs)
306 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
307 self.contains = {}
308 for i in codeparsercache.pythoncacheextras[h].contains:
309 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
310 return
311
Andrew Geissler9aee5002022-03-30 16:27:02 +0000312 # Need to parse so take the hit on the real log buffer
313 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
314
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500315 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
316 node = "\n" * int(lineno) + node
317 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500318 ast.PyCF_ONLY_AST)
319
320 for n in ast.walk(code):
321 if n.__class__.__name__ == "Call":
322 self.visit_Call(n)
323
324 self.execs.update(self.var_execs)
325
326 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
327
328class ShellParser():
329 def __init__(self, name, log):
330 self.funcdefs = set()
331 self.allexecs = set()
332 self.execs = set()
Andrew Geissler9aee5002022-03-30 16:27:02 +0000333 self._name = name
334 self._log = log
335 # Defer init as expensive
336 self.log = DummyLogger()
337
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500338 self.unhandled_template = "unable to handle non-literal command '%s'"
339 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
340
341 def parse_shell(self, value):
342 """Parse the supplied shell code in a string, returning the external
343 commands it executes.
344 """
345
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600346 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500347
348 if h in codeparsercache.shellcache:
349 self.execs = set(codeparsercache.shellcache[h].execs)
350 return self.execs
351
352 if h in codeparsercache.shellcacheextras:
353 self.execs = set(codeparsercache.shellcacheextras[h].execs)
354 return self.execs
355
Andrew Geissler9aee5002022-03-30 16:27:02 +0000356 # Need to parse so take the hit on the real log buffer
357 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
358
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500359 self._parse_shell(value)
360 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
361
362 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
363
364 return self.execs
365
366 def _parse_shell(self, value):
367 try:
368 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400369 except Exception:
370 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
371 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500372
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500373 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500374
375 def process_tokens(self, tokens):
376 """Process a supplied portion of the syntax tree as returned by
377 pyshyacc.parse.
378 """
379
380 def function_definition(value):
381 self.funcdefs.add(value.name)
382 return [value.body], None
383
384 def case_clause(value):
385 # Element 0 of each item in the case is the list of patterns, and
386 # Element 1 of each item in the case is the list of commands to be
387 # executed when that pattern matches.
388 words = chain(*[item[0] for item in value.items])
389 cmds = chain(*[item[1] for item in value.items])
390 return cmds, words
391
392 def if_clause(value):
393 main = chain(value.cond, value.if_cmds)
394 rest = value.else_cmds
395 if isinstance(rest, tuple) and rest[0] == "elif":
396 return chain(main, if_clause(rest[1]))
397 else:
398 return chain(main, rest)
399
400 def simple_command(value):
401 return None, chain(value.words, (assign[1] for assign in value.assigns))
402
403 token_handlers = {
404 "and_or": lambda x: ((x.left, x.right), None),
405 "async": lambda x: ([x], None),
406 "brace_group": lambda x: (x.cmds, None),
407 "for_clause": lambda x: (x.cmds, x.items),
408 "function_definition": function_definition,
409 "if_clause": lambda x: (if_clause(x), None),
410 "pipeline": lambda x: (x.commands, None),
411 "redirect_list": lambda x: ([x.cmd], None),
412 "subshell": lambda x: (x.cmds, None),
413 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
414 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
415 "simple_command": simple_command,
416 "case_clause": case_clause,
417 }
418
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500419 def process_token_list(tokens):
420 for token in tokens:
421 if isinstance(token, list):
422 process_token_list(token)
423 continue
424 name, value = token
425 try:
426 more_tokens, words = token_handlers[name](value)
427 except KeyError:
428 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500429
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500430 if more_tokens:
431 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500432
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500433 if words:
434 self.process_words(words)
435
436 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500437
438 def process_words(self, words):
439 """Process a set of 'words' in pyshyacc parlance, which includes
440 extraction of executed commands from $() blocks, as well as grabbing
441 the command name argument.
442 """
443
444 words = list(words)
445 for word in list(words):
446 wtree = pyshlex.make_wordtree(word[1])
447 for part in wtree:
448 if not isinstance(part, list):
449 continue
450
451 if part[0] in ('`', '$('):
452 command = pyshlex.wordtree_as_string(part[1:-1])
453 self._parse_shell(command)
454
455 if word[0] in ("cmd_name", "cmd_word"):
456 if word in words:
457 words.remove(word)
458
459 usetoken = False
460 for word in words:
461 if word[0] in ("cmd_name", "cmd_word") or \
462 (usetoken and word[0] == "TOKEN"):
463 if "=" in word[1]:
464 usetoken = True
465 continue
466
467 cmd = word[1]
468 if cmd.startswith("$"):
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500469 self.log.debug(self.unhandled_template % cmd)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500470 elif cmd == "eval":
471 command = " ".join(word for _, word in words[1:])
472 self._parse_shell(command)
473 else:
474 self.allexecs.add(cmd)
475 break