blob: 0cec452c00527631883bc0f4d0d1b2e0cb805d2f [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Brad Bishop6e60e8b2018-02-01 10:27:11 -05005"""
6BitBake code parser
7
8Parses actual code (i.e. python and shell) for functions and in-line
9expressions. Used mainly to determine dependencies on other functions
10and variables within the BitBake metadata. Also provides a cache for
11this information in order to speed up processing.
12
13(Not to be confused with the code that parses the metadata itself,
14see lib/bb/parse/ for that).
15
16NOTE: if you change how the parsers gather information you will almost
17certainly need to increment CodeParserCache.CACHE_VERSION below so that
18any existing codeparser cache gets invalidated. Additionally you'll need
19to increment __cache_version__ in cache.py in order to ensure that old
20recipe caches don't trigger "Taskhash mismatch" errors.
21
22"""
23
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024import ast
Patrick Williamsc0f7c042017-02-23 20:41:17 -060025import sys
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026import codegen
27import logging
Patrick Williamsc0f7c042017-02-23 20:41:17 -060028import bb.pysh as pysh
Patrick Williamsc124f4f2015-09-15 14:41:29 -050029import bb.utils, bb.data
Patrick Williamsc0f7c042017-02-23 20:41:17 -060030import hashlib
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031from itertools import chain
Andrew Geissler82c905d2020-04-13 13:39:40 -050032from bb.pysh import pyshyacc, pyshlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050033from bb.cache import MultiProcessCache
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035logger = logging.getLogger('BitBake.CodeParser')
36
Patrick Williamsc0f7c042017-02-23 20:41:17 -060037def bbhash(s):
Brad Bishop19323692019-04-05 15:28:33 -040038 return hashlib.sha256(s.encode("utf-8")).hexdigest()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050039
40def check_indent(codestr):
41 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
42
43 i = 0
44 while codestr[i] in ["\n", "\t", " "]:
45 i = i + 1
46
47 if i == 0:
48 return codestr
49
50 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050051 if codestr[0] == "\n":
52 # Since we're adding a line, we need to remove one line of any empty padding
53 # to ensure line numbers are correct
54 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050055 return "if 1:\n" + codestr
56
57 return codestr
58
Patrick Williamsc124f4f2015-09-15 14:41:29 -050059# A custom getstate/setstate using tuples is actually worth 15% cachesize by
60# avoiding duplication of the attribute names!
61
Andrew Geissler82c905d2020-04-13 13:39:40 -050062
Patrick Williamsc124f4f2015-09-15 14:41:29 -050063class SetCache(object):
64 def __init__(self):
65 self.setcache = {}
66
67 def internSet(self, items):
68
69 new = []
70 for i in items:
Patrick Williamsc0f7c042017-02-23 20:41:17 -060071 new.append(sys.intern(i))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050072 s = frozenset(new)
Patrick Williamsc0f7c042017-02-23 20:41:17 -060073 h = hash(s)
74 if h in self.setcache:
75 return self.setcache[h]
76 self.setcache[h] = s
Patrick Williamsc124f4f2015-09-15 14:41:29 -050077 return s
78
79codecache = SetCache()
80
81class pythonCacheLine(object):
82 def __init__(self, refs, execs, contains):
83 self.refs = codecache.internSet(refs)
84 self.execs = codecache.internSet(execs)
85 self.contains = {}
86 for c in contains:
87 self.contains[c] = codecache.internSet(contains[c])
88
89 def __getstate__(self):
90 return (self.refs, self.execs, self.contains)
91
92 def __setstate__(self, state):
93 (refs, execs, contains) = state
94 self.__init__(refs, execs, contains)
95 def __hash__(self):
96 l = (hash(self.refs), hash(self.execs))
97 for c in sorted(self.contains.keys()):
98 l = l + (c, hash(self.contains[c]))
99 return hash(l)
100 def __repr__(self):
101 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
102
103
104class shellCacheLine(object):
105 def __init__(self, execs):
106 self.execs = codecache.internSet(execs)
107
108 def __getstate__(self):
109 return (self.execs)
110
111 def __setstate__(self, state):
112 (execs) = state
113 self.__init__(execs)
114 def __hash__(self):
115 return hash(self.execs)
116 def __repr__(self):
117 return str(self.execs)
118
119class CodeParserCache(MultiProcessCache):
120 cache_file_name = "bb_codeparser.dat"
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500121 # NOTE: you must increment this if you change how the parsers gather information,
122 # so that an existing cache gets invalidated. Additionally you'll need
123 # to increment __cache_version__ in cache.py in order to ensure that old
124 # recipe caches don't trigger "Taskhash mismatch" errors.
Brad Bishop19323692019-04-05 15:28:33 -0400125 CACHE_VERSION = 11
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126
127 def __init__(self):
128 MultiProcessCache.__init__(self)
129 self.pythoncache = self.cachedata[0]
130 self.shellcache = self.cachedata[1]
131 self.pythoncacheextras = self.cachedata_extras[0]
132 self.shellcacheextras = self.cachedata_extras[1]
133
134 # To avoid duplication in the codeparser cache, keep
135 # a lookup of hashes of objects we already have
136 self.pythoncachelines = {}
137 self.shellcachelines = {}
138
139 def newPythonCacheLine(self, refs, execs, contains):
140 cacheline = pythonCacheLine(refs, execs, contains)
141 h = hash(cacheline)
142 if h in self.pythoncachelines:
143 return self.pythoncachelines[h]
144 self.pythoncachelines[h] = cacheline
145 return cacheline
146
147 def newShellCacheLine(self, execs):
148 cacheline = shellCacheLine(execs)
149 h = hash(cacheline)
150 if h in self.shellcachelines:
151 return self.shellcachelines[h]
152 self.shellcachelines[h] = cacheline
153 return cacheline
154
155 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500156 # Check if we already have the caches
157 if self.pythoncache:
158 return
159
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500160 MultiProcessCache.init_cache(self, d)
161
162 # cachedata gets re-assigned in the parent
163 self.pythoncache = self.cachedata[0]
164 self.shellcache = self.cachedata[1]
165
166 def create_cachedata(self):
167 data = [{}, {}]
168 return data
169
170codeparsercache = CodeParserCache()
171
172def parser_cache_init(d):
173 codeparsercache.init_cache(d)
174
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500175def parser_cache_save():
176 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500178def parser_cache_savemerge():
179 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500180
181Logger = logging.getLoggerClass()
182class BufferedLogger(Logger):
183 def __init__(self, name, level=0, target=None):
184 Logger.__init__(self, name)
185 self.setLevel(level)
186 self.buffer = []
187 self.target = target
188
189 def handle(self, record):
190 self.buffer.append(record)
191
192 def flush(self):
193 for record in self.buffer:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500194 if self.target.isEnabledFor(record.levelno):
195 self.target.handle(record)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500196 self.buffer = []
197
198class PythonParser():
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800199 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600200 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500201 containsfuncs = ("bb.utils.contains", "base_contains")
202 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
204
205 def warn(self, func, arg):
206 """Warn about calls of bitbake APIs which pass a non-literal
207 argument for the variable name, as we're not able to track such
208 a reference.
209 """
210
211 try:
212 funcstr = codegen.to_source(func)
213 argstr = codegen.to_source(arg)
214 except TypeError:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500215 self.log.debug2('Failed to convert function and argument to source form')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500216 else:
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500217 self.log.debug(self.unhandled_message % (funcstr, argstr))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218
219 def visit_Call(self, node):
220 name = self.called_node_name(node.func)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500221 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 if isinstance(node.args[0], ast.Str):
223 varname = node.args[0].s
224 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
225 if varname not in self.contains:
226 self.contains[varname] = set()
227 self.contains[varname].add(node.args[1].s)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500228 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
229 if varname not in self.contains:
230 self.contains[varname] = set()
231 self.contains[varname].update(node.args[1].s.split())
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600232 elif name.endswith(self.getvarflags):
233 if isinstance(node.args[1], ast.Str):
234 self.references.add('%s[%s]' % (varname, node.args[1].s))
235 else:
236 self.warn(node.func, node.args[1])
237 else:
238 self.references.add(varname)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500239 else:
240 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500241 elif name and name.endswith(".expand"):
242 if isinstance(node.args[0], ast.Str):
243 value = node.args[0].s
244 d = bb.data.init()
245 parser = d.expandWithRefs(value, self.name)
246 self.references |= parser.references
247 self.execs |= parser.execs
248 for varname in parser.contains:
249 if varname not in self.contains:
250 self.contains[varname] = set()
251 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500252 elif name in self.execfuncs:
253 if isinstance(node.args[0], ast.Str):
254 self.var_execs.add(node.args[0].s)
255 else:
256 self.warn(node.func, node.args[0])
257 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
258 self.execs.add(name)
259
260 def called_node_name(self, node):
261 """Given a called node, return its original string form"""
262 components = []
263 while node:
264 if isinstance(node, ast.Attribute):
265 components.append(node.attr)
266 node = node.value
267 elif isinstance(node, ast.Name):
268 components.append(node.id)
269 return '.'.join(reversed(components))
270 else:
271 break
272
273 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500274 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500275 self.var_execs = set()
276 self.contains = {}
277 self.execs = set()
278 self.references = set()
279 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
280
281 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
282 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
283
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500285 if not node or not node.strip():
286 return
287
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600288 h = bbhash(str(node))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289
290 if h in codeparsercache.pythoncache:
291 self.references = set(codeparsercache.pythoncache[h].refs)
292 self.execs = set(codeparsercache.pythoncache[h].execs)
293 self.contains = {}
294 for i in codeparsercache.pythoncache[h].contains:
295 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
296 return
297
298 if h in codeparsercache.pythoncacheextras:
299 self.references = set(codeparsercache.pythoncacheextras[h].refs)
300 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
301 self.contains = {}
302 for i in codeparsercache.pythoncacheextras[h].contains:
303 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
304 return
305
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
307 node = "\n" * int(lineno) + node
308 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500309 ast.PyCF_ONLY_AST)
310
311 for n in ast.walk(code):
312 if n.__class__.__name__ == "Call":
313 self.visit_Call(n)
314
315 self.execs.update(self.var_execs)
316
317 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
318
319class ShellParser():
320 def __init__(self, name, log):
321 self.funcdefs = set()
322 self.allexecs = set()
323 self.execs = set()
324 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
325 self.unhandled_template = "unable to handle non-literal command '%s'"
326 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
327
328 def parse_shell(self, value):
329 """Parse the supplied shell code in a string, returning the external
330 commands it executes.
331 """
332
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600333 h = bbhash(str(value))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500334
335 if h in codeparsercache.shellcache:
336 self.execs = set(codeparsercache.shellcache[h].execs)
337 return self.execs
338
339 if h in codeparsercache.shellcacheextras:
340 self.execs = set(codeparsercache.shellcacheextras[h].execs)
341 return self.execs
342
343 self._parse_shell(value)
344 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
345
346 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
347
348 return self.execs
349
350 def _parse_shell(self, value):
351 try:
352 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
Brad Bishop19323692019-04-05 15:28:33 -0400353 except Exception:
354 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
355 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500356
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500357 self.process_tokens(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500358
359 def process_tokens(self, tokens):
360 """Process a supplied portion of the syntax tree as returned by
361 pyshyacc.parse.
362 """
363
364 def function_definition(value):
365 self.funcdefs.add(value.name)
366 return [value.body], None
367
368 def case_clause(value):
369 # Element 0 of each item in the case is the list of patterns, and
370 # Element 1 of each item in the case is the list of commands to be
371 # executed when that pattern matches.
372 words = chain(*[item[0] for item in value.items])
373 cmds = chain(*[item[1] for item in value.items])
374 return cmds, words
375
376 def if_clause(value):
377 main = chain(value.cond, value.if_cmds)
378 rest = value.else_cmds
379 if isinstance(rest, tuple) and rest[0] == "elif":
380 return chain(main, if_clause(rest[1]))
381 else:
382 return chain(main, rest)
383
384 def simple_command(value):
385 return None, chain(value.words, (assign[1] for assign in value.assigns))
386
387 token_handlers = {
388 "and_or": lambda x: ((x.left, x.right), None),
389 "async": lambda x: ([x], None),
390 "brace_group": lambda x: (x.cmds, None),
391 "for_clause": lambda x: (x.cmds, x.items),
392 "function_definition": function_definition,
393 "if_clause": lambda x: (if_clause(x), None),
394 "pipeline": lambda x: (x.commands, None),
395 "redirect_list": lambda x: ([x.cmd], None),
396 "subshell": lambda x: (x.cmds, None),
397 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
398 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
399 "simple_command": simple_command,
400 "case_clause": case_clause,
401 }
402
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500403 def process_token_list(tokens):
404 for token in tokens:
405 if isinstance(token, list):
406 process_token_list(token)
407 continue
408 name, value = token
409 try:
410 more_tokens, words = token_handlers[name](value)
411 except KeyError:
412 raise NotImplementedError("Unsupported token type " + name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500413
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500414 if more_tokens:
415 self.process_tokens(more_tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500416
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500417 if words:
418 self.process_words(words)
419
420 process_token_list(tokens)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500421
422 def process_words(self, words):
423 """Process a set of 'words' in pyshyacc parlance, which includes
424 extraction of executed commands from $() blocks, as well as grabbing
425 the command name argument.
426 """
427
428 words = list(words)
429 for word in list(words):
430 wtree = pyshlex.make_wordtree(word[1])
431 for part in wtree:
432 if not isinstance(part, list):
433 continue
434
435 if part[0] in ('`', '$('):
436 command = pyshlex.wordtree_as_string(part[1:-1])
437 self._parse_shell(command)
438
439 if word[0] in ("cmd_name", "cmd_word"):
440 if word in words:
441 words.remove(word)
442
443 usetoken = False
444 for word in words:
445 if word[0] in ("cmd_name", "cmd_word") or \
446 (usetoken and word[0] == "TOKEN"):
447 if "=" in word[1]:
448 usetoken = True
449 continue
450
451 cmd = word[1]
452 if cmd.startswith("$"):
Andrew Geissler95ac1b82021-03-31 14:34:31 -0500453 self.log.debug(self.unhandled_template % cmd)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500454 elif cmd == "eval":
455 command = " ".join(word for _, word in words[1:])
456 self._parse_shell(command)
457 else:
458 self.allexecs.add(cmd)
459 break