blob: 3ee4d5622bb8199717a0dcb46ab9db73cdaa40d4 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001import ast
2import codegen
3import logging
4import os.path
5import bb.utils, bb.data
6from itertools import chain
7from pysh import pyshyacc, pyshlex, sherrors
8from bb.cache import MultiProcessCache
9
10
11logger = logging.getLogger('BitBake.CodeParser')
12
13try:
14 import cPickle as pickle
15except ImportError:
16 import pickle
17 logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
18
19
20def check_indent(codestr):
21 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
22
23 i = 0
24 while codestr[i] in ["\n", "\t", " "]:
25 i = i + 1
26
27 if i == 0:
28 return codestr
29
30 if codestr[i-1] == "\t" or codestr[i-1] == " ":
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031 if codestr[0] == "\n":
32 # Since we're adding a line, we need to remove one line of any empty padding
33 # to ensure line numbers are correct
34 codestr = codestr[1:]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035 return "if 1:\n" + codestr
36
37 return codestr
38
39
40# Basically pickle, in python 2.7.3 at least, does badly with data duplication
41# upon pickling and unpickling. Combine this with duplicate objects and things
42# are a mess.
43#
44# When the sets are originally created, python calls intern() on the set keys
45# which significantly improves memory usage. Sadly the pickle/unpickle process
46# doesn't call intern() on the keys and results in the same strings being duplicated
47# in memory. This also means pickle will save the same string multiple times in
48# the cache file.
49#
50# By having shell and python cacheline objects with setstate/getstate, we force
51# the object creation through our own routine where we can call intern (via internSet).
52#
53# We also use hashable frozensets and ensure we use references to these so that
54# duplicates can be removed, both in memory and in the resulting pickled data.
55#
56# By playing these games, the size of the cache file shrinks dramatically
57# meaning faster load times and the reloaded cache files also consume much less
58# memory. Smaller cache files, faster load times and lower memory usage is good.
59#
60# A custom getstate/setstate using tuples is actually worth 15% cachesize by
61# avoiding duplication of the attribute names!
62
63class SetCache(object):
64 def __init__(self):
65 self.setcache = {}
66
67 def internSet(self, items):
68
69 new = []
70 for i in items:
71 new.append(intern(i))
72 s = frozenset(new)
73 if hash(s) in self.setcache:
74 return self.setcache[hash(s)]
75 self.setcache[hash(s)] = s
76 return s
77
78codecache = SetCache()
79
80class pythonCacheLine(object):
81 def __init__(self, refs, execs, contains):
82 self.refs = codecache.internSet(refs)
83 self.execs = codecache.internSet(execs)
84 self.contains = {}
85 for c in contains:
86 self.contains[c] = codecache.internSet(contains[c])
87
88 def __getstate__(self):
89 return (self.refs, self.execs, self.contains)
90
91 def __setstate__(self, state):
92 (refs, execs, contains) = state
93 self.__init__(refs, execs, contains)
94 def __hash__(self):
95 l = (hash(self.refs), hash(self.execs))
96 for c in sorted(self.contains.keys()):
97 l = l + (c, hash(self.contains[c]))
98 return hash(l)
99 def __repr__(self):
100 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
101
102
103class shellCacheLine(object):
104 def __init__(self, execs):
105 self.execs = codecache.internSet(execs)
106
107 def __getstate__(self):
108 return (self.execs)
109
110 def __setstate__(self, state):
111 (execs) = state
112 self.__init__(execs)
113 def __hash__(self):
114 return hash(self.execs)
115 def __repr__(self):
116 return str(self.execs)
117
118class CodeParserCache(MultiProcessCache):
119 cache_file_name = "bb_codeparser.dat"
120 CACHE_VERSION = 7
121
122 def __init__(self):
123 MultiProcessCache.__init__(self)
124 self.pythoncache = self.cachedata[0]
125 self.shellcache = self.cachedata[1]
126 self.pythoncacheextras = self.cachedata_extras[0]
127 self.shellcacheextras = self.cachedata_extras[1]
128
129 # To avoid duplication in the codeparser cache, keep
130 # a lookup of hashes of objects we already have
131 self.pythoncachelines = {}
132 self.shellcachelines = {}
133
134 def newPythonCacheLine(self, refs, execs, contains):
135 cacheline = pythonCacheLine(refs, execs, contains)
136 h = hash(cacheline)
137 if h in self.pythoncachelines:
138 return self.pythoncachelines[h]
139 self.pythoncachelines[h] = cacheline
140 return cacheline
141
142 def newShellCacheLine(self, execs):
143 cacheline = shellCacheLine(execs)
144 h = hash(cacheline)
145 if h in self.shellcachelines:
146 return self.shellcachelines[h]
147 self.shellcachelines[h] = cacheline
148 return cacheline
149
150 def init_cache(self, d):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500151 # Check if we already have the caches
152 if self.pythoncache:
153 return
154
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 MultiProcessCache.init_cache(self, d)
156
157 # cachedata gets re-assigned in the parent
158 self.pythoncache = self.cachedata[0]
159 self.shellcache = self.cachedata[1]
160
161 def create_cachedata(self):
162 data = [{}, {}]
163 return data
164
165codeparsercache = CodeParserCache()
166
167def parser_cache_init(d):
168 codeparsercache.init_cache(d)
169
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500170def parser_cache_save():
171 codeparsercache.save_extras()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500172
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500173def parser_cache_savemerge():
174 codeparsercache.save_merge()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175
176Logger = logging.getLoggerClass()
177class BufferedLogger(Logger):
178 def __init__(self, name, level=0, target=None):
179 Logger.__init__(self, name)
180 self.setLevel(level)
181 self.buffer = []
182 self.target = target
183
184 def handle(self, record):
185 self.buffer.append(record)
186
187 def flush(self):
188 for record in self.buffer:
189 self.target.handle(record)
190 self.buffer = []
191
192class PythonParser():
193 getvars = (".getVar", ".appendVar", ".prependVar")
194 containsfuncs = ("bb.utils.contains", "base_contains", "bb.utils.contains_any")
195 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
196
197 def warn(self, func, arg):
198 """Warn about calls of bitbake APIs which pass a non-literal
199 argument for the variable name, as we're not able to track such
200 a reference.
201 """
202
203 try:
204 funcstr = codegen.to_source(func)
205 argstr = codegen.to_source(arg)
206 except TypeError:
207 self.log.debug(2, 'Failed to convert function and argument to source form')
208 else:
209 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
210
211 def visit_Call(self, node):
212 name = self.called_node_name(node.func)
213 if name and name.endswith(self.getvars) or name in self.containsfuncs:
214 if isinstance(node.args[0], ast.Str):
215 varname = node.args[0].s
216 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
217 if varname not in self.contains:
218 self.contains[varname] = set()
219 self.contains[varname].add(node.args[1].s)
220 else:
221 self.references.add(node.args[0].s)
222 else:
223 self.warn(node.func, node.args[0])
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500224 elif name and name.endswith(".expand"):
225 if isinstance(node.args[0], ast.Str):
226 value = node.args[0].s
227 d = bb.data.init()
228 parser = d.expandWithRefs(value, self.name)
229 self.references |= parser.references
230 self.execs |= parser.execs
231 for varname in parser.contains:
232 if varname not in self.contains:
233 self.contains[varname] = set()
234 self.contains[varname] |= parser.contains[varname]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235 elif name in self.execfuncs:
236 if isinstance(node.args[0], ast.Str):
237 self.var_execs.add(node.args[0].s)
238 else:
239 self.warn(node.func, node.args[0])
240 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
241 self.execs.add(name)
242
243 def called_node_name(self, node):
244 """Given a called node, return its original string form"""
245 components = []
246 while node:
247 if isinstance(node, ast.Attribute):
248 components.append(node.attr)
249 node = node.value
250 elif isinstance(node, ast.Name):
251 components.append(node.id)
252 return '.'.join(reversed(components))
253 else:
254 break
255
256 def __init__(self, name, log):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500257 self.name = name
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500258 self.var_execs = set()
259 self.contains = {}
260 self.execs = set()
261 self.references = set()
262 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
263
264 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
265 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
266
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500267 def parse_python(self, node, lineno=0, filename="<string>"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500268 if not node or not node.strip():
269 return
270
271 h = hash(str(node))
272
273 if h in codeparsercache.pythoncache:
274 self.references = set(codeparsercache.pythoncache[h].refs)
275 self.execs = set(codeparsercache.pythoncache[h].execs)
276 self.contains = {}
277 for i in codeparsercache.pythoncache[h].contains:
278 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
279 return
280
281 if h in codeparsercache.pythoncacheextras:
282 self.references = set(codeparsercache.pythoncacheextras[h].refs)
283 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
284 self.contains = {}
285 for i in codeparsercache.pythoncacheextras[h].contains:
286 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
287 return
288
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500289 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
290 node = "\n" * int(lineno) + node
291 code = compile(check_indent(str(node)), filename, "exec",
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 ast.PyCF_ONLY_AST)
293
294 for n in ast.walk(code):
295 if n.__class__.__name__ == "Call":
296 self.visit_Call(n)
297
298 self.execs.update(self.var_execs)
299
300 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
301
302class ShellParser():
303 def __init__(self, name, log):
304 self.funcdefs = set()
305 self.allexecs = set()
306 self.execs = set()
307 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
308 self.unhandled_template = "unable to handle non-literal command '%s'"
309 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
310
311 def parse_shell(self, value):
312 """Parse the supplied shell code in a string, returning the external
313 commands it executes.
314 """
315
316 h = hash(str(value))
317
318 if h in codeparsercache.shellcache:
319 self.execs = set(codeparsercache.shellcache[h].execs)
320 return self.execs
321
322 if h in codeparsercache.shellcacheextras:
323 self.execs = set(codeparsercache.shellcacheextras[h].execs)
324 return self.execs
325
326 self._parse_shell(value)
327 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
328
329 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
330
331 return self.execs
332
333 def _parse_shell(self, value):
334 try:
335 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
336 except pyshlex.NeedMore:
337 raise sherrors.ShellSyntaxError("Unexpected EOF")
338
339 for token in tokens:
340 self.process_tokens(token)
341
342 def process_tokens(self, tokens):
343 """Process a supplied portion of the syntax tree as returned by
344 pyshyacc.parse.
345 """
346
347 def function_definition(value):
348 self.funcdefs.add(value.name)
349 return [value.body], None
350
351 def case_clause(value):
352 # Element 0 of each item in the case is the list of patterns, and
353 # Element 1 of each item in the case is the list of commands to be
354 # executed when that pattern matches.
355 words = chain(*[item[0] for item in value.items])
356 cmds = chain(*[item[1] for item in value.items])
357 return cmds, words
358
359 def if_clause(value):
360 main = chain(value.cond, value.if_cmds)
361 rest = value.else_cmds
362 if isinstance(rest, tuple) and rest[0] == "elif":
363 return chain(main, if_clause(rest[1]))
364 else:
365 return chain(main, rest)
366
367 def simple_command(value):
368 return None, chain(value.words, (assign[1] for assign in value.assigns))
369
370 token_handlers = {
371 "and_or": lambda x: ((x.left, x.right), None),
372 "async": lambda x: ([x], None),
373 "brace_group": lambda x: (x.cmds, None),
374 "for_clause": lambda x: (x.cmds, x.items),
375 "function_definition": function_definition,
376 "if_clause": lambda x: (if_clause(x), None),
377 "pipeline": lambda x: (x.commands, None),
378 "redirect_list": lambda x: ([x.cmd], None),
379 "subshell": lambda x: (x.cmds, None),
380 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
381 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
382 "simple_command": simple_command,
383 "case_clause": case_clause,
384 }
385
386 for token in tokens:
387 name, value = token
388 try:
389 more_tokens, words = token_handlers[name](value)
390 except KeyError:
391 raise NotImplementedError("Unsupported token type " + name)
392
393 if more_tokens:
394 self.process_tokens(more_tokens)
395
396 if words:
397 self.process_words(words)
398
399 def process_words(self, words):
400 """Process a set of 'words' in pyshyacc parlance, which includes
401 extraction of executed commands from $() blocks, as well as grabbing
402 the command name argument.
403 """
404
405 words = list(words)
406 for word in list(words):
407 wtree = pyshlex.make_wordtree(word[1])
408 for part in wtree:
409 if not isinstance(part, list):
410 continue
411
412 if part[0] in ('`', '$('):
413 command = pyshlex.wordtree_as_string(part[1:-1])
414 self._parse_shell(command)
415
416 if word[0] in ("cmd_name", "cmd_word"):
417 if word in words:
418 words.remove(word)
419
420 usetoken = False
421 for word in words:
422 if word[0] in ("cmd_name", "cmd_word") or \
423 (usetoken and word[0] == "TOKEN"):
424 if "=" in word[1]:
425 usetoken = True
426 continue
427
428 cmd = word[1]
429 if cmd.startswith("$"):
430 self.log.debug(1, self.unhandled_template % cmd)
431 elif cmd == "eval":
432 command = " ".join(word for _, word in words[1:])
433 self._parse_shell(command)
434 else:
435 self.allexecs.add(cmd)
436 break