blob: 872333d7fdf8e2f8fd91c9a789755f118c7c2988 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050014from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040015from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040016import hashserv
Patrick Williamsc124f4f2015-09-15 14:41:29 -050017
18logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050019hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060022 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023 if type(obj) is type and issubclass(obj, SignatureGenerator)]
24
Brad Bishop6e60e8b2018-02-01 10:27:11 -050025 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026 for sg in siggens:
27 if desired == sg.name:
28 return sg(d)
29 break
30 else:
31 logger.error("Invalid signature generator '%s', using default 'noop'\n"
32 "Available generators: %s", desired,
33 ', '.join(obj.name for obj in siggens))
34 return SignatureGenerator(d)
35
36class SignatureGenerator(object):
37 """
38 """
39 name = "noop"
40
Andrew Geissler5a43b432020-06-13 10:46:56 -050041 # If the derived class supports multiconfig datacaches, set this to True
42 # The default is False for backward compatibility with derived signature
43 # generators that do not understand multiconfig caches
44 supports_multiconfig_datacaches = False
45
Patrick Williamsc124f4f2015-09-15 14:41:29 -050046 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050047 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050048 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050049 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050050 self.runtaskdeps = {}
51 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050052 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040053 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050054 self.tidtopn = {}
55 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050056
57 def finalise(self, fn, d, varient):
58 return
59
Andrew Geissler82c905d2020-04-13 13:39:40 -050060 def postparsing_clean_cache(self):
61 return
62
Brad Bishop08902b02019-08-20 09:16:51 -040063 def get_unihash(self, tid):
64 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040065
Andrew Geissler5a43b432020-06-13 10:46:56 -050066 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050067 return
68
Andrew Geissler5a43b432020-06-13 10:46:56 -050069 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040070 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
71 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050072
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050073 def writeout_file_checksum_cache(self):
74 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050075 return
76
77 def stampfile(self, stampbase, file_name, taskname, extrainfo):
78 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
79
80 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
81 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
82
83 def dump_sigtask(self, fn, task, stampbase, runtime):
84 return
85
86 def invalidate_task(self, task, d, fn):
87 bb.build.del_stamp(task, d, fn)
88
89 def dump_sigs(self, dataCache, options):
90 return
91
92 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -050093 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094
95 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -050096 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Brad Bishopd7bf8c12018-02-25 22:55:05 -050098 def reset(self, data):
99 self.__init__(data)
100
Brad Bishop08902b02019-08-20 09:16:51 -0400101 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500102 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400103
104 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500105 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400106
107 def save_unitaskhashes(self):
108 return
109
Brad Bishopa34c0302019-09-23 22:34:48 -0400110 def set_setscene_tasks(self, setscene_tasks):
111 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500112
Andrew Geissler5a43b432020-06-13 10:46:56 -0500113 @classmethod
114 def get_data_caches(cls, dataCaches, mc):
115 """
116 This function returns the datacaches that should be passed to signature
117 generator functions. If the signature generator supports multiconfig
118 caches, the entire dictionary of data caches is sent, otherwise a
119 special proxy is sent that support both index access to all
120 multiconfigs, and also direct access for the default multiconfig.
121
122 The proxy class allows code in this class itself to always use
123 multiconfig aware code (to ease maintenance), but derived classes that
124 are unaware of multiconfig data caches can still access the default
125 multiconfig as expected.
126
127 Do not override this function in derived classes; it will be removed in
128 the future when support for multiconfig data caches is mandatory
129 """
130 class DataCacheProxy(object):
131 def __init__(self):
132 pass
133
134 def __getitem__(self, key):
135 return dataCaches[key]
136
137 def __getattr__(self, name):
138 return getattr(dataCaches[mc], name)
139
140 if cls.supports_multiconfig_datacaches:
141 return dataCaches
142
143 return DataCacheProxy()
144
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500145class SignatureGeneratorBasic(SignatureGenerator):
146 """
147 """
148 name = "basic"
149
150 def __init__(self, data):
151 self.basehash = {}
152 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500153 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500154 self.taskdeps = {}
155 self.runtaskdeps = {}
156 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500157 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500158 self.gendeps = {}
159 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500160 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500161 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162 self.taskwhitelist = None
163 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500164 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500165 if checksum_cache_file:
166 self.checksum_cache = FileChecksumCache()
167 self.checksum_cache.init_cache(data, checksum_cache_file)
168 else:
169 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500170
Andrew Geissler82c905d2020-04-13 13:39:40 -0500171 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400172 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500173 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
174 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400175
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500176 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500177 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500178 if self.taskwhitelist:
179 self.twl = re.compile(self.taskwhitelist)
180 else:
181 self.twl = None
182
183 def _build_data(self, fn, d):
184
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500185 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500186 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500187
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800188 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500189
190 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400191 tid = fn + ":" + task
192 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
193 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400194 bb.error("The following commands may help:")
195 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
196 # Make sure sigdata is dumped before run printdiff
197 bb.error("%s -Snone" % cmd)
198 bb.error("Then:")
199 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400200 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201
202 self.taskdeps[fn] = taskdeps
203 self.gendeps[fn] = gendeps
204 self.lookupcache[fn] = lookupcache
205
206 return taskdeps
207
Brad Bishopa34c0302019-09-23 22:34:48 -0400208 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500209 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400210
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500211 def finalise(self, fn, d, variant):
212
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600213 mc = d.getVar("__BBMULTICONFIG", False) or ""
214 if variant or mc:
215 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500216
217 try:
218 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500219 except bb.parse.SkipRecipe:
220 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500221 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500222 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500223 raise
224
225 #Slow but can be useful for debugging mismatched basehashes
226 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500227 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500228
229 for task in taskdeps:
Brad Bishop08902b02019-08-20 09:16:51 -0400230 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500231
Andrew Geissler82c905d2020-04-13 13:39:40 -0500232 def postparsing_clean_cache(self):
233 #
234 # After parsing we can remove some things from memory to reduce our memory footprint
235 #
236 self.gendeps = {}
237 self.lookupcache = {}
238 self.taskdeps = {}
239
Andrew Geissler5a43b432020-06-13 10:46:56 -0500240 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500241 # Return True if we should keep the dependency, False to drop it
242 # We only manipulate the dependencies for packages not in the whitelist
243 if self.twl and not self.twl.search(recipename):
244 # then process the actual dependencies
245 if self.twl.search(depname):
246 return False
247 return True
248
249 def read_taint(self, fn, task, stampbase):
250 taint = None
251 try:
252 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
253 taint = taintf.read()
254 except IOError:
255 pass
256 return taint
257
Andrew Geissler5a43b432020-06-13 10:46:56 -0500258 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800259
Brad Bishop08902b02019-08-20 09:16:51 -0400260 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800261
Andrew Geissler5a43b432020-06-13 10:46:56 -0500262 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400263 self.runtaskdeps[tid] = []
264 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500265 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500266
267 self.tidtopn[tid] = recipename
268
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500269 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500270 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
271 depname = dataCaches[depmc].pkg_fn[depmcfn]
272 if not self.supports_multiconfig_datacaches and mc != depmc:
273 # If the signature generator doesn't understand multiconfig
274 # data caches, any dependency not in the same multiconfig must
275 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600276 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500277 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500278 continue
279 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800280 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400281 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500282
Andrew Geissler5a43b432020-06-13 10:46:56 -0500283 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500285 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500286 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500287 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500288 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400289 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500290
Andrew Geissler5a43b432020-06-13 10:46:56 -0500291 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 if 'nostamp' in taskdep and task in taskdep['nostamp']:
293 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500294 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
295 # Don't reset taint value upon every call
296 pass
297 else:
298 import uuid
299 taint = str(uuid.uuid4())
300 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500301
Andrew Geissler5a43b432020-06-13 10:46:56 -0500302 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500303 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400304 self.taints[tid] = taint
305 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500306
Andrew Geissler82c905d2020-04-13 13:39:40 -0500307 return
308
Andrew Geissler5a43b432020-06-13 10:46:56 -0500309 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500310
311 data = self.basehash[tid]
312 for dep in self.runtaskdeps[tid]:
313 if dep in self.unihash:
314 if self.unihash[dep] is None:
315 data = data + self.taskhash[dep]
316 else:
317 data = data + self.unihash[dep]
318 else:
319 data = data + self.get_unihash(dep)
320
321 for (f, cs) in self.file_checksum_values[tid]:
322 if cs:
323 data = data + cs
324
325 if tid in self.taints:
326 if self.taints[tid].startswith("nostamp:"):
327 data = data + self.taints[tid][8:]
328 else:
329 data = data + self.taints[tid]
330
Brad Bishop19323692019-04-05 15:28:33 -0400331 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400332 self.taskhash[tid] = h
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500333 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
334 return h
335
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500336 def writeout_file_checksum_cache(self):
337 """Write/update the file checksum cache onto disk"""
338 if self.checksum_cache:
339 self.checksum_cache.save_extras()
340 self.checksum_cache.save_merge()
341 else:
342 bb.fetch2.fetcher_parse_save()
343 bb.fetch2.fetcher_parse_done()
344
Brad Bishop08902b02019-08-20 09:16:51 -0400345 def save_unitaskhashes(self):
346 self.unihash_cache.save(self.unitaskhashes)
347
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500348 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500349
Brad Bishop08902b02019-08-20 09:16:51 -0400350 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500351 referencestamp = stampbase
352 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500353 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500354 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400355 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400356 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500357 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400358 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500359
360 bb.utils.mkdirhier(os.path.dirname(sigfile))
361
362 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500363 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500364 data['basewhitelist'] = self.basewhitelist
365 data['taskwhitelist'] = self.taskwhitelist
366 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400367 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500368 data['gendeps'] = {}
369 data['varvals'] = {}
370 data['varvals'][task] = self.lookupcache[fn][task]
371 for dep in self.taskdeps[fn][task]:
372 if dep in self.basewhitelist:
373 continue
374 data['gendeps'][dep] = self.gendeps[fn][dep]
375 data['varvals'][dep] = self.lookupcache[fn][dep]
376
Brad Bishop08902b02019-08-20 09:16:51 -0400377 if runtime and tid in self.taskhash:
378 data['runtaskdeps'] = self.runtaskdeps[tid]
379 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380 data['runtaskhashes'] = {}
381 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400382 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400383 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400384 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500385
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500386 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500387 if taint:
388 data['taint'] = taint
389
Brad Bishop08902b02019-08-20 09:16:51 -0400390 if runtime and tid in self.taints:
391 if 'nostamp:' in self.taints[tid]:
392 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500393
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500394 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400395 if computed_basehash != self.basehash[tid]:
396 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
397 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500398 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400399 if computed_taskhash != self.taskhash[tid]:
400 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
401 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500402
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500403 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
404 try:
405 with os.fdopen(fd, "wb") as stream:
406 p = pickle.dump(data, stream, -1)
407 stream.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600408 os.chmod(tmpfile, 0o664)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500409 os.rename(tmpfile, sigfile)
410 except (OSError, IOError) as err:
411 try:
412 os.unlink(tmpfile)
413 except OSError:
414 pass
415 raise err
416
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500417 def dump_sigfn(self, fn, dataCaches, options):
418 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500419 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600420 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400421 mc = bb.runqueue.mc_from_tid(tid)
422 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500423 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400424 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
425 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
426 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600427 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500428
429class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
430 name = "basichash"
431
Brad Bishop08902b02019-08-20 09:16:51 -0400432 def get_stampfile_hash(self, tid):
433 if tid in self.taskhash:
434 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400435
436 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400437 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400438
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500439 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
440 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400441 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500442 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400443 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 if clean:
445 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500446 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400447 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400448
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500449 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
450
451 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
452 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800453
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500454 def invalidate_task(self, task, d, fn):
455 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
456 bb.build.write_taint(task, d, fn)
457
Brad Bishop08902b02019-08-20 09:16:51 -0400458class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500459 def __init__(self, data):
460 self.extramethod = {}
461 super().__init__(data)
462
Brad Bishop08902b02019-08-20 09:16:51 -0400463 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500464 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400465
466 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500467 self.server, self.method, self.extramethod = data[:3]
468 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400469
Brad Bishopa34c0302019-09-23 22:34:48 -0400470 def client(self):
471 if getattr(self, '_client', None) is None:
472 self._client = hashserv.create_client(self.server)
473 return self._client
474
Brad Bishop08902b02019-08-20 09:16:51 -0400475 def get_stampfile_hash(self, tid):
476 if tid in self.taskhash:
477 # If a unique hash is reported, use it as the stampfile hash. This
478 # ensures that if a task won't be re-run if the taskhash changes,
479 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500480 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400481 if unihash is not None:
482 return unihash
483
484 return super().get_stampfile_hash(tid)
485
486 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500487 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
488 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
489 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
490 self.unihash[tid] = unihash
491
492 def _get_unihash(self, tid, checkkey=None):
493 if tid not in self.tidtopn:
494 return None
495 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
496 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
497 if key not in self.unitaskhashes:
498 return None
499 if not checkkey:
500 checkkey = self.taskhash[tid]
501 (key, unihash) = self.unitaskhashes[key]
502 if key != checkkey:
503 return None
504 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400505
506 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400507 taskhash = self.taskhash[tid]
508
Brad Bishopa34c0302019-09-23 22:34:48 -0400509 # If its not a setscene task we can return
510 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500511 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400512 return taskhash
513
Brad Bishop08902b02019-08-20 09:16:51 -0400514 # TODO: This cache can grow unbounded. It probably only needs to keep
515 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500516 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400517 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500518 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400519 return unihash
520
521 # In the absence of being able to discover a unique hash from the
522 # server, make it be equivalent to the taskhash. The unique "hash" only
523 # really needs to be a unique string (not even necessarily a hash), but
524 # making it match the taskhash has a few advantages:
525 #
526 # 1) All of the sstate code that assumes hashes can be the same
527 # 2) It provides maximal compatibility with builders that don't use
528 # an equivalency server
529 # 3) The value is easy for multiple independent builders to derive the
530 # same unique hash from the same input. This means that if the
531 # independent builders find the same taskhash, but it isn't reported
532 # to the server, there is a better chance that they will agree on
533 # the unique hash.
534 unihash = taskhash
535
536 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500537 method = self.method
538 if tid in self.extramethod:
539 method = method + self.extramethod[tid]
540 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400541 if data:
542 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400543 # A unique hash equal to the taskhash is not very interesting,
544 # so it is reported it at debug level 2. If they differ, that
545 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500546 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400547 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500548 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400549 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400550 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400551
Andrew Geissler82c905d2020-04-13 13:39:40 -0500552 self.set_unihash(tid, unihash)
553 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400554 return unihash
555
556 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400557 import importlib
558
559 taskhash = d.getVar('BB_TASKHASH')
560 unihash = d.getVar('BB_UNIHASH')
561 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
562 tempdir = d.getVar('T')
563 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400564 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500565 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400566
567 if self.setscenetasks and tid not in self.setscenetasks:
568 return
Brad Bishop08902b02019-08-20 09:16:51 -0400569
Andrew Geissler82c905d2020-04-13 13:39:40 -0500570 # This can happen if locked sigs are in action. Detect and just abort
571 if taskhash != self.taskhash[tid]:
572 return
573
Brad Bishop08902b02019-08-20 09:16:51 -0400574 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500575 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400576 if cache_unihash is None:
577 bb.fatal('%s not in unihash cache. Please report this error' % key)
578
579 if cache_unihash != unihash:
580 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
581
582 sigfile = None
583 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
584 sigfile_link = "depsig.do_%s" % task
585
586 try:
587 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
588
589 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
590
591 if "." in self.method:
592 (module, method) = self.method.rsplit('.', 1)
593 locs['method'] = getattr(importlib.import_module(module), method)
594 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
595 else:
596 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
597
598 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400599 extra_data = {}
600
601 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
602 if owner:
603 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400604
605 if report_taskdata:
606 sigfile.seek(0)
607
Brad Bishopa34c0302019-09-23 22:34:48 -0400608 extra_data['PN'] = d.getVar('PN')
609 extra_data['PV'] = d.getVar('PV')
610 extra_data['PR'] = d.getVar('PR')
611 extra_data['task'] = task
612 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400613
Andrew Geissler82c905d2020-04-13 13:39:40 -0500614 method = self.method
615 if tid in self.extramethod:
616 method = method + self.extramethod[tid]
617
618 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400619 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400620
621 if new_unihash != unihash:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500622 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400623 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500624 self.set_unihash(tid, new_unihash)
625 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400626 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500627 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400628 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400629 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400630 finally:
631 if sigfile:
632 sigfile.close()
633
634 sigfile_link_path = os.path.join(tempdir, sigfile_link)
635 bb.utils.remove(sigfile_link_path)
636
637 try:
638 os.symlink(sigfile_name, sigfile_link_path)
639 except OSError:
640 pass
641
Andrew Geissler82c905d2020-04-13 13:39:40 -0500642 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
643 try:
644 extra_data = {}
645 method = self.method
646 if tid in self.extramethod:
647 method = method + self.extramethod[tid]
648
649 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
650 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
651
652 if data is None:
653 bb.warn("Server unable to handle unihash report")
654 return False
655
656 finalunihash = data['unihash']
657
658 if finalunihash == current_unihash:
659 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
660 elif finalunihash == wanted_unihash:
661 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
662 self.set_unihash(tid, finalunihash)
663 return True
664 else:
665 # TODO: What to do here?
666 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
667
668 except hashserv.client.HashConnectionError as e:
669 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
670
671 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400672
673#
674# Dummy class used for bitbake-selftest
675#
676class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
677 name = "TestEquivHash"
678 def init_rundepcheck(self, data):
679 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400680 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400681 self.method = "sstate_output_hash"
682
Andrew Geissler5a43b432020-06-13 10:46:56 -0500683#
684# Dummy class used for bitbake-selftest
685#
686class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
687 name = "TestMulticonfigDepends"
688 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400689
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500690def dump_this_task(outfile, d):
691 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500692 fn = d.getVar("BB_FILENAME")
693 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500694 referencestamp = bb.build.stamp_internal(task, d, None, True)
695 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500696
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500697def init_colors(enable_color):
698 """Initialise colour dict for passing to compare_sigfiles()"""
699 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800700 colors = {'color_title': '\033[1m',
701 'color_default': '\033[0m',
702 'color_add': '\033[0;32m',
703 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500704 }
705 # Leave all keys present but clear the values
706 if not enable_color:
707 for k in colors.keys():
708 colors[k] = ''
709 return colors
710
711def worddiff_str(oldstr, newstr, colors=None):
712 if not colors:
713 colors = init_colors(False)
714 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
715 ret = []
716 for change, value in diff:
717 value = ' '.join(value)
718 if change == '=':
719 ret.append(value)
720 elif change == '+':
721 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
722 ret.append(item)
723 elif change == '-':
724 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
725 ret.append(item)
726 whitespace_note = ''
727 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
728 whitespace_note = ' (whitespace changed)'
729 return '"%s"%s' % (' '.join(ret), whitespace_note)
730
731def list_inline_diff(oldlist, newlist, colors=None):
732 if not colors:
733 colors = init_colors(False)
734 diff = simplediff.diff(oldlist, newlist)
735 ret = []
736 for change, value in diff:
737 value = ' '.join(value)
738 if change == '=':
739 ret.append("'%s'" % value)
740 elif change == '+':
741 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
742 ret.append(item)
743 elif change == '-':
744 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
745 ret.append(item)
746 return '[%s]' % (', '.join(ret))
747
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500748def clean_basepath(a):
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500749 mc = None
Brad Bishop15ae2502019-06-18 21:44:24 -0400750 if a.startswith("mc:"):
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500751 _, mc, a = a.split(":", 2)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500752 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500753 if a.startswith("virtual:"):
754 b = b + ":" + a.rsplit(":", 1)[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500755 if mc:
Brad Bishop15ae2502019-06-18 21:44:24 -0400756 b = b + ":mc:" + mc
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500757 return b
758
759def clean_basepaths(a):
760 b = {}
761 for x in a:
762 b[clean_basepath(x)] = a[x]
763 return b
764
765def clean_basepaths_list(a):
766 b = []
767 for x in a:
768 b.append(clean_basepath(x))
769 return b
770
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500771def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500772 output = []
773
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500774 colors = init_colors(color)
775 def color_format(formatstr, **values):
776 """
777 Return colour formatted string.
778 NOTE: call with the format string, not an already formatted string
779 containing values (otherwise you could have trouble with { and }
780 characters)
781 """
782 if not formatstr.endswith('{color_default}'):
783 formatstr += '{color_default}'
784 # In newer python 3 versions you can pass both of these directly,
785 # but we only require 3.4 at the moment
786 formatparams = {}
787 formatparams.update(colors)
788 formatparams.update(values)
789 return formatstr.format(**formatparams)
790
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600791 with open(a, 'rb') as f:
792 p1 = pickle.Unpickler(f)
793 a_data = p1.load()
794 with open(b, 'rb') as f:
795 p2 = pickle.Unpickler(f)
796 b_data = p2.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500797
798 def dict_diff(a, b, whitelist=set()):
799 sa = set(a.keys())
800 sb = set(b.keys())
801 common = sa & sb
802 changed = set()
803 for i in common:
804 if a[i] != b[i] and i not in whitelist:
805 changed.add(i)
806 added = sb - sa
807 removed = sa - sb
808 return changed, added, removed
809
810 def file_checksums_diff(a, b):
811 from collections import Counter
812 # Handle old siginfo format
813 if isinstance(a, dict):
814 a = [(os.path.basename(f), cs) for f, cs in a.items()]
815 if isinstance(b, dict):
816 b = [(os.path.basename(f), cs) for f, cs in b.items()]
817 # Compare lists, ensuring we can handle duplicate filenames if they exist
818 removedcount = Counter(a)
819 removedcount.subtract(b)
820 addedcount = Counter(b)
821 addedcount.subtract(a)
822 added = []
823 for x in b:
824 if addedcount[x] > 0:
825 addedcount[x] -= 1
826 added.append(x)
827 removed = []
828 changed = []
829 for x in a:
830 if removedcount[x] > 0:
831 removedcount[x] -= 1
832 for y in added:
833 if y[0] == x[0]:
834 changed.append((x[0], x[1], y[1]))
835 added.remove(y)
836 break
837 else:
838 removed.append(x)
839 added = [x[0] for x in added]
840 removed = [x[0] for x in removed]
841 return changed, added, removed
842
843 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500844 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500845 if a_data['basewhitelist'] and b_data['basewhitelist']:
846 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
847
848 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500849 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500850 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
851 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
852
853 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500854 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500855
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500856 if a_data['basehash'] != b_data['basehash'] and not collapsed:
857 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500858
859 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
860 if changed:
861 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500862 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500863 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
864 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
865 if added:
866 for dep in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500867 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500868 if removed:
869 for dep in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500870 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500871
872
873 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
874 if changed:
875 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500876 oldval = a_data['varvals'][dep]
877 newval = b_data['varvals'][dep]
878 if newval and oldval and ('\n' in oldval or '\n' in newval):
879 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
880 # Cut off the first two lines, since we aren't interested in
881 # the old/new filename (they are blank anyway in this case)
882 difflines = list(diff)[2:]
883 if color:
884 # Add colour to diff output
885 for i, line in enumerate(difflines):
886 if line.startswith('+'):
887 line = color_format('{color_add}{line}', line=line)
888 difflines[i] = line
889 elif line.startswith('-'):
890 line = color_format('{color_remove}{line}', line=line)
891 difflines[i] = line
892 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
893 elif newval and oldval and (' ' in oldval or ' ' in newval):
894 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
895 else:
896 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500897
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600898 if not 'file_checksum_values' in a_data:
899 a_data['file_checksum_values'] = {}
900 if not 'file_checksum_values' in b_data:
901 b_data['file_checksum_values'] = {}
902
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500903 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
904 if changed:
905 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500906 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500907 if added:
908 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500909 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500910 if removed:
911 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500912 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500913
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600914 if not 'runtaskdeps' in a_data:
915 a_data['runtaskdeps'] = {}
916 if not 'runtaskdeps' in b_data:
917 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500918
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500919 if not collapsed:
920 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
921 changed = ["Number of task dependencies changed"]
922 else:
923 changed = []
924 for idx, task in enumerate(a_data['runtaskdeps']):
925 a = a_data['runtaskdeps'][idx]
926 b = b_data['runtaskdeps'][idx]
927 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
928 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500929
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500930 if changed:
931 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
932 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
933 if clean_a != clean_b:
934 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
935 else:
936 output.append(color_format("{color_title}runtaskdeps changed:"))
937 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500938
939
940 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
941 a = a_data['runtaskhashes']
942 b = b_data['runtaskhashes']
943 changed, added, removed = dict_diff(a, b)
944 if added:
945 for dep in added:
946 bdep_found = False
947 if removed:
948 for bdep in removed:
949 if b[dep] == a[bdep]:
950 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
951 bdep_found = True
952 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500953 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500954 if removed:
955 for dep in removed:
956 adep_found = False
957 if added:
958 for adep in added:
959 if b[adep] == a[dep]:
960 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
961 adep_found = True
962 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500963 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500964 if changed:
965 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500966 if not collapsed:
967 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500968 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500969 recout = recursecb(dep, a[dep], b[dep])
970 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500971 if collapsed:
972 output.extend(recout)
973 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800974 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500975 # that hash since in all likelyhood, they're the same changes this task also saw.
976 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500977
978 a_taint = a_data.get('taint', None)
979 b_taint = b_data.get('taint', None)
980 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -0400981 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400982 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -0400983 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400984 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500985 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500986
987 return output
988
989
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500990def calc_basehash(sigdata):
991 task = sigdata['task']
992 basedata = sigdata['varvals'][task]
993
994 if basedata is None:
995 basedata = ''
996
997 alldeps = sigdata['taskdeps']
998 for dep in alldeps:
999 basedata = basedata + dep
1000 val = sigdata['varvals'][dep]
1001 if val is not None:
1002 basedata = basedata + str(val)
1003
Brad Bishop19323692019-04-05 15:28:33 -04001004 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001005
1006def calc_taskhash(sigdata):
1007 data = sigdata['basehash']
1008
1009 for dep in sigdata['runtaskdeps']:
1010 data = data + sigdata['runtaskhashes'][dep]
1011
1012 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001013 if c[1]:
1014 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001015
1016 if 'taint' in sigdata:
1017 if 'nostamp:' in sigdata['taint']:
1018 data = data + sigdata['taint'][8:]
1019 else:
1020 data = data + sigdata['taint']
1021
Brad Bishop19323692019-04-05 15:28:33 -04001022 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001023
1024
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001025def dump_sigfile(a):
1026 output = []
1027
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001028 with open(a, 'rb') as f:
1029 p1 = pickle.Unpickler(f)
1030 a_data = p1.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001031
1032 output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1033
1034 output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1035
1036 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1037
1038 output.append("basehash: %s" % (a_data['basehash']))
1039
1040 for dep in a_data['gendeps']:
1041 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1042
1043 for dep in a_data['varvals']:
1044 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1045
1046 if 'runtaskdeps' in a_data:
1047 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1048
1049 if 'file_checksum_values' in a_data:
1050 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1051
1052 if 'runtaskhashes' in a_data:
1053 for dep in a_data['runtaskhashes']:
1054 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1055
1056 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001057 if a_data['taint'].startswith('nostamp:'):
1058 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1059 else:
1060 msg = a_data['taint']
1061 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001062
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001063 if 'task' in a_data:
1064 computed_basehash = calc_basehash(a_data)
1065 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1066 else:
1067 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001068
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001069 computed_taskhash = calc_taskhash(a_data)
1070 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001071
1072 return output