blob: 3f9fe5064250aa896948a3bd9252c968207a1719 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050014from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040015from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040016import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050017import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018
19logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050020hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021
Patrick Williamsc124f4f2015-09-15 14:41:29 -050022def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060023 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024 if type(obj) is type and issubclass(obj, SignatureGenerator)]
25
Brad Bishop6e60e8b2018-02-01 10:27:11 -050026 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050027 for sg in siggens:
28 if desired == sg.name:
29 return sg(d)
30 break
31 else:
32 logger.error("Invalid signature generator '%s', using default 'noop'\n"
33 "Available generators: %s", desired,
34 ', '.join(obj.name for obj in siggens))
35 return SignatureGenerator(d)
36
37class SignatureGenerator(object):
38 """
39 """
40 name = "noop"
41
Andrew Geissler5a43b432020-06-13 10:46:56 -050042 # If the derived class supports multiconfig datacaches, set this to True
43 # The default is False for backward compatibility with derived signature
44 # generators that do not understand multiconfig caches
45 supports_multiconfig_datacaches = False
46
Patrick Williamsc124f4f2015-09-15 14:41:29 -050047 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050048 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050049 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050050 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050051 self.runtaskdeps = {}
52 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050053 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040054 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050055 self.tidtopn = {}
56 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057
58 def finalise(self, fn, d, varient):
59 return
60
Andrew Geissler82c905d2020-04-13 13:39:40 -050061 def postparsing_clean_cache(self):
62 return
63
Brad Bishop08902b02019-08-20 09:16:51 -040064 def get_unihash(self, tid):
65 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040066
Andrew Geissler5a43b432020-06-13 10:46:56 -050067 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 return
69
Andrew Geissler5a43b432020-06-13 10:46:56 -050070 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040071 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
72 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050074 def writeout_file_checksum_cache(self):
75 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050076 return
77
78 def stampfile(self, stampbase, file_name, taskname, extrainfo):
79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80
81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83
84 def dump_sigtask(self, fn, task, stampbase, runtime):
85 return
86
87 def invalidate_task(self, task, d, fn):
88 bb.build.del_stamp(task, d, fn)
89
90 def dump_sigs(self, dataCache, options):
91 return
92
93 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -050094 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
96 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -050097 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098
Brad Bishopd7bf8c12018-02-25 22:55:05 -050099 def reset(self, data):
100 self.__init__(data)
101
Brad Bishop08902b02019-08-20 09:16:51 -0400102 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500103 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400104
105 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500106 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400107
108 def save_unitaskhashes(self):
109 return
110
Brad Bishopa34c0302019-09-23 22:34:48 -0400111 def set_setscene_tasks(self, setscene_tasks):
112 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500113
Andrew Geissler5a43b432020-06-13 10:46:56 -0500114 @classmethod
115 def get_data_caches(cls, dataCaches, mc):
116 """
117 This function returns the datacaches that should be passed to signature
118 generator functions. If the signature generator supports multiconfig
119 caches, the entire dictionary of data caches is sent, otherwise a
120 special proxy is sent that support both index access to all
121 multiconfigs, and also direct access for the default multiconfig.
122
123 The proxy class allows code in this class itself to always use
124 multiconfig aware code (to ease maintenance), but derived classes that
125 are unaware of multiconfig data caches can still access the default
126 multiconfig as expected.
127
128 Do not override this function in derived classes; it will be removed in
129 the future when support for multiconfig data caches is mandatory
130 """
131 class DataCacheProxy(object):
132 def __init__(self):
133 pass
134
135 def __getitem__(self, key):
136 return dataCaches[key]
137
138 def __getattr__(self, name):
139 return getattr(dataCaches[mc], name)
140
141 if cls.supports_multiconfig_datacaches:
142 return dataCaches
143
144 return DataCacheProxy()
145
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146class SignatureGeneratorBasic(SignatureGenerator):
147 """
148 """
149 name = "basic"
150
151 def __init__(self, data):
152 self.basehash = {}
153 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500154 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 self.taskdeps = {}
156 self.runtaskdeps = {}
157 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500158 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159 self.gendeps = {}
160 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500161 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500163 self.taskwhitelist = None
164 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500166 if checksum_cache_file:
167 self.checksum_cache = FileChecksumCache()
168 self.checksum_cache.init_cache(data, checksum_cache_file)
169 else:
170 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171
Andrew Geissler82c905d2020-04-13 13:39:40 -0500172 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400173 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
175 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400176
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 if self.taskwhitelist:
180 self.twl = re.compile(self.taskwhitelist)
181 else:
182 self.twl = None
183
184 def _build_data(self, fn, d):
185
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190
191 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400192 tid = fn + ":" + task
193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400195 bb.error("The following commands may help:")
196 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
197 # Make sure sigdata is dumped before run printdiff
198 bb.error("%s -Snone" % cmd)
199 bb.error("Then:")
200 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400201 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500202
203 self.taskdeps[fn] = taskdeps
204 self.gendeps[fn] = gendeps
205 self.lookupcache[fn] = lookupcache
206
207 return taskdeps
208
Brad Bishopa34c0302019-09-23 22:34:48 -0400209 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500210 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400211
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 def finalise(self, fn, d, variant):
213
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600214 mc = d.getVar("__BBMULTICONFIG", False) or ""
215 if variant or mc:
216 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500217
218 try:
219 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500220 except bb.parse.SkipRecipe:
221 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500223 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500224 raise
225
226 #Slow but can be useful for debugging mismatched basehashes
227 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500229
230 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500231 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232
Andrew Geissler82c905d2020-04-13 13:39:40 -0500233 def postparsing_clean_cache(self):
234 #
235 # After parsing we can remove some things from memory to reduce our memory footprint
236 #
237 self.gendeps = {}
238 self.lookupcache = {}
239 self.taskdeps = {}
240
Andrew Geissler5a43b432020-06-13 10:46:56 -0500241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 # Return True if we should keep the dependency, False to drop it
243 # We only manipulate the dependencies for packages not in the whitelist
244 if self.twl and not self.twl.search(recipename):
245 # then process the actual dependencies
246 if self.twl.search(depname):
247 return False
248 return True
249
250 def read_taint(self, fn, task, stampbase):
251 taint = None
252 try:
253 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
254 taint = taintf.read()
255 except IOError:
256 pass
257 return taint
258
Andrew Geissler5a43b432020-06-13 10:46:56 -0500259 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800260
Brad Bishop08902b02019-08-20 09:16:51 -0400261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800262
Andrew Geissler5a43b432020-06-13 10:46:56 -0500263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400264 self.runtaskdeps[tid] = []
265 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500266 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500267
268 self.tidtopn[tid] = recipename
269
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272 depname = dataCaches[depmc].pkg_fn[depmcfn]
273 if not self.supports_multiconfig_datacaches and mc != depmc:
274 # If the signature generator doesn't understand multiconfig
275 # data caches, any dependency not in the same multiconfig must
276 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600277 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279 continue
280 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400282 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500285 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400290 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291
Andrew Geissler5a43b432020-06-13 10:46:56 -0500292 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 if 'nostamp' in taskdep and task in taskdep['nostamp']:
294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500295 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
296 # Don't reset taint value upon every call
297 pass
298 else:
299 import uuid
300 taint = str(uuid.uuid4())
301 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302
Andrew Geissler5a43b432020-06-13 10:46:56 -0500303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400305 self.taints[tid] = taint
306 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
Andrew Geissler82c905d2020-04-13 13:39:40 -0500308 return
309
Andrew Geissler5a43b432020-06-13 10:46:56 -0500310 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500311
312 data = self.basehash[tid]
313 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600314 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500315
316 for (f, cs) in self.file_checksum_values[tid]:
317 if cs:
318 data = data + cs
319
320 if tid in self.taints:
321 if self.taints[tid].startswith("nostamp:"):
322 data = data + self.taints[tid][8:]
323 else:
324 data = data + self.taints[tid]
325
Brad Bishop19323692019-04-05 15:28:33 -0400326 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400327 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500328 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500329 return h
330
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500331 def writeout_file_checksum_cache(self):
332 """Write/update the file checksum cache onto disk"""
333 if self.checksum_cache:
334 self.checksum_cache.save_extras()
335 self.checksum_cache.save_merge()
336 else:
337 bb.fetch2.fetcher_parse_save()
338 bb.fetch2.fetcher_parse_done()
339
Brad Bishop08902b02019-08-20 09:16:51 -0400340 def save_unitaskhashes(self):
341 self.unihash_cache.save(self.unitaskhashes)
342
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500343 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500344
Brad Bishop08902b02019-08-20 09:16:51 -0400345 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500346 referencestamp = stampbase
347 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500348 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500349 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400350 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400351 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500352 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400353 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500354
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500355 with bb.utils.umask(0o002):
356 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500357
358 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500359 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500360 data['basewhitelist'] = self.basewhitelist
361 data['taskwhitelist'] = self.taskwhitelist
362 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400363 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500364 data['gendeps'] = {}
365 data['varvals'] = {}
366 data['varvals'][task] = self.lookupcache[fn][task]
367 for dep in self.taskdeps[fn][task]:
368 if dep in self.basewhitelist:
369 continue
370 data['gendeps'][dep] = self.gendeps[fn][dep]
371 data['varvals'][dep] = self.lookupcache[fn][dep]
372
Brad Bishop08902b02019-08-20 09:16:51 -0400373 if runtime and tid in self.taskhash:
374 data['runtaskdeps'] = self.runtaskdeps[tid]
375 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500376 data['runtaskhashes'] = {}
377 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400378 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400379 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400380 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500381
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500382 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500383 if taint:
384 data['taint'] = taint
385
Brad Bishop08902b02019-08-20 09:16:51 -0400386 if runtime and tid in self.taints:
387 if 'nostamp:' in self.taints[tid]:
388 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500389
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500390 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400391 if computed_basehash != self.basehash[tid]:
392 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
393 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500394 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400395 if computed_taskhash != self.taskhash[tid]:
396 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
397 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500398
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500399 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
400 try:
401 with os.fdopen(fd, "wb") as stream:
402 p = pickle.dump(data, stream, -1)
403 stream.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600404 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500405 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500406 except (OSError, IOError) as err:
407 try:
408 os.unlink(tmpfile)
409 except OSError:
410 pass
411 raise err
412
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500413 def dump_sigfn(self, fn, dataCaches, options):
414 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500415 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600416 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400417 mc = bb.runqueue.mc_from_tid(tid)
418 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500419 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400420 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
421 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
422 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600423 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500424
425class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
426 name = "basichash"
427
Brad Bishop08902b02019-08-20 09:16:51 -0400428 def get_stampfile_hash(self, tid):
429 if tid in self.taskhash:
430 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400431
432 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400433 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400434
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
436 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400437 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400439 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500440 if clean:
441 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500442 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400443 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400444
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500445 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
446
447 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
448 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800449
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500450 def invalidate_task(self, task, d, fn):
451 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
452 bb.build.write_taint(task, d, fn)
453
Brad Bishop08902b02019-08-20 09:16:51 -0400454class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500455 def __init__(self, data):
456 self.extramethod = {}
457 super().__init__(data)
458
Brad Bishop08902b02019-08-20 09:16:51 -0400459 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500460 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400461
462 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500463 self.server, self.method, self.extramethod = data[:3]
464 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400465
Brad Bishopa34c0302019-09-23 22:34:48 -0400466 def client(self):
467 if getattr(self, '_client', None) is None:
468 self._client = hashserv.create_client(self.server)
469 return self._client
470
Brad Bishop08902b02019-08-20 09:16:51 -0400471 def get_stampfile_hash(self, tid):
472 if tid in self.taskhash:
473 # If a unique hash is reported, use it as the stampfile hash. This
474 # ensures that if a task won't be re-run if the taskhash changes,
475 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500476 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400477 if unihash is not None:
478 return unihash
479
480 return super().get_stampfile_hash(tid)
481
482 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500483 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
484 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
485 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
486 self.unihash[tid] = unihash
487
488 def _get_unihash(self, tid, checkkey=None):
489 if tid not in self.tidtopn:
490 return None
491 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
492 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
493 if key not in self.unitaskhashes:
494 return None
495 if not checkkey:
496 checkkey = self.taskhash[tid]
497 (key, unihash) = self.unitaskhashes[key]
498 if key != checkkey:
499 return None
500 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400501
502 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400503 taskhash = self.taskhash[tid]
504
Brad Bishopa34c0302019-09-23 22:34:48 -0400505 # If its not a setscene task we can return
506 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500507 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400508 return taskhash
509
Brad Bishop08902b02019-08-20 09:16:51 -0400510 # TODO: This cache can grow unbounded. It probably only needs to keep
511 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500512 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400513 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500514 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400515 return unihash
516
517 # In the absence of being able to discover a unique hash from the
518 # server, make it be equivalent to the taskhash. The unique "hash" only
519 # really needs to be a unique string (not even necessarily a hash), but
520 # making it match the taskhash has a few advantages:
521 #
522 # 1) All of the sstate code that assumes hashes can be the same
523 # 2) It provides maximal compatibility with builders that don't use
524 # an equivalency server
525 # 3) The value is easy for multiple independent builders to derive the
526 # same unique hash from the same input. This means that if the
527 # independent builders find the same taskhash, but it isn't reported
528 # to the server, there is a better chance that they will agree on
529 # the unique hash.
530 unihash = taskhash
531
532 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500533 method = self.method
534 if tid in self.extramethod:
535 method = method + self.extramethod[tid]
536 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400537 if data:
538 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400539 # A unique hash equal to the taskhash is not very interesting,
540 # so it is reported it at debug level 2. If they differ, that
541 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500542 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400543 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600544 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500545 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400546 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400547
Andrew Geissler82c905d2020-04-13 13:39:40 -0500548 self.set_unihash(tid, unihash)
549 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400550 return unihash
551
552 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400553 import importlib
554
555 taskhash = d.getVar('BB_TASKHASH')
556 unihash = d.getVar('BB_UNIHASH')
557 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
558 tempdir = d.getVar('T')
559 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400560 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500561 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400562
563 if self.setscenetasks and tid not in self.setscenetasks:
564 return
Brad Bishop08902b02019-08-20 09:16:51 -0400565
Andrew Geissler82c905d2020-04-13 13:39:40 -0500566 # This can happen if locked sigs are in action. Detect and just abort
567 if taskhash != self.taskhash[tid]:
568 return
569
Brad Bishop08902b02019-08-20 09:16:51 -0400570 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500571 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400572 if cache_unihash is None:
573 bb.fatal('%s not in unihash cache. Please report this error' % key)
574
575 if cache_unihash != unihash:
576 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
577
578 sigfile = None
579 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
580 sigfile_link = "depsig.do_%s" % task
581
582 try:
583 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
584
585 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
586
587 if "." in self.method:
588 (module, method) = self.method.rsplit('.', 1)
589 locs['method'] = getattr(importlib.import_module(module), method)
590 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
591 else:
592 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
593
594 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400595 extra_data = {}
596
597 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
598 if owner:
599 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400600
601 if report_taskdata:
602 sigfile.seek(0)
603
Brad Bishopa34c0302019-09-23 22:34:48 -0400604 extra_data['PN'] = d.getVar('PN')
605 extra_data['PV'] = d.getVar('PV')
606 extra_data['PR'] = d.getVar('PR')
607 extra_data['task'] = task
608 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400609
Andrew Geissler82c905d2020-04-13 13:39:40 -0500610 method = self.method
611 if tid in self.extramethod:
612 method = method + self.extramethod[tid]
613
614 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400615 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400616
617 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600618 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400619 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500620 self.set_unihash(tid, new_unihash)
621 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400622 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600623 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500624 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400626 finally:
627 if sigfile:
628 sigfile.close()
629
630 sigfile_link_path = os.path.join(tempdir, sigfile_link)
631 bb.utils.remove(sigfile_link_path)
632
633 try:
634 os.symlink(sigfile_name, sigfile_link_path)
635 except OSError:
636 pass
637
Andrew Geissler82c905d2020-04-13 13:39:40 -0500638 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
639 try:
640 extra_data = {}
641 method = self.method
642 if tid in self.extramethod:
643 method = method + self.extramethod[tid]
644
645 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
646 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
647
648 if data is None:
649 bb.warn("Server unable to handle unihash report")
650 return False
651
652 finalunihash = data['unihash']
653
654 if finalunihash == current_unihash:
655 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
656 elif finalunihash == wanted_unihash:
657 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
658 self.set_unihash(tid, finalunihash)
659 return True
660 else:
661 # TODO: What to do here?
662 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
663
Andrew Geisslerc926e172021-05-07 16:11:35 -0500664 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500665 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
666
667 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400668
669#
670# Dummy class used for bitbake-selftest
671#
672class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
673 name = "TestEquivHash"
674 def init_rundepcheck(self, data):
675 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400676 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400677 self.method = "sstate_output_hash"
678
Andrew Geissler5a43b432020-06-13 10:46:56 -0500679#
680# Dummy class used for bitbake-selftest
681#
682class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
683 name = "TestMulticonfigDepends"
684 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400685
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500686def dump_this_task(outfile, d):
687 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500688 fn = d.getVar("BB_FILENAME")
689 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500690 referencestamp = bb.build.stamp_internal(task, d, None, True)
691 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500692
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500693def init_colors(enable_color):
694 """Initialise colour dict for passing to compare_sigfiles()"""
695 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800696 colors = {'color_title': '\033[1m',
697 'color_default': '\033[0m',
698 'color_add': '\033[0;32m',
699 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500700 }
701 # Leave all keys present but clear the values
702 if not enable_color:
703 for k in colors.keys():
704 colors[k] = ''
705 return colors
706
707def worddiff_str(oldstr, newstr, colors=None):
708 if not colors:
709 colors = init_colors(False)
710 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
711 ret = []
712 for change, value in diff:
713 value = ' '.join(value)
714 if change == '=':
715 ret.append(value)
716 elif change == '+':
717 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
718 ret.append(item)
719 elif change == '-':
720 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
721 ret.append(item)
722 whitespace_note = ''
723 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
724 whitespace_note = ' (whitespace changed)'
725 return '"%s"%s' % (' '.join(ret), whitespace_note)
726
727def list_inline_diff(oldlist, newlist, colors=None):
728 if not colors:
729 colors = init_colors(False)
730 diff = simplediff.diff(oldlist, newlist)
731 ret = []
732 for change, value in diff:
733 value = ' '.join(value)
734 if change == '=':
735 ret.append("'%s'" % value)
736 elif change == '+':
737 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
738 ret.append(item)
739 elif change == '-':
740 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
741 ret.append(item)
742 return '[%s]' % (', '.join(ret))
743
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500744def clean_basepath(basepath):
745 basepath, dir, recipe_task = basepath.rsplit("/", 2)
746 cleaned = dir + '/' + recipe_task
747
748 if basepath[0] == '/':
749 return cleaned
750
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600751 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500752 mc, mc_name, basepath = basepath.split(":", 2)
753 mc_suffix = ':mc:' + mc_name
754 else:
755 mc_suffix = ''
756
757 # mc stuff now removed from basepath. Whatever was next, if present will be the first
758 # suffix. ':/', recipe path start, marks the end of this. Something like
759 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
760 if basepath[0] != '/':
761 cleaned += ':' + basepath.split(':/', 1)[0]
762
763 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500764
765def clean_basepaths(a):
766 b = {}
767 for x in a:
768 b[clean_basepath(x)] = a[x]
769 return b
770
771def clean_basepaths_list(a):
772 b = []
773 for x in a:
774 b.append(clean_basepath(x))
775 return b
776
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500777def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500778 output = []
779
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500780 colors = init_colors(color)
781 def color_format(formatstr, **values):
782 """
783 Return colour formatted string.
784 NOTE: call with the format string, not an already formatted string
785 containing values (otherwise you could have trouble with { and }
786 characters)
787 """
788 if not formatstr.endswith('{color_default}'):
789 formatstr += '{color_default}'
790 # In newer python 3 versions you can pass both of these directly,
791 # but we only require 3.4 at the moment
792 formatparams = {}
793 formatparams.update(colors)
794 formatparams.update(values)
795 return formatstr.format(**formatparams)
796
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600797 with open(a, 'rb') as f:
798 p1 = pickle.Unpickler(f)
799 a_data = p1.load()
800 with open(b, 'rb') as f:
801 p2 = pickle.Unpickler(f)
802 b_data = p2.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500803
804 def dict_diff(a, b, whitelist=set()):
805 sa = set(a.keys())
806 sb = set(b.keys())
807 common = sa & sb
808 changed = set()
809 for i in common:
810 if a[i] != b[i] and i not in whitelist:
811 changed.add(i)
812 added = sb - sa
813 removed = sa - sb
814 return changed, added, removed
815
816 def file_checksums_diff(a, b):
817 from collections import Counter
818 # Handle old siginfo format
819 if isinstance(a, dict):
820 a = [(os.path.basename(f), cs) for f, cs in a.items()]
821 if isinstance(b, dict):
822 b = [(os.path.basename(f), cs) for f, cs in b.items()]
823 # Compare lists, ensuring we can handle duplicate filenames if they exist
824 removedcount = Counter(a)
825 removedcount.subtract(b)
826 addedcount = Counter(b)
827 addedcount.subtract(a)
828 added = []
829 for x in b:
830 if addedcount[x] > 0:
831 addedcount[x] -= 1
832 added.append(x)
833 removed = []
834 changed = []
835 for x in a:
836 if removedcount[x] > 0:
837 removedcount[x] -= 1
838 for y in added:
839 if y[0] == x[0]:
840 changed.append((x[0], x[1], y[1]))
841 added.remove(y)
842 break
843 else:
844 removed.append(x)
845 added = [x[0] for x in added]
846 removed = [x[0] for x in removed]
847 return changed, added, removed
848
849 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500850 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500851 if a_data['basewhitelist'] and b_data['basewhitelist']:
852 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
853
854 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500855 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500856 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
857 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
858
859 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500860 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500861
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500862 if a_data['basehash'] != b_data['basehash'] and not collapsed:
863 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500864
865 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
866 if changed:
867 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500868 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500869 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
870 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
871 if added:
872 for dep in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500873 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500874 if removed:
875 for dep in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500876 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500877
878
879 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
880 if changed:
881 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500882 oldval = a_data['varvals'][dep]
883 newval = b_data['varvals'][dep]
884 if newval and oldval and ('\n' in oldval or '\n' in newval):
885 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
886 # Cut off the first two lines, since we aren't interested in
887 # the old/new filename (they are blank anyway in this case)
888 difflines = list(diff)[2:]
889 if color:
890 # Add colour to diff output
891 for i, line in enumerate(difflines):
892 if line.startswith('+'):
893 line = color_format('{color_add}{line}', line=line)
894 difflines[i] = line
895 elif line.startswith('-'):
896 line = color_format('{color_remove}{line}', line=line)
897 difflines[i] = line
898 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
899 elif newval and oldval and (' ' in oldval or ' ' in newval):
900 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
901 else:
902 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500903
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600904 if not 'file_checksum_values' in a_data:
905 a_data['file_checksum_values'] = {}
906 if not 'file_checksum_values' in b_data:
907 b_data['file_checksum_values'] = {}
908
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500909 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
910 if changed:
911 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500912 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500913 if added:
914 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500915 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500916 if removed:
917 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500918 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500919
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600920 if not 'runtaskdeps' in a_data:
921 a_data['runtaskdeps'] = {}
922 if not 'runtaskdeps' in b_data:
923 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500924
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500925 if not collapsed:
926 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
927 changed = ["Number of task dependencies changed"]
928 else:
929 changed = []
930 for idx, task in enumerate(a_data['runtaskdeps']):
931 a = a_data['runtaskdeps'][idx]
932 b = b_data['runtaskdeps'][idx]
933 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
934 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500935
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500936 if changed:
937 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
938 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
939 if clean_a != clean_b:
940 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
941 else:
942 output.append(color_format("{color_title}runtaskdeps changed:"))
943 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500944
945
946 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
947 a = a_data['runtaskhashes']
948 b = b_data['runtaskhashes']
949 changed, added, removed = dict_diff(a, b)
950 if added:
951 for dep in added:
952 bdep_found = False
953 if removed:
954 for bdep in removed:
955 if b[dep] == a[bdep]:
956 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
957 bdep_found = True
958 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500959 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500960 if removed:
961 for dep in removed:
962 adep_found = False
963 if added:
964 for adep in added:
965 if b[adep] == a[dep]:
966 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
967 adep_found = True
968 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500969 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500970 if changed:
971 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500972 if not collapsed:
973 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500974 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500975 recout = recursecb(dep, a[dep], b[dep])
976 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500977 if collapsed:
978 output.extend(recout)
979 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800980 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500981 # that hash since in all likelyhood, they're the same changes this task also saw.
982 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500983
984 a_taint = a_data.get('taint', None)
985 b_taint = b_data.get('taint', None)
986 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -0400987 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400988 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -0400989 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400990 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500991 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500992
993 return output
994
995
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500996def calc_basehash(sigdata):
997 task = sigdata['task']
998 basedata = sigdata['varvals'][task]
999
1000 if basedata is None:
1001 basedata = ''
1002
1003 alldeps = sigdata['taskdeps']
1004 for dep in alldeps:
1005 basedata = basedata + dep
1006 val = sigdata['varvals'][dep]
1007 if val is not None:
1008 basedata = basedata + str(val)
1009
Brad Bishop19323692019-04-05 15:28:33 -04001010 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001011
1012def calc_taskhash(sigdata):
1013 data = sigdata['basehash']
1014
1015 for dep in sigdata['runtaskdeps']:
1016 data = data + sigdata['runtaskhashes'][dep]
1017
1018 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001019 if c[1]:
1020 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001021
1022 if 'taint' in sigdata:
1023 if 'nostamp:' in sigdata['taint']:
1024 data = data + sigdata['taint'][8:]
1025 else:
1026 data = data + sigdata['taint']
1027
Brad Bishop19323692019-04-05 15:28:33 -04001028 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001029
1030
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001031def dump_sigfile(a):
1032 output = []
1033
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001034 with open(a, 'rb') as f:
1035 p1 = pickle.Unpickler(f)
1036 a_data = p1.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001037
1038 output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1039
1040 output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1041
1042 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1043
1044 output.append("basehash: %s" % (a_data['basehash']))
1045
1046 for dep in a_data['gendeps']:
1047 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1048
1049 for dep in a_data['varvals']:
1050 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1051
1052 if 'runtaskdeps' in a_data:
1053 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1054
1055 if 'file_checksum_values' in a_data:
1056 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1057
1058 if 'runtaskhashes' in a_data:
1059 for dep in a_data['runtaskhashes']:
1060 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1061
1062 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001063 if a_data['taint'].startswith('nostamp:'):
1064 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1065 else:
1066 msg = a_data['taint']
1067 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001068
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001069 if 'task' in a_data:
1070 computed_basehash = calc_basehash(a_data)
1071 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1072 else:
1073 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001074
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001075 computed_taskhash = calc_taskhash(a_data)
1076 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001077
1078 return output