blob: ad49d1e2aa3ab8c91ec43175a6bfc4f5959dc1ef [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050014from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040015from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040016import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050017import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018
19logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050020hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021
Patrick Williamsc124f4f2015-09-15 14:41:29 -050022def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060023 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024 if type(obj) is type and issubclass(obj, SignatureGenerator)]
25
Brad Bishop6e60e8b2018-02-01 10:27:11 -050026 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050027 for sg in siggens:
28 if desired == sg.name:
29 return sg(d)
30 break
31 else:
32 logger.error("Invalid signature generator '%s', using default 'noop'\n"
33 "Available generators: %s", desired,
34 ', '.join(obj.name for obj in siggens))
35 return SignatureGenerator(d)
36
37class SignatureGenerator(object):
38 """
39 """
40 name = "noop"
41
Andrew Geissler5a43b432020-06-13 10:46:56 -050042 # If the derived class supports multiconfig datacaches, set this to True
43 # The default is False for backward compatibility with derived signature
44 # generators that do not understand multiconfig caches
45 supports_multiconfig_datacaches = False
46
Patrick Williamsc124f4f2015-09-15 14:41:29 -050047 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050048 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050049 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050050 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050051 self.runtaskdeps = {}
52 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050053 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040054 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050055 self.tidtopn = {}
56 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057
58 def finalise(self, fn, d, varient):
59 return
60
Andrew Geissler82c905d2020-04-13 13:39:40 -050061 def postparsing_clean_cache(self):
62 return
63
Brad Bishop08902b02019-08-20 09:16:51 -040064 def get_unihash(self, tid):
65 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040066
Andrew Geissler5a43b432020-06-13 10:46:56 -050067 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 return
69
Andrew Geissler5a43b432020-06-13 10:46:56 -050070 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040071 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
72 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050074 def writeout_file_checksum_cache(self):
75 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050076 return
77
78 def stampfile(self, stampbase, file_name, taskname, extrainfo):
79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80
81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83
84 def dump_sigtask(self, fn, task, stampbase, runtime):
85 return
86
87 def invalidate_task(self, task, d, fn):
88 bb.build.del_stamp(task, d, fn)
89
90 def dump_sigs(self, dataCache, options):
91 return
92
93 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -050094 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
96 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -050097 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098
Brad Bishopd7bf8c12018-02-25 22:55:05 -050099 def reset(self, data):
100 self.__init__(data)
101
Brad Bishop08902b02019-08-20 09:16:51 -0400102 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500103 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400104
105 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500106 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400107
108 def save_unitaskhashes(self):
109 return
110
Brad Bishopa34c0302019-09-23 22:34:48 -0400111 def set_setscene_tasks(self, setscene_tasks):
112 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500113
Andrew Geissler5a43b432020-06-13 10:46:56 -0500114 @classmethod
115 def get_data_caches(cls, dataCaches, mc):
116 """
117 This function returns the datacaches that should be passed to signature
118 generator functions. If the signature generator supports multiconfig
119 caches, the entire dictionary of data caches is sent, otherwise a
120 special proxy is sent that support both index access to all
121 multiconfigs, and also direct access for the default multiconfig.
122
123 The proxy class allows code in this class itself to always use
124 multiconfig aware code (to ease maintenance), but derived classes that
125 are unaware of multiconfig data caches can still access the default
126 multiconfig as expected.
127
128 Do not override this function in derived classes; it will be removed in
129 the future when support for multiconfig data caches is mandatory
130 """
131 class DataCacheProxy(object):
132 def __init__(self):
133 pass
134
135 def __getitem__(self, key):
136 return dataCaches[key]
137
138 def __getattr__(self, name):
139 return getattr(dataCaches[mc], name)
140
141 if cls.supports_multiconfig_datacaches:
142 return dataCaches
143
144 return DataCacheProxy()
145
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146class SignatureGeneratorBasic(SignatureGenerator):
147 """
148 """
149 name = "basic"
150
151 def __init__(self, data):
152 self.basehash = {}
153 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500154 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 self.taskdeps = {}
156 self.runtaskdeps = {}
157 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500158 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159 self.gendeps = {}
160 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500161 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500163 self.taskwhitelist = None
164 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500166 if checksum_cache_file:
167 self.checksum_cache = FileChecksumCache()
168 self.checksum_cache.init_cache(data, checksum_cache_file)
169 else:
170 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171
Andrew Geissler82c905d2020-04-13 13:39:40 -0500172 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400173 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
175 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400176
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 if self.taskwhitelist:
180 self.twl = re.compile(self.taskwhitelist)
181 else:
182 self.twl = None
183
184 def _build_data(self, fn, d):
185
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190
191 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400192 tid = fn + ":" + task
193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400195 bb.error("The following commands may help:")
196 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
197 # Make sure sigdata is dumped before run printdiff
198 bb.error("%s -Snone" % cmd)
199 bb.error("Then:")
200 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400201 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500202
203 self.taskdeps[fn] = taskdeps
204 self.gendeps[fn] = gendeps
205 self.lookupcache[fn] = lookupcache
206
207 return taskdeps
208
Brad Bishopa34c0302019-09-23 22:34:48 -0400209 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500210 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400211
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 def finalise(self, fn, d, variant):
213
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600214 mc = d.getVar("__BBMULTICONFIG", False) or ""
215 if variant or mc:
216 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500217
218 try:
219 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500220 except bb.parse.SkipRecipe:
221 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500223 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500224 raise
225
226 #Slow but can be useful for debugging mismatched basehashes
227 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500229
230 for task in taskdeps:
Brad Bishop08902b02019-08-20 09:16:51 -0400231 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232
Andrew Geissler82c905d2020-04-13 13:39:40 -0500233 def postparsing_clean_cache(self):
234 #
235 # After parsing we can remove some things from memory to reduce our memory footprint
236 #
237 self.gendeps = {}
238 self.lookupcache = {}
239 self.taskdeps = {}
240
Andrew Geissler5a43b432020-06-13 10:46:56 -0500241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 # Return True if we should keep the dependency, False to drop it
243 # We only manipulate the dependencies for packages not in the whitelist
244 if self.twl and not self.twl.search(recipename):
245 # then process the actual dependencies
246 if self.twl.search(depname):
247 return False
248 return True
249
250 def read_taint(self, fn, task, stampbase):
251 taint = None
252 try:
253 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
254 taint = taintf.read()
255 except IOError:
256 pass
257 return taint
258
Andrew Geissler5a43b432020-06-13 10:46:56 -0500259 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800260
Brad Bishop08902b02019-08-20 09:16:51 -0400261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800262
Andrew Geissler5a43b432020-06-13 10:46:56 -0500263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400264 self.runtaskdeps[tid] = []
265 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500266 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500267
268 self.tidtopn[tid] = recipename
269
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272 depname = dataCaches[depmc].pkg_fn[depmcfn]
273 if not self.supports_multiconfig_datacaches and mc != depmc:
274 # If the signature generator doesn't understand multiconfig
275 # data caches, any dependency not in the same multiconfig must
276 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600277 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279 continue
280 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400282 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500285 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400290 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291
Andrew Geissler5a43b432020-06-13 10:46:56 -0500292 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 if 'nostamp' in taskdep and task in taskdep['nostamp']:
294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500295 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
296 # Don't reset taint value upon every call
297 pass
298 else:
299 import uuid
300 taint = str(uuid.uuid4())
301 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302
Andrew Geissler5a43b432020-06-13 10:46:56 -0500303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400305 self.taints[tid] = taint
306 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
Andrew Geissler82c905d2020-04-13 13:39:40 -0500308 return
309
Andrew Geissler5a43b432020-06-13 10:46:56 -0500310 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500311
312 data = self.basehash[tid]
313 for dep in self.runtaskdeps[tid]:
314 if dep in self.unihash:
315 if self.unihash[dep] is None:
316 data = data + self.taskhash[dep]
317 else:
318 data = data + self.unihash[dep]
319 else:
320 data = data + self.get_unihash(dep)
321
322 for (f, cs) in self.file_checksum_values[tid]:
323 if cs:
324 data = data + cs
325
326 if tid in self.taints:
327 if self.taints[tid].startswith("nostamp:"):
328 data = data + self.taints[tid][8:]
329 else:
330 data = data + self.taints[tid]
331
Brad Bishop19323692019-04-05 15:28:33 -0400332 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400333 self.taskhash[tid] = h
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500334 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
335 return h
336
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500337 def writeout_file_checksum_cache(self):
338 """Write/update the file checksum cache onto disk"""
339 if self.checksum_cache:
340 self.checksum_cache.save_extras()
341 self.checksum_cache.save_merge()
342 else:
343 bb.fetch2.fetcher_parse_save()
344 bb.fetch2.fetcher_parse_done()
345
Brad Bishop08902b02019-08-20 09:16:51 -0400346 def save_unitaskhashes(self):
347 self.unihash_cache.save(self.unitaskhashes)
348
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500349 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500350
Brad Bishop08902b02019-08-20 09:16:51 -0400351 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500352 referencestamp = stampbase
353 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500354 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500355 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400356 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400357 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500358 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400359 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500360
361 bb.utils.mkdirhier(os.path.dirname(sigfile))
362
363 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500364 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500365 data['basewhitelist'] = self.basewhitelist
366 data['taskwhitelist'] = self.taskwhitelist
367 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400368 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500369 data['gendeps'] = {}
370 data['varvals'] = {}
371 data['varvals'][task] = self.lookupcache[fn][task]
372 for dep in self.taskdeps[fn][task]:
373 if dep in self.basewhitelist:
374 continue
375 data['gendeps'][dep] = self.gendeps[fn][dep]
376 data['varvals'][dep] = self.lookupcache[fn][dep]
377
Brad Bishop08902b02019-08-20 09:16:51 -0400378 if runtime and tid in self.taskhash:
379 data['runtaskdeps'] = self.runtaskdeps[tid]
380 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500381 data['runtaskhashes'] = {}
382 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400383 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400384 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400385 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500386
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500387 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388 if taint:
389 data['taint'] = taint
390
Brad Bishop08902b02019-08-20 09:16:51 -0400391 if runtime and tid in self.taints:
392 if 'nostamp:' in self.taints[tid]:
393 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500394
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500395 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400396 if computed_basehash != self.basehash[tid]:
397 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
398 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500399 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400400 if computed_taskhash != self.taskhash[tid]:
401 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
402 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500403
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500404 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
405 try:
406 with os.fdopen(fd, "wb") as stream:
407 p = pickle.dump(data, stream, -1)
408 stream.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600409 os.chmod(tmpfile, 0o664)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500410 os.rename(tmpfile, sigfile)
411 except (OSError, IOError) as err:
412 try:
413 os.unlink(tmpfile)
414 except OSError:
415 pass
416 raise err
417
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500418 def dump_sigfn(self, fn, dataCaches, options):
419 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500420 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600421 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400422 mc = bb.runqueue.mc_from_tid(tid)
423 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500424 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400425 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
426 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
427 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600428 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500429
430class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
431 name = "basichash"
432
Brad Bishop08902b02019-08-20 09:16:51 -0400433 def get_stampfile_hash(self, tid):
434 if tid in self.taskhash:
435 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400436
437 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400438 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400439
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500440 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
441 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400442 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500443 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400444 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500445 if clean:
446 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500447 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400448 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400449
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500450 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
451
452 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
453 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800454
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500455 def invalidate_task(self, task, d, fn):
456 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
457 bb.build.write_taint(task, d, fn)
458
Brad Bishop08902b02019-08-20 09:16:51 -0400459class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500460 def __init__(self, data):
461 self.extramethod = {}
462 super().__init__(data)
463
Brad Bishop08902b02019-08-20 09:16:51 -0400464 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500465 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400466
467 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500468 self.server, self.method, self.extramethod = data[:3]
469 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400470
Brad Bishopa34c0302019-09-23 22:34:48 -0400471 def client(self):
472 if getattr(self, '_client', None) is None:
473 self._client = hashserv.create_client(self.server)
474 return self._client
475
Brad Bishop08902b02019-08-20 09:16:51 -0400476 def get_stampfile_hash(self, tid):
477 if tid in self.taskhash:
478 # If a unique hash is reported, use it as the stampfile hash. This
479 # ensures that if a task won't be re-run if the taskhash changes,
480 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500481 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400482 if unihash is not None:
483 return unihash
484
485 return super().get_stampfile_hash(tid)
486
487 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500488 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
489 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
490 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
491 self.unihash[tid] = unihash
492
493 def _get_unihash(self, tid, checkkey=None):
494 if tid not in self.tidtopn:
495 return None
496 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
497 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
498 if key not in self.unitaskhashes:
499 return None
500 if not checkkey:
501 checkkey = self.taskhash[tid]
502 (key, unihash) = self.unitaskhashes[key]
503 if key != checkkey:
504 return None
505 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400506
507 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400508 taskhash = self.taskhash[tid]
509
Brad Bishopa34c0302019-09-23 22:34:48 -0400510 # If its not a setscene task we can return
511 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500512 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400513 return taskhash
514
Brad Bishop08902b02019-08-20 09:16:51 -0400515 # TODO: This cache can grow unbounded. It probably only needs to keep
516 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500517 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400518 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500519 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400520 return unihash
521
522 # In the absence of being able to discover a unique hash from the
523 # server, make it be equivalent to the taskhash. The unique "hash" only
524 # really needs to be a unique string (not even necessarily a hash), but
525 # making it match the taskhash has a few advantages:
526 #
527 # 1) All of the sstate code that assumes hashes can be the same
528 # 2) It provides maximal compatibility with builders that don't use
529 # an equivalency server
530 # 3) The value is easy for multiple independent builders to derive the
531 # same unique hash from the same input. This means that if the
532 # independent builders find the same taskhash, but it isn't reported
533 # to the server, there is a better chance that they will agree on
534 # the unique hash.
535 unihash = taskhash
536
537 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500538 method = self.method
539 if tid in self.extramethod:
540 method = method + self.extramethod[tid]
541 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400542 if data:
543 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400544 # A unique hash equal to the taskhash is not very interesting,
545 # so it is reported it at debug level 2. If they differ, that
546 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500547 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400548 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500549 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400550 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400551 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400552
Andrew Geissler82c905d2020-04-13 13:39:40 -0500553 self.set_unihash(tid, unihash)
554 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400555 return unihash
556
557 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400558 import importlib
559
560 taskhash = d.getVar('BB_TASKHASH')
561 unihash = d.getVar('BB_UNIHASH')
562 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
563 tempdir = d.getVar('T')
564 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400565 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500566 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400567
568 if self.setscenetasks and tid not in self.setscenetasks:
569 return
Brad Bishop08902b02019-08-20 09:16:51 -0400570
Andrew Geissler82c905d2020-04-13 13:39:40 -0500571 # This can happen if locked sigs are in action. Detect and just abort
572 if taskhash != self.taskhash[tid]:
573 return
574
Brad Bishop08902b02019-08-20 09:16:51 -0400575 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500576 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400577 if cache_unihash is None:
578 bb.fatal('%s not in unihash cache. Please report this error' % key)
579
580 if cache_unihash != unihash:
581 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
582
583 sigfile = None
584 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
585 sigfile_link = "depsig.do_%s" % task
586
587 try:
588 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
589
590 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
591
592 if "." in self.method:
593 (module, method) = self.method.rsplit('.', 1)
594 locs['method'] = getattr(importlib.import_module(module), method)
595 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
596 else:
597 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
598
599 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400600 extra_data = {}
601
602 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
603 if owner:
604 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400605
606 if report_taskdata:
607 sigfile.seek(0)
608
Brad Bishopa34c0302019-09-23 22:34:48 -0400609 extra_data['PN'] = d.getVar('PN')
610 extra_data['PV'] = d.getVar('PV')
611 extra_data['PR'] = d.getVar('PR')
612 extra_data['task'] = task
613 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400614
Andrew Geissler82c905d2020-04-13 13:39:40 -0500615 method = self.method
616 if tid in self.extramethod:
617 method = method + self.extramethod[tid]
618
619 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400620 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400621
622 if new_unihash != unihash:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500623 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400624 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500625 self.set_unihash(tid, new_unihash)
626 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400627 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500628 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400629 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400630 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400631 finally:
632 if sigfile:
633 sigfile.close()
634
635 sigfile_link_path = os.path.join(tempdir, sigfile_link)
636 bb.utils.remove(sigfile_link_path)
637
638 try:
639 os.symlink(sigfile_name, sigfile_link_path)
640 except OSError:
641 pass
642
Andrew Geissler82c905d2020-04-13 13:39:40 -0500643 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
644 try:
645 extra_data = {}
646 method = self.method
647 if tid in self.extramethod:
648 method = method + self.extramethod[tid]
649
650 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
651 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
652
653 if data is None:
654 bb.warn("Server unable to handle unihash report")
655 return False
656
657 finalunihash = data['unihash']
658
659 if finalunihash == current_unihash:
660 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
661 elif finalunihash == wanted_unihash:
662 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
663 self.set_unihash(tid, finalunihash)
664 return True
665 else:
666 # TODO: What to do here?
667 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
668
669 except hashserv.client.HashConnectionError as e:
670 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
671
672 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400673
674#
675# Dummy class used for bitbake-selftest
676#
677class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
678 name = "TestEquivHash"
679 def init_rundepcheck(self, data):
680 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400681 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400682 self.method = "sstate_output_hash"
683
Andrew Geissler5a43b432020-06-13 10:46:56 -0500684#
685# Dummy class used for bitbake-selftest
686#
687class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
688 name = "TestMulticonfigDepends"
689 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400690
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500691def dump_this_task(outfile, d):
692 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500693 fn = d.getVar("BB_FILENAME")
694 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500695 referencestamp = bb.build.stamp_internal(task, d, None, True)
696 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500697
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500698def init_colors(enable_color):
699 """Initialise colour dict for passing to compare_sigfiles()"""
700 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800701 colors = {'color_title': '\033[1m',
702 'color_default': '\033[0m',
703 'color_add': '\033[0;32m',
704 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500705 }
706 # Leave all keys present but clear the values
707 if not enable_color:
708 for k in colors.keys():
709 colors[k] = ''
710 return colors
711
712def worddiff_str(oldstr, newstr, colors=None):
713 if not colors:
714 colors = init_colors(False)
715 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
716 ret = []
717 for change, value in diff:
718 value = ' '.join(value)
719 if change == '=':
720 ret.append(value)
721 elif change == '+':
722 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
723 ret.append(item)
724 elif change == '-':
725 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
726 ret.append(item)
727 whitespace_note = ''
728 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
729 whitespace_note = ' (whitespace changed)'
730 return '"%s"%s' % (' '.join(ret), whitespace_note)
731
732def list_inline_diff(oldlist, newlist, colors=None):
733 if not colors:
734 colors = init_colors(False)
735 diff = simplediff.diff(oldlist, newlist)
736 ret = []
737 for change, value in diff:
738 value = ' '.join(value)
739 if change == '=':
740 ret.append("'%s'" % value)
741 elif change == '+':
742 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
743 ret.append(item)
744 elif change == '-':
745 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
746 ret.append(item)
747 return '[%s]' % (', '.join(ret))
748
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500749def clean_basepath(a):
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500750 mc = None
Brad Bishop15ae2502019-06-18 21:44:24 -0400751 if a.startswith("mc:"):
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500752 _, mc, a = a.split(":", 2)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500753 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500754 if a.startswith("virtual:"):
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500755 b = b + ":" + a.rsplit(":", 2)[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500756 if mc:
Brad Bishop15ae2502019-06-18 21:44:24 -0400757 b = b + ":mc:" + mc
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500758 return b
759
760def clean_basepaths(a):
761 b = {}
762 for x in a:
763 b[clean_basepath(x)] = a[x]
764 return b
765
766def clean_basepaths_list(a):
767 b = []
768 for x in a:
769 b.append(clean_basepath(x))
770 return b
771
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500772def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500773 output = []
774
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500775 colors = init_colors(color)
776 def color_format(formatstr, **values):
777 """
778 Return colour formatted string.
779 NOTE: call with the format string, not an already formatted string
780 containing values (otherwise you could have trouble with { and }
781 characters)
782 """
783 if not formatstr.endswith('{color_default}'):
784 formatstr += '{color_default}'
785 # In newer python 3 versions you can pass both of these directly,
786 # but we only require 3.4 at the moment
787 formatparams = {}
788 formatparams.update(colors)
789 formatparams.update(values)
790 return formatstr.format(**formatparams)
791
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600792 with open(a, 'rb') as f:
793 p1 = pickle.Unpickler(f)
794 a_data = p1.load()
795 with open(b, 'rb') as f:
796 p2 = pickle.Unpickler(f)
797 b_data = p2.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500798
799 def dict_diff(a, b, whitelist=set()):
800 sa = set(a.keys())
801 sb = set(b.keys())
802 common = sa & sb
803 changed = set()
804 for i in common:
805 if a[i] != b[i] and i not in whitelist:
806 changed.add(i)
807 added = sb - sa
808 removed = sa - sb
809 return changed, added, removed
810
811 def file_checksums_diff(a, b):
812 from collections import Counter
813 # Handle old siginfo format
814 if isinstance(a, dict):
815 a = [(os.path.basename(f), cs) for f, cs in a.items()]
816 if isinstance(b, dict):
817 b = [(os.path.basename(f), cs) for f, cs in b.items()]
818 # Compare lists, ensuring we can handle duplicate filenames if they exist
819 removedcount = Counter(a)
820 removedcount.subtract(b)
821 addedcount = Counter(b)
822 addedcount.subtract(a)
823 added = []
824 for x in b:
825 if addedcount[x] > 0:
826 addedcount[x] -= 1
827 added.append(x)
828 removed = []
829 changed = []
830 for x in a:
831 if removedcount[x] > 0:
832 removedcount[x] -= 1
833 for y in added:
834 if y[0] == x[0]:
835 changed.append((x[0], x[1], y[1]))
836 added.remove(y)
837 break
838 else:
839 removed.append(x)
840 added = [x[0] for x in added]
841 removed = [x[0] for x in removed]
842 return changed, added, removed
843
844 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500845 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500846 if a_data['basewhitelist'] and b_data['basewhitelist']:
847 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
848
849 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500850 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500851 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
852 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
853
854 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500855 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500856
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500857 if a_data['basehash'] != b_data['basehash'] and not collapsed:
858 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500859
860 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
861 if changed:
862 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500863 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500864 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
865 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
866 if added:
867 for dep in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500868 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500869 if removed:
870 for dep in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500871 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500872
873
874 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
875 if changed:
876 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500877 oldval = a_data['varvals'][dep]
878 newval = b_data['varvals'][dep]
879 if newval and oldval and ('\n' in oldval or '\n' in newval):
880 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
881 # Cut off the first two lines, since we aren't interested in
882 # the old/new filename (they are blank anyway in this case)
883 difflines = list(diff)[2:]
884 if color:
885 # Add colour to diff output
886 for i, line in enumerate(difflines):
887 if line.startswith('+'):
888 line = color_format('{color_add}{line}', line=line)
889 difflines[i] = line
890 elif line.startswith('-'):
891 line = color_format('{color_remove}{line}', line=line)
892 difflines[i] = line
893 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
894 elif newval and oldval and (' ' in oldval or ' ' in newval):
895 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
896 else:
897 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500898
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600899 if not 'file_checksum_values' in a_data:
900 a_data['file_checksum_values'] = {}
901 if not 'file_checksum_values' in b_data:
902 b_data['file_checksum_values'] = {}
903
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500904 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
905 if changed:
906 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500907 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500908 if added:
909 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500910 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500911 if removed:
912 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500913 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500914
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600915 if not 'runtaskdeps' in a_data:
916 a_data['runtaskdeps'] = {}
917 if not 'runtaskdeps' in b_data:
918 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500919
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500920 if not collapsed:
921 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
922 changed = ["Number of task dependencies changed"]
923 else:
924 changed = []
925 for idx, task in enumerate(a_data['runtaskdeps']):
926 a = a_data['runtaskdeps'][idx]
927 b = b_data['runtaskdeps'][idx]
928 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
929 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500930
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500931 if changed:
932 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
933 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
934 if clean_a != clean_b:
935 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
936 else:
937 output.append(color_format("{color_title}runtaskdeps changed:"))
938 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500939
940
941 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
942 a = a_data['runtaskhashes']
943 b = b_data['runtaskhashes']
944 changed, added, removed = dict_diff(a, b)
945 if added:
946 for dep in added:
947 bdep_found = False
948 if removed:
949 for bdep in removed:
950 if b[dep] == a[bdep]:
951 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
952 bdep_found = True
953 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500954 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500955 if removed:
956 for dep in removed:
957 adep_found = False
958 if added:
959 for adep in added:
960 if b[adep] == a[dep]:
961 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
962 adep_found = True
963 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500964 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500965 if changed:
966 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500967 if not collapsed:
968 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500969 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500970 recout = recursecb(dep, a[dep], b[dep])
971 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500972 if collapsed:
973 output.extend(recout)
974 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800975 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500976 # that hash since in all likelyhood, they're the same changes this task also saw.
977 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500978
979 a_taint = a_data.get('taint', None)
980 b_taint = b_data.get('taint', None)
981 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -0400982 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400983 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -0400984 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400985 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500986 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500987
988 return output
989
990
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500991def calc_basehash(sigdata):
992 task = sigdata['task']
993 basedata = sigdata['varvals'][task]
994
995 if basedata is None:
996 basedata = ''
997
998 alldeps = sigdata['taskdeps']
999 for dep in alldeps:
1000 basedata = basedata + dep
1001 val = sigdata['varvals'][dep]
1002 if val is not None:
1003 basedata = basedata + str(val)
1004
Brad Bishop19323692019-04-05 15:28:33 -04001005 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001006
1007def calc_taskhash(sigdata):
1008 data = sigdata['basehash']
1009
1010 for dep in sigdata['runtaskdeps']:
1011 data = data + sigdata['runtaskhashes'][dep]
1012
1013 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001014 if c[1]:
1015 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001016
1017 if 'taint' in sigdata:
1018 if 'nostamp:' in sigdata['taint']:
1019 data = data + sigdata['taint'][8:]
1020 else:
1021 data = data + sigdata['taint']
1022
Brad Bishop19323692019-04-05 15:28:33 -04001023 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001024
1025
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001026def dump_sigfile(a):
1027 output = []
1028
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001029 with open(a, 'rb') as f:
1030 p1 = pickle.Unpickler(f)
1031 a_data = p1.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001032
1033 output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1034
1035 output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1036
1037 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1038
1039 output.append("basehash: %s" % (a_data['basehash']))
1040
1041 for dep in a_data['gendeps']:
1042 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1043
1044 for dep in a_data['varvals']:
1045 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1046
1047 if 'runtaskdeps' in a_data:
1048 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1049
1050 if 'file_checksum_values' in a_data:
1051 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1052
1053 if 'runtaskhashes' in a_data:
1054 for dep in a_data['runtaskhashes']:
1055 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1056
1057 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001058 if a_data['taint'].startswith('nostamp:'):
1059 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1060 else:
1061 msg = a_data['taint']
1062 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001063
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001064 if 'task' in a_data:
1065 computed_basehash = calc_basehash(a_data)
1066 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1067 else:
1068 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001069
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001070 computed_taskhash = calc_taskhash(a_data)
1071 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001072
1073 return output