blob: 86e0e16f390b4272ff2b6a2757866a8475720981 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050014from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040015from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040016import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050017import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018
19logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050020hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021
Patrick Williamsc124f4f2015-09-15 14:41:29 -050022def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060023 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050024 if type(obj) is type and issubclass(obj, SignatureGenerator)]
25
Brad Bishop6e60e8b2018-02-01 10:27:11 -050026 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050027 for sg in siggens:
28 if desired == sg.name:
29 return sg(d)
30 break
31 else:
32 logger.error("Invalid signature generator '%s', using default 'noop'\n"
33 "Available generators: %s", desired,
34 ', '.join(obj.name for obj in siggens))
35 return SignatureGenerator(d)
36
37class SignatureGenerator(object):
38 """
39 """
40 name = "noop"
41
Andrew Geissler5a43b432020-06-13 10:46:56 -050042 # If the derived class supports multiconfig datacaches, set this to True
43 # The default is False for backward compatibility with derived signature
44 # generators that do not understand multiconfig caches
45 supports_multiconfig_datacaches = False
46
Patrick Williamsc124f4f2015-09-15 14:41:29 -050047 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050048 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050049 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050050 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050051 self.runtaskdeps = {}
52 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050053 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040054 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050055 self.tidtopn = {}
56 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057
58 def finalise(self, fn, d, varient):
59 return
60
Andrew Geissler82c905d2020-04-13 13:39:40 -050061 def postparsing_clean_cache(self):
62 return
63
Brad Bishop08902b02019-08-20 09:16:51 -040064 def get_unihash(self, tid):
65 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040066
Andrew Geissler5a43b432020-06-13 10:46:56 -050067 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 return
69
Andrew Geissler5a43b432020-06-13 10:46:56 -050070 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040071 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
72 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050074 def writeout_file_checksum_cache(self):
75 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050076 return
77
78 def stampfile(self, stampbase, file_name, taskname, extrainfo):
79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80
81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83
84 def dump_sigtask(self, fn, task, stampbase, runtime):
85 return
86
87 def invalidate_task(self, task, d, fn):
88 bb.build.del_stamp(task, d, fn)
89
90 def dump_sigs(self, dataCache, options):
91 return
92
93 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -050094 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
96 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -050097 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098
Brad Bishopd7bf8c12018-02-25 22:55:05 -050099 def reset(self, data):
100 self.__init__(data)
101
Brad Bishop08902b02019-08-20 09:16:51 -0400102 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500103 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400104
105 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500106 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400107
108 def save_unitaskhashes(self):
109 return
110
Brad Bishopa34c0302019-09-23 22:34:48 -0400111 def set_setscene_tasks(self, setscene_tasks):
112 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500113
Andrew Geissler5a43b432020-06-13 10:46:56 -0500114 @classmethod
115 def get_data_caches(cls, dataCaches, mc):
116 """
117 This function returns the datacaches that should be passed to signature
118 generator functions. If the signature generator supports multiconfig
119 caches, the entire dictionary of data caches is sent, otherwise a
120 special proxy is sent that support both index access to all
121 multiconfigs, and also direct access for the default multiconfig.
122
123 The proxy class allows code in this class itself to always use
124 multiconfig aware code (to ease maintenance), but derived classes that
125 are unaware of multiconfig data caches can still access the default
126 multiconfig as expected.
127
128 Do not override this function in derived classes; it will be removed in
129 the future when support for multiconfig data caches is mandatory
130 """
131 class DataCacheProxy(object):
132 def __init__(self):
133 pass
134
135 def __getitem__(self, key):
136 return dataCaches[key]
137
138 def __getattr__(self, name):
139 return getattr(dataCaches[mc], name)
140
141 if cls.supports_multiconfig_datacaches:
142 return dataCaches
143
144 return DataCacheProxy()
145
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146class SignatureGeneratorBasic(SignatureGenerator):
147 """
148 """
149 name = "basic"
150
151 def __init__(self, data):
152 self.basehash = {}
153 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500154 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 self.taskdeps = {}
156 self.runtaskdeps = {}
157 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500158 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159 self.gendeps = {}
160 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500161 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500163 self.taskwhitelist = None
164 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500166 if checksum_cache_file:
167 self.checksum_cache = FileChecksumCache()
168 self.checksum_cache.init_cache(data, checksum_cache_file)
169 else:
170 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171
Andrew Geissler82c905d2020-04-13 13:39:40 -0500172 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400173 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
175 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400176
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 if self.taskwhitelist:
180 self.twl = re.compile(self.taskwhitelist)
181 else:
182 self.twl = None
183
184 def _build_data(self, fn, d):
185
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190
191 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400192 tid = fn + ":" + task
193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400195 bb.error("The following commands may help:")
196 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
197 # Make sure sigdata is dumped before run printdiff
198 bb.error("%s -Snone" % cmd)
199 bb.error("Then:")
200 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400201 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500202
203 self.taskdeps[fn] = taskdeps
204 self.gendeps[fn] = gendeps
205 self.lookupcache[fn] = lookupcache
206
207 return taskdeps
208
Brad Bishopa34c0302019-09-23 22:34:48 -0400209 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500210 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400211
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 def finalise(self, fn, d, variant):
213
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600214 mc = d.getVar("__BBMULTICONFIG", False) or ""
215 if variant or mc:
216 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500217
218 try:
219 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500220 except bb.parse.SkipRecipe:
221 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500223 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500224 raise
225
226 #Slow but can be useful for debugging mismatched basehashes
227 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500229
230 for task in taskdeps:
Brad Bishop08902b02019-08-20 09:16:51 -0400231 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232
Andrew Geissler82c905d2020-04-13 13:39:40 -0500233 def postparsing_clean_cache(self):
234 #
235 # After parsing we can remove some things from memory to reduce our memory footprint
236 #
237 self.gendeps = {}
238 self.lookupcache = {}
239 self.taskdeps = {}
240
Andrew Geissler5a43b432020-06-13 10:46:56 -0500241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 # Return True if we should keep the dependency, False to drop it
243 # We only manipulate the dependencies for packages not in the whitelist
244 if self.twl and not self.twl.search(recipename):
245 # then process the actual dependencies
246 if self.twl.search(depname):
247 return False
248 return True
249
250 def read_taint(self, fn, task, stampbase):
251 taint = None
252 try:
253 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
254 taint = taintf.read()
255 except IOError:
256 pass
257 return taint
258
Andrew Geissler5a43b432020-06-13 10:46:56 -0500259 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800260
Brad Bishop08902b02019-08-20 09:16:51 -0400261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800262
Andrew Geissler5a43b432020-06-13 10:46:56 -0500263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400264 self.runtaskdeps[tid] = []
265 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500266 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500267
268 self.tidtopn[tid] = recipename
269
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272 depname = dataCaches[depmc].pkg_fn[depmcfn]
273 if not self.supports_multiconfig_datacaches and mc != depmc:
274 # If the signature generator doesn't understand multiconfig
275 # data caches, any dependency not in the same multiconfig must
276 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600277 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279 continue
280 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400282 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500285 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400290 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291
Andrew Geissler5a43b432020-06-13 10:46:56 -0500292 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 if 'nostamp' in taskdep and task in taskdep['nostamp']:
294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500295 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
296 # Don't reset taint value upon every call
297 pass
298 else:
299 import uuid
300 taint = str(uuid.uuid4())
301 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302
Andrew Geissler5a43b432020-06-13 10:46:56 -0500303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400305 self.taints[tid] = taint
306 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
Andrew Geissler82c905d2020-04-13 13:39:40 -0500308 return
309
Andrew Geissler5a43b432020-06-13 10:46:56 -0500310 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500311
312 data = self.basehash[tid]
313 for dep in self.runtaskdeps[tid]:
314 if dep in self.unihash:
315 if self.unihash[dep] is None:
316 data = data + self.taskhash[dep]
317 else:
318 data = data + self.unihash[dep]
319 else:
320 data = data + self.get_unihash(dep)
321
322 for (f, cs) in self.file_checksum_values[tid]:
323 if cs:
324 data = data + cs
325
326 if tid in self.taints:
327 if self.taints[tid].startswith("nostamp:"):
328 data = data + self.taints[tid][8:]
329 else:
330 data = data + self.taints[tid]
331
Brad Bishop19323692019-04-05 15:28:33 -0400332 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400333 self.taskhash[tid] = h
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500334 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
335 return h
336
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500337 def writeout_file_checksum_cache(self):
338 """Write/update the file checksum cache onto disk"""
339 if self.checksum_cache:
340 self.checksum_cache.save_extras()
341 self.checksum_cache.save_merge()
342 else:
343 bb.fetch2.fetcher_parse_save()
344 bb.fetch2.fetcher_parse_done()
345
Brad Bishop08902b02019-08-20 09:16:51 -0400346 def save_unitaskhashes(self):
347 self.unihash_cache.save(self.unitaskhashes)
348
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500349 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500350
Brad Bishop08902b02019-08-20 09:16:51 -0400351 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500352 referencestamp = stampbase
353 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500354 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500355 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400356 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400357 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500358 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400359 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500360
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500361 with bb.utils.umask(0o002):
362 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500363
364 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500365 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500366 data['basewhitelist'] = self.basewhitelist
367 data['taskwhitelist'] = self.taskwhitelist
368 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400369 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500370 data['gendeps'] = {}
371 data['varvals'] = {}
372 data['varvals'][task] = self.lookupcache[fn][task]
373 for dep in self.taskdeps[fn][task]:
374 if dep in self.basewhitelist:
375 continue
376 data['gendeps'][dep] = self.gendeps[fn][dep]
377 data['varvals'][dep] = self.lookupcache[fn][dep]
378
Brad Bishop08902b02019-08-20 09:16:51 -0400379 if runtime and tid in self.taskhash:
380 data['runtaskdeps'] = self.runtaskdeps[tid]
381 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500382 data['runtaskhashes'] = {}
383 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400384 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400385 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400386 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500387
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500388 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500389 if taint:
390 data['taint'] = taint
391
Brad Bishop08902b02019-08-20 09:16:51 -0400392 if runtime and tid in self.taints:
393 if 'nostamp:' in self.taints[tid]:
394 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500395
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500396 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400397 if computed_basehash != self.basehash[tid]:
398 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
399 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500400 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400401 if computed_taskhash != self.taskhash[tid]:
402 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
403 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500404
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500405 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
406 try:
407 with os.fdopen(fd, "wb") as stream:
408 p = pickle.dump(data, stream, -1)
409 stream.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600410 os.chmod(tmpfile, 0o664)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500411 os.rename(tmpfile, sigfile)
412 except (OSError, IOError) as err:
413 try:
414 os.unlink(tmpfile)
415 except OSError:
416 pass
417 raise err
418
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500419 def dump_sigfn(self, fn, dataCaches, options):
420 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500421 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600422 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400423 mc = bb.runqueue.mc_from_tid(tid)
424 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500425 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400426 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
427 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
428 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600429 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500430
431class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
432 name = "basichash"
433
Brad Bishop08902b02019-08-20 09:16:51 -0400434 def get_stampfile_hash(self, tid):
435 if tid in self.taskhash:
436 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400437
438 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400439 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400440
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500441 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
442 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400443 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400445 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500446 if clean:
447 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500448 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400449 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400450
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500451 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
452
453 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
454 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800455
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500456 def invalidate_task(self, task, d, fn):
457 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
458 bb.build.write_taint(task, d, fn)
459
Brad Bishop08902b02019-08-20 09:16:51 -0400460class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500461 def __init__(self, data):
462 self.extramethod = {}
463 super().__init__(data)
464
Brad Bishop08902b02019-08-20 09:16:51 -0400465 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500466 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400467
468 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500469 self.server, self.method, self.extramethod = data[:3]
470 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400471
Brad Bishopa34c0302019-09-23 22:34:48 -0400472 def client(self):
473 if getattr(self, '_client', None) is None:
474 self._client = hashserv.create_client(self.server)
475 return self._client
476
Brad Bishop08902b02019-08-20 09:16:51 -0400477 def get_stampfile_hash(self, tid):
478 if tid in self.taskhash:
479 # If a unique hash is reported, use it as the stampfile hash. This
480 # ensures that if a task won't be re-run if the taskhash changes,
481 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500482 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400483 if unihash is not None:
484 return unihash
485
486 return super().get_stampfile_hash(tid)
487
488 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500489 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
490 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
491 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
492 self.unihash[tid] = unihash
493
494 def _get_unihash(self, tid, checkkey=None):
495 if tid not in self.tidtopn:
496 return None
497 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
498 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
499 if key not in self.unitaskhashes:
500 return None
501 if not checkkey:
502 checkkey = self.taskhash[tid]
503 (key, unihash) = self.unitaskhashes[key]
504 if key != checkkey:
505 return None
506 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400507
508 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400509 taskhash = self.taskhash[tid]
510
Brad Bishopa34c0302019-09-23 22:34:48 -0400511 # If its not a setscene task we can return
512 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500513 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400514 return taskhash
515
Brad Bishop08902b02019-08-20 09:16:51 -0400516 # TODO: This cache can grow unbounded. It probably only needs to keep
517 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500518 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400519 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500520 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400521 return unihash
522
523 # In the absence of being able to discover a unique hash from the
524 # server, make it be equivalent to the taskhash. The unique "hash" only
525 # really needs to be a unique string (not even necessarily a hash), but
526 # making it match the taskhash has a few advantages:
527 #
528 # 1) All of the sstate code that assumes hashes can be the same
529 # 2) It provides maximal compatibility with builders that don't use
530 # an equivalency server
531 # 3) The value is easy for multiple independent builders to derive the
532 # same unique hash from the same input. This means that if the
533 # independent builders find the same taskhash, but it isn't reported
534 # to the server, there is a better chance that they will agree on
535 # the unique hash.
536 unihash = taskhash
537
538 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500539 method = self.method
540 if tid in self.extramethod:
541 method = method + self.extramethod[tid]
542 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400543 if data:
544 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400545 # A unique hash equal to the taskhash is not very interesting,
546 # so it is reported it at debug level 2. If they differ, that
547 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500548 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400549 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500550 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400551 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400552 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400553
Andrew Geissler82c905d2020-04-13 13:39:40 -0500554 self.set_unihash(tid, unihash)
555 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400556 return unihash
557
558 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400559 import importlib
560
561 taskhash = d.getVar('BB_TASKHASH')
562 unihash = d.getVar('BB_UNIHASH')
563 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
564 tempdir = d.getVar('T')
565 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400566 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500567 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400568
569 if self.setscenetasks and tid not in self.setscenetasks:
570 return
Brad Bishop08902b02019-08-20 09:16:51 -0400571
Andrew Geissler82c905d2020-04-13 13:39:40 -0500572 # This can happen if locked sigs are in action. Detect and just abort
573 if taskhash != self.taskhash[tid]:
574 return
575
Brad Bishop08902b02019-08-20 09:16:51 -0400576 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500577 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400578 if cache_unihash is None:
579 bb.fatal('%s not in unihash cache. Please report this error' % key)
580
581 if cache_unihash != unihash:
582 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
583
584 sigfile = None
585 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
586 sigfile_link = "depsig.do_%s" % task
587
588 try:
589 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
590
591 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
592
593 if "." in self.method:
594 (module, method) = self.method.rsplit('.', 1)
595 locs['method'] = getattr(importlib.import_module(module), method)
596 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
597 else:
598 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
599
600 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400601 extra_data = {}
602
603 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
604 if owner:
605 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400606
607 if report_taskdata:
608 sigfile.seek(0)
609
Brad Bishopa34c0302019-09-23 22:34:48 -0400610 extra_data['PN'] = d.getVar('PN')
611 extra_data['PV'] = d.getVar('PV')
612 extra_data['PR'] = d.getVar('PR')
613 extra_data['task'] = task
614 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400615
Andrew Geissler82c905d2020-04-13 13:39:40 -0500616 method = self.method
617 if tid in self.extramethod:
618 method = method + self.extramethod[tid]
619
620 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400621 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400622
623 if new_unihash != unihash:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500624 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400625 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500626 self.set_unihash(tid, new_unihash)
627 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400628 else:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500629 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Brad Bishop00e122a2019-10-05 11:10:57 -0400630 except hashserv.client.HashConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400631 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400632 finally:
633 if sigfile:
634 sigfile.close()
635
636 sigfile_link_path = os.path.join(tempdir, sigfile_link)
637 bb.utils.remove(sigfile_link_path)
638
639 try:
640 os.symlink(sigfile_name, sigfile_link_path)
641 except OSError:
642 pass
643
Andrew Geissler82c905d2020-04-13 13:39:40 -0500644 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
645 try:
646 extra_data = {}
647 method = self.method
648 if tid in self.extramethod:
649 method = method + self.extramethod[tid]
650
651 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
652 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
653
654 if data is None:
655 bb.warn("Server unable to handle unihash report")
656 return False
657
658 finalunihash = data['unihash']
659
660 if finalunihash == current_unihash:
661 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
662 elif finalunihash == wanted_unihash:
663 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
664 self.set_unihash(tid, finalunihash)
665 return True
666 else:
667 # TODO: What to do here?
668 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
669
670 except hashserv.client.HashConnectionError as e:
671 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
672
673 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400674
675#
676# Dummy class used for bitbake-selftest
677#
678class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
679 name = "TestEquivHash"
680 def init_rundepcheck(self, data):
681 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400682 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400683 self.method = "sstate_output_hash"
684
Andrew Geissler5a43b432020-06-13 10:46:56 -0500685#
686# Dummy class used for bitbake-selftest
687#
688class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
689 name = "TestMulticonfigDepends"
690 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400691
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500692def dump_this_task(outfile, d):
693 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500694 fn = d.getVar("BB_FILENAME")
695 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500696 referencestamp = bb.build.stamp_internal(task, d, None, True)
697 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500698
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500699def init_colors(enable_color):
700 """Initialise colour dict for passing to compare_sigfiles()"""
701 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800702 colors = {'color_title': '\033[1m',
703 'color_default': '\033[0m',
704 'color_add': '\033[0;32m',
705 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500706 }
707 # Leave all keys present but clear the values
708 if not enable_color:
709 for k in colors.keys():
710 colors[k] = ''
711 return colors
712
713def worddiff_str(oldstr, newstr, colors=None):
714 if not colors:
715 colors = init_colors(False)
716 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
717 ret = []
718 for change, value in diff:
719 value = ' '.join(value)
720 if change == '=':
721 ret.append(value)
722 elif change == '+':
723 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
724 ret.append(item)
725 elif change == '-':
726 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
727 ret.append(item)
728 whitespace_note = ''
729 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
730 whitespace_note = ' (whitespace changed)'
731 return '"%s"%s' % (' '.join(ret), whitespace_note)
732
733def list_inline_diff(oldlist, newlist, colors=None):
734 if not colors:
735 colors = init_colors(False)
736 diff = simplediff.diff(oldlist, newlist)
737 ret = []
738 for change, value in diff:
739 value = ' '.join(value)
740 if change == '=':
741 ret.append("'%s'" % value)
742 elif change == '+':
743 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
744 ret.append(item)
745 elif change == '-':
746 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
747 ret.append(item)
748 return '[%s]' % (', '.join(ret))
749
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500750def clean_basepath(basepath):
751 basepath, dir, recipe_task = basepath.rsplit("/", 2)
752 cleaned = dir + '/' + recipe_task
753
754 if basepath[0] == '/':
755 return cleaned
756
757 if basepath.startswith("mc:"):
758 mc, mc_name, basepath = basepath.split(":", 2)
759 mc_suffix = ':mc:' + mc_name
760 else:
761 mc_suffix = ''
762
763 # mc stuff now removed from basepath. Whatever was next, if present will be the first
764 # suffix. ':/', recipe path start, marks the end of this. Something like
765 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
766 if basepath[0] != '/':
767 cleaned += ':' + basepath.split(':/', 1)[0]
768
769 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500770
771def clean_basepaths(a):
772 b = {}
773 for x in a:
774 b[clean_basepath(x)] = a[x]
775 return b
776
777def clean_basepaths_list(a):
778 b = []
779 for x in a:
780 b.append(clean_basepath(x))
781 return b
782
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500783def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500784 output = []
785
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500786 colors = init_colors(color)
787 def color_format(formatstr, **values):
788 """
789 Return colour formatted string.
790 NOTE: call with the format string, not an already formatted string
791 containing values (otherwise you could have trouble with { and }
792 characters)
793 """
794 if not formatstr.endswith('{color_default}'):
795 formatstr += '{color_default}'
796 # In newer python 3 versions you can pass both of these directly,
797 # but we only require 3.4 at the moment
798 formatparams = {}
799 formatparams.update(colors)
800 formatparams.update(values)
801 return formatstr.format(**formatparams)
802
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600803 with open(a, 'rb') as f:
804 p1 = pickle.Unpickler(f)
805 a_data = p1.load()
806 with open(b, 'rb') as f:
807 p2 = pickle.Unpickler(f)
808 b_data = p2.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500809
810 def dict_diff(a, b, whitelist=set()):
811 sa = set(a.keys())
812 sb = set(b.keys())
813 common = sa & sb
814 changed = set()
815 for i in common:
816 if a[i] != b[i] and i not in whitelist:
817 changed.add(i)
818 added = sb - sa
819 removed = sa - sb
820 return changed, added, removed
821
822 def file_checksums_diff(a, b):
823 from collections import Counter
824 # Handle old siginfo format
825 if isinstance(a, dict):
826 a = [(os.path.basename(f), cs) for f, cs in a.items()]
827 if isinstance(b, dict):
828 b = [(os.path.basename(f), cs) for f, cs in b.items()]
829 # Compare lists, ensuring we can handle duplicate filenames if they exist
830 removedcount = Counter(a)
831 removedcount.subtract(b)
832 addedcount = Counter(b)
833 addedcount.subtract(a)
834 added = []
835 for x in b:
836 if addedcount[x] > 0:
837 addedcount[x] -= 1
838 added.append(x)
839 removed = []
840 changed = []
841 for x in a:
842 if removedcount[x] > 0:
843 removedcount[x] -= 1
844 for y in added:
845 if y[0] == x[0]:
846 changed.append((x[0], x[1], y[1]))
847 added.remove(y)
848 break
849 else:
850 removed.append(x)
851 added = [x[0] for x in added]
852 removed = [x[0] for x in removed]
853 return changed, added, removed
854
855 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500856 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500857 if a_data['basewhitelist'] and b_data['basewhitelist']:
858 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
859
860 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500861 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500862 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
863 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
864
865 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500866 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500867
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500868 if a_data['basehash'] != b_data['basehash'] and not collapsed:
869 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500870
871 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
872 if changed:
873 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500874 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500875 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
876 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
877 if added:
878 for dep in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500879 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500880 if removed:
881 for dep in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500882 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500883
884
885 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
886 if changed:
887 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500888 oldval = a_data['varvals'][dep]
889 newval = b_data['varvals'][dep]
890 if newval and oldval and ('\n' in oldval or '\n' in newval):
891 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
892 # Cut off the first two lines, since we aren't interested in
893 # the old/new filename (they are blank anyway in this case)
894 difflines = list(diff)[2:]
895 if color:
896 # Add colour to diff output
897 for i, line in enumerate(difflines):
898 if line.startswith('+'):
899 line = color_format('{color_add}{line}', line=line)
900 difflines[i] = line
901 elif line.startswith('-'):
902 line = color_format('{color_remove}{line}', line=line)
903 difflines[i] = line
904 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
905 elif newval and oldval and (' ' in oldval or ' ' in newval):
906 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
907 else:
908 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500909
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600910 if not 'file_checksum_values' in a_data:
911 a_data['file_checksum_values'] = {}
912 if not 'file_checksum_values' in b_data:
913 b_data['file_checksum_values'] = {}
914
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500915 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
916 if changed:
917 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500918 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500919 if added:
920 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500921 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500922 if removed:
923 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500924 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500925
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600926 if not 'runtaskdeps' in a_data:
927 a_data['runtaskdeps'] = {}
928 if not 'runtaskdeps' in b_data:
929 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500930
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500931 if not collapsed:
932 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
933 changed = ["Number of task dependencies changed"]
934 else:
935 changed = []
936 for idx, task in enumerate(a_data['runtaskdeps']):
937 a = a_data['runtaskdeps'][idx]
938 b = b_data['runtaskdeps'][idx]
939 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
940 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500941
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500942 if changed:
943 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
944 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
945 if clean_a != clean_b:
946 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
947 else:
948 output.append(color_format("{color_title}runtaskdeps changed:"))
949 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500950
951
952 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
953 a = a_data['runtaskhashes']
954 b = b_data['runtaskhashes']
955 changed, added, removed = dict_diff(a, b)
956 if added:
957 for dep in added:
958 bdep_found = False
959 if removed:
960 for bdep in removed:
961 if b[dep] == a[bdep]:
962 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
963 bdep_found = True
964 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500965 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500966 if removed:
967 for dep in removed:
968 adep_found = False
969 if added:
970 for adep in added:
971 if b[adep] == a[dep]:
972 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
973 adep_found = True
974 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500975 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500976 if changed:
977 for dep in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500978 if not collapsed:
979 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500980 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500981 recout = recursecb(dep, a[dep], b[dep])
982 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500983 if collapsed:
984 output.extend(recout)
985 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800986 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500987 # that hash since in all likelyhood, they're the same changes this task also saw.
988 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500989
990 a_taint = a_data.get('taint', None)
991 b_taint = b_data.get('taint', None)
992 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -0400993 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400994 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -0400995 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400996 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500997 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500998
999 return output
1000
1001
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001002def calc_basehash(sigdata):
1003 task = sigdata['task']
1004 basedata = sigdata['varvals'][task]
1005
1006 if basedata is None:
1007 basedata = ''
1008
1009 alldeps = sigdata['taskdeps']
1010 for dep in alldeps:
1011 basedata = basedata + dep
1012 val = sigdata['varvals'][dep]
1013 if val is not None:
1014 basedata = basedata + str(val)
1015
Brad Bishop19323692019-04-05 15:28:33 -04001016 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001017
1018def calc_taskhash(sigdata):
1019 data = sigdata['basehash']
1020
1021 for dep in sigdata['runtaskdeps']:
1022 data = data + sigdata['runtaskhashes'][dep]
1023
1024 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001025 if c[1]:
1026 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001027
1028 if 'taint' in sigdata:
1029 if 'nostamp:' in sigdata['taint']:
1030 data = data + sigdata['taint'][8:]
1031 else:
1032 data = data + sigdata['taint']
1033
Brad Bishop19323692019-04-05 15:28:33 -04001034 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001035
1036
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001037def dump_sigfile(a):
1038 output = []
1039
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001040 with open(a, 'rb') as f:
1041 p1 = pickle.Unpickler(f)
1042 a_data = p1.load()
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001043
1044 output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1045
1046 output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1047
1048 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1049
1050 output.append("basehash: %s" % (a_data['basehash']))
1051
1052 for dep in a_data['gendeps']:
1053 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1054
1055 for dep in a_data['varvals']:
1056 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1057
1058 if 'runtaskdeps' in a_data:
1059 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1060
1061 if 'file_checksum_values' in a_data:
1062 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1063
1064 if 'runtaskhashes' in a_data:
1065 for dep in a_data['runtaskhashes']:
1066 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1067
1068 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001069 if a_data['taint'].startswith('nostamp:'):
1070 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1071 else:
1072 msg = a_data['taint']
1073 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001074
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001075 if 'task' in a_data:
1076 computed_basehash = calc_basehash(a_data)
1077 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1078 else:
1079 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001080
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001081 computed_taskhash = calc_taskhash(a_data)
1082 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001083
1084 return output