blob: e0ec736dff72d8fc352291101e0b63e68ed40d40 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
43 break
44 else:
45 logger.error("Invalid signature generator '%s', using default 'noop'\n"
46 "Available generators: %s", desired,
47 ', '.join(obj.name for obj in siggens))
48 return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51 """
52 """
53 name = "noop"
54
Andrew Geissler5a43b432020-06-13 10:46:56 -050055 # If the derived class supports multiconfig datacaches, set this to True
56 # The default is False for backward compatibility with derived signature
57 # generators that do not understand multiconfig caches
58 supports_multiconfig_datacaches = False
59
Patrick Williamsc124f4f2015-09-15 14:41:29 -050060 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050061 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050062 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050063 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050064 self.runtaskdeps = {}
65 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050066 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040067 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 self.tidtopn = {}
69 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050070
71 def finalise(self, fn, d, varient):
72 return
73
Andrew Geissler82c905d2020-04-13 13:39:40 -050074 def postparsing_clean_cache(self):
75 return
76
Brad Bishop08902b02019-08-20 09:16:51 -040077 def get_unihash(self, tid):
78 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040079
Andrew Geissler5a43b432020-06-13 10:46:56 -050080 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050081 return
82
Andrew Geissler5a43b432020-06-13 10:46:56 -050083 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040084 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 def writeout_file_checksum_cache(self):
88 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089 return
90
91 def stampfile(self, stampbase, file_name, taskname, extrainfo):
92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97 def dump_sigtask(self, fn, task, stampbase, runtime):
98 return
99
100 def invalidate_task(self, task, d, fn):
101 bb.build.del_stamp(task, d, fn)
102
103 def dump_sigs(self, dataCache, options):
104 return
105
106 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108
109 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500112 def reset(self, data):
113 self.__init__(data)
114
Brad Bishop08902b02019-08-20 09:16:51 -0400115 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400117
118 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400120
121 def save_unitaskhashes(self):
122 return
123
Brad Bishopa34c0302019-09-23 22:34:48 -0400124 def set_setscene_tasks(self, setscene_tasks):
125 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500126
Andrew Geissler5a43b432020-06-13 10:46:56 -0500127 @classmethod
128 def get_data_caches(cls, dataCaches, mc):
129 """
130 This function returns the datacaches that should be passed to signature
131 generator functions. If the signature generator supports multiconfig
132 caches, the entire dictionary of data caches is sent, otherwise a
133 special proxy is sent that support both index access to all
134 multiconfigs, and also direct access for the default multiconfig.
135
136 The proxy class allows code in this class itself to always use
137 multiconfig aware code (to ease maintenance), but derived classes that
138 are unaware of multiconfig data caches can still access the default
139 multiconfig as expected.
140
141 Do not override this function in derived classes; it will be removed in
142 the future when support for multiconfig data caches is mandatory
143 """
144 class DataCacheProxy(object):
145 def __init__(self):
146 pass
147
148 def __getitem__(self, key):
149 return dataCaches[key]
150
151 def __getattr__(self, name):
152 return getattr(dataCaches[mc], name)
153
154 if cls.supports_multiconfig_datacaches:
155 return dataCaches
156
157 return DataCacheProxy()
158
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159class SignatureGeneratorBasic(SignatureGenerator):
160 """
161 """
162 name = "basic"
163
164 def __init__(self, data):
165 self.basehash = {}
166 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500167 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 self.taskdeps = {}
169 self.runtaskdeps = {}
170 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500171 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500172 self.gendeps = {}
173 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500175 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500176 self.taskwhitelist = None
177 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500179 if checksum_cache_file:
180 self.checksum_cache = FileChecksumCache()
181 self.checksum_cache.init_cache(data, checksum_cache_file)
182 else:
183 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500184
Andrew Geissler82c905d2020-04-13 13:39:40 -0500185 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400186 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
188 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400189
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500191 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500192 if self.taskwhitelist:
193 self.twl = re.compile(self.taskwhitelist)
194 else:
195 self.twl = None
196
197 def _build_data(self, fn, d):
198
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500199 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500200 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800202 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203
204 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400205 tid = fn + ":" + task
206 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
207 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400208 bb.error("The following commands may help:")
209 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
210 # Make sure sigdata is dumped before run printdiff
211 bb.error("%s -Snone" % cmd)
212 bb.error("Then:")
213 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400214 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500215
216 self.taskdeps[fn] = taskdeps
217 self.gendeps[fn] = gendeps
218 self.lookupcache[fn] = lookupcache
219
220 return taskdeps
221
Brad Bishopa34c0302019-09-23 22:34:48 -0400222 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500223 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400224
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 def finalise(self, fn, d, variant):
226
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600227 mc = d.getVar("__BBMULTICONFIG", False) or ""
228 if variant or mc:
229 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230
231 try:
232 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500233 except bb.parse.SkipRecipe:
234 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500236 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237 raise
238
239 #Slow but can be useful for debugging mismatched basehashes
240 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500241 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242
243 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500244 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
Andrew Geissler82c905d2020-04-13 13:39:40 -0500246 def postparsing_clean_cache(self):
247 #
248 # After parsing we can remove some things from memory to reduce our memory footprint
249 #
250 self.gendeps = {}
251 self.lookupcache = {}
252 self.taskdeps = {}
253
Andrew Geissler5a43b432020-06-13 10:46:56 -0500254 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500255 # Return True if we should keep the dependency, False to drop it
256 # We only manipulate the dependencies for packages not in the whitelist
257 if self.twl and not self.twl.search(recipename):
258 # then process the actual dependencies
259 if self.twl.search(depname):
260 return False
261 return True
262
263 def read_taint(self, fn, task, stampbase):
264 taint = None
265 try:
266 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
267 taint = taintf.read()
268 except IOError:
269 pass
270 return taint
271
Andrew Geissler5a43b432020-06-13 10:46:56 -0500272 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800273
Brad Bishop08902b02019-08-20 09:16:51 -0400274 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800275
Andrew Geissler5a43b432020-06-13 10:46:56 -0500276 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400277 self.runtaskdeps[tid] = []
278 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500279 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500280
281 self.tidtopn[tid] = recipename
282
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
285 depname = dataCaches[depmc].pkg_fn[depmcfn]
286 if not self.supports_multiconfig_datacaches and mc != depmc:
287 # If the signature generator doesn't understand multiconfig
288 # data caches, any dependency not in the same multiconfig must
289 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600290 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500291 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 continue
293 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800294 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400295 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500296
Andrew Geissler5a43b432020-06-13 10:46:56 -0500297 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500298 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500299 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500301 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400303 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304
Andrew Geissler5a43b432020-06-13 10:46:56 -0500305 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500306 if 'nostamp' in taskdep and task in taskdep['nostamp']:
307 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500308 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
309 # Don't reset taint value upon every call
310 pass
311 else:
312 import uuid
313 taint = str(uuid.uuid4())
314 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500315
Andrew Geissler5a43b432020-06-13 10:46:56 -0500316 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500317 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400318 self.taints[tid] = taint
319 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500320
Andrew Geissler82c905d2020-04-13 13:39:40 -0500321 return
322
Andrew Geissler5a43b432020-06-13 10:46:56 -0500323 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500324
325 data = self.basehash[tid]
326 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600327 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500328
329 for (f, cs) in self.file_checksum_values[tid]:
330 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000331 if "/./" in f:
332 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500333 data = data + cs
334
335 if tid in self.taints:
336 if self.taints[tid].startswith("nostamp:"):
337 data = data + self.taints[tid][8:]
338 else:
339 data = data + self.taints[tid]
340
Brad Bishop19323692019-04-05 15:28:33 -0400341 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400342 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500343 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500344 return h
345
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500346 def writeout_file_checksum_cache(self):
347 """Write/update the file checksum cache onto disk"""
348 if self.checksum_cache:
349 self.checksum_cache.save_extras()
350 self.checksum_cache.save_merge()
351 else:
352 bb.fetch2.fetcher_parse_save()
353 bb.fetch2.fetcher_parse_done()
354
Brad Bishop08902b02019-08-20 09:16:51 -0400355 def save_unitaskhashes(self):
356 self.unihash_cache.save(self.unitaskhashes)
357
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500358 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500359
Brad Bishop08902b02019-08-20 09:16:51 -0400360 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500361 referencestamp = stampbase
362 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500363 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500364 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400365 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400366 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500367 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400368 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500369
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500370 with bb.utils.umask(0o002):
371 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500372
373 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500374 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500375 data['basewhitelist'] = self.basewhitelist
376 data['taskwhitelist'] = self.taskwhitelist
377 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400378 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500379 data['gendeps'] = {}
380 data['varvals'] = {}
381 data['varvals'][task] = self.lookupcache[fn][task]
382 for dep in self.taskdeps[fn][task]:
383 if dep in self.basewhitelist:
384 continue
385 data['gendeps'][dep] = self.gendeps[fn][dep]
386 data['varvals'][dep] = self.lookupcache[fn][dep]
387
Brad Bishop08902b02019-08-20 09:16:51 -0400388 if runtime and tid in self.taskhash:
389 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000390 data['file_checksum_values'] = []
391 for f,cs in self.file_checksum_values[tid]:
392 if "/./" in f:
393 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
394 else:
395 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500396 data['runtaskhashes'] = {}
397 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400398 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400399 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400400 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500401
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500402 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500403 if taint:
404 data['taint'] = taint
405
Brad Bishop08902b02019-08-20 09:16:51 -0400406 if runtime and tid in self.taints:
407 if 'nostamp:' in self.taints[tid]:
408 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500409
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500410 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400411 if computed_basehash != self.basehash[tid]:
412 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
413 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500414 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400415 if computed_taskhash != self.taskhash[tid]:
416 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
417 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500418
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500419 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
420 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500421 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
422 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
423 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600424 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500425 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500426 except (OSError, IOError) as err:
427 try:
428 os.unlink(tmpfile)
429 except OSError:
430 pass
431 raise err
432
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500433 def dump_sigfn(self, fn, dataCaches, options):
434 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600436 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400437 mc = bb.runqueue.mc_from_tid(tid)
438 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500439 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400440 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
441 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
442 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600443 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444
445class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
446 name = "basichash"
447
Brad Bishop08902b02019-08-20 09:16:51 -0400448 def get_stampfile_hash(self, tid):
449 if tid in self.taskhash:
450 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400451
452 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400453 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400454
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500455 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
456 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400457 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500458 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400459 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500460 if clean:
461 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500462 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400463 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400464
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500465 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
466
467 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
468 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800469
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500470 def invalidate_task(self, task, d, fn):
471 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
472 bb.build.write_taint(task, d, fn)
473
Brad Bishop08902b02019-08-20 09:16:51 -0400474class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500475 def __init__(self, data):
476 self.extramethod = {}
477 super().__init__(data)
478
Brad Bishop08902b02019-08-20 09:16:51 -0400479 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500480 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400481
482 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500483 self.server, self.method, self.extramethod = data[:3]
484 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400485
Brad Bishopa34c0302019-09-23 22:34:48 -0400486 def client(self):
487 if getattr(self, '_client', None) is None:
488 self._client = hashserv.create_client(self.server)
489 return self._client
490
Brad Bishop08902b02019-08-20 09:16:51 -0400491 def get_stampfile_hash(self, tid):
492 if tid in self.taskhash:
493 # If a unique hash is reported, use it as the stampfile hash. This
494 # ensures that if a task won't be re-run if the taskhash changes,
495 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500496 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400497 if unihash is not None:
498 return unihash
499
500 return super().get_stampfile_hash(tid)
501
502 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500503 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
504 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
505 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
506 self.unihash[tid] = unihash
507
508 def _get_unihash(self, tid, checkkey=None):
509 if tid not in self.tidtopn:
510 return None
511 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
512 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
513 if key not in self.unitaskhashes:
514 return None
515 if not checkkey:
516 checkkey = self.taskhash[tid]
517 (key, unihash) = self.unitaskhashes[key]
518 if key != checkkey:
519 return None
520 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400521
522 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400523 taskhash = self.taskhash[tid]
524
Brad Bishopa34c0302019-09-23 22:34:48 -0400525 # If its not a setscene task we can return
526 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500527 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400528 return taskhash
529
Brad Bishop08902b02019-08-20 09:16:51 -0400530 # TODO: This cache can grow unbounded. It probably only needs to keep
531 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500532 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400533 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500534 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400535 return unihash
536
537 # In the absence of being able to discover a unique hash from the
538 # server, make it be equivalent to the taskhash. The unique "hash" only
539 # really needs to be a unique string (not even necessarily a hash), but
540 # making it match the taskhash has a few advantages:
541 #
542 # 1) All of the sstate code that assumes hashes can be the same
543 # 2) It provides maximal compatibility with builders that don't use
544 # an equivalency server
545 # 3) The value is easy for multiple independent builders to derive the
546 # same unique hash from the same input. This means that if the
547 # independent builders find the same taskhash, but it isn't reported
548 # to the server, there is a better chance that they will agree on
549 # the unique hash.
550 unihash = taskhash
551
552 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500553 method = self.method
554 if tid in self.extramethod:
555 method = method + self.extramethod[tid]
556 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400557 if data:
558 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400559 # A unique hash equal to the taskhash is not very interesting,
560 # so it is reported it at debug level 2. If they differ, that
561 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500562 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400563 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600564 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500565 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400566 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400567
Andrew Geissler82c905d2020-04-13 13:39:40 -0500568 self.set_unihash(tid, unihash)
569 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400570 return unihash
571
572 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400573 import importlib
574
575 taskhash = d.getVar('BB_TASKHASH')
576 unihash = d.getVar('BB_UNIHASH')
577 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
578 tempdir = d.getVar('T')
579 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400580 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500581 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400582
583 if self.setscenetasks and tid not in self.setscenetasks:
584 return
Brad Bishop08902b02019-08-20 09:16:51 -0400585
Andrew Geissler82c905d2020-04-13 13:39:40 -0500586 # This can happen if locked sigs are in action. Detect and just abort
587 if taskhash != self.taskhash[tid]:
588 return
589
Brad Bishop08902b02019-08-20 09:16:51 -0400590 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500591 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400592 if cache_unihash is None:
593 bb.fatal('%s not in unihash cache. Please report this error' % key)
594
595 if cache_unihash != unihash:
596 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
597
598 sigfile = None
599 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
600 sigfile_link = "depsig.do_%s" % task
601
602 try:
603 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
604
605 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
606
607 if "." in self.method:
608 (module, method) = self.method.rsplit('.', 1)
609 locs['method'] = getattr(importlib.import_module(module), method)
610 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
611 else:
612 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
613
614 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400615 extra_data = {}
616
617 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
618 if owner:
619 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400620
621 if report_taskdata:
622 sigfile.seek(0)
623
Brad Bishopa34c0302019-09-23 22:34:48 -0400624 extra_data['PN'] = d.getVar('PN')
625 extra_data['PV'] = d.getVar('PV')
626 extra_data['PR'] = d.getVar('PR')
627 extra_data['task'] = task
628 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400629
Andrew Geissler82c905d2020-04-13 13:39:40 -0500630 method = self.method
631 if tid in self.extramethod:
632 method = method + self.extramethod[tid]
633
634 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400635 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400636
637 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600638 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400639 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500640 self.set_unihash(tid, new_unihash)
641 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400642 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600643 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500644 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400645 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400646 finally:
647 if sigfile:
648 sigfile.close()
649
650 sigfile_link_path = os.path.join(tempdir, sigfile_link)
651 bb.utils.remove(sigfile_link_path)
652
653 try:
654 os.symlink(sigfile_name, sigfile_link_path)
655 except OSError:
656 pass
657
Andrew Geissler82c905d2020-04-13 13:39:40 -0500658 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
659 try:
660 extra_data = {}
661 method = self.method
662 if tid in self.extramethod:
663 method = method + self.extramethod[tid]
664
665 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
666 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
667
668 if data is None:
669 bb.warn("Server unable to handle unihash report")
670 return False
671
672 finalunihash = data['unihash']
673
674 if finalunihash == current_unihash:
675 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
676 elif finalunihash == wanted_unihash:
677 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
678 self.set_unihash(tid, finalunihash)
679 return True
680 else:
681 # TODO: What to do here?
682 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
683
Andrew Geisslerc926e172021-05-07 16:11:35 -0500684 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500685 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
686
687 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400688
689#
690# Dummy class used for bitbake-selftest
691#
692class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
693 name = "TestEquivHash"
694 def init_rundepcheck(self, data):
695 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400696 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400697 self.method = "sstate_output_hash"
698
Andrew Geissler5a43b432020-06-13 10:46:56 -0500699#
700# Dummy class used for bitbake-selftest
701#
702class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
703 name = "TestMulticonfigDepends"
704 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400705
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500706def dump_this_task(outfile, d):
707 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500708 fn = d.getVar("BB_FILENAME")
709 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500710 referencestamp = bb.build.stamp_internal(task, d, None, True)
711 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500712
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500713def init_colors(enable_color):
714 """Initialise colour dict for passing to compare_sigfiles()"""
715 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800716 colors = {'color_title': '\033[1m',
717 'color_default': '\033[0m',
718 'color_add': '\033[0;32m',
719 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500720 }
721 # Leave all keys present but clear the values
722 if not enable_color:
723 for k in colors.keys():
724 colors[k] = ''
725 return colors
726
727def worddiff_str(oldstr, newstr, colors=None):
728 if not colors:
729 colors = init_colors(False)
730 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
731 ret = []
732 for change, value in diff:
733 value = ' '.join(value)
734 if change == '=':
735 ret.append(value)
736 elif change == '+':
737 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
738 ret.append(item)
739 elif change == '-':
740 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
741 ret.append(item)
742 whitespace_note = ''
743 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
744 whitespace_note = ' (whitespace changed)'
745 return '"%s"%s' % (' '.join(ret), whitespace_note)
746
747def list_inline_diff(oldlist, newlist, colors=None):
748 if not colors:
749 colors = init_colors(False)
750 diff = simplediff.diff(oldlist, newlist)
751 ret = []
752 for change, value in diff:
753 value = ' '.join(value)
754 if change == '=':
755 ret.append("'%s'" % value)
756 elif change == '+':
757 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
758 ret.append(item)
759 elif change == '-':
760 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
761 ret.append(item)
762 return '[%s]' % (', '.join(ret))
763
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500764def clean_basepath(basepath):
765 basepath, dir, recipe_task = basepath.rsplit("/", 2)
766 cleaned = dir + '/' + recipe_task
767
768 if basepath[0] == '/':
769 return cleaned
770
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600771 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500772 mc, mc_name, basepath = basepath.split(":", 2)
773 mc_suffix = ':mc:' + mc_name
774 else:
775 mc_suffix = ''
776
777 # mc stuff now removed from basepath. Whatever was next, if present will be the first
778 # suffix. ':/', recipe path start, marks the end of this. Something like
779 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
780 if basepath[0] != '/':
781 cleaned += ':' + basepath.split(':/', 1)[0]
782
783 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500784
785def clean_basepaths(a):
786 b = {}
787 for x in a:
788 b[clean_basepath(x)] = a[x]
789 return b
790
791def clean_basepaths_list(a):
792 b = []
793 for x in a:
794 b.append(clean_basepath(x))
795 return b
796
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500797def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500798 output = []
799
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500800 colors = init_colors(color)
801 def color_format(formatstr, **values):
802 """
803 Return colour formatted string.
804 NOTE: call with the format string, not an already formatted string
805 containing values (otherwise you could have trouble with { and }
806 characters)
807 """
808 if not formatstr.endswith('{color_default}'):
809 formatstr += '{color_default}'
810 # In newer python 3 versions you can pass both of these directly,
811 # but we only require 3.4 at the moment
812 formatparams = {}
813 formatparams.update(colors)
814 formatparams.update(values)
815 return formatstr.format(**formatparams)
816
Andrew Geisslereff27472021-10-29 15:35:00 -0500817 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
818 a_data = json.load(f, object_hook=SetDecoder)
819 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
820 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500821
822 def dict_diff(a, b, whitelist=set()):
823 sa = set(a.keys())
824 sb = set(b.keys())
825 common = sa & sb
826 changed = set()
827 for i in common:
828 if a[i] != b[i] and i not in whitelist:
829 changed.add(i)
830 added = sb - sa
831 removed = sa - sb
832 return changed, added, removed
833
834 def file_checksums_diff(a, b):
835 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500836
837 # Convert lists back to tuples
838 a = [(f[0], f[1]) for f in a]
839 b = [(f[0], f[1]) for f in b]
840
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500841 # Compare lists, ensuring we can handle duplicate filenames if they exist
842 removedcount = Counter(a)
843 removedcount.subtract(b)
844 addedcount = Counter(b)
845 addedcount.subtract(a)
846 added = []
847 for x in b:
848 if addedcount[x] > 0:
849 addedcount[x] -= 1
850 added.append(x)
851 removed = []
852 changed = []
853 for x in a:
854 if removedcount[x] > 0:
855 removedcount[x] -= 1
856 for y in added:
857 if y[0] == x[0]:
858 changed.append((x[0], x[1], y[1]))
859 added.remove(y)
860 break
861 else:
862 removed.append(x)
863 added = [x[0] for x in added]
864 removed = [x[0] for x in removed]
865 return changed, added, removed
866
867 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500868 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500869 if a_data['basewhitelist'] and b_data['basewhitelist']:
870 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
871
872 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500873 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500874 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
875 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
876
877 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500878 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500879
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500880 if a_data['basehash'] != b_data['basehash'] and not collapsed:
881 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500882
883 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
884 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500885 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500886 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500887 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
888 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
889 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500890 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500891 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500892 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500893 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500894 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500895
896
897 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
898 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500899 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500900 oldval = a_data['varvals'][dep]
901 newval = b_data['varvals'][dep]
902 if newval and oldval and ('\n' in oldval or '\n' in newval):
903 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
904 # Cut off the first two lines, since we aren't interested in
905 # the old/new filename (they are blank anyway in this case)
906 difflines = list(diff)[2:]
907 if color:
908 # Add colour to diff output
909 for i, line in enumerate(difflines):
910 if line.startswith('+'):
911 line = color_format('{color_add}{line}', line=line)
912 difflines[i] = line
913 elif line.startswith('-'):
914 line = color_format('{color_remove}{line}', line=line)
915 difflines[i] = line
916 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
917 elif newval and oldval and (' ' in oldval or ' ' in newval):
918 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
919 else:
920 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500921
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600922 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500923 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600924 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500925 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600926
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500927 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
928 if changed:
929 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500930 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500931 if added:
932 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500933 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500934 if removed:
935 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500936 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500937
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600938 if not 'runtaskdeps' in a_data:
939 a_data['runtaskdeps'] = {}
940 if not 'runtaskdeps' in b_data:
941 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500942
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500943 if not collapsed:
944 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
945 changed = ["Number of task dependencies changed"]
946 else:
947 changed = []
948 for idx, task in enumerate(a_data['runtaskdeps']):
949 a = a_data['runtaskdeps'][idx]
950 b = b_data['runtaskdeps'][idx]
951 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
952 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500953
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500954 if changed:
955 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
956 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
957 if clean_a != clean_b:
958 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
959 else:
960 output.append(color_format("{color_title}runtaskdeps changed:"))
961 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500962
963
964 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
965 a = a_data['runtaskhashes']
966 b = b_data['runtaskhashes']
967 changed, added, removed = dict_diff(a, b)
968 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500969 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500970 bdep_found = False
971 if removed:
972 for bdep in removed:
973 if b[dep] == a[bdep]:
974 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
975 bdep_found = True
976 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500977 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500978 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500979 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500980 adep_found = False
981 if added:
982 for adep in added:
983 if b[adep] == a[dep]:
984 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
985 adep_found = True
986 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500987 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500988 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500989 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500990 if not collapsed:
991 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500992 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500993 recout = recursecb(dep, a[dep], b[dep])
994 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500995 if collapsed:
996 output.extend(recout)
997 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800998 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500999 # that hash since in all likelyhood, they're the same changes this task also saw.
1000 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001001
1002 a_taint = a_data.get('taint', None)
1003 b_taint = b_data.get('taint', None)
1004 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001005 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001006 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001007 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001008 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001009 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001010
1011 return output
1012
1013
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001014def calc_basehash(sigdata):
1015 task = sigdata['task']
1016 basedata = sigdata['varvals'][task]
1017
1018 if basedata is None:
1019 basedata = ''
1020
1021 alldeps = sigdata['taskdeps']
1022 for dep in alldeps:
1023 basedata = basedata + dep
1024 val = sigdata['varvals'][dep]
1025 if val is not None:
1026 basedata = basedata + str(val)
1027
Brad Bishop19323692019-04-05 15:28:33 -04001028 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001029
1030def calc_taskhash(sigdata):
1031 data = sigdata['basehash']
1032
1033 for dep in sigdata['runtaskdeps']:
1034 data = data + sigdata['runtaskhashes'][dep]
1035
1036 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001037 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001038 if "./" in c[0]:
1039 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001040 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001041
1042 if 'taint' in sigdata:
1043 if 'nostamp:' in sigdata['taint']:
1044 data = data + sigdata['taint'][8:]
1045 else:
1046 data = data + sigdata['taint']
1047
Brad Bishop19323692019-04-05 15:28:33 -04001048 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001049
1050
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001051def dump_sigfile(a):
1052 output = []
1053
Andrew Geisslereff27472021-10-29 15:35:00 -05001054 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1055 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001056
Andrew Geissler595f6302022-01-24 19:11:47 +00001057 output.append("basewhitelist: %s" % (sorted(a_data['basewhitelist'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001058
Andrew Geissler595f6302022-01-24 19:11:47 +00001059 output.append("taskwhitelist: %s" % (sorted(a_data['taskwhitelist'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001060
1061 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1062
1063 output.append("basehash: %s" % (a_data['basehash']))
1064
Andrew Geissler595f6302022-01-24 19:11:47 +00001065 for dep in sorted(a_data['gendeps']):
1066 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001067
Andrew Geissler595f6302022-01-24 19:11:47 +00001068 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001069 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1070
1071 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001072 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001073
1074 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001075 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001076
1077 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001078 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001079 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1080
1081 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001082 if a_data['taint'].startswith('nostamp:'):
1083 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1084 else:
1085 msg = a_data['taint']
1086 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001087
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001088 if 'task' in a_data:
1089 computed_basehash = calc_basehash(a_data)
1090 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1091 else:
1092 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001093
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001094 computed_taskhash = calc_taskhash(a_data)
1095 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001096
1097 return output