blob: 578ba5d6612d6e249c2f9ad46874b65ea8c65dea [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
43 break
44 else:
45 logger.error("Invalid signature generator '%s', using default 'noop'\n"
46 "Available generators: %s", desired,
47 ', '.join(obj.name for obj in siggens))
48 return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51 """
52 """
53 name = "noop"
54
Andrew Geissler5a43b432020-06-13 10:46:56 -050055 # If the derived class supports multiconfig datacaches, set this to True
56 # The default is False for backward compatibility with derived signature
57 # generators that do not understand multiconfig caches
58 supports_multiconfig_datacaches = False
59
Patrick Williamsc124f4f2015-09-15 14:41:29 -050060 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050061 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050062 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050063 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050064 self.runtaskdeps = {}
65 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050066 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040067 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 self.tidtopn = {}
69 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050070
71 def finalise(self, fn, d, varient):
72 return
73
Andrew Geissler82c905d2020-04-13 13:39:40 -050074 def postparsing_clean_cache(self):
75 return
76
Brad Bishop08902b02019-08-20 09:16:51 -040077 def get_unihash(self, tid):
78 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040079
Andrew Geissler5a43b432020-06-13 10:46:56 -050080 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050081 return
82
Andrew Geissler5a43b432020-06-13 10:46:56 -050083 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040084 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 def writeout_file_checksum_cache(self):
88 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089 return
90
91 def stampfile(self, stampbase, file_name, taskname, extrainfo):
92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97 def dump_sigtask(self, fn, task, stampbase, runtime):
98 return
99
100 def invalidate_task(self, task, d, fn):
101 bb.build.del_stamp(task, d, fn)
102
103 def dump_sigs(self, dataCache, options):
104 return
105
106 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108
109 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500112 def reset(self, data):
113 self.__init__(data)
114
Brad Bishop08902b02019-08-20 09:16:51 -0400115 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400117
118 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400120
121 def save_unitaskhashes(self):
122 return
123
Brad Bishopa34c0302019-09-23 22:34:48 -0400124 def set_setscene_tasks(self, setscene_tasks):
125 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500126
Andrew Geissler5a43b432020-06-13 10:46:56 -0500127 @classmethod
128 def get_data_caches(cls, dataCaches, mc):
129 """
130 This function returns the datacaches that should be passed to signature
131 generator functions. If the signature generator supports multiconfig
132 caches, the entire dictionary of data caches is sent, otherwise a
133 special proxy is sent that support both index access to all
134 multiconfigs, and also direct access for the default multiconfig.
135
136 The proxy class allows code in this class itself to always use
137 multiconfig aware code (to ease maintenance), but derived classes that
138 are unaware of multiconfig data caches can still access the default
139 multiconfig as expected.
140
141 Do not override this function in derived classes; it will be removed in
142 the future when support for multiconfig data caches is mandatory
143 """
144 class DataCacheProxy(object):
145 def __init__(self):
146 pass
147
148 def __getitem__(self, key):
149 return dataCaches[key]
150
151 def __getattr__(self, name):
152 return getattr(dataCaches[mc], name)
153
154 if cls.supports_multiconfig_datacaches:
155 return dataCaches
156
157 return DataCacheProxy()
158
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159class SignatureGeneratorBasic(SignatureGenerator):
160 """
161 """
162 name = "basic"
163
164 def __init__(self, data):
165 self.basehash = {}
166 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500167 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 self.taskdeps = {}
169 self.runtaskdeps = {}
170 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500171 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500172 self.gendeps = {}
173 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.setscenetasks = set()
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500175 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500176 self.taskwhitelist = None
177 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500179 if checksum_cache_file:
180 self.checksum_cache = FileChecksumCache()
181 self.checksum_cache.init_cache(data, checksum_cache_file)
182 else:
183 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500184
Andrew Geissler82c905d2020-04-13 13:39:40 -0500185 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400186 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
188 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400189
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190 def init_rundepcheck(self, data):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500191 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500192 if self.taskwhitelist:
193 self.twl = re.compile(self.taskwhitelist)
194 else:
195 self.twl = None
196
197 def _build_data(self, fn, d):
198
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500199 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler82c905d2020-04-13 13:39:40 -0500200 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800202 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203
204 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400205 tid = fn + ":" + task
206 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
207 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400208 bb.error("The following commands may help:")
209 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
210 # Make sure sigdata is dumped before run printdiff
211 bb.error("%s -Snone" % cmd)
212 bb.error("Then:")
213 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400214 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500215
216 self.taskdeps[fn] = taskdeps
217 self.gendeps[fn] = gendeps
218 self.lookupcache[fn] = lookupcache
219
220 return taskdeps
221
Brad Bishopa34c0302019-09-23 22:34:48 -0400222 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500223 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400224
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 def finalise(self, fn, d, variant):
226
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600227 mc = d.getVar("__BBMULTICONFIG", False) or ""
228 if variant or mc:
229 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230
231 try:
232 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500233 except bb.parse.SkipRecipe:
234 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500236 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237 raise
238
239 #Slow but can be useful for debugging mismatched basehashes
240 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500241 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242
243 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500244 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
Andrew Geissler82c905d2020-04-13 13:39:40 -0500246 def postparsing_clean_cache(self):
247 #
248 # After parsing we can remove some things from memory to reduce our memory footprint
249 #
250 self.gendeps = {}
251 self.lookupcache = {}
252 self.taskdeps = {}
253
Andrew Geissler5a43b432020-06-13 10:46:56 -0500254 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500255 # Return True if we should keep the dependency, False to drop it
256 # We only manipulate the dependencies for packages not in the whitelist
257 if self.twl and not self.twl.search(recipename):
258 # then process the actual dependencies
259 if self.twl.search(depname):
260 return False
261 return True
262
263 def read_taint(self, fn, task, stampbase):
264 taint = None
265 try:
266 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
267 taint = taintf.read()
268 except IOError:
269 pass
270 return taint
271
Andrew Geissler5a43b432020-06-13 10:46:56 -0500272 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800273
Brad Bishop08902b02019-08-20 09:16:51 -0400274 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800275
Andrew Geissler5a43b432020-06-13 10:46:56 -0500276 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400277 self.runtaskdeps[tid] = []
278 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500279 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500280
281 self.tidtopn[tid] = recipename
282
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
285 depname = dataCaches[depmc].pkg_fn[depmcfn]
286 if not self.supports_multiconfig_datacaches and mc != depmc:
287 # If the signature generator doesn't understand multiconfig
288 # data caches, any dependency not in the same multiconfig must
289 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600290 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500291 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 continue
293 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800294 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400295 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500296
Andrew Geissler5a43b432020-06-13 10:46:56 -0500297 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500298 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500299 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500301 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400303 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304
Andrew Geissler5a43b432020-06-13 10:46:56 -0500305 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500306 if 'nostamp' in taskdep and task in taskdep['nostamp']:
307 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500308 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
309 # Don't reset taint value upon every call
310 pass
311 else:
312 import uuid
313 taint = str(uuid.uuid4())
314 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500315
Andrew Geissler5a43b432020-06-13 10:46:56 -0500316 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500317 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400318 self.taints[tid] = taint
319 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500320
Andrew Geissler82c905d2020-04-13 13:39:40 -0500321 return
322
Andrew Geissler5a43b432020-06-13 10:46:56 -0500323 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500324
325 data = self.basehash[tid]
326 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600327 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500328
329 for (f, cs) in self.file_checksum_values[tid]:
330 if cs:
331 data = data + cs
332
333 if tid in self.taints:
334 if self.taints[tid].startswith("nostamp:"):
335 data = data + self.taints[tid][8:]
336 else:
337 data = data + self.taints[tid]
338
Brad Bishop19323692019-04-05 15:28:33 -0400339 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400340 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500341 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500342 return h
343
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500344 def writeout_file_checksum_cache(self):
345 """Write/update the file checksum cache onto disk"""
346 if self.checksum_cache:
347 self.checksum_cache.save_extras()
348 self.checksum_cache.save_merge()
349 else:
350 bb.fetch2.fetcher_parse_save()
351 bb.fetch2.fetcher_parse_done()
352
Brad Bishop08902b02019-08-20 09:16:51 -0400353 def save_unitaskhashes(self):
354 self.unihash_cache.save(self.unitaskhashes)
355
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500356 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500357
Brad Bishop08902b02019-08-20 09:16:51 -0400358 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500359 referencestamp = stampbase
360 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500361 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500362 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400363 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400364 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500365 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400366 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500367
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500368 with bb.utils.umask(0o002):
369 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500370
371 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500372 data['task'] = task
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500373 data['basewhitelist'] = self.basewhitelist
374 data['taskwhitelist'] = self.taskwhitelist
375 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400376 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500377 data['gendeps'] = {}
378 data['varvals'] = {}
379 data['varvals'][task] = self.lookupcache[fn][task]
380 for dep in self.taskdeps[fn][task]:
381 if dep in self.basewhitelist:
382 continue
383 data['gendeps'][dep] = self.gendeps[fn][dep]
384 data['varvals'][dep] = self.lookupcache[fn][dep]
385
Brad Bishop08902b02019-08-20 09:16:51 -0400386 if runtime and tid in self.taskhash:
387 data['runtaskdeps'] = self.runtaskdeps[tid]
388 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500389 data['runtaskhashes'] = {}
390 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400391 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400392 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400393 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500394
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500395 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500396 if taint:
397 data['taint'] = taint
398
Brad Bishop08902b02019-08-20 09:16:51 -0400399 if runtime and tid in self.taints:
400 if 'nostamp:' in self.taints[tid]:
401 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500402
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500403 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400404 if computed_basehash != self.basehash[tid]:
405 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
406 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500407 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400408 if computed_taskhash != self.taskhash[tid]:
409 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
410 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500411
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500412 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
413 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500414 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
415 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
416 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600417 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500418 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500419 except (OSError, IOError) as err:
420 try:
421 os.unlink(tmpfile)
422 except OSError:
423 pass
424 raise err
425
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500426 def dump_sigfn(self, fn, dataCaches, options):
427 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500428 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600429 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400430 mc = bb.runqueue.mc_from_tid(tid)
431 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500432 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400433 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
434 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
435 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600436 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500437
438class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
439 name = "basichash"
440
Brad Bishop08902b02019-08-20 09:16:51 -0400441 def get_stampfile_hash(self, tid):
442 if tid in self.taskhash:
443 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400444
445 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400446 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400447
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500448 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
449 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400450 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500451 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400452 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500453 if clean:
454 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500455 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400456 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400457
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500458 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
459
460 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
461 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800462
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 def invalidate_task(self, task, d, fn):
464 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
465 bb.build.write_taint(task, d, fn)
466
Brad Bishop08902b02019-08-20 09:16:51 -0400467class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500468 def __init__(self, data):
469 self.extramethod = {}
470 super().__init__(data)
471
Brad Bishop08902b02019-08-20 09:16:51 -0400472 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500473 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400474
475 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500476 self.server, self.method, self.extramethod = data[:3]
477 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400478
Brad Bishopa34c0302019-09-23 22:34:48 -0400479 def client(self):
480 if getattr(self, '_client', None) is None:
481 self._client = hashserv.create_client(self.server)
482 return self._client
483
Brad Bishop08902b02019-08-20 09:16:51 -0400484 def get_stampfile_hash(self, tid):
485 if tid in self.taskhash:
486 # If a unique hash is reported, use it as the stampfile hash. This
487 # ensures that if a task won't be re-run if the taskhash changes,
488 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500489 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400490 if unihash is not None:
491 return unihash
492
493 return super().get_stampfile_hash(tid)
494
495 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500496 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
497 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
498 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
499 self.unihash[tid] = unihash
500
501 def _get_unihash(self, tid, checkkey=None):
502 if tid not in self.tidtopn:
503 return None
504 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
505 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
506 if key not in self.unitaskhashes:
507 return None
508 if not checkkey:
509 checkkey = self.taskhash[tid]
510 (key, unihash) = self.unitaskhashes[key]
511 if key != checkkey:
512 return None
513 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400514
515 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400516 taskhash = self.taskhash[tid]
517
Brad Bishopa34c0302019-09-23 22:34:48 -0400518 # If its not a setscene task we can return
519 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500520 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400521 return taskhash
522
Brad Bishop08902b02019-08-20 09:16:51 -0400523 # TODO: This cache can grow unbounded. It probably only needs to keep
524 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500525 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400526 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500527 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400528 return unihash
529
530 # In the absence of being able to discover a unique hash from the
531 # server, make it be equivalent to the taskhash. The unique "hash" only
532 # really needs to be a unique string (not even necessarily a hash), but
533 # making it match the taskhash has a few advantages:
534 #
535 # 1) All of the sstate code that assumes hashes can be the same
536 # 2) It provides maximal compatibility with builders that don't use
537 # an equivalency server
538 # 3) The value is easy for multiple independent builders to derive the
539 # same unique hash from the same input. This means that if the
540 # independent builders find the same taskhash, but it isn't reported
541 # to the server, there is a better chance that they will agree on
542 # the unique hash.
543 unihash = taskhash
544
545 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500546 method = self.method
547 if tid in self.extramethod:
548 method = method + self.extramethod[tid]
549 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400550 if data:
551 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400552 # A unique hash equal to the taskhash is not very interesting,
553 # so it is reported it at debug level 2. If they differ, that
554 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500555 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400556 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600557 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500558 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400559 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400560
Andrew Geissler82c905d2020-04-13 13:39:40 -0500561 self.set_unihash(tid, unihash)
562 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400563 return unihash
564
565 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400566 import importlib
567
568 taskhash = d.getVar('BB_TASKHASH')
569 unihash = d.getVar('BB_UNIHASH')
570 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
571 tempdir = d.getVar('T')
572 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400573 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500574 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400575
576 if self.setscenetasks and tid not in self.setscenetasks:
577 return
Brad Bishop08902b02019-08-20 09:16:51 -0400578
Andrew Geissler82c905d2020-04-13 13:39:40 -0500579 # This can happen if locked sigs are in action. Detect and just abort
580 if taskhash != self.taskhash[tid]:
581 return
582
Brad Bishop08902b02019-08-20 09:16:51 -0400583 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500584 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400585 if cache_unihash is None:
586 bb.fatal('%s not in unihash cache. Please report this error' % key)
587
588 if cache_unihash != unihash:
589 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
590
591 sigfile = None
592 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
593 sigfile_link = "depsig.do_%s" % task
594
595 try:
596 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
597
598 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
599
600 if "." in self.method:
601 (module, method) = self.method.rsplit('.', 1)
602 locs['method'] = getattr(importlib.import_module(module), method)
603 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
604 else:
605 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
606
607 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400608 extra_data = {}
609
610 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
611 if owner:
612 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400613
614 if report_taskdata:
615 sigfile.seek(0)
616
Brad Bishopa34c0302019-09-23 22:34:48 -0400617 extra_data['PN'] = d.getVar('PN')
618 extra_data['PV'] = d.getVar('PV')
619 extra_data['PR'] = d.getVar('PR')
620 extra_data['task'] = task
621 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400622
Andrew Geissler82c905d2020-04-13 13:39:40 -0500623 method = self.method
624 if tid in self.extramethod:
625 method = method + self.extramethod[tid]
626
627 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400628 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400629
630 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600631 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400632 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500633 self.set_unihash(tid, new_unihash)
634 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400635 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600636 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500637 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400638 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400639 finally:
640 if sigfile:
641 sigfile.close()
642
643 sigfile_link_path = os.path.join(tempdir, sigfile_link)
644 bb.utils.remove(sigfile_link_path)
645
646 try:
647 os.symlink(sigfile_name, sigfile_link_path)
648 except OSError:
649 pass
650
Andrew Geissler82c905d2020-04-13 13:39:40 -0500651 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
652 try:
653 extra_data = {}
654 method = self.method
655 if tid in self.extramethod:
656 method = method + self.extramethod[tid]
657
658 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
659 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
660
661 if data is None:
662 bb.warn("Server unable to handle unihash report")
663 return False
664
665 finalunihash = data['unihash']
666
667 if finalunihash == current_unihash:
668 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
669 elif finalunihash == wanted_unihash:
670 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
671 self.set_unihash(tid, finalunihash)
672 return True
673 else:
674 # TODO: What to do here?
675 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
676
Andrew Geisslerc926e172021-05-07 16:11:35 -0500677 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500678 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
679
680 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400681
682#
683# Dummy class used for bitbake-selftest
684#
685class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
686 name = "TestEquivHash"
687 def init_rundepcheck(self, data):
688 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400689 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400690 self.method = "sstate_output_hash"
691
Andrew Geissler5a43b432020-06-13 10:46:56 -0500692#
693# Dummy class used for bitbake-selftest
694#
695class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
696 name = "TestMulticonfigDepends"
697 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400698
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500699def dump_this_task(outfile, d):
700 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500701 fn = d.getVar("BB_FILENAME")
702 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500703 referencestamp = bb.build.stamp_internal(task, d, None, True)
704 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500705
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500706def init_colors(enable_color):
707 """Initialise colour dict for passing to compare_sigfiles()"""
708 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800709 colors = {'color_title': '\033[1m',
710 'color_default': '\033[0m',
711 'color_add': '\033[0;32m',
712 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500713 }
714 # Leave all keys present but clear the values
715 if not enable_color:
716 for k in colors.keys():
717 colors[k] = ''
718 return colors
719
720def worddiff_str(oldstr, newstr, colors=None):
721 if not colors:
722 colors = init_colors(False)
723 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
724 ret = []
725 for change, value in diff:
726 value = ' '.join(value)
727 if change == '=':
728 ret.append(value)
729 elif change == '+':
730 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
731 ret.append(item)
732 elif change == '-':
733 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
734 ret.append(item)
735 whitespace_note = ''
736 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
737 whitespace_note = ' (whitespace changed)'
738 return '"%s"%s' % (' '.join(ret), whitespace_note)
739
740def list_inline_diff(oldlist, newlist, colors=None):
741 if not colors:
742 colors = init_colors(False)
743 diff = simplediff.diff(oldlist, newlist)
744 ret = []
745 for change, value in diff:
746 value = ' '.join(value)
747 if change == '=':
748 ret.append("'%s'" % value)
749 elif change == '+':
750 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
751 ret.append(item)
752 elif change == '-':
753 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
754 ret.append(item)
755 return '[%s]' % (', '.join(ret))
756
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500757def clean_basepath(basepath):
758 basepath, dir, recipe_task = basepath.rsplit("/", 2)
759 cleaned = dir + '/' + recipe_task
760
761 if basepath[0] == '/':
762 return cleaned
763
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600764 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500765 mc, mc_name, basepath = basepath.split(":", 2)
766 mc_suffix = ':mc:' + mc_name
767 else:
768 mc_suffix = ''
769
770 # mc stuff now removed from basepath. Whatever was next, if present will be the first
771 # suffix. ':/', recipe path start, marks the end of this. Something like
772 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
773 if basepath[0] != '/':
774 cleaned += ':' + basepath.split(':/', 1)[0]
775
776 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500777
778def clean_basepaths(a):
779 b = {}
780 for x in a:
781 b[clean_basepath(x)] = a[x]
782 return b
783
784def clean_basepaths_list(a):
785 b = []
786 for x in a:
787 b.append(clean_basepath(x))
788 return b
789
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500790def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500791 output = []
792
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500793 colors = init_colors(color)
794 def color_format(formatstr, **values):
795 """
796 Return colour formatted string.
797 NOTE: call with the format string, not an already formatted string
798 containing values (otherwise you could have trouble with { and }
799 characters)
800 """
801 if not formatstr.endswith('{color_default}'):
802 formatstr += '{color_default}'
803 # In newer python 3 versions you can pass both of these directly,
804 # but we only require 3.4 at the moment
805 formatparams = {}
806 formatparams.update(colors)
807 formatparams.update(values)
808 return formatstr.format(**formatparams)
809
Andrew Geisslereff27472021-10-29 15:35:00 -0500810 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
811 a_data = json.load(f, object_hook=SetDecoder)
812 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
813 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500814
815 def dict_diff(a, b, whitelist=set()):
816 sa = set(a.keys())
817 sb = set(b.keys())
818 common = sa & sb
819 changed = set()
820 for i in common:
821 if a[i] != b[i] and i not in whitelist:
822 changed.add(i)
823 added = sb - sa
824 removed = sa - sb
825 return changed, added, removed
826
827 def file_checksums_diff(a, b):
828 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500829
830 # Convert lists back to tuples
831 a = [(f[0], f[1]) for f in a]
832 b = [(f[0], f[1]) for f in b]
833
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500834 # Compare lists, ensuring we can handle duplicate filenames if they exist
835 removedcount = Counter(a)
836 removedcount.subtract(b)
837 addedcount = Counter(b)
838 addedcount.subtract(a)
839 added = []
840 for x in b:
841 if addedcount[x] > 0:
842 addedcount[x] -= 1
843 added.append(x)
844 removed = []
845 changed = []
846 for x in a:
847 if removedcount[x] > 0:
848 removedcount[x] -= 1
849 for y in added:
850 if y[0] == x[0]:
851 changed.append((x[0], x[1], y[1]))
852 added.remove(y)
853 break
854 else:
855 removed.append(x)
856 added = [x[0] for x in added]
857 removed = [x[0] for x in removed]
858 return changed, added, removed
859
860 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500861 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500862 if a_data['basewhitelist'] and b_data['basewhitelist']:
863 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
864
865 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500866 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500867 if a_data['taskwhitelist'] and b_data['taskwhitelist']:
868 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
869
870 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500871 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500872
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500873 if a_data['basehash'] != b_data['basehash'] and not collapsed:
874 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500875
876 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
877 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500878 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500879 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500880 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
881 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
882 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500883 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500884 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500885 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500886 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500887 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500888
889
890 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
891 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500892 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500893 oldval = a_data['varvals'][dep]
894 newval = b_data['varvals'][dep]
895 if newval and oldval and ('\n' in oldval or '\n' in newval):
896 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
897 # Cut off the first two lines, since we aren't interested in
898 # the old/new filename (they are blank anyway in this case)
899 difflines = list(diff)[2:]
900 if color:
901 # Add colour to diff output
902 for i, line in enumerate(difflines):
903 if line.startswith('+'):
904 line = color_format('{color_add}{line}', line=line)
905 difflines[i] = line
906 elif line.startswith('-'):
907 line = color_format('{color_remove}{line}', line=line)
908 difflines[i] = line
909 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
910 elif newval and oldval and (' ' in oldval or ' ' in newval):
911 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
912 else:
913 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500914
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600915 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500916 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600917 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500918 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600919
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500920 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
921 if changed:
922 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500923 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500924 if added:
925 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500926 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500927 if removed:
928 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500929 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500930
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600931 if not 'runtaskdeps' in a_data:
932 a_data['runtaskdeps'] = {}
933 if not 'runtaskdeps' in b_data:
934 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500935
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500936 if not collapsed:
937 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
938 changed = ["Number of task dependencies changed"]
939 else:
940 changed = []
941 for idx, task in enumerate(a_data['runtaskdeps']):
942 a = a_data['runtaskdeps'][idx]
943 b = b_data['runtaskdeps'][idx]
944 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
945 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500946
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500947 if changed:
948 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
949 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
950 if clean_a != clean_b:
951 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
952 else:
953 output.append(color_format("{color_title}runtaskdeps changed:"))
954 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500955
956
957 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
958 a = a_data['runtaskhashes']
959 b = b_data['runtaskhashes']
960 changed, added, removed = dict_diff(a, b)
961 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500962 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500963 bdep_found = False
964 if removed:
965 for bdep in removed:
966 if b[dep] == a[bdep]:
967 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
968 bdep_found = True
969 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500970 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500971 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500972 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500973 adep_found = False
974 if added:
975 for adep in added:
976 if b[adep] == a[dep]:
977 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
978 adep_found = True
979 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500980 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500981 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500982 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500983 if not collapsed:
984 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500985 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500986 recout = recursecb(dep, a[dep], b[dep])
987 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500988 if collapsed:
989 output.extend(recout)
990 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800991 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500992 # that hash since in all likelyhood, they're the same changes this task also saw.
993 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500994
995 a_taint = a_data.get('taint', None)
996 b_taint = b_data.get('taint', None)
997 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -0400998 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -0400999 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001000 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001001 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001002 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001003
1004 return output
1005
1006
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001007def calc_basehash(sigdata):
1008 task = sigdata['task']
1009 basedata = sigdata['varvals'][task]
1010
1011 if basedata is None:
1012 basedata = ''
1013
1014 alldeps = sigdata['taskdeps']
1015 for dep in alldeps:
1016 basedata = basedata + dep
1017 val = sigdata['varvals'][dep]
1018 if val is not None:
1019 basedata = basedata + str(val)
1020
Brad Bishop19323692019-04-05 15:28:33 -04001021 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001022
1023def calc_taskhash(sigdata):
1024 data = sigdata['basehash']
1025
1026 for dep in sigdata['runtaskdeps']:
1027 data = data + sigdata['runtaskhashes'][dep]
1028
1029 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001030 if c[1]:
1031 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001032
1033 if 'taint' in sigdata:
1034 if 'nostamp:' in sigdata['taint']:
1035 data = data + sigdata['taint'][8:]
1036 else:
1037 data = data + sigdata['taint']
1038
Brad Bishop19323692019-04-05 15:28:33 -04001039 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001040
1041
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001042def dump_sigfile(a):
1043 output = []
1044
Andrew Geisslereff27472021-10-29 15:35:00 -05001045 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1046 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001047
1048 output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1049
1050 output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1051
1052 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1053
1054 output.append("basehash: %s" % (a_data['basehash']))
1055
1056 for dep in a_data['gendeps']:
1057 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1058
1059 for dep in a_data['varvals']:
1060 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1061
1062 if 'runtaskdeps' in a_data:
1063 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1064
1065 if 'file_checksum_values' in a_data:
1066 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1067
1068 if 'runtaskhashes' in a_data:
1069 for dep in a_data['runtaskhashes']:
1070 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1071
1072 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001073 if a_data['taint'].startswith('nostamp:'):
1074 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1075 else:
1076 msg = a_data['taint']
1077 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001078
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001079 if 'task' in a_data:
1080 computed_basehash = calc_basehash(a_data)
1081 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1082 else:
1083 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001084
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001085 computed_taskhash = calc_taskhash(a_data)
1086 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001087
1088 return output