blob: ebba4525924c1ad65517f2f760878be10a8ba948 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
43 break
44 else:
45 logger.error("Invalid signature generator '%s', using default 'noop'\n"
46 "Available generators: %s", desired,
47 ', '.join(obj.name for obj in siggens))
48 return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51 """
52 """
53 name = "noop"
54
Andrew Geissler5a43b432020-06-13 10:46:56 -050055 # If the derived class supports multiconfig datacaches, set this to True
56 # The default is False for backward compatibility with derived signature
57 # generators that do not understand multiconfig caches
58 supports_multiconfig_datacaches = False
59
Patrick Williamsc124f4f2015-09-15 14:41:29 -050060 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050061 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050062 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050063 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050064 self.runtaskdeps = {}
65 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050066 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040067 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 self.tidtopn = {}
69 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050070
71 def finalise(self, fn, d, varient):
72 return
73
Andrew Geissler82c905d2020-04-13 13:39:40 -050074 def postparsing_clean_cache(self):
75 return
76
Brad Bishop08902b02019-08-20 09:16:51 -040077 def get_unihash(self, tid):
78 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040079
Andrew Geissler5a43b432020-06-13 10:46:56 -050080 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050081 return
82
Andrew Geissler5a43b432020-06-13 10:46:56 -050083 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040084 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 def writeout_file_checksum_cache(self):
88 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089 return
90
91 def stampfile(self, stampbase, file_name, taskname, extrainfo):
92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97 def dump_sigtask(self, fn, task, stampbase, runtime):
98 return
99
100 def invalidate_task(self, task, d, fn):
101 bb.build.del_stamp(task, d, fn)
102
103 def dump_sigs(self, dataCache, options):
104 return
105
106 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108
109 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500112 def reset(self, data):
113 self.__init__(data)
114
Brad Bishop08902b02019-08-20 09:16:51 -0400115 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400117
118 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400120
121 def save_unitaskhashes(self):
122 return
123
Brad Bishopa34c0302019-09-23 22:34:48 -0400124 def set_setscene_tasks(self, setscene_tasks):
125 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500126
Andrew Geissler5a43b432020-06-13 10:46:56 -0500127 @classmethod
128 def get_data_caches(cls, dataCaches, mc):
129 """
130 This function returns the datacaches that should be passed to signature
131 generator functions. If the signature generator supports multiconfig
132 caches, the entire dictionary of data caches is sent, otherwise a
133 special proxy is sent that support both index access to all
134 multiconfigs, and also direct access for the default multiconfig.
135
136 The proxy class allows code in this class itself to always use
137 multiconfig aware code (to ease maintenance), but derived classes that
138 are unaware of multiconfig data caches can still access the default
139 multiconfig as expected.
140
141 Do not override this function in derived classes; it will be removed in
142 the future when support for multiconfig data caches is mandatory
143 """
144 class DataCacheProxy(object):
145 def __init__(self):
146 pass
147
148 def __getitem__(self, key):
149 return dataCaches[key]
150
151 def __getattr__(self, name):
152 return getattr(dataCaches[mc], name)
153
154 if cls.supports_multiconfig_datacaches:
155 return dataCaches
156
157 return DataCacheProxy()
158
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159class SignatureGeneratorBasic(SignatureGenerator):
160 """
161 """
162 name = "basic"
163
164 def __init__(self, data):
165 self.basehash = {}
166 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500167 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 self.taskdeps = {}
169 self.runtaskdeps = {}
170 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500171 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500172 self.gendeps = {}
173 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.setscenetasks = set()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000175 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
176 self.taskhash_ignore_tasks = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500178 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500179 if checksum_cache_file:
180 self.checksum_cache = FileChecksumCache()
181 self.checksum_cache.init_cache(data, checksum_cache_file)
182 else:
183 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500184
Andrew Geissler82c905d2020-04-13 13:39:40 -0500185 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400186 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
188 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400189
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190 def init_rundepcheck(self, data):
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000191 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
192 if self.taskhash_ignore_tasks:
193 self.twl = re.compile(self.taskhash_ignore_tasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500194 else:
195 self.twl = None
196
197 def _build_data(self, fn, d):
198
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500199 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000200 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000202 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203
204 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400205 tid = fn + ":" + task
206 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
207 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400208 bb.error("The following commands may help:")
209 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
210 # Make sure sigdata is dumped before run printdiff
211 bb.error("%s -Snone" % cmd)
212 bb.error("Then:")
213 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400214 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500215
216 self.taskdeps[fn] = taskdeps
217 self.gendeps[fn] = gendeps
218 self.lookupcache[fn] = lookupcache
219
220 return taskdeps
221
Brad Bishopa34c0302019-09-23 22:34:48 -0400222 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500223 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400224
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 def finalise(self, fn, d, variant):
226
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600227 mc = d.getVar("__BBMULTICONFIG", False) or ""
228 if variant or mc:
229 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230
231 try:
232 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500233 except bb.parse.SkipRecipe:
234 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500236 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237 raise
238
239 #Slow but can be useful for debugging mismatched basehashes
240 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500241 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242
243 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500244 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
Andrew Geissler82c905d2020-04-13 13:39:40 -0500246 def postparsing_clean_cache(self):
247 #
248 # After parsing we can remove some things from memory to reduce our memory footprint
249 #
250 self.gendeps = {}
251 self.lookupcache = {}
252 self.taskdeps = {}
253
Andrew Geissler5a43b432020-06-13 10:46:56 -0500254 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500255 # Return True if we should keep the dependency, False to drop it
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000256 # We only manipulate the dependencies for packages not in the ignore
257 # list
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500258 if self.twl and not self.twl.search(recipename):
259 # then process the actual dependencies
260 if self.twl.search(depname):
261 return False
262 return True
263
264 def read_taint(self, fn, task, stampbase):
265 taint = None
266 try:
267 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
268 taint = taintf.read()
269 except IOError:
270 pass
271 return taint
272
Andrew Geissler5a43b432020-06-13 10:46:56 -0500273 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800274
Brad Bishop08902b02019-08-20 09:16:51 -0400275 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800276
Andrew Geissler5a43b432020-06-13 10:46:56 -0500277 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400278 self.runtaskdeps[tid] = []
279 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500280 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500281
282 self.tidtopn[tid] = recipename
283
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500284 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500285 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
286 depname = dataCaches[depmc].pkg_fn[depmcfn]
287 if not self.supports_multiconfig_datacaches and mc != depmc:
288 # If the signature generator doesn't understand multiconfig
289 # data caches, any dependency not in the same multiconfig must
290 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600291 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500292 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293 continue
294 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800295 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400296 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297
Andrew Geissler5a43b432020-06-13 10:46:56 -0500298 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500300 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500301 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500302 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500303 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400304 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500305
Andrew Geissler5a43b432020-06-13 10:46:56 -0500306 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307 if 'nostamp' in taskdep and task in taskdep['nostamp']:
308 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500309 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
310 # Don't reset taint value upon every call
311 pass
312 else:
313 import uuid
314 taint = str(uuid.uuid4())
315 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500316
Andrew Geissler5a43b432020-06-13 10:46:56 -0500317 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500318 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400319 self.taints[tid] = taint
320 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500321
Andrew Geissler82c905d2020-04-13 13:39:40 -0500322 return
323
Andrew Geissler5a43b432020-06-13 10:46:56 -0500324 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500325
326 data = self.basehash[tid]
327 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600328 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500329
330 for (f, cs) in self.file_checksum_values[tid]:
331 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000332 if "/./" in f:
333 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500334 data = data + cs
335
336 if tid in self.taints:
337 if self.taints[tid].startswith("nostamp:"):
338 data = data + self.taints[tid][8:]
339 else:
340 data = data + self.taints[tid]
341
Brad Bishop19323692019-04-05 15:28:33 -0400342 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400343 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500344 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500345 return h
346
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500347 def writeout_file_checksum_cache(self):
348 """Write/update the file checksum cache onto disk"""
349 if self.checksum_cache:
350 self.checksum_cache.save_extras()
351 self.checksum_cache.save_merge()
352 else:
353 bb.fetch2.fetcher_parse_save()
354 bb.fetch2.fetcher_parse_done()
355
Brad Bishop08902b02019-08-20 09:16:51 -0400356 def save_unitaskhashes(self):
357 self.unihash_cache.save(self.unitaskhashes)
358
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500359 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500360
Brad Bishop08902b02019-08-20 09:16:51 -0400361 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500362 referencestamp = stampbase
363 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500364 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500365 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400366 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400367 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500368 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400369 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500370
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500371 with bb.utils.umask(0o002):
372 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500373
374 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500375 data['task'] = task
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000376 data['basehash_ignore_vars'] = self.basehash_ignore_vars
377 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500378 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400379 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380 data['gendeps'] = {}
381 data['varvals'] = {}
382 data['varvals'][task] = self.lookupcache[fn][task]
383 for dep in self.taskdeps[fn][task]:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000384 if dep in self.basehash_ignore_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500385 continue
386 data['gendeps'][dep] = self.gendeps[fn][dep]
387 data['varvals'][dep] = self.lookupcache[fn][dep]
388
Brad Bishop08902b02019-08-20 09:16:51 -0400389 if runtime and tid in self.taskhash:
390 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000391 data['file_checksum_values'] = []
392 for f,cs in self.file_checksum_values[tid]:
393 if "/./" in f:
394 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
395 else:
396 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500397 data['runtaskhashes'] = {}
398 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400399 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400400 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400401 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500402
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500403 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500404 if taint:
405 data['taint'] = taint
406
Brad Bishop08902b02019-08-20 09:16:51 -0400407 if runtime and tid in self.taints:
408 if 'nostamp:' in self.taints[tid]:
409 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500410
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500411 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400412 if computed_basehash != self.basehash[tid]:
413 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
414 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500415 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400416 if computed_taskhash != self.taskhash[tid]:
417 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
418 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500419
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500420 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
421 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500422 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
423 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
424 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600425 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500426 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500427 except (OSError, IOError) as err:
428 try:
429 os.unlink(tmpfile)
430 except OSError:
431 pass
432 raise err
433
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500434 def dump_sigfn(self, fn, dataCaches, options):
435 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500436 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600437 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400438 mc = bb.runqueue.mc_from_tid(tid)
439 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500440 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400441 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
442 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
443 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600444 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500445
446class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
447 name = "basichash"
448
Brad Bishop08902b02019-08-20 09:16:51 -0400449 def get_stampfile_hash(self, tid):
450 if tid in self.taskhash:
451 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400452
453 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400454 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400455
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500456 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
457 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400458 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500459 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400460 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500461 if clean:
462 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400464 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400465
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
467
468 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
469 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800470
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 def invalidate_task(self, task, d, fn):
472 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
473 bb.build.write_taint(task, d, fn)
474
Brad Bishop08902b02019-08-20 09:16:51 -0400475class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500476 def __init__(self, data):
477 self.extramethod = {}
478 super().__init__(data)
479
Brad Bishop08902b02019-08-20 09:16:51 -0400480 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500481 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400482
483 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500484 self.server, self.method, self.extramethod = data[:3]
485 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400486
Brad Bishopa34c0302019-09-23 22:34:48 -0400487 def client(self):
488 if getattr(self, '_client', None) is None:
489 self._client = hashserv.create_client(self.server)
490 return self._client
491
Brad Bishop08902b02019-08-20 09:16:51 -0400492 def get_stampfile_hash(self, tid):
493 if tid in self.taskhash:
494 # If a unique hash is reported, use it as the stampfile hash. This
495 # ensures that if a task won't be re-run if the taskhash changes,
496 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500497 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400498 if unihash is not None:
499 return unihash
500
501 return super().get_stampfile_hash(tid)
502
503 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500504 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
505 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
506 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
507 self.unihash[tid] = unihash
508
509 def _get_unihash(self, tid, checkkey=None):
510 if tid not in self.tidtopn:
511 return None
512 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
513 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
514 if key not in self.unitaskhashes:
515 return None
516 if not checkkey:
517 checkkey = self.taskhash[tid]
518 (key, unihash) = self.unitaskhashes[key]
519 if key != checkkey:
520 return None
521 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400522
523 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400524 taskhash = self.taskhash[tid]
525
Brad Bishopa34c0302019-09-23 22:34:48 -0400526 # If its not a setscene task we can return
527 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500528 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400529 return taskhash
530
Brad Bishop08902b02019-08-20 09:16:51 -0400531 # TODO: This cache can grow unbounded. It probably only needs to keep
532 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500533 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400534 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500535 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400536 return unihash
537
538 # In the absence of being able to discover a unique hash from the
539 # server, make it be equivalent to the taskhash. The unique "hash" only
540 # really needs to be a unique string (not even necessarily a hash), but
541 # making it match the taskhash has a few advantages:
542 #
543 # 1) All of the sstate code that assumes hashes can be the same
544 # 2) It provides maximal compatibility with builders that don't use
545 # an equivalency server
546 # 3) The value is easy for multiple independent builders to derive the
547 # same unique hash from the same input. This means that if the
548 # independent builders find the same taskhash, but it isn't reported
549 # to the server, there is a better chance that they will agree on
550 # the unique hash.
551 unihash = taskhash
552
553 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500554 method = self.method
555 if tid in self.extramethod:
556 method = method + self.extramethod[tid]
557 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400558 if data:
559 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400560 # A unique hash equal to the taskhash is not very interesting,
561 # so it is reported it at debug level 2. If they differ, that
562 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500563 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400564 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600565 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500566 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400567 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400568
Andrew Geissler82c905d2020-04-13 13:39:40 -0500569 self.set_unihash(tid, unihash)
570 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400571 return unihash
572
573 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400574 import importlib
575
576 taskhash = d.getVar('BB_TASKHASH')
577 unihash = d.getVar('BB_UNIHASH')
578 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
579 tempdir = d.getVar('T')
580 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400581 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500582 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400583
584 if self.setscenetasks and tid not in self.setscenetasks:
585 return
Brad Bishop08902b02019-08-20 09:16:51 -0400586
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000587 # This can happen if locked sigs are in action. Detect and just exit
Andrew Geissler82c905d2020-04-13 13:39:40 -0500588 if taskhash != self.taskhash[tid]:
589 return
590
Brad Bishop08902b02019-08-20 09:16:51 -0400591 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500592 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400593 if cache_unihash is None:
594 bb.fatal('%s not in unihash cache. Please report this error' % key)
595
596 if cache_unihash != unihash:
597 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
598
599 sigfile = None
600 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
601 sigfile_link = "depsig.do_%s" % task
602
603 try:
604 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
605
606 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
607
608 if "." in self.method:
609 (module, method) = self.method.rsplit('.', 1)
610 locs['method'] = getattr(importlib.import_module(module), method)
611 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
612 else:
613 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
614
615 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400616 extra_data = {}
617
618 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
619 if owner:
620 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400621
622 if report_taskdata:
623 sigfile.seek(0)
624
Brad Bishopa34c0302019-09-23 22:34:48 -0400625 extra_data['PN'] = d.getVar('PN')
626 extra_data['PV'] = d.getVar('PV')
627 extra_data['PR'] = d.getVar('PR')
628 extra_data['task'] = task
629 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400630
Andrew Geissler82c905d2020-04-13 13:39:40 -0500631 method = self.method
632 if tid in self.extramethod:
633 method = method + self.extramethod[tid]
634
635 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400636 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400637
638 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600639 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400640 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500641 self.set_unihash(tid, new_unihash)
642 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400643 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600644 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500645 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400646 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400647 finally:
648 if sigfile:
649 sigfile.close()
650
651 sigfile_link_path = os.path.join(tempdir, sigfile_link)
652 bb.utils.remove(sigfile_link_path)
653
654 try:
655 os.symlink(sigfile_name, sigfile_link_path)
656 except OSError:
657 pass
658
Andrew Geissler82c905d2020-04-13 13:39:40 -0500659 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
660 try:
661 extra_data = {}
662 method = self.method
663 if tid in self.extramethod:
664 method = method + self.extramethod[tid]
665
666 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
667 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
668
669 if data is None:
670 bb.warn("Server unable to handle unihash report")
671 return False
672
673 finalunihash = data['unihash']
674
675 if finalunihash == current_unihash:
676 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
677 elif finalunihash == wanted_unihash:
678 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
679 self.set_unihash(tid, finalunihash)
680 return True
681 else:
682 # TODO: What to do here?
683 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
684
Andrew Geisslerc926e172021-05-07 16:11:35 -0500685 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500686 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
687
688 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400689
690#
691# Dummy class used for bitbake-selftest
692#
693class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
694 name = "TestEquivHash"
695 def init_rundepcheck(self, data):
696 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400697 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400698 self.method = "sstate_output_hash"
699
Andrew Geissler5a43b432020-06-13 10:46:56 -0500700#
701# Dummy class used for bitbake-selftest
702#
703class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
704 name = "TestMulticonfigDepends"
705 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400706
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500707def dump_this_task(outfile, d):
708 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500709 fn = d.getVar("BB_FILENAME")
710 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500711 referencestamp = bb.build.stamp_internal(task, d, None, True)
712 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500713
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500714def init_colors(enable_color):
715 """Initialise colour dict for passing to compare_sigfiles()"""
716 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800717 colors = {'color_title': '\033[1m',
718 'color_default': '\033[0m',
719 'color_add': '\033[0;32m',
720 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500721 }
722 # Leave all keys present but clear the values
723 if not enable_color:
724 for k in colors.keys():
725 colors[k] = ''
726 return colors
727
728def worddiff_str(oldstr, newstr, colors=None):
729 if not colors:
730 colors = init_colors(False)
731 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
732 ret = []
733 for change, value in diff:
734 value = ' '.join(value)
735 if change == '=':
736 ret.append(value)
737 elif change == '+':
738 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
739 ret.append(item)
740 elif change == '-':
741 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
742 ret.append(item)
743 whitespace_note = ''
744 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
745 whitespace_note = ' (whitespace changed)'
746 return '"%s"%s' % (' '.join(ret), whitespace_note)
747
748def list_inline_diff(oldlist, newlist, colors=None):
749 if not colors:
750 colors = init_colors(False)
751 diff = simplediff.diff(oldlist, newlist)
752 ret = []
753 for change, value in diff:
754 value = ' '.join(value)
755 if change == '=':
756 ret.append("'%s'" % value)
757 elif change == '+':
758 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
759 ret.append(item)
760 elif change == '-':
761 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
762 ret.append(item)
763 return '[%s]' % (', '.join(ret))
764
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500765def clean_basepath(basepath):
766 basepath, dir, recipe_task = basepath.rsplit("/", 2)
767 cleaned = dir + '/' + recipe_task
768
769 if basepath[0] == '/':
770 return cleaned
771
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600772 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500773 mc, mc_name, basepath = basepath.split(":", 2)
774 mc_suffix = ':mc:' + mc_name
775 else:
776 mc_suffix = ''
777
778 # mc stuff now removed from basepath. Whatever was next, if present will be the first
779 # suffix. ':/', recipe path start, marks the end of this. Something like
780 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
781 if basepath[0] != '/':
782 cleaned += ':' + basepath.split(':/', 1)[0]
783
784 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500785
786def clean_basepaths(a):
787 b = {}
788 for x in a:
789 b[clean_basepath(x)] = a[x]
790 return b
791
792def clean_basepaths_list(a):
793 b = []
794 for x in a:
795 b.append(clean_basepath(x))
796 return b
797
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000798# Handled renamed fields
799def handle_renames(data):
800 if 'basewhitelist' in data:
801 data['basehash_ignore_vars'] = data['basewhitelist']
802 del data['basewhitelist']
803 if 'taskwhitelist' in data:
804 data['taskhash_ignore_tasks'] = data['taskwhitelist']
805 del data['taskwhitelist']
806
807
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500808def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500809 output = []
810
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500811 colors = init_colors(color)
812 def color_format(formatstr, **values):
813 """
814 Return colour formatted string.
815 NOTE: call with the format string, not an already formatted string
816 containing values (otherwise you could have trouble with { and }
817 characters)
818 """
819 if not formatstr.endswith('{color_default}'):
820 formatstr += '{color_default}'
821 # In newer python 3 versions you can pass both of these directly,
822 # but we only require 3.4 at the moment
823 formatparams = {}
824 formatparams.update(colors)
825 formatparams.update(values)
826 return formatstr.format(**formatparams)
827
Andrew Geisslereff27472021-10-29 15:35:00 -0500828 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
829 a_data = json.load(f, object_hook=SetDecoder)
830 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
831 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500832
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000833 for data in [a_data, b_data]:
834 handle_renames(data)
835
836 def dict_diff(a, b, ignored_vars=set()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500837 sa = set(a.keys())
838 sb = set(b.keys())
839 common = sa & sb
840 changed = set()
841 for i in common:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000842 if a[i] != b[i] and i not in ignored_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500843 changed.add(i)
844 added = sb - sa
845 removed = sa - sb
846 return changed, added, removed
847
848 def file_checksums_diff(a, b):
849 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500850
851 # Convert lists back to tuples
852 a = [(f[0], f[1]) for f in a]
853 b = [(f[0], f[1]) for f in b]
854
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500855 # Compare lists, ensuring we can handle duplicate filenames if they exist
856 removedcount = Counter(a)
857 removedcount.subtract(b)
858 addedcount = Counter(b)
859 addedcount.subtract(a)
860 added = []
861 for x in b:
862 if addedcount[x] > 0:
863 addedcount[x] -= 1
864 added.append(x)
865 removed = []
866 changed = []
867 for x in a:
868 if removedcount[x] > 0:
869 removedcount[x] -= 1
870 for y in added:
871 if y[0] == x[0]:
872 changed.append((x[0], x[1], y[1]))
873 added.remove(y)
874 break
875 else:
876 removed.append(x)
877 added = [x[0] for x in added]
878 removed = [x[0] for x in removed]
879 return changed, added, removed
880
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000881 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
882 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
883 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
884 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500885
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000886 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
887 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
888 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
889 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500890
891 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500892 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500893
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500894 if a_data['basehash'] != b_data['basehash'] and not collapsed:
895 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500896
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000897 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500898 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500899 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500900 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500901 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
902 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
903 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500904 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500905 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500906 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500907 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500908 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500909
910
911 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
912 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500913 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500914 oldval = a_data['varvals'][dep]
915 newval = b_data['varvals'][dep]
916 if newval and oldval and ('\n' in oldval or '\n' in newval):
917 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
918 # Cut off the first two lines, since we aren't interested in
919 # the old/new filename (they are blank anyway in this case)
920 difflines = list(diff)[2:]
921 if color:
922 # Add colour to diff output
923 for i, line in enumerate(difflines):
924 if line.startswith('+'):
925 line = color_format('{color_add}{line}', line=line)
926 difflines[i] = line
927 elif line.startswith('-'):
928 line = color_format('{color_remove}{line}', line=line)
929 difflines[i] = line
930 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
931 elif newval and oldval and (' ' in oldval or ' ' in newval):
932 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
933 else:
934 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500935
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600936 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500937 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600938 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500939 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600940
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500941 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
942 if changed:
943 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500944 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500945 if added:
946 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500947 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500948 if removed:
949 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500950 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500951
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600952 if not 'runtaskdeps' in a_data:
953 a_data['runtaskdeps'] = {}
954 if not 'runtaskdeps' in b_data:
955 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500956
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500957 if not collapsed:
958 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
959 changed = ["Number of task dependencies changed"]
960 else:
961 changed = []
962 for idx, task in enumerate(a_data['runtaskdeps']):
963 a = a_data['runtaskdeps'][idx]
964 b = b_data['runtaskdeps'][idx]
965 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
966 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500967
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500968 if changed:
969 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
970 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
971 if clean_a != clean_b:
972 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
973 else:
974 output.append(color_format("{color_title}runtaskdeps changed:"))
975 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500976
977
978 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
979 a = a_data['runtaskhashes']
980 b = b_data['runtaskhashes']
981 changed, added, removed = dict_diff(a, b)
982 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500983 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500984 bdep_found = False
985 if removed:
986 for bdep in removed:
987 if b[dep] == a[bdep]:
988 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
989 bdep_found = True
990 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500991 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500992 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500993 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500994 adep_found = False
995 if added:
996 for adep in added:
997 if b[adep] == a[dep]:
998 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
999 adep_found = True
1000 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001001 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001002 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001003 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001004 if not collapsed:
1005 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001006 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001007 recout = recursecb(dep, a[dep], b[dep])
1008 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001009 if collapsed:
1010 output.extend(recout)
1011 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -08001012 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001013 # that hash since in all likelyhood, they're the same changes this task also saw.
1014 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001015
1016 a_taint = a_data.get('taint', None)
1017 b_taint = b_data.get('taint', None)
1018 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001019 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001020 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001021 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001022 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001023 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001024
1025 return output
1026
1027
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001028def calc_basehash(sigdata):
1029 task = sigdata['task']
1030 basedata = sigdata['varvals'][task]
1031
1032 if basedata is None:
1033 basedata = ''
1034
1035 alldeps = sigdata['taskdeps']
1036 for dep in alldeps:
1037 basedata = basedata + dep
1038 val = sigdata['varvals'][dep]
1039 if val is not None:
1040 basedata = basedata + str(val)
1041
Brad Bishop19323692019-04-05 15:28:33 -04001042 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001043
1044def calc_taskhash(sigdata):
1045 data = sigdata['basehash']
1046
1047 for dep in sigdata['runtaskdeps']:
1048 data = data + sigdata['runtaskhashes'][dep]
1049
1050 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001051 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001052 if "./" in c[0]:
1053 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001054 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001055
1056 if 'taint' in sigdata:
1057 if 'nostamp:' in sigdata['taint']:
1058 data = data + sigdata['taint'][8:]
1059 else:
1060 data = data + sigdata['taint']
1061
Brad Bishop19323692019-04-05 15:28:33 -04001062 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001063
1064
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001065def dump_sigfile(a):
1066 output = []
1067
Andrew Geisslereff27472021-10-29 15:35:00 -05001068 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1069 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001070
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001071 handle_renames(a_data)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001072
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001073 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1074
1075 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001076
1077 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1078
1079 output.append("basehash: %s" % (a_data['basehash']))
1080
Andrew Geissler595f6302022-01-24 19:11:47 +00001081 for dep in sorted(a_data['gendeps']):
1082 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001083
Andrew Geissler595f6302022-01-24 19:11:47 +00001084 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001085 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1086
1087 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001088 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001089
1090 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001091 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001092
1093 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001094 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001095 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1096
1097 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001098 if a_data['taint'].startswith('nostamp:'):
1099 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1100 else:
1101 msg = a_data['taint']
1102 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001103
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001104 if 'task' in a_data:
1105 computed_basehash = calc_basehash(a_data)
1106 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1107 else:
1108 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001109
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001110 computed_taskhash = calc_taskhash(a_data)
1111 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001112
1113 return output