blob: 1612b4efa129a1639e2bee1186e6d6e73319cf40 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
43 break
44 else:
45 logger.error("Invalid signature generator '%s', using default 'noop'\n"
46 "Available generators: %s", desired,
47 ', '.join(obj.name for obj in siggens))
48 return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51 """
52 """
53 name = "noop"
54
Andrew Geissler5a43b432020-06-13 10:46:56 -050055 # If the derived class supports multiconfig datacaches, set this to True
56 # The default is False for backward compatibility with derived signature
57 # generators that do not understand multiconfig caches
58 supports_multiconfig_datacaches = False
59
Patrick Williamsc124f4f2015-09-15 14:41:29 -050060 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050061 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050062 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050063 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050064 self.runtaskdeps = {}
65 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050066 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040067 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050068 self.tidtopn = {}
69 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050070
71 def finalise(self, fn, d, varient):
72 return
73
Andrew Geissler82c905d2020-04-13 13:39:40 -050074 def postparsing_clean_cache(self):
75 return
76
Brad Bishop08902b02019-08-20 09:16:51 -040077 def get_unihash(self, tid):
78 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040079
Andrew Geissler5a43b432020-06-13 10:46:56 -050080 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050081 return
82
Andrew Geissler5a43b432020-06-13 10:46:56 -050083 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040084 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 def writeout_file_checksum_cache(self):
88 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089 return
90
91 def stampfile(self, stampbase, file_name, taskname, extrainfo):
92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97 def dump_sigtask(self, fn, task, stampbase, runtime):
98 return
99
100 def invalidate_task(self, task, d, fn):
101 bb.build.del_stamp(task, d, fn)
102
103 def dump_sigs(self, dataCache, options):
104 return
105
106 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108
109 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500112 def reset(self, data):
113 self.__init__(data)
114
Brad Bishop08902b02019-08-20 09:16:51 -0400115 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400117
118 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400120
121 def save_unitaskhashes(self):
122 return
123
Brad Bishopa34c0302019-09-23 22:34:48 -0400124 def set_setscene_tasks(self, setscene_tasks):
125 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500126
Andrew Geissler5a43b432020-06-13 10:46:56 -0500127 @classmethod
128 def get_data_caches(cls, dataCaches, mc):
129 """
130 This function returns the datacaches that should be passed to signature
131 generator functions. If the signature generator supports multiconfig
132 caches, the entire dictionary of data caches is sent, otherwise a
133 special proxy is sent that support both index access to all
134 multiconfigs, and also direct access for the default multiconfig.
135
136 The proxy class allows code in this class itself to always use
137 multiconfig aware code (to ease maintenance), but derived classes that
138 are unaware of multiconfig data caches can still access the default
139 multiconfig as expected.
140
141 Do not override this function in derived classes; it will be removed in
142 the future when support for multiconfig data caches is mandatory
143 """
144 class DataCacheProxy(object):
145 def __init__(self):
146 pass
147
148 def __getitem__(self, key):
149 return dataCaches[key]
150
151 def __getattr__(self, name):
152 return getattr(dataCaches[mc], name)
153
154 if cls.supports_multiconfig_datacaches:
155 return dataCaches
156
157 return DataCacheProxy()
158
Andrew Geissler9aee5002022-03-30 16:27:02 +0000159 def exit(self):
160 return
161
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162class SignatureGeneratorBasic(SignatureGenerator):
163 """
164 """
165 name = "basic"
166
167 def __init__(self, data):
168 self.basehash = {}
169 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500170 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171 self.taskdeps = {}
172 self.runtaskdeps = {}
173 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500174 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175 self.gendeps = {}
176 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500177 self.setscenetasks = set()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000178 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
179 self.taskhash_ignore_tasks = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500180 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500181 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500182 if checksum_cache_file:
183 self.checksum_cache = FileChecksumCache()
184 self.checksum_cache.init_cache(data, checksum_cache_file)
185 else:
186 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500187
Andrew Geissler82c905d2020-04-13 13:39:40 -0500188 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400189 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500190 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
191 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400192
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500193 def init_rundepcheck(self, data):
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000194 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
195 if self.taskhash_ignore_tasks:
196 self.twl = re.compile(self.taskhash_ignore_tasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500197 else:
198 self.twl = None
199
200 def _build_data(self, fn, d):
201
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500202 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000203 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500204
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000205 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500206
207 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400208 tid = fn + ":" + task
209 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
210 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400211 bb.error("The following commands may help:")
212 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
213 # Make sure sigdata is dumped before run printdiff
214 bb.error("%s -Snone" % cmd)
215 bb.error("Then:")
216 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400217 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218
219 self.taskdeps[fn] = taskdeps
220 self.gendeps[fn] = gendeps
221 self.lookupcache[fn] = lookupcache
222
223 return taskdeps
224
Brad Bishopa34c0302019-09-23 22:34:48 -0400225 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500226 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400227
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500228 def finalise(self, fn, d, variant):
229
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600230 mc = d.getVar("__BBMULTICONFIG", False) or ""
231 if variant or mc:
232 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500233
234 try:
235 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500236 except bb.parse.SkipRecipe:
237 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500238 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500239 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240 raise
241
242 #Slow but can be useful for debugging mismatched basehashes
243 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500244 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
246 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500247 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500248
Andrew Geissler82c905d2020-04-13 13:39:40 -0500249 def postparsing_clean_cache(self):
250 #
251 # After parsing we can remove some things from memory to reduce our memory footprint
252 #
253 self.gendeps = {}
254 self.lookupcache = {}
255 self.taskdeps = {}
256
Andrew Geissler5a43b432020-06-13 10:46:56 -0500257 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500258 # Return True if we should keep the dependency, False to drop it
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000259 # We only manipulate the dependencies for packages not in the ignore
260 # list
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500261 if self.twl and not self.twl.search(recipename):
262 # then process the actual dependencies
263 if self.twl.search(depname):
264 return False
265 return True
266
267 def read_taint(self, fn, task, stampbase):
268 taint = None
269 try:
270 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
271 taint = taintf.read()
272 except IOError:
273 pass
274 return taint
275
Andrew Geissler5a43b432020-06-13 10:46:56 -0500276 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800277
Brad Bishop08902b02019-08-20 09:16:51 -0400278 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800279
Andrew Geissler5a43b432020-06-13 10:46:56 -0500280 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400281 self.runtaskdeps[tid] = []
282 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500283 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500284
285 self.tidtopn[tid] = recipename
286
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500287 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500288 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
289 depname = dataCaches[depmc].pkg_fn[depmcfn]
290 if not self.supports_multiconfig_datacaches and mc != depmc:
291 # If the signature generator doesn't understand multiconfig
292 # data caches, any dependency not in the same multiconfig must
293 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600294 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500295 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500296 continue
297 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800298 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400299 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500300
Andrew Geissler5a43b432020-06-13 10:46:56 -0500301 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500302 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500303 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500304 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500305 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500306 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400307 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500308
Andrew Geissler5a43b432020-06-13 10:46:56 -0500309 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500310 if 'nostamp' in taskdep and task in taskdep['nostamp']:
311 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500312 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
313 # Don't reset taint value upon every call
314 pass
315 else:
316 import uuid
317 taint = str(uuid.uuid4())
318 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500319
Andrew Geissler5a43b432020-06-13 10:46:56 -0500320 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500321 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400322 self.taints[tid] = taint
323 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500324
Andrew Geissler82c905d2020-04-13 13:39:40 -0500325 return
326
Andrew Geissler5a43b432020-06-13 10:46:56 -0500327 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500328
329 data = self.basehash[tid]
330 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600331 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500332
333 for (f, cs) in self.file_checksum_values[tid]:
334 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000335 if "/./" in f:
336 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500337 data = data + cs
338
339 if tid in self.taints:
340 if self.taints[tid].startswith("nostamp:"):
341 data = data + self.taints[tid][8:]
342 else:
343 data = data + self.taints[tid]
344
Brad Bishop19323692019-04-05 15:28:33 -0400345 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400346 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500347 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500348 return h
349
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500350 def writeout_file_checksum_cache(self):
351 """Write/update the file checksum cache onto disk"""
352 if self.checksum_cache:
353 self.checksum_cache.save_extras()
354 self.checksum_cache.save_merge()
355 else:
356 bb.fetch2.fetcher_parse_save()
357 bb.fetch2.fetcher_parse_done()
358
Brad Bishop08902b02019-08-20 09:16:51 -0400359 def save_unitaskhashes(self):
360 self.unihash_cache.save(self.unitaskhashes)
361
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500362 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500363
Brad Bishop08902b02019-08-20 09:16:51 -0400364 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500365 referencestamp = stampbase
366 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500367 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500368 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400369 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400370 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500371 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400372 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500373
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500374 with bb.utils.umask(0o002):
375 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500376
377 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500378 data['task'] = task
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000379 data['basehash_ignore_vars'] = self.basehash_ignore_vars
380 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500381 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400382 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500383 data['gendeps'] = {}
384 data['varvals'] = {}
385 data['varvals'][task] = self.lookupcache[fn][task]
386 for dep in self.taskdeps[fn][task]:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000387 if dep in self.basehash_ignore_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388 continue
389 data['gendeps'][dep] = self.gendeps[fn][dep]
390 data['varvals'][dep] = self.lookupcache[fn][dep]
391
Brad Bishop08902b02019-08-20 09:16:51 -0400392 if runtime and tid in self.taskhash:
393 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000394 data['file_checksum_values'] = []
395 for f,cs in self.file_checksum_values[tid]:
396 if "/./" in f:
397 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
398 else:
399 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500400 data['runtaskhashes'] = {}
401 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400402 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400403 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400404 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500405
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500406 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500407 if taint:
408 data['taint'] = taint
409
Brad Bishop08902b02019-08-20 09:16:51 -0400410 if runtime and tid in self.taints:
411 if 'nostamp:' in self.taints[tid]:
412 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500413
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500414 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400415 if computed_basehash != self.basehash[tid]:
416 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
417 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500418 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400419 if computed_taskhash != self.taskhash[tid]:
420 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
421 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500422
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500423 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
424 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500425 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
426 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
427 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600428 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500429 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500430 except (OSError, IOError) as err:
431 try:
432 os.unlink(tmpfile)
433 except OSError:
434 pass
435 raise err
436
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500437 def dump_sigfn(self, fn, dataCaches, options):
438 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500439 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600440 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400441 mc = bb.runqueue.mc_from_tid(tid)
442 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500443 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400444 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
445 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
446 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600447 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500448
449class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
450 name = "basichash"
451
Brad Bishop08902b02019-08-20 09:16:51 -0400452 def get_stampfile_hash(self, tid):
453 if tid in self.taskhash:
454 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400455
456 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400457 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400458
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500459 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
460 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400461 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500462 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400463 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500464 if clean:
465 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400467 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400468
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500469 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
470
471 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
472 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800473
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500474 def invalidate_task(self, task, d, fn):
475 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
476 bb.build.write_taint(task, d, fn)
477
Brad Bishop08902b02019-08-20 09:16:51 -0400478class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500479 def __init__(self, data):
480 self.extramethod = {}
481 super().__init__(data)
482
Brad Bishop08902b02019-08-20 09:16:51 -0400483 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500484 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400485
486 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500487 self.server, self.method, self.extramethod = data[:3]
488 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400489
Brad Bishopa34c0302019-09-23 22:34:48 -0400490 def client(self):
491 if getattr(self, '_client', None) is None:
492 self._client = hashserv.create_client(self.server)
493 return self._client
494
Andrew Geissler9aee5002022-03-30 16:27:02 +0000495 def reset(self, data):
496 if getattr(self, '_client', None) is not None:
497 self._client.close()
498 self._client = None
499 return super().reset(data)
500
501 def exit(self):
502 if getattr(self, '_client', None) is not None:
503 self._client.close()
504 self._client = None
505 return super().exit()
506
Brad Bishop08902b02019-08-20 09:16:51 -0400507 def get_stampfile_hash(self, tid):
508 if tid in self.taskhash:
509 # If a unique hash is reported, use it as the stampfile hash. This
510 # ensures that if a task won't be re-run if the taskhash changes,
511 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500512 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400513 if unihash is not None:
514 return unihash
515
516 return super().get_stampfile_hash(tid)
517
518 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500519 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
520 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
521 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
522 self.unihash[tid] = unihash
523
524 def _get_unihash(self, tid, checkkey=None):
525 if tid not in self.tidtopn:
526 return None
527 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
528 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
529 if key not in self.unitaskhashes:
530 return None
531 if not checkkey:
532 checkkey = self.taskhash[tid]
533 (key, unihash) = self.unitaskhashes[key]
534 if key != checkkey:
535 return None
536 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400537
538 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400539 taskhash = self.taskhash[tid]
540
Brad Bishopa34c0302019-09-23 22:34:48 -0400541 # If its not a setscene task we can return
542 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500543 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400544 return taskhash
545
Brad Bishop08902b02019-08-20 09:16:51 -0400546 # TODO: This cache can grow unbounded. It probably only needs to keep
547 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500548 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400549 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500550 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400551 return unihash
552
553 # In the absence of being able to discover a unique hash from the
554 # server, make it be equivalent to the taskhash. The unique "hash" only
555 # really needs to be a unique string (not even necessarily a hash), but
556 # making it match the taskhash has a few advantages:
557 #
558 # 1) All of the sstate code that assumes hashes can be the same
559 # 2) It provides maximal compatibility with builders that don't use
560 # an equivalency server
561 # 3) The value is easy for multiple independent builders to derive the
562 # same unique hash from the same input. This means that if the
563 # independent builders find the same taskhash, but it isn't reported
564 # to the server, there is a better chance that they will agree on
565 # the unique hash.
566 unihash = taskhash
567
568 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500569 method = self.method
570 if tid in self.extramethod:
571 method = method + self.extramethod[tid]
572 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400573 if data:
574 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400575 # A unique hash equal to the taskhash is not very interesting,
576 # so it is reported it at debug level 2. If they differ, that
577 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500578 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400579 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600580 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500581 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400582 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400583
Andrew Geissler82c905d2020-04-13 13:39:40 -0500584 self.set_unihash(tid, unihash)
585 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400586 return unihash
587
588 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400589 import importlib
590
591 taskhash = d.getVar('BB_TASKHASH')
592 unihash = d.getVar('BB_UNIHASH')
593 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
594 tempdir = d.getVar('T')
595 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400596 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500597 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400598
599 if self.setscenetasks and tid not in self.setscenetasks:
600 return
Brad Bishop08902b02019-08-20 09:16:51 -0400601
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000602 # This can happen if locked sigs are in action. Detect and just exit
Andrew Geissler82c905d2020-04-13 13:39:40 -0500603 if taskhash != self.taskhash[tid]:
604 return
605
Brad Bishop08902b02019-08-20 09:16:51 -0400606 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500607 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400608 if cache_unihash is None:
609 bb.fatal('%s not in unihash cache. Please report this error' % key)
610
611 if cache_unihash != unihash:
612 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
613
614 sigfile = None
615 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
616 sigfile_link = "depsig.do_%s" % task
617
618 try:
619 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
620
621 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
622
623 if "." in self.method:
624 (module, method) = self.method.rsplit('.', 1)
625 locs['method'] = getattr(importlib.import_module(module), method)
626 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
627 else:
628 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
629
630 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400631 extra_data = {}
632
633 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
634 if owner:
635 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400636
637 if report_taskdata:
638 sigfile.seek(0)
639
Brad Bishopa34c0302019-09-23 22:34:48 -0400640 extra_data['PN'] = d.getVar('PN')
641 extra_data['PV'] = d.getVar('PV')
642 extra_data['PR'] = d.getVar('PR')
643 extra_data['task'] = task
644 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400645
Andrew Geissler82c905d2020-04-13 13:39:40 -0500646 method = self.method
647 if tid in self.extramethod:
648 method = method + self.extramethod[tid]
649
650 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400651 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400652
653 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600654 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400655 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500656 self.set_unihash(tid, new_unihash)
657 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400658 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600659 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500660 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400661 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400662 finally:
663 if sigfile:
664 sigfile.close()
665
666 sigfile_link_path = os.path.join(tempdir, sigfile_link)
667 bb.utils.remove(sigfile_link_path)
668
669 try:
670 os.symlink(sigfile_name, sigfile_link_path)
671 except OSError:
672 pass
673
Andrew Geissler82c905d2020-04-13 13:39:40 -0500674 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
675 try:
676 extra_data = {}
677 method = self.method
678 if tid in self.extramethod:
679 method = method + self.extramethod[tid]
680
681 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
682 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
683
684 if data is None:
685 bb.warn("Server unable to handle unihash report")
686 return False
687
688 finalunihash = data['unihash']
689
690 if finalunihash == current_unihash:
691 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
692 elif finalunihash == wanted_unihash:
693 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
694 self.set_unihash(tid, finalunihash)
695 return True
696 else:
697 # TODO: What to do here?
698 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
699
Andrew Geisslerc926e172021-05-07 16:11:35 -0500700 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500701 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
702
703 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400704
705#
706# Dummy class used for bitbake-selftest
707#
708class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
709 name = "TestEquivHash"
710 def init_rundepcheck(self, data):
711 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400712 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400713 self.method = "sstate_output_hash"
714
Andrew Geissler5a43b432020-06-13 10:46:56 -0500715#
716# Dummy class used for bitbake-selftest
717#
718class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
719 name = "TestMulticonfigDepends"
720 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400721
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500722def dump_this_task(outfile, d):
723 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500724 fn = d.getVar("BB_FILENAME")
725 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500726 referencestamp = bb.build.stamp_internal(task, d, None, True)
727 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500728
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500729def init_colors(enable_color):
730 """Initialise colour dict for passing to compare_sigfiles()"""
731 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800732 colors = {'color_title': '\033[1m',
733 'color_default': '\033[0m',
734 'color_add': '\033[0;32m',
735 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500736 }
737 # Leave all keys present but clear the values
738 if not enable_color:
739 for k in colors.keys():
740 colors[k] = ''
741 return colors
742
743def worddiff_str(oldstr, newstr, colors=None):
744 if not colors:
745 colors = init_colors(False)
746 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
747 ret = []
748 for change, value in diff:
749 value = ' '.join(value)
750 if change == '=':
751 ret.append(value)
752 elif change == '+':
753 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
754 ret.append(item)
755 elif change == '-':
756 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
757 ret.append(item)
758 whitespace_note = ''
759 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
760 whitespace_note = ' (whitespace changed)'
761 return '"%s"%s' % (' '.join(ret), whitespace_note)
762
763def list_inline_diff(oldlist, newlist, colors=None):
764 if not colors:
765 colors = init_colors(False)
766 diff = simplediff.diff(oldlist, newlist)
767 ret = []
768 for change, value in diff:
769 value = ' '.join(value)
770 if change == '=':
771 ret.append("'%s'" % value)
772 elif change == '+':
773 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
774 ret.append(item)
775 elif change == '-':
776 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
777 ret.append(item)
778 return '[%s]' % (', '.join(ret))
779
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500780def clean_basepath(basepath):
781 basepath, dir, recipe_task = basepath.rsplit("/", 2)
782 cleaned = dir + '/' + recipe_task
783
784 if basepath[0] == '/':
785 return cleaned
786
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600787 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500788 mc, mc_name, basepath = basepath.split(":", 2)
789 mc_suffix = ':mc:' + mc_name
790 else:
791 mc_suffix = ''
792
793 # mc stuff now removed from basepath. Whatever was next, if present will be the first
794 # suffix. ':/', recipe path start, marks the end of this. Something like
795 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
796 if basepath[0] != '/':
797 cleaned += ':' + basepath.split(':/', 1)[0]
798
799 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500800
801def clean_basepaths(a):
802 b = {}
803 for x in a:
804 b[clean_basepath(x)] = a[x]
805 return b
806
807def clean_basepaths_list(a):
808 b = []
809 for x in a:
810 b.append(clean_basepath(x))
811 return b
812
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000813# Handled renamed fields
814def handle_renames(data):
815 if 'basewhitelist' in data:
816 data['basehash_ignore_vars'] = data['basewhitelist']
817 del data['basewhitelist']
818 if 'taskwhitelist' in data:
819 data['taskhash_ignore_tasks'] = data['taskwhitelist']
820 del data['taskwhitelist']
821
822
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500823def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500824 output = []
825
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500826 colors = init_colors(color)
827 def color_format(formatstr, **values):
828 """
829 Return colour formatted string.
830 NOTE: call with the format string, not an already formatted string
831 containing values (otherwise you could have trouble with { and }
832 characters)
833 """
834 if not formatstr.endswith('{color_default}'):
835 formatstr += '{color_default}'
836 # In newer python 3 versions you can pass both of these directly,
837 # but we only require 3.4 at the moment
838 formatparams = {}
839 formatparams.update(colors)
840 formatparams.update(values)
841 return formatstr.format(**formatparams)
842
Andrew Geisslereff27472021-10-29 15:35:00 -0500843 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
844 a_data = json.load(f, object_hook=SetDecoder)
845 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
846 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500847
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000848 for data in [a_data, b_data]:
849 handle_renames(data)
850
851 def dict_diff(a, b, ignored_vars=set()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500852 sa = set(a.keys())
853 sb = set(b.keys())
854 common = sa & sb
855 changed = set()
856 for i in common:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000857 if a[i] != b[i] and i not in ignored_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500858 changed.add(i)
859 added = sb - sa
860 removed = sa - sb
861 return changed, added, removed
862
863 def file_checksums_diff(a, b):
864 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500865
866 # Convert lists back to tuples
867 a = [(f[0], f[1]) for f in a]
868 b = [(f[0], f[1]) for f in b]
869
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500870 # Compare lists, ensuring we can handle duplicate filenames if they exist
871 removedcount = Counter(a)
872 removedcount.subtract(b)
873 addedcount = Counter(b)
874 addedcount.subtract(a)
875 added = []
876 for x in b:
877 if addedcount[x] > 0:
878 addedcount[x] -= 1
879 added.append(x)
880 removed = []
881 changed = []
882 for x in a:
883 if removedcount[x] > 0:
884 removedcount[x] -= 1
885 for y in added:
886 if y[0] == x[0]:
887 changed.append((x[0], x[1], y[1]))
888 added.remove(y)
889 break
890 else:
891 removed.append(x)
892 added = [x[0] for x in added]
893 removed = [x[0] for x in removed]
894 return changed, added, removed
895
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000896 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
897 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
898 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
899 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500900
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000901 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
902 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
903 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
904 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500905
906 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500907 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500908
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500909 if a_data['basehash'] != b_data['basehash'] and not collapsed:
910 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500911
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000912 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500913 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500914 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500915 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500916 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
917 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
918 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500919 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500920 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500921 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500922 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500923 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500924
925
926 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
927 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500928 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500929 oldval = a_data['varvals'][dep]
930 newval = b_data['varvals'][dep]
931 if newval and oldval and ('\n' in oldval or '\n' in newval):
932 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
933 # Cut off the first two lines, since we aren't interested in
934 # the old/new filename (they are blank anyway in this case)
935 difflines = list(diff)[2:]
936 if color:
937 # Add colour to diff output
938 for i, line in enumerate(difflines):
939 if line.startswith('+'):
940 line = color_format('{color_add}{line}', line=line)
941 difflines[i] = line
942 elif line.startswith('-'):
943 line = color_format('{color_remove}{line}', line=line)
944 difflines[i] = line
945 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
946 elif newval and oldval and (' ' in oldval or ' ' in newval):
947 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
948 else:
949 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500950
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600951 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500952 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600953 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500954 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600955
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500956 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
957 if changed:
958 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500959 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500960 if added:
961 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500962 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500963 if removed:
964 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500965 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500966
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600967 if not 'runtaskdeps' in a_data:
968 a_data['runtaskdeps'] = {}
969 if not 'runtaskdeps' in b_data:
970 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500971
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500972 if not collapsed:
973 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
974 changed = ["Number of task dependencies changed"]
975 else:
976 changed = []
977 for idx, task in enumerate(a_data['runtaskdeps']):
978 a = a_data['runtaskdeps'][idx]
979 b = b_data['runtaskdeps'][idx]
980 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
981 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500982
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500983 if changed:
984 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
985 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
986 if clean_a != clean_b:
987 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
988 else:
989 output.append(color_format("{color_title}runtaskdeps changed:"))
990 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500991
992
993 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
994 a = a_data['runtaskhashes']
995 b = b_data['runtaskhashes']
996 changed, added, removed = dict_diff(a, b)
997 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500998 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500999 bdep_found = False
1000 if removed:
1001 for bdep in removed:
1002 if b[dep] == a[bdep]:
1003 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1004 bdep_found = True
1005 if not bdep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001006 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001007 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001008 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001009 adep_found = False
1010 if added:
1011 for adep in added:
1012 if b[adep] == a[dep]:
1013 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1014 adep_found = True
1015 if not adep_found:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001016 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001017 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001018 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001019 if not collapsed:
Andrew Geissler9aee5002022-03-30 16:27:02 +00001020 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001021 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001022 recout = recursecb(dep, a[dep], b[dep])
1023 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001024 if collapsed:
1025 output.extend(recout)
1026 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -08001027 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001028 # that hash since in all likelyhood, they're the same changes this task also saw.
1029 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001030
1031 a_taint = a_data.get('taint', None)
1032 b_taint = b_data.get('taint', None)
1033 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001034 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001035 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001036 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001037 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001038 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001039
1040 return output
1041
1042
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001043def calc_basehash(sigdata):
1044 task = sigdata['task']
1045 basedata = sigdata['varvals'][task]
1046
1047 if basedata is None:
1048 basedata = ''
1049
1050 alldeps = sigdata['taskdeps']
1051 for dep in alldeps:
1052 basedata = basedata + dep
1053 val = sigdata['varvals'][dep]
1054 if val is not None:
1055 basedata = basedata + str(val)
1056
Brad Bishop19323692019-04-05 15:28:33 -04001057 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001058
1059def calc_taskhash(sigdata):
1060 data = sigdata['basehash']
1061
1062 for dep in sigdata['runtaskdeps']:
1063 data = data + sigdata['runtaskhashes'][dep]
1064
1065 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001066 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001067 if "./" in c[0]:
1068 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001069 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001070
1071 if 'taint' in sigdata:
1072 if 'nostamp:' in sigdata['taint']:
1073 data = data + sigdata['taint'][8:]
1074 else:
1075 data = data + sigdata['taint']
1076
Brad Bishop19323692019-04-05 15:28:33 -04001077 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001078
1079
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001080def dump_sigfile(a):
1081 output = []
1082
Andrew Geisslereff27472021-10-29 15:35:00 -05001083 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1084 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001085
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001086 handle_renames(a_data)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001087
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001088 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1089
1090 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001091
1092 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1093
1094 output.append("basehash: %s" % (a_data['basehash']))
1095
Andrew Geissler595f6302022-01-24 19:11:47 +00001096 for dep in sorted(a_data['gendeps']):
1097 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001098
Andrew Geissler595f6302022-01-24 19:11:47 +00001099 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001100 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1101
1102 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001103 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001104
1105 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001106 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001107
1108 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001109 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001110 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1111
1112 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001113 if a_data['taint'].startswith('nostamp:'):
1114 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1115 else:
1116 msg = a_data['taint']
1117 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001118
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001119 if 'task' in a_data:
1120 computed_basehash = calc_basehash(a_data)
1121 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1122 else:
1123 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001124
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001125 computed_taskhash = calc_taskhash(a_data)
1126 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001127
1128 return output