blob: 07bb52945247c50d86e90262bea70b25e0182786 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williams92b42cb2022-09-03 06:53:57 -05002# Copyright BitBake Contributors
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
5#
6
Patrick Williamsc124f4f2015-09-15 14:41:29 -05007import hashlib
8import logging
9import os
10import re
11import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060012import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050013import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050014import difflib
15import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050016import json
17import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050018from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040019from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040020import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050021import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050022
23logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050024hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025
Andrew Geisslereff27472021-10-29 15:35:00 -050026class SetEncoder(json.JSONEncoder):
27 def default(self, obj):
28 if isinstance(obj, set):
29 return dict(_set_object=list(sorted(obj)))
30 return json.JSONEncoder.default(self, obj)
31
32def SetDecoder(dct):
33 if '_set_object' in dct:
34 return set(dct['_set_object'])
35 return dct
36
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060038 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050039 if type(obj) is type and issubclass(obj, SignatureGenerator)]
40
Brad Bishop6e60e8b2018-02-01 10:27:11 -050041 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050042 for sg in siggens:
43 if desired == sg.name:
44 return sg(d)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050045 else:
46 logger.error("Invalid signature generator '%s', using default 'noop'\n"
47 "Available generators: %s", desired,
48 ', '.join(obj.name for obj in siggens))
49 return SignatureGenerator(d)
50
51class SignatureGenerator(object):
52 """
53 """
54 name = "noop"
55
Andrew Geissler5a43b432020-06-13 10:46:56 -050056 # If the derived class supports multiconfig datacaches, set this to True
57 # The default is False for backward compatibility with derived signature
58 # generators that do not understand multiconfig caches
59 supports_multiconfig_datacaches = False
60
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050062 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050063 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050064 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050065 self.runtaskdeps = {}
66 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050067 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040068 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050069 self.tidtopn = {}
70 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050071
72 def finalise(self, fn, d, varient):
73 return
74
Andrew Geissler82c905d2020-04-13 13:39:40 -050075 def postparsing_clean_cache(self):
76 return
77
Brad Bishop08902b02019-08-20 09:16:51 -040078 def get_unihash(self, tid):
79 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040080
Andrew Geissler5a43b432020-06-13 10:46:56 -050081 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050082 return
83
Andrew Geissler5a43b432020-06-13 10:46:56 -050084 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040085 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
86 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050087
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 def writeout_file_checksum_cache(self):
89 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090 return
91
92 def stampfile(self, stampbase, file_name, taskname, extrainfo):
93 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
94
95 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
96 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
97
98 def dump_sigtask(self, fn, task, stampbase, runtime):
99 return
100
101 def invalidate_task(self, task, d, fn):
102 bb.build.del_stamp(task, d, fn)
103
104 def dump_sigs(self, dataCache, options):
105 return
106
107 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500108 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500109
110 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500111 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500112
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500113 def reset(self, data):
114 self.__init__(data)
115
Brad Bishop08902b02019-08-20 09:16:51 -0400116 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500117 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400118
119 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500120 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400121
122 def save_unitaskhashes(self):
123 return
124
Andrew Geissler78b72792022-06-14 06:47:25 -0500125 def copy_unitaskhashes(self, targetdir):
126 return
127
Brad Bishopa34c0302019-09-23 22:34:48 -0400128 def set_setscene_tasks(self, setscene_tasks):
129 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500130
Andrew Geissler5a43b432020-06-13 10:46:56 -0500131 @classmethod
132 def get_data_caches(cls, dataCaches, mc):
133 """
134 This function returns the datacaches that should be passed to signature
135 generator functions. If the signature generator supports multiconfig
136 caches, the entire dictionary of data caches is sent, otherwise a
137 special proxy is sent that support both index access to all
138 multiconfigs, and also direct access for the default multiconfig.
139
140 The proxy class allows code in this class itself to always use
141 multiconfig aware code (to ease maintenance), but derived classes that
142 are unaware of multiconfig data caches can still access the default
143 multiconfig as expected.
144
145 Do not override this function in derived classes; it will be removed in
146 the future when support for multiconfig data caches is mandatory
147 """
148 class DataCacheProxy(object):
149 def __init__(self):
150 pass
151
152 def __getitem__(self, key):
153 return dataCaches[key]
154
155 def __getattr__(self, name):
156 return getattr(dataCaches[mc], name)
157
158 if cls.supports_multiconfig_datacaches:
159 return dataCaches
160
161 return DataCacheProxy()
162
Andrew Geissler9aee5002022-03-30 16:27:02 +0000163 def exit(self):
164 return
165
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500166class SignatureGeneratorBasic(SignatureGenerator):
167 """
168 """
169 name = "basic"
170
171 def __init__(self, data):
172 self.basehash = {}
173 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500174 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175 self.taskdeps = {}
176 self.runtaskdeps = {}
177 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500178 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 self.gendeps = {}
180 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500181 self.setscenetasks = set()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000182 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
183 self.taskhash_ignore_tasks = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500184 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500185 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500186 if checksum_cache_file:
187 self.checksum_cache = FileChecksumCache()
188 self.checksum_cache.init_cache(data, checksum_cache_file)
189 else:
190 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500191
Andrew Geissler82c905d2020-04-13 13:39:40 -0500192 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400193 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500194 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
195 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400196
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500197 def init_rundepcheck(self, data):
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000198 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
199 if self.taskhash_ignore_tasks:
200 self.twl = re.compile(self.taskhash_ignore_tasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201 else:
202 self.twl = None
203
204 def _build_data(self, fn, d):
205
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500206 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000207 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500208
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000209 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500210
211 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400212 tid = fn + ":" + task
213 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
214 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400215 bb.error("The following commands may help:")
216 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
217 # Make sure sigdata is dumped before run printdiff
218 bb.error("%s -Snone" % cmd)
219 bb.error("Then:")
220 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400221 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222
223 self.taskdeps[fn] = taskdeps
224 self.gendeps[fn] = gendeps
225 self.lookupcache[fn] = lookupcache
226
227 return taskdeps
228
Brad Bishopa34c0302019-09-23 22:34:48 -0400229 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500230 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400231
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232 def finalise(self, fn, d, variant):
233
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600234 mc = d.getVar("__BBMULTICONFIG", False) or ""
235 if variant or mc:
236 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237
238 try:
239 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500240 except bb.parse.SkipRecipe:
241 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500243 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500244 raise
245
246 #Slow but can be useful for debugging mismatched basehashes
247 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500248 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500249
250 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500251 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500252
Andrew Geissler82c905d2020-04-13 13:39:40 -0500253 def postparsing_clean_cache(self):
254 #
255 # After parsing we can remove some things from memory to reduce our memory footprint
256 #
257 self.gendeps = {}
258 self.lookupcache = {}
259 self.taskdeps = {}
260
Andrew Geissler5a43b432020-06-13 10:46:56 -0500261 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500262 # Return True if we should keep the dependency, False to drop it
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000263 # We only manipulate the dependencies for packages not in the ignore
264 # list
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500265 if self.twl and not self.twl.search(recipename):
266 # then process the actual dependencies
267 if self.twl.search(depname):
268 return False
269 return True
270
271 def read_taint(self, fn, task, stampbase):
272 taint = None
273 try:
274 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
275 taint = taintf.read()
276 except IOError:
277 pass
278 return taint
279
Andrew Geissler5a43b432020-06-13 10:46:56 -0500280 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800281
Brad Bishop08902b02019-08-20 09:16:51 -0400282 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800283
Andrew Geissler5a43b432020-06-13 10:46:56 -0500284 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400285 self.runtaskdeps[tid] = []
286 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500287 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500288
289 self.tidtopn[tid] = recipename
290
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500292 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
293 depname = dataCaches[depmc].pkg_fn[depmcfn]
294 if not self.supports_multiconfig_datacaches and mc != depmc:
295 # If the signature generator doesn't understand multiconfig
296 # data caches, any dependency not in the same multiconfig must
297 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600298 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500299 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500300 continue
301 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800302 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400303 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304
Andrew Geissler5a43b432020-06-13 10:46:56 -0500305 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500307 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500308 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500309 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500310 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400311 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500312
Andrew Geissler5a43b432020-06-13 10:46:56 -0500313 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500314 if 'nostamp' in taskdep and task in taskdep['nostamp']:
315 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500316 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
317 # Don't reset taint value upon every call
318 pass
319 else:
320 import uuid
321 taint = str(uuid.uuid4())
322 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500323
Andrew Geissler5a43b432020-06-13 10:46:56 -0500324 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500325 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400326 self.taints[tid] = taint
327 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500328
Andrew Geissler82c905d2020-04-13 13:39:40 -0500329 return
330
Andrew Geissler5a43b432020-06-13 10:46:56 -0500331 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500332
333 data = self.basehash[tid]
334 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600335 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500336
337 for (f, cs) in self.file_checksum_values[tid]:
338 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000339 if "/./" in f:
340 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500341 data = data + cs
342
343 if tid in self.taints:
344 if self.taints[tid].startswith("nostamp:"):
345 data = data + self.taints[tid][8:]
346 else:
347 data = data + self.taints[tid]
348
Brad Bishop19323692019-04-05 15:28:33 -0400349 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400350 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500351 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500352 return h
353
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500354 def writeout_file_checksum_cache(self):
355 """Write/update the file checksum cache onto disk"""
356 if self.checksum_cache:
357 self.checksum_cache.save_extras()
358 self.checksum_cache.save_merge()
359 else:
360 bb.fetch2.fetcher_parse_save()
361 bb.fetch2.fetcher_parse_done()
362
Brad Bishop08902b02019-08-20 09:16:51 -0400363 def save_unitaskhashes(self):
364 self.unihash_cache.save(self.unitaskhashes)
365
Andrew Geissler78b72792022-06-14 06:47:25 -0500366 def copy_unitaskhashes(self, targetdir):
367 self.unihash_cache.copyfile(targetdir)
368
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500369 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500370
Brad Bishop08902b02019-08-20 09:16:51 -0400371 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500372 referencestamp = stampbase
373 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500374 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500375 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400376 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400377 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500378 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400379 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500381 with bb.utils.umask(0o002):
382 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500383
384 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500385 data['task'] = task
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000386 data['basehash_ignore_vars'] = self.basehash_ignore_vars
387 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400389 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500390 data['gendeps'] = {}
391 data['varvals'] = {}
392 data['varvals'][task] = self.lookupcache[fn][task]
393 for dep in self.taskdeps[fn][task]:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000394 if dep in self.basehash_ignore_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500395 continue
396 data['gendeps'][dep] = self.gendeps[fn][dep]
397 data['varvals'][dep] = self.lookupcache[fn][dep]
398
Brad Bishop08902b02019-08-20 09:16:51 -0400399 if runtime and tid in self.taskhash:
400 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000401 data['file_checksum_values'] = []
402 for f,cs in self.file_checksum_values[tid]:
403 if "/./" in f:
404 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
405 else:
406 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500407 data['runtaskhashes'] = {}
408 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400409 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400410 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400411 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500412
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500413 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500414 if taint:
415 data['taint'] = taint
416
Brad Bishop08902b02019-08-20 09:16:51 -0400417 if runtime and tid in self.taints:
418 if 'nostamp:' in self.taints[tid]:
419 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500420
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500421 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400422 if computed_basehash != self.basehash[tid]:
423 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
424 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500425 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400426 if computed_taskhash != self.taskhash[tid]:
427 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
428 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500429
Patrick Williams92b42cb2022-09-03 06:53:57 -0500430 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500431 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500432 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
433 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
434 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600435 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500436 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500437 except (OSError, IOError) as err:
438 try:
439 os.unlink(tmpfile)
440 except OSError:
441 pass
442 raise err
443
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500444 def dump_sigfn(self, fn, dataCaches, options):
445 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500446 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600447 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400448 mc = bb.runqueue.mc_from_tid(tid)
449 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500450 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400451 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
452 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
453 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600454 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500455
456class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
457 name = "basichash"
458
Brad Bishop08902b02019-08-20 09:16:51 -0400459 def get_stampfile_hash(self, tid):
460 if tid in self.taskhash:
461 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400462
463 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400464 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400465
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
467 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400468 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500469 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400470 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 if clean:
472 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500473 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400474 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400475
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500476 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
477
478 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
479 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800480
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500481 def invalidate_task(self, task, d, fn):
482 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
483 bb.build.write_taint(task, d, fn)
484
Brad Bishop08902b02019-08-20 09:16:51 -0400485class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500486 def __init__(self, data):
487 self.extramethod = {}
488 super().__init__(data)
489
Brad Bishop08902b02019-08-20 09:16:51 -0400490 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500491 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400492
493 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500494 self.server, self.method, self.extramethod = data[:3]
495 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400496
Brad Bishopa34c0302019-09-23 22:34:48 -0400497 def client(self):
498 if getattr(self, '_client', None) is None:
499 self._client = hashserv.create_client(self.server)
500 return self._client
501
Andrew Geissler9aee5002022-03-30 16:27:02 +0000502 def reset(self, data):
503 if getattr(self, '_client', None) is not None:
504 self._client.close()
505 self._client = None
506 return super().reset(data)
507
508 def exit(self):
509 if getattr(self, '_client', None) is not None:
510 self._client.close()
511 self._client = None
512 return super().exit()
513
Brad Bishop08902b02019-08-20 09:16:51 -0400514 def get_stampfile_hash(self, tid):
515 if tid in self.taskhash:
516 # If a unique hash is reported, use it as the stampfile hash. This
517 # ensures that if a task won't be re-run if the taskhash changes,
518 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500519 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400520 if unihash is not None:
521 return unihash
522
523 return super().get_stampfile_hash(tid)
524
525 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500526 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
527 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
528 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
529 self.unihash[tid] = unihash
530
531 def _get_unihash(self, tid, checkkey=None):
532 if tid not in self.tidtopn:
533 return None
534 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
535 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
536 if key not in self.unitaskhashes:
537 return None
538 if not checkkey:
539 checkkey = self.taskhash[tid]
540 (key, unihash) = self.unitaskhashes[key]
541 if key != checkkey:
542 return None
543 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400544
545 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400546 taskhash = self.taskhash[tid]
547
Brad Bishopa34c0302019-09-23 22:34:48 -0400548 # If its not a setscene task we can return
549 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500550 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400551 return taskhash
552
Brad Bishop08902b02019-08-20 09:16:51 -0400553 # TODO: This cache can grow unbounded. It probably only needs to keep
554 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500555 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400556 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500557 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400558 return unihash
559
560 # In the absence of being able to discover a unique hash from the
561 # server, make it be equivalent to the taskhash. The unique "hash" only
562 # really needs to be a unique string (not even necessarily a hash), but
563 # making it match the taskhash has a few advantages:
564 #
565 # 1) All of the sstate code that assumes hashes can be the same
566 # 2) It provides maximal compatibility with builders that don't use
567 # an equivalency server
568 # 3) The value is easy for multiple independent builders to derive the
569 # same unique hash from the same input. This means that if the
570 # independent builders find the same taskhash, but it isn't reported
571 # to the server, there is a better chance that they will agree on
572 # the unique hash.
573 unihash = taskhash
574
575 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500576 method = self.method
577 if tid in self.extramethod:
578 method = method + self.extramethod[tid]
579 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400580 if data:
581 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400582 # A unique hash equal to the taskhash is not very interesting,
583 # so it is reported it at debug level 2. If they differ, that
584 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500585 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400586 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600587 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500588 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400589 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400590
Andrew Geissler82c905d2020-04-13 13:39:40 -0500591 self.set_unihash(tid, unihash)
592 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400593 return unihash
594
595 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400596 import importlib
597
598 taskhash = d.getVar('BB_TASKHASH')
599 unihash = d.getVar('BB_UNIHASH')
600 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
601 tempdir = d.getVar('T')
602 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400603 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500604 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400605
606 if self.setscenetasks and tid not in self.setscenetasks:
607 return
Brad Bishop08902b02019-08-20 09:16:51 -0400608
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000609 # This can happen if locked sigs are in action. Detect and just exit
Andrew Geissler82c905d2020-04-13 13:39:40 -0500610 if taskhash != self.taskhash[tid]:
611 return
612
Brad Bishop08902b02019-08-20 09:16:51 -0400613 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500614 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400615 if cache_unihash is None:
616 bb.fatal('%s not in unihash cache. Please report this error' % key)
617
618 if cache_unihash != unihash:
619 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
620
621 sigfile = None
622 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
623 sigfile_link = "depsig.do_%s" % task
624
625 try:
626 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
627
628 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
629
630 if "." in self.method:
631 (module, method) = self.method.rsplit('.', 1)
632 locs['method'] = getattr(importlib.import_module(module), method)
633 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
634 else:
635 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
636
637 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400638 extra_data = {}
639
640 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
641 if owner:
642 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400643
644 if report_taskdata:
645 sigfile.seek(0)
646
Brad Bishopa34c0302019-09-23 22:34:48 -0400647 extra_data['PN'] = d.getVar('PN')
648 extra_data['PV'] = d.getVar('PV')
649 extra_data['PR'] = d.getVar('PR')
650 extra_data['task'] = task
651 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400652
Andrew Geissler82c905d2020-04-13 13:39:40 -0500653 method = self.method
654 if tid in self.extramethod:
655 method = method + self.extramethod[tid]
656
657 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400658 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400659
660 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600661 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400662 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500663 self.set_unihash(tid, new_unihash)
664 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400665 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600666 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500667 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400668 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400669 finally:
670 if sigfile:
671 sigfile.close()
672
673 sigfile_link_path = os.path.join(tempdir, sigfile_link)
674 bb.utils.remove(sigfile_link_path)
675
676 try:
677 os.symlink(sigfile_name, sigfile_link_path)
678 except OSError:
679 pass
680
Andrew Geissler82c905d2020-04-13 13:39:40 -0500681 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
682 try:
683 extra_data = {}
684 method = self.method
685 if tid in self.extramethod:
686 method = method + self.extramethod[tid]
687
688 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
689 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
690
691 if data is None:
692 bb.warn("Server unable to handle unihash report")
693 return False
694
695 finalunihash = data['unihash']
696
697 if finalunihash == current_unihash:
698 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
699 elif finalunihash == wanted_unihash:
700 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
701 self.set_unihash(tid, finalunihash)
702 return True
703 else:
704 # TODO: What to do here?
705 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
706
Andrew Geisslerc926e172021-05-07 16:11:35 -0500707 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500708 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
709
710 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400711
712#
713# Dummy class used for bitbake-selftest
714#
715class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
716 name = "TestEquivHash"
717 def init_rundepcheck(self, data):
718 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400719 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400720 self.method = "sstate_output_hash"
721
Andrew Geissler5a43b432020-06-13 10:46:56 -0500722#
723# Dummy class used for bitbake-selftest
724#
725class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
726 name = "TestMulticonfigDepends"
727 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400728
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500729def dump_this_task(outfile, d):
730 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500731 fn = d.getVar("BB_FILENAME")
732 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500733 referencestamp = bb.build.stamp_internal(task, d, None, True)
734 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500735
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500736def init_colors(enable_color):
737 """Initialise colour dict for passing to compare_sigfiles()"""
738 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800739 colors = {'color_title': '\033[1m',
740 'color_default': '\033[0m',
741 'color_add': '\033[0;32m',
742 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500743 }
744 # Leave all keys present but clear the values
745 if not enable_color:
746 for k in colors.keys():
747 colors[k] = ''
748 return colors
749
750def worddiff_str(oldstr, newstr, colors=None):
751 if not colors:
752 colors = init_colors(False)
753 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
754 ret = []
755 for change, value in diff:
756 value = ' '.join(value)
757 if change == '=':
758 ret.append(value)
759 elif change == '+':
760 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
761 ret.append(item)
762 elif change == '-':
763 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
764 ret.append(item)
765 whitespace_note = ''
766 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
767 whitespace_note = ' (whitespace changed)'
768 return '"%s"%s' % (' '.join(ret), whitespace_note)
769
770def list_inline_diff(oldlist, newlist, colors=None):
771 if not colors:
772 colors = init_colors(False)
773 diff = simplediff.diff(oldlist, newlist)
774 ret = []
775 for change, value in diff:
776 value = ' '.join(value)
777 if change == '=':
778 ret.append("'%s'" % value)
779 elif change == '+':
780 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
781 ret.append(item)
782 elif change == '-':
783 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
784 ret.append(item)
785 return '[%s]' % (', '.join(ret))
786
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500787def clean_basepath(basepath):
788 basepath, dir, recipe_task = basepath.rsplit("/", 2)
789 cleaned = dir + '/' + recipe_task
790
791 if basepath[0] == '/':
792 return cleaned
793
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600794 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500795 mc, mc_name, basepath = basepath.split(":", 2)
796 mc_suffix = ':mc:' + mc_name
797 else:
798 mc_suffix = ''
799
800 # mc stuff now removed from basepath. Whatever was next, if present will be the first
801 # suffix. ':/', recipe path start, marks the end of this. Something like
802 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
803 if basepath[0] != '/':
804 cleaned += ':' + basepath.split(':/', 1)[0]
805
806 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500807
808def clean_basepaths(a):
809 b = {}
810 for x in a:
811 b[clean_basepath(x)] = a[x]
812 return b
813
814def clean_basepaths_list(a):
815 b = []
816 for x in a:
817 b.append(clean_basepath(x))
818 return b
819
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000820# Handled renamed fields
821def handle_renames(data):
822 if 'basewhitelist' in data:
823 data['basehash_ignore_vars'] = data['basewhitelist']
824 del data['basewhitelist']
825 if 'taskwhitelist' in data:
826 data['taskhash_ignore_tasks'] = data['taskwhitelist']
827 del data['taskwhitelist']
828
829
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500830def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500831 output = []
832
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500833 colors = init_colors(color)
834 def color_format(formatstr, **values):
835 """
836 Return colour formatted string.
837 NOTE: call with the format string, not an already formatted string
838 containing values (otherwise you could have trouble with { and }
839 characters)
840 """
841 if not formatstr.endswith('{color_default}'):
842 formatstr += '{color_default}'
843 # In newer python 3 versions you can pass both of these directly,
844 # but we only require 3.4 at the moment
845 formatparams = {}
846 formatparams.update(colors)
847 formatparams.update(values)
848 return formatstr.format(**formatparams)
849
Andrew Geisslereff27472021-10-29 15:35:00 -0500850 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
851 a_data = json.load(f, object_hook=SetDecoder)
852 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
853 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500854
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000855 for data in [a_data, b_data]:
856 handle_renames(data)
857
858 def dict_diff(a, b, ignored_vars=set()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500859 sa = set(a.keys())
860 sb = set(b.keys())
861 common = sa & sb
862 changed = set()
863 for i in common:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000864 if a[i] != b[i] and i not in ignored_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500865 changed.add(i)
866 added = sb - sa
867 removed = sa - sb
868 return changed, added, removed
869
870 def file_checksums_diff(a, b):
871 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500872
873 # Convert lists back to tuples
874 a = [(f[0], f[1]) for f in a]
875 b = [(f[0], f[1]) for f in b]
876
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500877 # Compare lists, ensuring we can handle duplicate filenames if they exist
878 removedcount = Counter(a)
879 removedcount.subtract(b)
880 addedcount = Counter(b)
881 addedcount.subtract(a)
882 added = []
883 for x in b:
884 if addedcount[x] > 0:
885 addedcount[x] -= 1
886 added.append(x)
887 removed = []
888 changed = []
889 for x in a:
890 if removedcount[x] > 0:
891 removedcount[x] -= 1
892 for y in added:
893 if y[0] == x[0]:
894 changed.append((x[0], x[1], y[1]))
895 added.remove(y)
896 break
897 else:
898 removed.append(x)
899 added = [x[0] for x in added]
900 removed = [x[0] for x in removed]
901 return changed, added, removed
902
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000903 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
904 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
905 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
906 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500907
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000908 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
909 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
910 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
911 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500912
913 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500914 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500915
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500916 if a_data['basehash'] != b_data['basehash'] and not collapsed:
917 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500918
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000919 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500920 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500921 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500922 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500923 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
924 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
925 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500926 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500927 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500928 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500929 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500930 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500931
932
933 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
934 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500935 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500936 oldval = a_data['varvals'][dep]
937 newval = b_data['varvals'][dep]
938 if newval and oldval and ('\n' in oldval or '\n' in newval):
939 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
940 # Cut off the first two lines, since we aren't interested in
941 # the old/new filename (they are blank anyway in this case)
942 difflines = list(diff)[2:]
943 if color:
944 # Add colour to diff output
945 for i, line in enumerate(difflines):
946 if line.startswith('+'):
947 line = color_format('{color_add}{line}', line=line)
948 difflines[i] = line
949 elif line.startswith('-'):
950 line = color_format('{color_remove}{line}', line=line)
951 difflines[i] = line
952 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
953 elif newval and oldval and (' ' in oldval or ' ' in newval):
954 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
955 else:
956 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500957
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600958 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500959 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600960 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500961 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600962
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500963 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
964 if changed:
965 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500966 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500967 if added:
968 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500969 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500970 if removed:
971 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500972 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500973
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600974 if not 'runtaskdeps' in a_data:
975 a_data['runtaskdeps'] = {}
976 if not 'runtaskdeps' in b_data:
977 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500978
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500979 if not collapsed:
980 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
981 changed = ["Number of task dependencies changed"]
982 else:
983 changed = []
984 for idx, task in enumerate(a_data['runtaskdeps']):
985 a = a_data['runtaskdeps'][idx]
986 b = b_data['runtaskdeps'][idx]
987 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
988 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500989
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500990 if changed:
991 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
992 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
993 if clean_a != clean_b:
994 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
995 else:
996 output.append(color_format("{color_title}runtaskdeps changed:"))
997 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500998
999
1000 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
Patrick Williams03907ee2022-05-01 06:28:52 -05001001 a = clean_basepaths(a_data['runtaskhashes'])
1002 b = clean_basepaths(b_data['runtaskhashes'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001003 changed, added, removed = dict_diff(a, b)
1004 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -05001005 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001006 bdep_found = False
1007 if removed:
1008 for bdep in removed:
1009 if b[dep] == a[bdep]:
1010 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1011 bdep_found = True
1012 if not bdep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001013 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001014 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001015 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001016 adep_found = False
1017 if added:
1018 for adep in added:
1019 if b[adep] == a[dep]:
1020 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1021 adep_found = True
1022 if not adep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001023 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001024 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001025 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001026 if not collapsed:
Patrick Williams03907ee2022-05-01 06:28:52 -05001027 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001028 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001029 recout = recursecb(dep, a[dep], b[dep])
1030 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001031 if collapsed:
1032 output.extend(recout)
1033 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -08001034 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001035 # that hash since in all likelyhood, they're the same changes this task also saw.
1036 output = [output[-1]] + recout
Andrew Geisslerd5838332022-05-27 11:33:10 -05001037 break
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001038
1039 a_taint = a_data.get('taint', None)
1040 b_taint = b_data.get('taint', None)
1041 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001042 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001043 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001044 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001045 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001046 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001047
1048 return output
1049
1050
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001051def calc_basehash(sigdata):
1052 task = sigdata['task']
1053 basedata = sigdata['varvals'][task]
1054
1055 if basedata is None:
1056 basedata = ''
1057
1058 alldeps = sigdata['taskdeps']
1059 for dep in alldeps:
1060 basedata = basedata + dep
1061 val = sigdata['varvals'][dep]
1062 if val is not None:
1063 basedata = basedata + str(val)
1064
Brad Bishop19323692019-04-05 15:28:33 -04001065 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001066
1067def calc_taskhash(sigdata):
1068 data = sigdata['basehash']
1069
1070 for dep in sigdata['runtaskdeps']:
1071 data = data + sigdata['runtaskhashes'][dep]
1072
1073 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001074 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001075 if "./" in c[0]:
1076 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001077 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001078
1079 if 'taint' in sigdata:
1080 if 'nostamp:' in sigdata['taint']:
1081 data = data + sigdata['taint'][8:]
1082 else:
1083 data = data + sigdata['taint']
1084
Brad Bishop19323692019-04-05 15:28:33 -04001085 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001086
1087
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001088def dump_sigfile(a):
1089 output = []
1090
Andrew Geisslereff27472021-10-29 15:35:00 -05001091 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1092 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001093
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001094 handle_renames(a_data)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001095
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001096 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1097
1098 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001099
1100 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1101
1102 output.append("basehash: %s" % (a_data['basehash']))
1103
Andrew Geissler595f6302022-01-24 19:11:47 +00001104 for dep in sorted(a_data['gendeps']):
1105 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001106
Andrew Geissler595f6302022-01-24 19:11:47 +00001107 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001108 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1109
1110 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001111 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001112
1113 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001114 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001115
1116 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001117 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001118 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1119
1120 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001121 if a_data['taint'].startswith('nostamp:'):
1122 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1123 else:
1124 msg = a_data['taint']
1125 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001126
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001127 if 'task' in a_data:
1128 computed_basehash = calc_basehash(a_data)
1129 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1130 else:
1131 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001132
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001133 computed_taskhash = calc_taskhash(a_data)
1134 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001135
1136 return output