blob: 3f3d6df54d5615c05d8f34cacde78b85cfff706b [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050043 else:
44 logger.error("Invalid signature generator '%s', using default 'noop'\n"
45 "Available generators: %s", desired,
46 ', '.join(obj.name for obj in siggens))
47 return SignatureGenerator(d)
48
49class SignatureGenerator(object):
50 """
51 """
52 name = "noop"
53
Andrew Geissler5a43b432020-06-13 10:46:56 -050054 # If the derived class supports multiconfig datacaches, set this to True
55 # The default is False for backward compatibility with derived signature
56 # generators that do not understand multiconfig caches
57 supports_multiconfig_datacaches = False
58
Patrick Williamsc124f4f2015-09-15 14:41:29 -050059 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050060 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050062 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050063 self.runtaskdeps = {}
64 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050065 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040066 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050067 self.tidtopn = {}
68 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050069
70 def finalise(self, fn, d, varient):
71 return
72
Andrew Geissler82c905d2020-04-13 13:39:40 -050073 def postparsing_clean_cache(self):
74 return
75
Brad Bishop08902b02019-08-20 09:16:51 -040076 def get_unihash(self, tid):
77 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040078
Andrew Geissler5a43b432020-06-13 10:46:56 -050079 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050080 return
81
Andrew Geissler5a43b432020-06-13 10:46:56 -050082 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040083 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
84 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050085
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050086 def writeout_file_checksum_cache(self):
87 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088 return
89
90 def stampfile(self, stampbase, file_name, taskname, extrainfo):
91 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
92
93 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
94 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
95
96 def dump_sigtask(self, fn, task, stampbase, runtime):
97 return
98
99 def invalidate_task(self, task, d, fn):
100 bb.build.del_stamp(task, d, fn)
101
102 def dump_sigs(self, dataCache, options):
103 return
104
105 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500106 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500107
108 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500109 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500110
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500111 def reset(self, data):
112 self.__init__(data)
113
Brad Bishop08902b02019-08-20 09:16:51 -0400114 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500115 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400116
117 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500118 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400119
120 def save_unitaskhashes(self):
121 return
122
Andrew Geissler78b72792022-06-14 06:47:25 -0500123 def copy_unitaskhashes(self, targetdir):
124 return
125
Brad Bishopa34c0302019-09-23 22:34:48 -0400126 def set_setscene_tasks(self, setscene_tasks):
127 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500128
Andrew Geissler5a43b432020-06-13 10:46:56 -0500129 @classmethod
130 def get_data_caches(cls, dataCaches, mc):
131 """
132 This function returns the datacaches that should be passed to signature
133 generator functions. If the signature generator supports multiconfig
134 caches, the entire dictionary of data caches is sent, otherwise a
135 special proxy is sent that support both index access to all
136 multiconfigs, and also direct access for the default multiconfig.
137
138 The proxy class allows code in this class itself to always use
139 multiconfig aware code (to ease maintenance), but derived classes that
140 are unaware of multiconfig data caches can still access the default
141 multiconfig as expected.
142
143 Do not override this function in derived classes; it will be removed in
144 the future when support for multiconfig data caches is mandatory
145 """
146 class DataCacheProxy(object):
147 def __init__(self):
148 pass
149
150 def __getitem__(self, key):
151 return dataCaches[key]
152
153 def __getattr__(self, name):
154 return getattr(dataCaches[mc], name)
155
156 if cls.supports_multiconfig_datacaches:
157 return dataCaches
158
159 return DataCacheProxy()
160
Andrew Geissler9aee5002022-03-30 16:27:02 +0000161 def exit(self):
162 return
163
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500164class SignatureGeneratorBasic(SignatureGenerator):
165 """
166 """
167 name = "basic"
168
169 def __init__(self, data):
170 self.basehash = {}
171 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500172 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500173 self.taskdeps = {}
174 self.runtaskdeps = {}
175 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500176 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 self.gendeps = {}
178 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500179 self.setscenetasks = set()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000180 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
181 self.taskhash_ignore_tasks = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500182 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500183 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500184 if checksum_cache_file:
185 self.checksum_cache = FileChecksumCache()
186 self.checksum_cache.init_cache(data, checksum_cache_file)
187 else:
188 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500189
Andrew Geissler82c905d2020-04-13 13:39:40 -0500190 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400191 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500192 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
193 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400194
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195 def init_rundepcheck(self, data):
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000196 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
197 if self.taskhash_ignore_tasks:
198 self.twl = re.compile(self.taskhash_ignore_tasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199 else:
200 self.twl = None
201
202 def _build_data(self, fn, d):
203
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500204 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000205 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500206
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000207 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500208
209 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400210 tid = fn + ":" + task
211 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
212 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400213 bb.error("The following commands may help:")
214 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
215 # Make sure sigdata is dumped before run printdiff
216 bb.error("%s -Snone" % cmd)
217 bb.error("Then:")
218 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400219 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500220
221 self.taskdeps[fn] = taskdeps
222 self.gendeps[fn] = gendeps
223 self.lookupcache[fn] = lookupcache
224
225 return taskdeps
226
Brad Bishopa34c0302019-09-23 22:34:48 -0400227 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500228 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400229
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230 def finalise(self, fn, d, variant):
231
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600232 mc = d.getVar("__BBMULTICONFIG", False) or ""
233 if variant or mc:
234 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235
236 try:
237 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500238 except bb.parse.SkipRecipe:
239 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500241 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 raise
243
244 #Slow but can be useful for debugging mismatched basehashes
245 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500246 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500247
248 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500249 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500250
Andrew Geissler82c905d2020-04-13 13:39:40 -0500251 def postparsing_clean_cache(self):
252 #
253 # After parsing we can remove some things from memory to reduce our memory footprint
254 #
255 self.gendeps = {}
256 self.lookupcache = {}
257 self.taskdeps = {}
258
Andrew Geissler5a43b432020-06-13 10:46:56 -0500259 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500260 # Return True if we should keep the dependency, False to drop it
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000261 # We only manipulate the dependencies for packages not in the ignore
262 # list
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500263 if self.twl and not self.twl.search(recipename):
264 # then process the actual dependencies
265 if self.twl.search(depname):
266 return False
267 return True
268
269 def read_taint(self, fn, task, stampbase):
270 taint = None
271 try:
272 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
273 taint = taintf.read()
274 except IOError:
275 pass
276 return taint
277
Andrew Geissler5a43b432020-06-13 10:46:56 -0500278 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800279
Brad Bishop08902b02019-08-20 09:16:51 -0400280 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800281
Andrew Geissler5a43b432020-06-13 10:46:56 -0500282 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400283 self.runtaskdeps[tid] = []
284 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500285 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500286
287 self.tidtopn[tid] = recipename
288
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500290 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
291 depname = dataCaches[depmc].pkg_fn[depmcfn]
292 if not self.supports_multiconfig_datacaches and mc != depmc:
293 # If the signature generator doesn't understand multiconfig
294 # data caches, any dependency not in the same multiconfig must
295 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600296 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500297 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500298 continue
299 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800300 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400301 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302
Andrew Geissler5a43b432020-06-13 10:46:56 -0500303 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500304 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500305 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500307 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500308 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400309 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500310
Andrew Geissler5a43b432020-06-13 10:46:56 -0500311 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500312 if 'nostamp' in taskdep and task in taskdep['nostamp']:
313 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500314 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
315 # Don't reset taint value upon every call
316 pass
317 else:
318 import uuid
319 taint = str(uuid.uuid4())
320 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500321
Andrew Geissler5a43b432020-06-13 10:46:56 -0500322 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500323 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400324 self.taints[tid] = taint
325 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500326
Andrew Geissler82c905d2020-04-13 13:39:40 -0500327 return
328
Andrew Geissler5a43b432020-06-13 10:46:56 -0500329 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500330
331 data = self.basehash[tid]
332 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600333 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500334
335 for (f, cs) in self.file_checksum_values[tid]:
336 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000337 if "/./" in f:
338 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500339 data = data + cs
340
341 if tid in self.taints:
342 if self.taints[tid].startswith("nostamp:"):
343 data = data + self.taints[tid][8:]
344 else:
345 data = data + self.taints[tid]
346
Brad Bishop19323692019-04-05 15:28:33 -0400347 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400348 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500349 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500350 return h
351
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500352 def writeout_file_checksum_cache(self):
353 """Write/update the file checksum cache onto disk"""
354 if self.checksum_cache:
355 self.checksum_cache.save_extras()
356 self.checksum_cache.save_merge()
357 else:
358 bb.fetch2.fetcher_parse_save()
359 bb.fetch2.fetcher_parse_done()
360
Brad Bishop08902b02019-08-20 09:16:51 -0400361 def save_unitaskhashes(self):
362 self.unihash_cache.save(self.unitaskhashes)
363
Andrew Geissler78b72792022-06-14 06:47:25 -0500364 def copy_unitaskhashes(self, targetdir):
365 self.unihash_cache.copyfile(targetdir)
366
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500367 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500368
Brad Bishop08902b02019-08-20 09:16:51 -0400369 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500370 referencestamp = stampbase
371 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500372 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500373 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400374 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400375 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500376 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400377 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500378
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500379 with bb.utils.umask(0o002):
380 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500381
382 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500383 data['task'] = task
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000384 data['basehash_ignore_vars'] = self.basehash_ignore_vars
385 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500386 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400387 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388 data['gendeps'] = {}
389 data['varvals'] = {}
390 data['varvals'][task] = self.lookupcache[fn][task]
391 for dep in self.taskdeps[fn][task]:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000392 if dep in self.basehash_ignore_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500393 continue
394 data['gendeps'][dep] = self.gendeps[fn][dep]
395 data['varvals'][dep] = self.lookupcache[fn][dep]
396
Brad Bishop08902b02019-08-20 09:16:51 -0400397 if runtime and tid in self.taskhash:
398 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000399 data['file_checksum_values'] = []
400 for f,cs in self.file_checksum_values[tid]:
401 if "/./" in f:
402 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
403 else:
404 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500405 data['runtaskhashes'] = {}
406 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400407 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400408 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400409 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500410
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500411 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500412 if taint:
413 data['taint'] = taint
414
Brad Bishop08902b02019-08-20 09:16:51 -0400415 if runtime and tid in self.taints:
416 if 'nostamp:' in self.taints[tid]:
417 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500418
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500419 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400420 if computed_basehash != self.basehash[tid]:
421 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
422 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500423 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400424 if computed_taskhash != self.taskhash[tid]:
425 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
426 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500427
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500428 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
429 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500430 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
431 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
432 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600433 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500434 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435 except (OSError, IOError) as err:
436 try:
437 os.unlink(tmpfile)
438 except OSError:
439 pass
440 raise err
441
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500442 def dump_sigfn(self, fn, dataCaches, options):
443 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600445 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400446 mc = bb.runqueue.mc_from_tid(tid)
447 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500448 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400449 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
450 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
451 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600452 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500453
454class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
455 name = "basichash"
456
Brad Bishop08902b02019-08-20 09:16:51 -0400457 def get_stampfile_hash(self, tid):
458 if tid in self.taskhash:
459 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400460
461 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400462 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400463
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500464 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
465 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400466 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500467 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400468 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500469 if clean:
470 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400472 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400473
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500474 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
475
476 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
477 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800478
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500479 def invalidate_task(self, task, d, fn):
480 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
481 bb.build.write_taint(task, d, fn)
482
Brad Bishop08902b02019-08-20 09:16:51 -0400483class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500484 def __init__(self, data):
485 self.extramethod = {}
486 super().__init__(data)
487
Brad Bishop08902b02019-08-20 09:16:51 -0400488 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500489 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400490
491 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500492 self.server, self.method, self.extramethod = data[:3]
493 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400494
Brad Bishopa34c0302019-09-23 22:34:48 -0400495 def client(self):
496 if getattr(self, '_client', None) is None:
497 self._client = hashserv.create_client(self.server)
498 return self._client
499
Andrew Geissler9aee5002022-03-30 16:27:02 +0000500 def reset(self, data):
501 if getattr(self, '_client', None) is not None:
502 self._client.close()
503 self._client = None
504 return super().reset(data)
505
506 def exit(self):
507 if getattr(self, '_client', None) is not None:
508 self._client.close()
509 self._client = None
510 return super().exit()
511
Brad Bishop08902b02019-08-20 09:16:51 -0400512 def get_stampfile_hash(self, tid):
513 if tid in self.taskhash:
514 # If a unique hash is reported, use it as the stampfile hash. This
515 # ensures that if a task won't be re-run if the taskhash changes,
516 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500517 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400518 if unihash is not None:
519 return unihash
520
521 return super().get_stampfile_hash(tid)
522
523 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500524 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
525 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
526 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
527 self.unihash[tid] = unihash
528
529 def _get_unihash(self, tid, checkkey=None):
530 if tid not in self.tidtopn:
531 return None
532 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
533 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
534 if key not in self.unitaskhashes:
535 return None
536 if not checkkey:
537 checkkey = self.taskhash[tid]
538 (key, unihash) = self.unitaskhashes[key]
539 if key != checkkey:
540 return None
541 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400542
543 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400544 taskhash = self.taskhash[tid]
545
Brad Bishopa34c0302019-09-23 22:34:48 -0400546 # If its not a setscene task we can return
547 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500548 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400549 return taskhash
550
Brad Bishop08902b02019-08-20 09:16:51 -0400551 # TODO: This cache can grow unbounded. It probably only needs to keep
552 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500553 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400554 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500555 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400556 return unihash
557
558 # In the absence of being able to discover a unique hash from the
559 # server, make it be equivalent to the taskhash. The unique "hash" only
560 # really needs to be a unique string (not even necessarily a hash), but
561 # making it match the taskhash has a few advantages:
562 #
563 # 1) All of the sstate code that assumes hashes can be the same
564 # 2) It provides maximal compatibility with builders that don't use
565 # an equivalency server
566 # 3) The value is easy for multiple independent builders to derive the
567 # same unique hash from the same input. This means that if the
568 # independent builders find the same taskhash, but it isn't reported
569 # to the server, there is a better chance that they will agree on
570 # the unique hash.
571 unihash = taskhash
572
573 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500574 method = self.method
575 if tid in self.extramethod:
576 method = method + self.extramethod[tid]
577 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400578 if data:
579 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400580 # A unique hash equal to the taskhash is not very interesting,
581 # so it is reported it at debug level 2. If they differ, that
582 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500583 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400584 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600585 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500586 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400587 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400588
Andrew Geissler82c905d2020-04-13 13:39:40 -0500589 self.set_unihash(tid, unihash)
590 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400591 return unihash
592
593 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400594 import importlib
595
596 taskhash = d.getVar('BB_TASKHASH')
597 unihash = d.getVar('BB_UNIHASH')
598 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
599 tempdir = d.getVar('T')
600 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400601 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500602 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400603
604 if self.setscenetasks and tid not in self.setscenetasks:
605 return
Brad Bishop08902b02019-08-20 09:16:51 -0400606
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000607 # This can happen if locked sigs are in action. Detect and just exit
Andrew Geissler82c905d2020-04-13 13:39:40 -0500608 if taskhash != self.taskhash[tid]:
609 return
610
Brad Bishop08902b02019-08-20 09:16:51 -0400611 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500612 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400613 if cache_unihash is None:
614 bb.fatal('%s not in unihash cache. Please report this error' % key)
615
616 if cache_unihash != unihash:
617 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
618
619 sigfile = None
620 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
621 sigfile_link = "depsig.do_%s" % task
622
623 try:
624 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
625
626 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
627
628 if "." in self.method:
629 (module, method) = self.method.rsplit('.', 1)
630 locs['method'] = getattr(importlib.import_module(module), method)
631 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
632 else:
633 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
634
635 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400636 extra_data = {}
637
638 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
639 if owner:
640 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400641
642 if report_taskdata:
643 sigfile.seek(0)
644
Brad Bishopa34c0302019-09-23 22:34:48 -0400645 extra_data['PN'] = d.getVar('PN')
646 extra_data['PV'] = d.getVar('PV')
647 extra_data['PR'] = d.getVar('PR')
648 extra_data['task'] = task
649 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400650
Andrew Geissler82c905d2020-04-13 13:39:40 -0500651 method = self.method
652 if tid in self.extramethod:
653 method = method + self.extramethod[tid]
654
655 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400656 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400657
658 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600659 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400660 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500661 self.set_unihash(tid, new_unihash)
662 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400663 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600664 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500665 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400666 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400667 finally:
668 if sigfile:
669 sigfile.close()
670
671 sigfile_link_path = os.path.join(tempdir, sigfile_link)
672 bb.utils.remove(sigfile_link_path)
673
674 try:
675 os.symlink(sigfile_name, sigfile_link_path)
676 except OSError:
677 pass
678
Andrew Geissler82c905d2020-04-13 13:39:40 -0500679 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
680 try:
681 extra_data = {}
682 method = self.method
683 if tid in self.extramethod:
684 method = method + self.extramethod[tid]
685
686 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
687 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
688
689 if data is None:
690 bb.warn("Server unable to handle unihash report")
691 return False
692
693 finalunihash = data['unihash']
694
695 if finalunihash == current_unihash:
696 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
697 elif finalunihash == wanted_unihash:
698 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
699 self.set_unihash(tid, finalunihash)
700 return True
701 else:
702 # TODO: What to do here?
703 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
704
Andrew Geisslerc926e172021-05-07 16:11:35 -0500705 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500706 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
707
708 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400709
710#
711# Dummy class used for bitbake-selftest
712#
713class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
714 name = "TestEquivHash"
715 def init_rundepcheck(self, data):
716 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400717 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400718 self.method = "sstate_output_hash"
719
Andrew Geissler5a43b432020-06-13 10:46:56 -0500720#
721# Dummy class used for bitbake-selftest
722#
723class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
724 name = "TestMulticonfigDepends"
725 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400726
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500727def dump_this_task(outfile, d):
728 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500729 fn = d.getVar("BB_FILENAME")
730 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500731 referencestamp = bb.build.stamp_internal(task, d, None, True)
732 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500733
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500734def init_colors(enable_color):
735 """Initialise colour dict for passing to compare_sigfiles()"""
736 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800737 colors = {'color_title': '\033[1m',
738 'color_default': '\033[0m',
739 'color_add': '\033[0;32m',
740 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500741 }
742 # Leave all keys present but clear the values
743 if not enable_color:
744 for k in colors.keys():
745 colors[k] = ''
746 return colors
747
748def worddiff_str(oldstr, newstr, colors=None):
749 if not colors:
750 colors = init_colors(False)
751 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
752 ret = []
753 for change, value in diff:
754 value = ' '.join(value)
755 if change == '=':
756 ret.append(value)
757 elif change == '+':
758 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
759 ret.append(item)
760 elif change == '-':
761 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
762 ret.append(item)
763 whitespace_note = ''
764 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
765 whitespace_note = ' (whitespace changed)'
766 return '"%s"%s' % (' '.join(ret), whitespace_note)
767
768def list_inline_diff(oldlist, newlist, colors=None):
769 if not colors:
770 colors = init_colors(False)
771 diff = simplediff.diff(oldlist, newlist)
772 ret = []
773 for change, value in diff:
774 value = ' '.join(value)
775 if change == '=':
776 ret.append("'%s'" % value)
777 elif change == '+':
778 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
779 ret.append(item)
780 elif change == '-':
781 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
782 ret.append(item)
783 return '[%s]' % (', '.join(ret))
784
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500785def clean_basepath(basepath):
786 basepath, dir, recipe_task = basepath.rsplit("/", 2)
787 cleaned = dir + '/' + recipe_task
788
789 if basepath[0] == '/':
790 return cleaned
791
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600792 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500793 mc, mc_name, basepath = basepath.split(":", 2)
794 mc_suffix = ':mc:' + mc_name
795 else:
796 mc_suffix = ''
797
798 # mc stuff now removed from basepath. Whatever was next, if present will be the first
799 # suffix. ':/', recipe path start, marks the end of this. Something like
800 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
801 if basepath[0] != '/':
802 cleaned += ':' + basepath.split(':/', 1)[0]
803
804 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500805
806def clean_basepaths(a):
807 b = {}
808 for x in a:
809 b[clean_basepath(x)] = a[x]
810 return b
811
812def clean_basepaths_list(a):
813 b = []
814 for x in a:
815 b.append(clean_basepath(x))
816 return b
817
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000818# Handled renamed fields
819def handle_renames(data):
820 if 'basewhitelist' in data:
821 data['basehash_ignore_vars'] = data['basewhitelist']
822 del data['basewhitelist']
823 if 'taskwhitelist' in data:
824 data['taskhash_ignore_tasks'] = data['taskwhitelist']
825 del data['taskwhitelist']
826
827
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500828def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500829 output = []
830
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500831 colors = init_colors(color)
832 def color_format(formatstr, **values):
833 """
834 Return colour formatted string.
835 NOTE: call with the format string, not an already formatted string
836 containing values (otherwise you could have trouble with { and }
837 characters)
838 """
839 if not formatstr.endswith('{color_default}'):
840 formatstr += '{color_default}'
841 # In newer python 3 versions you can pass both of these directly,
842 # but we only require 3.4 at the moment
843 formatparams = {}
844 formatparams.update(colors)
845 formatparams.update(values)
846 return formatstr.format(**formatparams)
847
Andrew Geisslereff27472021-10-29 15:35:00 -0500848 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
849 a_data = json.load(f, object_hook=SetDecoder)
850 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
851 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500852
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000853 for data in [a_data, b_data]:
854 handle_renames(data)
855
856 def dict_diff(a, b, ignored_vars=set()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500857 sa = set(a.keys())
858 sb = set(b.keys())
859 common = sa & sb
860 changed = set()
861 for i in common:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000862 if a[i] != b[i] and i not in ignored_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500863 changed.add(i)
864 added = sb - sa
865 removed = sa - sb
866 return changed, added, removed
867
868 def file_checksums_diff(a, b):
869 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500870
871 # Convert lists back to tuples
872 a = [(f[0], f[1]) for f in a]
873 b = [(f[0], f[1]) for f in b]
874
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500875 # Compare lists, ensuring we can handle duplicate filenames if they exist
876 removedcount = Counter(a)
877 removedcount.subtract(b)
878 addedcount = Counter(b)
879 addedcount.subtract(a)
880 added = []
881 for x in b:
882 if addedcount[x] > 0:
883 addedcount[x] -= 1
884 added.append(x)
885 removed = []
886 changed = []
887 for x in a:
888 if removedcount[x] > 0:
889 removedcount[x] -= 1
890 for y in added:
891 if y[0] == x[0]:
892 changed.append((x[0], x[1], y[1]))
893 added.remove(y)
894 break
895 else:
896 removed.append(x)
897 added = [x[0] for x in added]
898 removed = [x[0] for x in removed]
899 return changed, added, removed
900
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000901 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
902 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
903 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
904 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500905
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000906 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
907 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
908 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
909 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500910
911 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500912 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500913
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500914 if a_data['basehash'] != b_data['basehash'] and not collapsed:
915 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500916
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000917 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500918 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500919 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500920 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500921 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
922 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
923 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500924 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500925 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500926 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500927 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500928 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500929
930
931 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
932 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500933 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500934 oldval = a_data['varvals'][dep]
935 newval = b_data['varvals'][dep]
936 if newval and oldval and ('\n' in oldval or '\n' in newval):
937 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
938 # Cut off the first two lines, since we aren't interested in
939 # the old/new filename (they are blank anyway in this case)
940 difflines = list(diff)[2:]
941 if color:
942 # Add colour to diff output
943 for i, line in enumerate(difflines):
944 if line.startswith('+'):
945 line = color_format('{color_add}{line}', line=line)
946 difflines[i] = line
947 elif line.startswith('-'):
948 line = color_format('{color_remove}{line}', line=line)
949 difflines[i] = line
950 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
951 elif newval and oldval and (' ' in oldval or ' ' in newval):
952 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
953 else:
954 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500955
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600956 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500957 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600958 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500959 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600960
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500961 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
962 if changed:
963 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500964 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500965 if added:
966 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500967 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500968 if removed:
969 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500970 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500971
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600972 if not 'runtaskdeps' in a_data:
973 a_data['runtaskdeps'] = {}
974 if not 'runtaskdeps' in b_data:
975 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500976
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500977 if not collapsed:
978 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
979 changed = ["Number of task dependencies changed"]
980 else:
981 changed = []
982 for idx, task in enumerate(a_data['runtaskdeps']):
983 a = a_data['runtaskdeps'][idx]
984 b = b_data['runtaskdeps'][idx]
985 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
986 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500987
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500988 if changed:
989 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
990 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
991 if clean_a != clean_b:
992 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
993 else:
994 output.append(color_format("{color_title}runtaskdeps changed:"))
995 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500996
997
998 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
Patrick Williams03907ee2022-05-01 06:28:52 -0500999 a = clean_basepaths(a_data['runtaskhashes'])
1000 b = clean_basepaths(b_data['runtaskhashes'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001001 changed, added, removed = dict_diff(a, b)
1002 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -05001003 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001004 bdep_found = False
1005 if removed:
1006 for bdep in removed:
1007 if b[dep] == a[bdep]:
1008 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1009 bdep_found = True
1010 if not bdep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001011 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001012 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001013 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001014 adep_found = False
1015 if added:
1016 for adep in added:
1017 if b[adep] == a[dep]:
1018 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1019 adep_found = True
1020 if not adep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001021 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001022 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001023 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001024 if not collapsed:
Patrick Williams03907ee2022-05-01 06:28:52 -05001025 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001026 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001027 recout = recursecb(dep, a[dep], b[dep])
1028 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001029 if collapsed:
1030 output.extend(recout)
1031 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -08001032 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001033 # that hash since in all likelyhood, they're the same changes this task also saw.
1034 output = [output[-1]] + recout
Andrew Geisslerd5838332022-05-27 11:33:10 -05001035 break
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001036
1037 a_taint = a_data.get('taint', None)
1038 b_taint = b_data.get('taint', None)
1039 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001040 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001041 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001042 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001043 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001044 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001045
1046 return output
1047
1048
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001049def calc_basehash(sigdata):
1050 task = sigdata['task']
1051 basedata = sigdata['varvals'][task]
1052
1053 if basedata is None:
1054 basedata = ''
1055
1056 alldeps = sigdata['taskdeps']
1057 for dep in alldeps:
1058 basedata = basedata + dep
1059 val = sigdata['varvals'][dep]
1060 if val is not None:
1061 basedata = basedata + str(val)
1062
Brad Bishop19323692019-04-05 15:28:33 -04001063 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001064
1065def calc_taskhash(sigdata):
1066 data = sigdata['basehash']
1067
1068 for dep in sigdata['runtaskdeps']:
1069 data = data + sigdata['runtaskhashes'][dep]
1070
1071 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001072 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001073 if "./" in c[0]:
1074 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001075 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001076
1077 if 'taint' in sigdata:
1078 if 'nostamp:' in sigdata['taint']:
1079 data = data + sigdata['taint'][8:]
1080 else:
1081 data = data + sigdata['taint']
1082
Brad Bishop19323692019-04-05 15:28:33 -04001083 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001084
1085
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001086def dump_sigfile(a):
1087 output = []
1088
Andrew Geisslereff27472021-10-29 15:35:00 -05001089 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1090 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001091
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001092 handle_renames(a_data)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001093
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001094 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1095
1096 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001097
1098 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1099
1100 output.append("basehash: %s" % (a_data['basehash']))
1101
Andrew Geissler595f6302022-01-24 19:11:47 +00001102 for dep in sorted(a_data['gendeps']):
1103 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001104
Andrew Geissler595f6302022-01-24 19:11:47 +00001105 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001106 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1107
1108 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001109 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001110
1111 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001112 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001113
1114 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001115 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001116 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1117
1118 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001119 if a_data['taint'].startswith('nostamp:'):
1120 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1121 else:
1122 msg = a_data['taint']
1123 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001124
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001125 if 'task' in a_data:
1126 computed_basehash = calc_basehash(a_data)
1127 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1128 else:
1129 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001130
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001131 computed_taskhash = calc_taskhash(a_data)
1132 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001133
1134 return output