blob: 9fa568f614206d8987a145b7537a8ea8f2e05041 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
Patrick Williamsc124f4f2015-09-15 14:41:29 -05005import hashlib
6import logging
7import os
8import re
9import tempfile
Patrick Williamsc0f7c042017-02-23 20:41:17 -060010import pickle
Patrick Williamsc124f4f2015-09-15 14:41:29 -050011import bb.data
Brad Bishop6e60e8b2018-02-01 10:27:11 -050012import difflib
13import simplediff
Andrew Geisslereff27472021-10-29 15:35:00 -050014import json
15import bb.compress.zstd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050016from bb.checksum import FileChecksumCache
Brad Bishop08902b02019-08-20 09:16:51 -040017from bb import runqueue
Brad Bishopa34c0302019-09-23 22:34:48 -040018import hashserv
Andrew Geissler475cb722020-07-10 16:00:51 -050019import hashserv.client
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020
21logger = logging.getLogger('BitBake.SigGen')
Andrew Geissler82c905d2020-04-13 13:39:40 -050022hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050023
Andrew Geisslereff27472021-10-29 15:35:00 -050024class SetEncoder(json.JSONEncoder):
25 def default(self, obj):
26 if isinstance(obj, set):
27 return dict(_set_object=list(sorted(obj)))
28 return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31 if '_set_object' in dct:
32 return set(dct['_set_object'])
33 return dct
34
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035def init(d):
Patrick Williamsc0f7c042017-02-23 20:41:17 -060036 siggens = [obj for obj in globals().values()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 for sg in siggens:
41 if desired == sg.name:
42 return sg(d)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050043 else:
44 logger.error("Invalid signature generator '%s', using default 'noop'\n"
45 "Available generators: %s", desired,
46 ', '.join(obj.name for obj in siggens))
47 return SignatureGenerator(d)
48
49class SignatureGenerator(object):
50 """
51 """
52 name = "noop"
53
Andrew Geissler5a43b432020-06-13 10:46:56 -050054 # If the derived class supports multiconfig datacaches, set this to True
55 # The default is False for backward compatibility with derived signature
56 # generators that do not understand multiconfig caches
57 supports_multiconfig_datacaches = False
58
Patrick Williamsc124f4f2015-09-15 14:41:29 -050059 def __init__(self, data):
Brad Bishop37a0e4d2017-12-04 01:01:44 -050060 self.basehash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050062 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -050063 self.runtaskdeps = {}
64 self.file_checksum_values = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050065 self.taints = {}
Brad Bishop08902b02019-08-20 09:16:51 -040066 self.unitaskhashes = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -050067 self.tidtopn = {}
68 self.setscenetasks = set()
Patrick Williamsc124f4f2015-09-15 14:41:29 -050069
70 def finalise(self, fn, d, varient):
71 return
72
Andrew Geissler82c905d2020-04-13 13:39:40 -050073 def postparsing_clean_cache(self):
74 return
75
Brad Bishop08902b02019-08-20 09:16:51 -040076 def get_unihash(self, tid):
77 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -040078
Andrew Geissler5a43b432020-06-13 10:46:56 -050079 def prep_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -050080 return
81
Andrew Geissler5a43b432020-06-13 10:46:56 -050082 def get_taskhash(self, tid, deps, dataCaches):
Brad Bishop08902b02019-08-20 09:16:51 -040083 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
84 return self.taskhash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -050085
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050086 def writeout_file_checksum_cache(self):
87 """Write/update the file checksum cache onto disk"""
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088 return
89
90 def stampfile(self, stampbase, file_name, taskname, extrainfo):
91 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
92
93 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
94 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
95
96 def dump_sigtask(self, fn, task, stampbase, runtime):
97 return
98
99 def invalidate_task(self, task, d, fn):
100 bb.build.del_stamp(task, d, fn)
101
102 def dump_sigs(self, dataCache, options):
103 return
104
105 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500106 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500107
108 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500109 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500110
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500111 def reset(self, data):
112 self.__init__(data)
113
Brad Bishop08902b02019-08-20 09:16:51 -0400114 def get_taskhashes(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500115 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
Brad Bishop08902b02019-08-20 09:16:51 -0400116
117 def set_taskhashes(self, hashes):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500118 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
Brad Bishop08902b02019-08-20 09:16:51 -0400119
120 def save_unitaskhashes(self):
121 return
122
Brad Bishopa34c0302019-09-23 22:34:48 -0400123 def set_setscene_tasks(self, setscene_tasks):
124 return
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500125
Andrew Geissler5a43b432020-06-13 10:46:56 -0500126 @classmethod
127 def get_data_caches(cls, dataCaches, mc):
128 """
129 This function returns the datacaches that should be passed to signature
130 generator functions. If the signature generator supports multiconfig
131 caches, the entire dictionary of data caches is sent, otherwise a
132 special proxy is sent that support both index access to all
133 multiconfigs, and also direct access for the default multiconfig.
134
135 The proxy class allows code in this class itself to always use
136 multiconfig aware code (to ease maintenance), but derived classes that
137 are unaware of multiconfig data caches can still access the default
138 multiconfig as expected.
139
140 Do not override this function in derived classes; it will be removed in
141 the future when support for multiconfig data caches is mandatory
142 """
143 class DataCacheProxy(object):
144 def __init__(self):
145 pass
146
147 def __getitem__(self, key):
148 return dataCaches[key]
149
150 def __getattr__(self, name):
151 return getattr(dataCaches[mc], name)
152
153 if cls.supports_multiconfig_datacaches:
154 return dataCaches
155
156 return DataCacheProxy()
157
Andrew Geissler9aee5002022-03-30 16:27:02 +0000158 def exit(self):
159 return
160
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500161class SignatureGeneratorBasic(SignatureGenerator):
162 """
163 """
164 name = "basic"
165
166 def __init__(self, data):
167 self.basehash = {}
168 self.taskhash = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500169 self.unihash = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500170 self.taskdeps = {}
171 self.runtaskdeps = {}
172 self.file_checksum_values = {}
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500173 self.taints = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500174 self.gendeps = {}
175 self.lookupcache = {}
Andrew Geissler82c905d2020-04-13 13:39:40 -0500176 self.setscenetasks = set()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000177 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
178 self.taskhash_ignore_tasks = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 self.init_rundepcheck(data)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500180 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500181 if checksum_cache_file:
182 self.checksum_cache = FileChecksumCache()
183 self.checksum_cache.init_cache(data, checksum_cache_file)
184 else:
185 self.checksum_cache = None
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500186
Andrew Geissler82c905d2020-04-13 13:39:40 -0500187 self.unihash_cache = bb.cache.SimpleCache("3")
Brad Bishop08902b02019-08-20 09:16:51 -0400188 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
Andrew Geissler82c905d2020-04-13 13:39:40 -0500189 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
190 self.tidtopn = {}
Brad Bishop08902b02019-08-20 09:16:51 -0400191
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500192 def init_rundepcheck(self, data):
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000193 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
194 if self.taskhash_ignore_tasks:
195 self.twl = re.compile(self.taskhash_ignore_tasks)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500196 else:
197 self.twl = None
198
199 def _build_data(self, fn, d):
200
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500201 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000202 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000204 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500205
206 for task in tasklist:
Brad Bishop08902b02019-08-20 09:16:51 -0400207 tid = fn + ":" + task
208 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
209 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
Brad Bishopc342db32019-05-15 21:57:59 -0400210 bb.error("The following commands may help:")
211 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
212 # Make sure sigdata is dumped before run printdiff
213 bb.error("%s -Snone" % cmd)
214 bb.error("Then:")
215 bb.error("%s -Sprintdiff\n" % cmd)
Brad Bishop08902b02019-08-20 09:16:51 -0400216 self.basehash[tid] = basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500217
218 self.taskdeps[fn] = taskdeps
219 self.gendeps[fn] = gendeps
220 self.lookupcache[fn] = lookupcache
221
222 return taskdeps
223
Brad Bishopa34c0302019-09-23 22:34:48 -0400224 def set_setscene_tasks(self, setscene_tasks):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500225 self.setscenetasks = set(setscene_tasks)
Brad Bishopa34c0302019-09-23 22:34:48 -0400226
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500227 def finalise(self, fn, d, variant):
228
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600229 mc = d.getVar("__BBMULTICONFIG", False) or ""
230 if variant or mc:
231 fn = bb.cache.realfn2virtual(fn, variant, mc)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232
233 try:
234 taskdeps = self._build_data(fn, d)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500235 except bb.parse.SkipRecipe:
236 raise
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237 except:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500238 bb.warn("Error during finalise of %s" % fn)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500239 raise
240
241 #Slow but can be useful for debugging mismatched basehashes
242 #for task in self.taskdeps[fn]:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500243 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500244
245 for task in taskdeps:
Patrick Williams213cb262021-08-07 19:21:33 -0500246 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500247
Andrew Geissler82c905d2020-04-13 13:39:40 -0500248 def postparsing_clean_cache(self):
249 #
250 # After parsing we can remove some things from memory to reduce our memory footprint
251 #
252 self.gendeps = {}
253 self.lookupcache = {}
254 self.taskdeps = {}
255
Andrew Geissler5a43b432020-06-13 10:46:56 -0500256 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500257 # Return True if we should keep the dependency, False to drop it
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000258 # We only manipulate the dependencies for packages not in the ignore
259 # list
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500260 if self.twl and not self.twl.search(recipename):
261 # then process the actual dependencies
262 if self.twl.search(depname):
263 return False
264 return True
265
266 def read_taint(self, fn, task, stampbase):
267 taint = None
268 try:
269 with open(stampbase + '.' + task + '.taint', 'r') as taintf:
270 taint = taintf.read()
271 except IOError:
272 pass
273 return taint
274
Andrew Geissler5a43b432020-06-13 10:46:56 -0500275 def prep_taskhash(self, tid, deps, dataCaches):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800276
Brad Bishop08902b02019-08-20 09:16:51 -0400277 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800278
Andrew Geissler5a43b432020-06-13 10:46:56 -0500279 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
Brad Bishop08902b02019-08-20 09:16:51 -0400280 self.runtaskdeps[tid] = []
281 self.file_checksum_values[tid] = []
Andrew Geissler5a43b432020-06-13 10:46:56 -0500282 recipename = dataCaches[mc].pkg_fn[fn]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500283
284 self.tidtopn[tid] = recipename
285
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500286 for dep in sorted(deps, key=clean_basepath):
Andrew Geissler5a43b432020-06-13 10:46:56 -0500287 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
288 depname = dataCaches[depmc].pkg_fn[depmcfn]
289 if not self.supports_multiconfig_datacaches and mc != depmc:
290 # If the signature generator doesn't understand multiconfig
291 # data caches, any dependency not in the same multiconfig must
292 # be skipped for backward compatibility
Andrew Geissler99467da2019-02-25 18:54:23 -0600293 continue
Andrew Geissler5a43b432020-06-13 10:46:56 -0500294 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295 continue
296 if dep not in self.taskhash:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800297 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400298 self.runtaskdeps[tid].append(dep)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500299
Andrew Geissler5a43b432020-06-13 10:46:56 -0500300 if task in dataCaches[mc].file_checksums[fn]:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500301 if self.checksum_cache:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500302 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500303 else:
Andrew Geissler5a43b432020-06-13 10:46:56 -0500304 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500305 for (f,cs) in checksums:
Brad Bishop08902b02019-08-20 09:16:51 -0400306 self.file_checksum_values[tid].append((f,cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
Andrew Geissler5a43b432020-06-13 10:46:56 -0500308 taskdep = dataCaches[mc].task_deps[fn]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500309 if 'nostamp' in taskdep and task in taskdep['nostamp']:
310 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
Andrew Geissler82c905d2020-04-13 13:39:40 -0500311 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
312 # Don't reset taint value upon every call
313 pass
314 else:
315 import uuid
316 taint = str(uuid.uuid4())
317 self.taints[tid] = "nostamp:" + taint
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500318
Andrew Geissler5a43b432020-06-13 10:46:56 -0500319 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500320 if taint:
Brad Bishop08902b02019-08-20 09:16:51 -0400321 self.taints[tid] = taint
322 logger.warning("%s is tainted from a forced run" % tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500323
Andrew Geissler82c905d2020-04-13 13:39:40 -0500324 return
325
Andrew Geissler5a43b432020-06-13 10:46:56 -0500326 def get_taskhash(self, tid, deps, dataCaches):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500327
328 data = self.basehash[tid]
329 for dep in self.runtaskdeps[tid]:
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600330 data = data + self.get_unihash(dep)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500331
332 for (f, cs) in self.file_checksum_values[tid]:
333 if cs:
Andrew Geissler595f6302022-01-24 19:11:47 +0000334 if "/./" in f:
335 data = data + "./" + f.split("/./")[1]
Andrew Geissler82c905d2020-04-13 13:39:40 -0500336 data = data + cs
337
338 if tid in self.taints:
339 if self.taints[tid].startswith("nostamp:"):
340 data = data + self.taints[tid][8:]
341 else:
342 data = data + self.taints[tid]
343
Brad Bishop19323692019-04-05 15:28:33 -0400344 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
Brad Bishop08902b02019-08-20 09:16:51 -0400345 self.taskhash[tid] = h
Patrick Williams213cb262021-08-07 19:21:33 -0500346 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500347 return h
348
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500349 def writeout_file_checksum_cache(self):
350 """Write/update the file checksum cache onto disk"""
351 if self.checksum_cache:
352 self.checksum_cache.save_extras()
353 self.checksum_cache.save_merge()
354 else:
355 bb.fetch2.fetcher_parse_save()
356 bb.fetch2.fetcher_parse_done()
357
Brad Bishop08902b02019-08-20 09:16:51 -0400358 def save_unitaskhashes(self):
359 self.unihash_cache.save(self.unitaskhashes)
360
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500361 def dump_sigtask(self, fn, task, stampbase, runtime):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500362
Brad Bishop08902b02019-08-20 09:16:51 -0400363 tid = fn + ":" + task
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500364 referencestamp = stampbase
365 if isinstance(runtime, str) and runtime.startswith("customfile"):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500366 sigfile = stampbase
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500367 referencestamp = runtime[11:]
Brad Bishop08902b02019-08-20 09:16:51 -0400368 elif runtime and tid in self.taskhash:
Brad Bishop00e122a2019-10-05 11:10:57 -0400369 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500370 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400371 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500372
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500373 with bb.utils.umask(0o002):
374 bb.utils.mkdirhier(os.path.dirname(sigfile))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500375
376 data = {}
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500377 data['task'] = task
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000378 data['basehash_ignore_vars'] = self.basehash_ignore_vars
379 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380 data['taskdeps'] = self.taskdeps[fn][task]
Brad Bishop08902b02019-08-20 09:16:51 -0400381 data['basehash'] = self.basehash[tid]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500382 data['gendeps'] = {}
383 data['varvals'] = {}
384 data['varvals'][task] = self.lookupcache[fn][task]
385 for dep in self.taskdeps[fn][task]:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000386 if dep in self.basehash_ignore_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500387 continue
388 data['gendeps'][dep] = self.gendeps[fn][dep]
389 data['varvals'][dep] = self.lookupcache[fn][dep]
390
Brad Bishop08902b02019-08-20 09:16:51 -0400391 if runtime and tid in self.taskhash:
392 data['runtaskdeps'] = self.runtaskdeps[tid]
Andrew Geissler595f6302022-01-24 19:11:47 +0000393 data['file_checksum_values'] = []
394 for f,cs in self.file_checksum_values[tid]:
395 if "/./" in f:
396 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
397 else:
398 data['file_checksum_values'].append((os.path.basename(f), cs))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500399 data['runtaskhashes'] = {}
400 for dep in data['runtaskdeps']:
Brad Bishop19323692019-04-05 15:28:33 -0400401 data['runtaskhashes'][dep] = self.get_unihash(dep)
Brad Bishop08902b02019-08-20 09:16:51 -0400402 data['taskhash'] = self.taskhash[tid]
Brad Bishop00e122a2019-10-05 11:10:57 -0400403 data['unihash'] = self.get_unihash(tid)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500404
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500405 taint = self.read_taint(fn, task, referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500406 if taint:
407 data['taint'] = taint
408
Brad Bishop08902b02019-08-20 09:16:51 -0400409 if runtime and tid in self.taints:
410 if 'nostamp:' in self.taints[tid]:
411 data['taint'] = self.taints[tid]
Patrick Williamsf1e5d692016-03-30 15:21:19 -0500412
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500413 computed_basehash = calc_basehash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400414 if computed_basehash != self.basehash[tid]:
415 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
416 if runtime and tid in self.taskhash:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500417 computed_taskhash = calc_taskhash(data)
Brad Bishop08902b02019-08-20 09:16:51 -0400418 if computed_taskhash != self.taskhash[tid]:
419 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
420 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500421
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500422 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
423 try:
Andrew Geisslereff27472021-10-29 15:35:00 -0500424 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
425 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
426 f.flush()
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600427 os.chmod(tmpfile, 0o664)
Andrew Geisslerc926e172021-05-07 16:11:35 -0500428 bb.utils.rename(tmpfile, sigfile)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500429 except (OSError, IOError) as err:
430 try:
431 os.unlink(tmpfile)
432 except OSError:
433 pass
434 raise err
435
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500436 def dump_sigfn(self, fn, dataCaches, options):
437 if fn in self.taskdeps:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438 for task in self.taskdeps[fn]:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600439 tid = fn + ":" + task
Brad Bishop08902b02019-08-20 09:16:51 -0400440 mc = bb.runqueue.mc_from_tid(tid)
441 if tid not in self.taskhash:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500442 continue
Brad Bishop08902b02019-08-20 09:16:51 -0400443 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
444 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
445 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600446 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500447
448class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
449 name = "basichash"
450
Brad Bishop08902b02019-08-20 09:16:51 -0400451 def get_stampfile_hash(self, tid):
452 if tid in self.taskhash:
453 return self.taskhash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400454
455 # If task is not in basehash, then error
Brad Bishop08902b02019-08-20 09:16:51 -0400456 return self.basehash[tid]
Brad Bishop19323692019-04-05 15:28:33 -0400457
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500458 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
459 if taskname != "do_setscene" and taskname.endswith("_setscene"):
Brad Bishop08902b02019-08-20 09:16:51 -0400460 tid = fn + ":" + taskname[:-9]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500461 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400462 tid = fn + ":" + taskname
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 if clean:
464 h = "*"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500465 else:
Brad Bishop08902b02019-08-20 09:16:51 -0400466 h = self.get_stampfile_hash(tid)
Brad Bishop19323692019-04-05 15:28:33 -0400467
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500468 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
469
470 def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
471 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800472
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500473 def invalidate_task(self, task, d, fn):
474 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
475 bb.build.write_taint(task, d, fn)
476
Brad Bishop08902b02019-08-20 09:16:51 -0400477class SignatureGeneratorUniHashMixIn(object):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500478 def __init__(self, data):
479 self.extramethod = {}
480 super().__init__(data)
481
Brad Bishop08902b02019-08-20 09:16:51 -0400482 def get_taskdata(self):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500483 return (self.server, self.method, self.extramethod) + super().get_taskdata()
Brad Bishop08902b02019-08-20 09:16:51 -0400484
485 def set_taskdata(self, data):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500486 self.server, self.method, self.extramethod = data[:3]
487 super().set_taskdata(data[3:])
Brad Bishop08902b02019-08-20 09:16:51 -0400488
Brad Bishopa34c0302019-09-23 22:34:48 -0400489 def client(self):
490 if getattr(self, '_client', None) is None:
491 self._client = hashserv.create_client(self.server)
492 return self._client
493
Andrew Geissler9aee5002022-03-30 16:27:02 +0000494 def reset(self, data):
495 if getattr(self, '_client', None) is not None:
496 self._client.close()
497 self._client = None
498 return super().reset(data)
499
500 def exit(self):
501 if getattr(self, '_client', None) is not None:
502 self._client.close()
503 self._client = None
504 return super().exit()
505
Brad Bishop08902b02019-08-20 09:16:51 -0400506 def get_stampfile_hash(self, tid):
507 if tid in self.taskhash:
508 # If a unique hash is reported, use it as the stampfile hash. This
509 # ensures that if a task won't be re-run if the taskhash changes,
510 # but it would result in the same output hash
Andrew Geissler82c905d2020-04-13 13:39:40 -0500511 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400512 if unihash is not None:
513 return unihash
514
515 return super().get_stampfile_hash(tid)
516
517 def set_unihash(self, tid, unihash):
Andrew Geissler82c905d2020-04-13 13:39:40 -0500518 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
519 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
520 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
521 self.unihash[tid] = unihash
522
523 def _get_unihash(self, tid, checkkey=None):
524 if tid not in self.tidtopn:
525 return None
526 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
527 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
528 if key not in self.unitaskhashes:
529 return None
530 if not checkkey:
531 checkkey = self.taskhash[tid]
532 (key, unihash) = self.unitaskhashes[key]
533 if key != checkkey:
534 return None
535 return unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400536
537 def get_unihash(self, tid):
Brad Bishop08902b02019-08-20 09:16:51 -0400538 taskhash = self.taskhash[tid]
539
Brad Bishopa34c0302019-09-23 22:34:48 -0400540 # If its not a setscene task we can return
541 if self.setscenetasks and tid not in self.setscenetasks:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500542 self.unihash[tid] = None
Brad Bishopa34c0302019-09-23 22:34:48 -0400543 return taskhash
544
Brad Bishop08902b02019-08-20 09:16:51 -0400545 # TODO: This cache can grow unbounded. It probably only needs to keep
546 # for each task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500547 unihash = self._get_unihash(tid)
Brad Bishop08902b02019-08-20 09:16:51 -0400548 if unihash is not None:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500549 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400550 return unihash
551
552 # In the absence of being able to discover a unique hash from the
553 # server, make it be equivalent to the taskhash. The unique "hash" only
554 # really needs to be a unique string (not even necessarily a hash), but
555 # making it match the taskhash has a few advantages:
556 #
557 # 1) All of the sstate code that assumes hashes can be the same
558 # 2) It provides maximal compatibility with builders that don't use
559 # an equivalency server
560 # 3) The value is easy for multiple independent builders to derive the
561 # same unique hash from the same input. This means that if the
562 # independent builders find the same taskhash, but it isn't reported
563 # to the server, there is a better chance that they will agree on
564 # the unique hash.
565 unihash = taskhash
566
567 try:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500568 method = self.method
569 if tid in self.extramethod:
570 method = method + self.extramethod[tid]
571 data = self.client().get_unihash(method, self.taskhash[tid])
Brad Bishopa34c0302019-09-23 22:34:48 -0400572 if data:
573 unihash = data
Brad Bishop08902b02019-08-20 09:16:51 -0400574 # A unique hash equal to the taskhash is not very interesting,
575 # so it is reported it at debug level 2. If they differ, that
576 # is much more interesting, so it is reported at debug level 1
Andrew Geissler82c905d2020-04-13 13:39:40 -0500577 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400578 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600579 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500580 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400581 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400582
Andrew Geissler82c905d2020-04-13 13:39:40 -0500583 self.set_unihash(tid, unihash)
584 self.unihash[tid] = unihash
Brad Bishop08902b02019-08-20 09:16:51 -0400585 return unihash
586
587 def report_unihash(self, path, task, d):
Brad Bishop08902b02019-08-20 09:16:51 -0400588 import importlib
589
590 taskhash = d.getVar('BB_TASKHASH')
591 unihash = d.getVar('BB_UNIHASH')
592 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
593 tempdir = d.getVar('T')
594 fn = d.getVar('BB_FILENAME')
Brad Bishop00e122a2019-10-05 11:10:57 -0400595 tid = fn + ':do_' + task
Andrew Geissler82c905d2020-04-13 13:39:40 -0500596 key = tid + ':' + taskhash
Brad Bishop00e122a2019-10-05 11:10:57 -0400597
598 if self.setscenetasks and tid not in self.setscenetasks:
599 return
Brad Bishop08902b02019-08-20 09:16:51 -0400600
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000601 # This can happen if locked sigs are in action. Detect and just exit
Andrew Geissler82c905d2020-04-13 13:39:40 -0500602 if taskhash != self.taskhash[tid]:
603 return
604
Brad Bishop08902b02019-08-20 09:16:51 -0400605 # Sanity checks
Andrew Geissler82c905d2020-04-13 13:39:40 -0500606 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
Brad Bishop08902b02019-08-20 09:16:51 -0400607 if cache_unihash is None:
608 bb.fatal('%s not in unihash cache. Please report this error' % key)
609
610 if cache_unihash != unihash:
611 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
612
613 sigfile = None
614 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
615 sigfile_link = "depsig.do_%s" % task
616
617 try:
618 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
619
620 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
621
622 if "." in self.method:
623 (module, method) = self.method.rsplit('.', 1)
624 locs['method'] = getattr(importlib.import_module(module), method)
625 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
626 else:
627 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
628
629 try:
Brad Bishopa34c0302019-09-23 22:34:48 -0400630 extra_data = {}
631
632 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
633 if owner:
634 extra_data['owner'] = owner
Brad Bishop08902b02019-08-20 09:16:51 -0400635
636 if report_taskdata:
637 sigfile.seek(0)
638
Brad Bishopa34c0302019-09-23 22:34:48 -0400639 extra_data['PN'] = d.getVar('PN')
640 extra_data['PV'] = d.getVar('PV')
641 extra_data['PR'] = d.getVar('PR')
642 extra_data['task'] = task
643 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
Brad Bishop08902b02019-08-20 09:16:51 -0400644
Andrew Geissler82c905d2020-04-13 13:39:40 -0500645 method = self.method
646 if tid in self.extramethod:
647 method = method + self.extramethod[tid]
648
649 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400650 new_unihash = data['unihash']
Brad Bishop08902b02019-08-20 09:16:51 -0400651
652 if new_unihash != unihash:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600653 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
Brad Bishop08902b02019-08-20 09:16:51 -0400654 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
Andrew Geissler82c905d2020-04-13 13:39:40 -0500655 self.set_unihash(tid, new_unihash)
656 d.setVar('BB_UNIHASH', new_unihash)
Brad Bishop08902b02019-08-20 09:16:51 -0400657 else:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600658 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
Andrew Geisslerc926e172021-05-07 16:11:35 -0500659 except ConnectionError as e:
Brad Bishopa34c0302019-09-23 22:34:48 -0400660 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
Brad Bishop08902b02019-08-20 09:16:51 -0400661 finally:
662 if sigfile:
663 sigfile.close()
664
665 sigfile_link_path = os.path.join(tempdir, sigfile_link)
666 bb.utils.remove(sigfile_link_path)
667
668 try:
669 os.symlink(sigfile_name, sigfile_link_path)
670 except OSError:
671 pass
672
Andrew Geissler82c905d2020-04-13 13:39:40 -0500673 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
674 try:
675 extra_data = {}
676 method = self.method
677 if tid in self.extramethod:
678 method = method + self.extramethod[tid]
679
680 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
681 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
682
683 if data is None:
684 bb.warn("Server unable to handle unihash report")
685 return False
686
687 finalunihash = data['unihash']
688
689 if finalunihash == current_unihash:
690 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
691 elif finalunihash == wanted_unihash:
692 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
693 self.set_unihash(tid, finalunihash)
694 return True
695 else:
696 # TODO: What to do here?
697 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
698
Andrew Geisslerc926e172021-05-07 16:11:35 -0500699 except ConnectionError as e:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500700 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
701
702 return False
Brad Bishop08902b02019-08-20 09:16:51 -0400703
704#
705# Dummy class used for bitbake-selftest
706#
707class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
708 name = "TestEquivHash"
709 def init_rundepcheck(self, data):
710 super().init_rundepcheck(data)
Brad Bishopa34c0302019-09-23 22:34:48 -0400711 self.server = data.getVar('BB_HASHSERVE')
Brad Bishop08902b02019-08-20 09:16:51 -0400712 self.method = "sstate_output_hash"
713
Andrew Geissler5a43b432020-06-13 10:46:56 -0500714#
715# Dummy class used for bitbake-selftest
716#
717class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
718 name = "TestMulticonfigDepends"
719 supports_multiconfig_datacaches = True
Brad Bishop08902b02019-08-20 09:16:51 -0400720
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500721def dump_this_task(outfile, d):
722 import bb.parse
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500723 fn = d.getVar("BB_FILENAME")
724 task = "do_" + d.getVar("BB_CURRENTTASK")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500725 referencestamp = bb.build.stamp_internal(task, d, None, True)
726 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500727
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500728def init_colors(enable_color):
729 """Initialise colour dict for passing to compare_sigfiles()"""
730 # First set up the colours
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800731 colors = {'color_title': '\033[1m',
732 'color_default': '\033[0m',
733 'color_add': '\033[0;32m',
734 'color_remove': '\033[0;31m',
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500735 }
736 # Leave all keys present but clear the values
737 if not enable_color:
738 for k in colors.keys():
739 colors[k] = ''
740 return colors
741
742def worddiff_str(oldstr, newstr, colors=None):
743 if not colors:
744 colors = init_colors(False)
745 diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
746 ret = []
747 for change, value in diff:
748 value = ' '.join(value)
749 if change == '=':
750 ret.append(value)
751 elif change == '+':
752 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
753 ret.append(item)
754 elif change == '-':
755 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
756 ret.append(item)
757 whitespace_note = ''
758 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
759 whitespace_note = ' (whitespace changed)'
760 return '"%s"%s' % (' '.join(ret), whitespace_note)
761
762def list_inline_diff(oldlist, newlist, colors=None):
763 if not colors:
764 colors = init_colors(False)
765 diff = simplediff.diff(oldlist, newlist)
766 ret = []
767 for change, value in diff:
768 value = ' '.join(value)
769 if change == '=':
770 ret.append("'%s'" % value)
771 elif change == '+':
772 item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
773 ret.append(item)
774 elif change == '-':
775 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
776 ret.append(item)
777 return '[%s]' % (', '.join(ret))
778
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500779def clean_basepath(basepath):
780 basepath, dir, recipe_task = basepath.rsplit("/", 2)
781 cleaned = dir + '/' + recipe_task
782
783 if basepath[0] == '/':
784 return cleaned
785
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600786 if basepath.startswith("mc:") and basepath.count(':') >= 2:
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500787 mc, mc_name, basepath = basepath.split(":", 2)
788 mc_suffix = ':mc:' + mc_name
789 else:
790 mc_suffix = ''
791
792 # mc stuff now removed from basepath. Whatever was next, if present will be the first
793 # suffix. ':/', recipe path start, marks the end of this. Something like
794 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
795 if basepath[0] != '/':
796 cleaned += ':' + basepath.split(':/', 1)[0]
797
798 return cleaned + mc_suffix
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500799
800def clean_basepaths(a):
801 b = {}
802 for x in a:
803 b[clean_basepath(x)] = a[x]
804 return b
805
806def clean_basepaths_list(a):
807 b = []
808 for x in a:
809 b.append(clean_basepath(x))
810 return b
811
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000812# Handled renamed fields
813def handle_renames(data):
814 if 'basewhitelist' in data:
815 data['basehash_ignore_vars'] = data['basewhitelist']
816 del data['basewhitelist']
817 if 'taskwhitelist' in data:
818 data['taskhash_ignore_tasks'] = data['taskwhitelist']
819 del data['taskwhitelist']
820
821
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500822def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500823 output = []
824
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500825 colors = init_colors(color)
826 def color_format(formatstr, **values):
827 """
828 Return colour formatted string.
829 NOTE: call with the format string, not an already formatted string
830 containing values (otherwise you could have trouble with { and }
831 characters)
832 """
833 if not formatstr.endswith('{color_default}'):
834 formatstr += '{color_default}'
835 # In newer python 3 versions you can pass both of these directly,
836 # but we only require 3.4 at the moment
837 formatparams = {}
838 formatparams.update(colors)
839 formatparams.update(values)
840 return formatstr.format(**formatparams)
841
Andrew Geisslereff27472021-10-29 15:35:00 -0500842 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
843 a_data = json.load(f, object_hook=SetDecoder)
844 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
845 b_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500846
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000847 for data in [a_data, b_data]:
848 handle_renames(data)
849
850 def dict_diff(a, b, ignored_vars=set()):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500851 sa = set(a.keys())
852 sb = set(b.keys())
853 common = sa & sb
854 changed = set()
855 for i in common:
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000856 if a[i] != b[i] and i not in ignored_vars:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500857 changed.add(i)
858 added = sb - sa
859 removed = sa - sb
860 return changed, added, removed
861
862 def file_checksums_diff(a, b):
863 from collections import Counter
Andrew Geisslereff27472021-10-29 15:35:00 -0500864
865 # Convert lists back to tuples
866 a = [(f[0], f[1]) for f in a]
867 b = [(f[0], f[1]) for f in b]
868
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500869 # Compare lists, ensuring we can handle duplicate filenames if they exist
870 removedcount = Counter(a)
871 removedcount.subtract(b)
872 addedcount = Counter(b)
873 addedcount.subtract(a)
874 added = []
875 for x in b:
876 if addedcount[x] > 0:
877 addedcount[x] -= 1
878 added.append(x)
879 removed = []
880 changed = []
881 for x in a:
882 if removedcount[x] > 0:
883 removedcount[x] -= 1
884 for y in added:
885 if y[0] == x[0]:
886 changed.append((x[0], x[1], y[1]))
887 added.remove(y)
888 break
889 else:
890 removed.append(x)
891 added = [x[0] for x in added]
892 removed = [x[0] for x in removed]
893 return changed, added, removed
894
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000895 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
896 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
897 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
898 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500899
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000900 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
901 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
902 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
903 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500904
905 if a_data['taskdeps'] != b_data['taskdeps']:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500906 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500907
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500908 if a_data['basehash'] != b_data['basehash'] and not collapsed:
909 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500910
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000911 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500912 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500913 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500914 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500915 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
916 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
917 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500918 for dep in sorted(added):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500919 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500920 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500921 for dep in sorted(removed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500922 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500923
924
925 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
926 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -0500927 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500928 oldval = a_data['varvals'][dep]
929 newval = b_data['varvals'][dep]
930 if newval and oldval and ('\n' in oldval or '\n' in newval):
931 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
932 # Cut off the first two lines, since we aren't interested in
933 # the old/new filename (they are blank anyway in this case)
934 difflines = list(diff)[2:]
935 if color:
936 # Add colour to diff output
937 for i, line in enumerate(difflines):
938 if line.startswith('+'):
939 line = color_format('{color_add}{line}', line=line)
940 difflines[i] = line
941 elif line.startswith('-'):
942 line = color_format('{color_remove}{line}', line=line)
943 difflines[i] = line
944 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
945 elif newval and oldval and (' ' in oldval or ' ' in newval):
946 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
947 else:
948 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500949
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600950 if not 'file_checksum_values' in a_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500951 a_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600952 if not 'file_checksum_values' in b_data:
Andrew Geisslereff27472021-10-29 15:35:00 -0500953 b_data['file_checksum_values'] = []
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600954
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500955 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
956 if changed:
957 for f, old, new in changed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500958 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500959 if added:
960 for f in added:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500961 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500962 if removed:
963 for f in removed:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500964 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500965
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600966 if not 'runtaskdeps' in a_data:
967 a_data['runtaskdeps'] = {}
968 if not 'runtaskdeps' in b_data:
969 b_data['runtaskdeps'] = {}
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500970
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500971 if not collapsed:
972 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
973 changed = ["Number of task dependencies changed"]
974 else:
975 changed = []
976 for idx, task in enumerate(a_data['runtaskdeps']):
977 a = a_data['runtaskdeps'][idx]
978 b = b_data['runtaskdeps'][idx]
979 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
980 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500981
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500982 if changed:
983 clean_a = clean_basepaths_list(a_data['runtaskdeps'])
984 clean_b = clean_basepaths_list(b_data['runtaskdeps'])
985 if clean_a != clean_b:
986 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
987 else:
988 output.append(color_format("{color_title}runtaskdeps changed:"))
989 output.append("\n".join(changed))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500990
991
992 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
Patrick Williams03907ee2022-05-01 06:28:52 -0500993 a = clean_basepaths(a_data['runtaskhashes'])
994 b = clean_basepaths(b_data['runtaskhashes'])
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500995 changed, added, removed = dict_diff(a, b)
996 if added:
Patrick Williams93c203f2021-10-06 16:15:23 -0500997 for dep in sorted(added):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500998 bdep_found = False
999 if removed:
1000 for bdep in removed:
1001 if b[dep] == a[bdep]:
1002 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1003 bdep_found = True
1004 if not bdep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001005 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001006 if removed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001007 for dep in sorted(removed):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001008 adep_found = False
1009 if added:
1010 for adep in added:
1011 if b[adep] == a[dep]:
1012 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1013 adep_found = True
1014 if not adep_found:
Patrick Williams03907ee2022-05-01 06:28:52 -05001015 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001016 if changed:
Patrick Williams93c203f2021-10-06 16:15:23 -05001017 for dep in sorted(changed):
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001018 if not collapsed:
Patrick Williams03907ee2022-05-01 06:28:52 -05001019 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001020 if callable(recursecb):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001021 recout = recursecb(dep, a[dep], b[dep])
1022 if recout:
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001023 if collapsed:
1024 output.extend(recout)
1025 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -08001026 # If a dependent hash changed, might as well print the line above and then defer to the changes in
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001027 # that hash since in all likelyhood, they're the same changes this task also saw.
1028 output = [output[-1]] + recout
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001029
1030 a_taint = a_data.get('taint', None)
1031 b_taint = b_data.get('taint', None)
1032 if a_taint != b_taint:
Brad Bishop96ff1982019-08-19 13:50:42 -04001033 if a_taint and a_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001034 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop96ff1982019-08-19 13:50:42 -04001035 if b_taint and b_taint.startswith('nostamp:'):
Brad Bishopc342db32019-05-15 21:57:59 -04001036 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
Brad Bishop6e60e8b2018-02-01 10:27:11 -05001037 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001038
1039 return output
1040
1041
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001042def calc_basehash(sigdata):
1043 task = sigdata['task']
1044 basedata = sigdata['varvals'][task]
1045
1046 if basedata is None:
1047 basedata = ''
1048
1049 alldeps = sigdata['taskdeps']
1050 for dep in alldeps:
1051 basedata = basedata + dep
1052 val = sigdata['varvals'][dep]
1053 if val is not None:
1054 basedata = basedata + str(val)
1055
Brad Bishop19323692019-04-05 15:28:33 -04001056 return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001057
1058def calc_taskhash(sigdata):
1059 data = sigdata['basehash']
1060
1061 for dep in sigdata['runtaskdeps']:
1062 data = data + sigdata['runtaskhashes'][dep]
1063
1064 for c in sigdata['file_checksum_values']:
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001065 if c[1]:
Andrew Geissler595f6302022-01-24 19:11:47 +00001066 if "./" in c[0]:
1067 data = data + c[0]
Brad Bishop37a0e4d2017-12-04 01:01:44 -05001068 data = data + c[1]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001069
1070 if 'taint' in sigdata:
1071 if 'nostamp:' in sigdata['taint']:
1072 data = data + sigdata['taint'][8:]
1073 else:
1074 data = data + sigdata['taint']
1075
Brad Bishop19323692019-04-05 15:28:33 -04001076 return hashlib.sha256(data.encode("utf-8")).hexdigest()
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001077
1078
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001079def dump_sigfile(a):
1080 output = []
1081
Andrew Geisslereff27472021-10-29 15:35:00 -05001082 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1083 a_data = json.load(f, object_hook=SetDecoder)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001084
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001085 handle_renames(a_data)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001086
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001087 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1088
1089 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001090
1091 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1092
1093 output.append("basehash: %s" % (a_data['basehash']))
1094
Andrew Geissler595f6302022-01-24 19:11:47 +00001095 for dep in sorted(a_data['gendeps']):
1096 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001097
Andrew Geissler595f6302022-01-24 19:11:47 +00001098 for dep in sorted(a_data['varvals']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001099 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1100
1101 if 'runtaskdeps' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001102 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001103
1104 if 'file_checksum_values' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001105 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001106
1107 if 'runtaskhashes' in a_data:
Andrew Geissler595f6302022-01-24 19:11:47 +00001108 for dep in sorted(a_data['runtaskhashes']):
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001109 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1110
1111 if 'taint' in a_data:
Brad Bishopc342db32019-05-15 21:57:59 -04001112 if a_data['taint'].startswith('nostamp:'):
1113 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1114 else:
1115 msg = a_data['taint']
1116 output.append("Tainted (by forced/invalidated task): %s" % msg)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001117
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001118 if 'task' in a_data:
1119 computed_basehash = calc_basehash(a_data)
1120 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1121 else:
1122 output.append("Unable to compute base hash")
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001123
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001124 computed_taskhash = calc_taskhash(a_data)
1125 output.append("Computed task hash is %s" % computed_taskhash)
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001126
1127 return output