blob: 137db81a5b422872394f0ca43dbf86aee7bac908 [file] [log] [blame]
Andrew Geissler5199d832021-09-24 16:47:35 -05001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}"
6
7# The product name that the CVE database uses. Defaults to BPN, but may need to
8# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff).
9CVE_PRODUCT ??= "${BPN}"
10CVE_VERSION ??= "${PV}"
11
12SPDXDIR ??= "${WORKDIR}/spdx"
13SPDXDEPLOY = "${SPDXDIR}/deploy"
14SPDXWORK = "${SPDXDIR}/work"
15
Patrick Williams93c203f2021-10-06 16:15:23 -050016SPDX_TOOL_NAME ??= "oe-spdx-creator"
17SPDX_TOOL_VERSION ??= "1.0"
18
Andrew Geissler5199d832021-09-24 16:47:35 -050019SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
20
21SPDX_INCLUDE_SOURCES ??= "0"
22SPDX_INCLUDE_PACKAGED ??= "0"
23SPDX_ARCHIVE_SOURCES ??= "0"
24SPDX_ARCHIVE_PACKAGED ??= "0"
25
26SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
27SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc"
28
29SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
30
Andrew Geissler595f6302022-01-24 19:11:47 +000031SPDX_ORG ??= "OpenEmbedded ()"
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000032SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}"
33SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \
34 this recipe. For SPDX documents create using this class during the build, this \
35 is the contact information for the person or organization who is doing the \
36 build."
Andrew Geissler595f6302022-01-24 19:11:47 +000037
Andrew Geissler5199d832021-09-24 16:47:35 -050038do_image_complete[depends] = "virtual/kernel:do_create_spdx"
39
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000040def extract_licenses(filename):
41 import re
42
Andrew Geissler9aee5002022-03-30 16:27:02 +000043 lic_regex = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000044
45 try:
46 with open(filename, 'rb') as f:
47 size = min(15000, os.stat(filename).st_size)
48 txt = f.read(size)
49 licenses = re.findall(lic_regex, txt)
50 if licenses:
51 ascii_licenses = [lic.decode('ascii') for lic in licenses]
52 return ascii_licenses
53 except Exception as e:
54 bb.warn(f"Exception reading {filename}: {e}")
55 return None
56
Andrew Geissler5199d832021-09-24 16:47:35 -050057def get_doc_namespace(d, doc):
58 import uuid
59 namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
60 return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name)))
61
Andrew Geisslereff27472021-10-29 15:35:00 -050062def create_annotation(d, comment):
63 from datetime import datetime, timezone
64
65 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
66 annotation = oe.spdx.SPDXAnnotation()
67 annotation.annotationDate = creation_time
68 annotation.annotationType = "OTHER"
69 annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION"))
70 annotation.comment = comment
71 return annotation
72
Patrick Williams93c203f2021-10-06 16:15:23 -050073def recipe_spdx_is_native(d, recipe):
74 return any(a.annotationType == "OTHER" and
75 a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and
76 a.comment == "isNative" for a in recipe.annotations)
Andrew Geissler5199d832021-09-24 16:47:35 -050077
Andrew Geissler595f6302022-01-24 19:11:47 +000078def is_work_shared_spdx(d):
79 return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR'))
Andrew Geissler5199d832021-09-24 16:47:35 -050080
81python() {
82 import json
83 if d.getVar("SPDX_LICENSE_DATA"):
84 return
85
86 with open(d.getVar("SPDX_LICENSES"), "r") as f:
87 data = json.load(f)
88 # Transform the license array to a dictionary
89 data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
90 d.setVar("SPDX_LICENSE_DATA", data)
91}
92
93def convert_license_to_spdx(lic, document, d, existing={}):
94 from pathlib import Path
95 import oe.spdx
96
Andrew Geissler5199d832021-09-24 16:47:35 -050097 license_data = d.getVar("SPDX_LICENSE_DATA")
98 extracted = {}
99
100 def add_extracted_license(ident, name):
101 nonlocal document
102
103 if name in extracted:
104 return
105
106 extracted_info = oe.spdx.SPDXExtractedLicensingInfo()
107 extracted_info.name = name
108 extracted_info.licenseId = ident
109 extracted_info.extractedText = None
110
111 if name == "PD":
112 # Special-case this.
113 extracted_info.extractedText = "Software released to the public domain"
Andrew Geissler9aee5002022-03-30 16:27:02 +0000114 else:
115 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
Andrew Geissler595f6302022-01-24 19:11:47 +0000116 for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split():
Andrew Geissler5199d832021-09-24 16:47:35 -0500117 try:
118 with (Path(directory) / name).open(errors="replace") as f:
119 extracted_info.extractedText = f.read()
120 break
121 except FileNotFoundError:
122 pass
123 if extracted_info.extractedText is None:
Andrew Geissler9aee5002022-03-30 16:27:02 +0000124 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
125 filename = d.getVarFlag('NO_GENERIC_LICENSE', name)
126 if filename:
127 filename = d.expand("${S}/" + filename)
128 with open(filename, errors="replace") as f:
129 extracted_info.extractedText = f.read()
130 else:
131 bb.error("Cannot find any text for license %s" % name)
Andrew Geissler5199d832021-09-24 16:47:35 -0500132
133 extracted[name] = extracted_info
134 document.hasExtractedLicensingInfos.append(extracted_info)
135
136 def convert(l):
137 if l == "(" or l == ")":
138 return l
139
140 if l == "&":
141 return "AND"
142
143 if l == "|":
144 return "OR"
145
146 if l == "CLOSED":
147 return "NONE"
148
149 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
150 if spdx_license in license_data["licenses"]:
151 return spdx_license
152
153 try:
154 spdx_license = existing[l]
155 except KeyError:
156 spdx_license = "LicenseRef-" + l
157 add_extracted_license(spdx_license, l)
158
159 return spdx_license
160
161 lic_split = lic.replace("(", " ( ").replace(")", " ) ").split()
162
163 return ' '.join(convert(l) for l in lic_split)
164
Andrew Geissler5199d832021-09-24 16:47:35 -0500165def process_sources(d):
166 pn = d.getVar('PN')
167 assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split()
168 if pn in assume_provided:
169 for p in d.getVar("PROVIDES").split():
170 if p != pn:
171 pn = p
172 break
173
174 # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted,
175 # so avoid archiving source here.
176 if pn.startswith('glibc-locale'):
177 return False
178 if d.getVar('PN') == "libtool-cross":
179 return False
180 if d.getVar('PN') == "libgcc-initial":
181 return False
182 if d.getVar('PN') == "shadow-sysroot":
183 return False
184
185 # We just archive gcc-source for all the gcc related recipes
186 if d.getVar('BPN') in ['gcc', 'libgcc']:
187 bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn)
188 return False
189
190 return True
191
192
193def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]):
194 from pathlib import Path
195 import oe.spdx
196 import hashlib
197
198 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
199 if source_date_epoch:
200 source_date_epoch = int(source_date_epoch)
201
202 sha1s = []
203 spdx_files = []
204
205 file_counter = 1
206 for subdir, dirs, files in os.walk(topdir):
207 dirs[:] = [d for d in dirs if d not in ignore_dirs]
208 if subdir == str(topdir):
209 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
210
211 for file in files:
212 filepath = Path(subdir) / file
213 filename = str(filepath.relative_to(topdir))
214
215 if filepath.is_file() and not filepath.is_symlink():
216 spdx_file = oe.spdx.SPDXFile()
217 spdx_file.SPDXID = get_spdxid(file_counter)
218 for t in get_types(filepath):
219 spdx_file.fileTypes.append(t)
220 spdx_file.fileName = filename
221
222 if archive is not None:
223 with filepath.open("rb") as f:
224 info = archive.gettarinfo(fileobj=f)
225 info.name = filename
226 info.uid = 0
227 info.gid = 0
228 info.uname = "root"
229 info.gname = "root"
230
231 if source_date_epoch is not None and info.mtime > source_date_epoch:
232 info.mtime = source_date_epoch
233
234 archive.addfile(info, f)
235
236 sha1 = bb.utils.sha1_file(filepath)
237 sha1s.append(sha1)
238 spdx_file.checksums.append(oe.spdx.SPDXChecksum(
239 algorithm="SHA1",
240 checksumValue=sha1,
241 ))
242 spdx_file.checksums.append(oe.spdx.SPDXChecksum(
243 algorithm="SHA256",
244 checksumValue=bb.utils.sha256_file(filepath),
245 ))
246
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000247 if "SOURCE" in spdx_file.fileTypes:
248 extracted_lics = extract_licenses(filepath)
249 if extracted_lics:
250 spdx_file.licenseInfoInFiles = extracted_lics
251
Andrew Geissler5199d832021-09-24 16:47:35 -0500252 doc.files.append(spdx_file)
253 doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
254 spdx_pkg.hasFiles.append(spdx_file.SPDXID)
255
256 spdx_files.append(spdx_file)
257
258 file_counter += 1
259
260 sha1s.sort()
261 verifier = hashlib.sha1()
262 for v in sha1s:
263 verifier.update(v.encode("utf-8"))
264 spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest()
265
266 return spdx_files
267
268
269def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources):
270 from pathlib import Path
271 import hashlib
272 import oe.packagedata
273 import oe.spdx
274
275 debug_search_paths = [
276 Path(d.getVar('PKGD')),
277 Path(d.getVar('STAGING_DIR_TARGET')),
278 Path(d.getVar('STAGING_DIR_NATIVE')),
Andrew Geissler595f6302022-01-24 19:11:47 +0000279 Path(d.getVar('STAGING_KERNEL_DIR')),
Andrew Geissler5199d832021-09-24 16:47:35 -0500280 ]
281
282 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
283
284 if pkg_data is None:
285 return
286
287 for file_path, file_data in pkg_data["files_info"].items():
288 if not "debugsrc" in file_data:
289 continue
290
291 for pkg_file in package_files:
292 if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"):
293 break
294 else:
295 bb.fatal("No package file found for %s" % str(file_path))
296 continue
297
298 for debugsrc in file_data["debugsrc"]:
299 ref_id = "NOASSERTION"
300 for search in debug_search_paths:
Andrew Geissler595f6302022-01-24 19:11:47 +0000301 if debugsrc.startswith("/usr/src/kernel"):
302 debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '')
303 else:
304 debugsrc_path = search / debugsrc.lstrip("/")
Andrew Geissler5199d832021-09-24 16:47:35 -0500305 if not debugsrc_path.exists():
306 continue
307
308 file_sha256 = bb.utils.sha256_file(debugsrc_path)
309
310 if file_sha256 in sources:
311 source_file = sources[file_sha256]
312
313 doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace)
314 if doc_ref is None:
315 doc_ref = oe.spdx.SPDXExternalDocumentRef()
316 doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name
317 doc_ref.spdxDocument = source_file.doc.documentNamespace
318 doc_ref.checksum.algorithm = "SHA1"
319 doc_ref.checksum.checksumValue = source_file.doc_sha1
320 package_doc.externalDocumentRefs.append(doc_ref)
321
322 ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID)
323 else:
324 bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256))
325 break
326 else:
327 bb.debug(1, "Debug source %s not found" % debugsrc)
328
329 package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc)
330
331def collect_dep_recipes(d, doc, spdx_recipe):
332 from pathlib import Path
333 import oe.sbom
334 import oe.spdx
335
336 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
337
338 dep_recipes = []
339 taskdepdata = d.getVar("BB_TASKDEPDATA", False)
340 deps = sorted(set(
341 dep[0] for dep in taskdepdata.values() if
342 dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN")
343 ))
344 for dep_pn in deps:
345 dep_recipe_path = deploy_dir_spdx / "recipes" / ("recipe-%s.spdx.json" % dep_pn)
346
347 spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path)
348
349 for pkg in spdx_dep_doc.packages:
350 if pkg.name == dep_pn:
351 spdx_dep_recipe = pkg
352 break
353 else:
354 continue
355
356 dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe))
357
358 dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef()
359 dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name
360 dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace
361 dep_recipe_ref.checksum.algorithm = "SHA1"
362 dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1
363
364 doc.externalDocumentRefs.append(dep_recipe_ref)
365
366 doc.add_relationship(
367 "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID),
368 "BUILD_DEPENDENCY_OF",
369 spdx_recipe
370 )
371
372 return dep_recipes
373
374collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA"
375
376
377def collect_dep_sources(d, dep_recipes):
378 import oe.sbom
379
380 sources = {}
381 for dep in dep_recipes:
Patrick Williams93c203f2021-10-06 16:15:23 -0500382 # Don't collect sources from native recipes as they
383 # match non-native sources also.
384 if recipe_spdx_is_native(d, dep.recipe):
385 continue
Andrew Geissler5199d832021-09-24 16:47:35 -0500386 recipe_files = set(dep.recipe.hasFiles)
387
388 for spdx_file in dep.doc.files:
389 if spdx_file.SPDXID not in recipe_files:
390 continue
391
392 if "SOURCE" in spdx_file.fileTypes:
393 for checksum in spdx_file.checksums:
394 if checksum.algorithm == "SHA256":
395 sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file)
396 break
397
398 return sources
399
400
401python do_create_spdx() {
402 from datetime import datetime, timezone
403 import oe.sbom
404 import oe.spdx
405 import uuid
406 from pathlib import Path
407 from contextlib import contextmanager
408 import oe.cve_check
409
410 @contextmanager
411 def optional_tarfile(name, guard, mode="w"):
412 import tarfile
413 import bb.compress.zstd
414
415 num_threads = int(d.getVar("BB_NUMBER_THREADS"))
416
417 if guard:
418 name.parent.mkdir(parents=True, exist_ok=True)
419 with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f:
420 with tarfile.open(fileobj=f, mode=mode + "|") as tf:
421 yield tf
422 else:
423 yield None
424
425
426 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
427 spdx_workdir = Path(d.getVar("SPDXWORK"))
428 include_packaged = d.getVar("SPDX_INCLUDE_PACKAGED") == "1"
429 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
430 archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1"
431 archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1"
Andrew Geissler5199d832021-09-24 16:47:35 -0500432
433 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
434
435 doc = oe.spdx.SPDXDocument()
436
437 doc.name = "recipe-" + d.getVar("PN")
438 doc.documentNamespace = get_doc_namespace(d, doc)
439 doc.creationInfo.created = creation_time
440 doc.creationInfo.comment = "This document was created by analyzing recipe files during the build."
441 doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
442 doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000443 doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500444 doc.creationInfo.creators.append("Person: N/A ()")
445
446 recipe = oe.spdx.SPDXPackage()
447 recipe.name = d.getVar("PN")
448 recipe.versionInfo = d.getVar("PV")
449 recipe.SPDXID = oe.sbom.get_recipe_spdxid(d)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000450 recipe.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geisslereff27472021-10-29 15:35:00 -0500451 if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d):
452 recipe.annotations.append(create_annotation(d, "isNative"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500453
454 for s in d.getVar('SRC_URI').split():
455 if not s.startswith("file://"):
456 recipe.downloadLocation = s
457 break
458 else:
459 recipe.downloadLocation = "NOASSERTION"
460
461 homepage = d.getVar("HOMEPAGE")
462 if homepage:
463 recipe.homepage = homepage
464
465 license = d.getVar("LICENSE")
466 if license:
467 recipe.licenseDeclared = convert_license_to_spdx(license, doc, d)
468
469 summary = d.getVar("SUMMARY")
470 if summary:
471 recipe.summary = summary
472
473 description = d.getVar("DESCRIPTION")
474 if description:
475 recipe.description = description
476
477 # Some CVEs may be patched during the build process without incrementing the version number,
478 # so querying for CVEs based on the CPE id can lead to false positives. To account for this,
479 # save the CVEs fixed by patches to source information field in the SPDX.
480 patched_cves = oe.cve_check.get_patched_cves(d)
481 patched_cves = list(patched_cves)
482 patched_cves = ' '.join(patched_cves)
483 if patched_cves:
484 recipe.sourceInfo = "CVEs fixed: " + patched_cves
485
486 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
487 if cpe_ids:
488 for cpe_id in cpe_ids:
489 cpe = oe.spdx.SPDXExternalReference()
490 cpe.referenceCategory = "SECURITY"
491 cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type"
492 cpe.referenceLocator = cpe_id
493 recipe.externalRefs.append(cpe)
494
495 doc.packages.append(recipe)
496 doc.add_relationship(doc, "DESCRIBES", recipe)
497
498 if process_sources(d) and include_sources:
499 recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst")
500 with optional_tarfile(recipe_archive, archive_sources) as archive:
501 spdx_get_src(d)
502
503 add_package_files(
504 d,
505 doc,
506 recipe,
507 spdx_workdir,
508 lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter),
509 lambda filepath: ["SOURCE"],
510 ignore_dirs=[".git"],
511 ignore_top_level_dirs=["temp"],
512 archive=archive,
513 )
514
515 if archive is not None:
516 recipe.packageFileName = str(recipe_archive.name)
517
518 dep_recipes = collect_dep_recipes(d, doc, recipe)
519
520 doc_sha1 = oe.sbom.write_doc(d, doc, "recipes")
521 dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe))
522
523 recipe_ref = oe.spdx.SPDXExternalDocumentRef()
524 recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name
525 recipe_ref.spdxDocument = doc.documentNamespace
526 recipe_ref.checksum.algorithm = "SHA1"
527 recipe_ref.checksum.checksumValue = doc_sha1
528
529 sources = collect_dep_sources(d, dep_recipes)
530 found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos}
531
Patrick Williams93c203f2021-10-06 16:15:23 -0500532 if not recipe_spdx_is_native(d, recipe):
Andrew Geissler5199d832021-09-24 16:47:35 -0500533 bb.build.exec_func("read_subpackage_metadata", d)
534
535 pkgdest = Path(d.getVar("PKGDEST"))
536 for package in d.getVar("PACKAGES").split():
537 if not oe.packagedata.packaged(package, d):
538 continue
539
540 package_doc = oe.spdx.SPDXDocument()
541 pkg_name = d.getVar("PKG:%s" % package) or package
542 package_doc.name = pkg_name
543 package_doc.documentNamespace = get_doc_namespace(d, package_doc)
544 package_doc.creationInfo.created = creation_time
545 package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build."
546 package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
547 package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000548 package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500549 package_doc.creationInfo.creators.append("Person: N/A ()")
550 package_doc.externalDocumentRefs.append(recipe_ref)
551
552 package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE")
553
554 spdx_package = oe.spdx.SPDXPackage()
555
556 spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name)
557 spdx_package.name = pkg_name
558 spdx_package.versionInfo = d.getVar("PV")
559 spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000560 spdx_package.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geissler5199d832021-09-24 16:47:35 -0500561
562 package_doc.packages.append(spdx_package)
563
564 package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID))
565 package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package)
566
567 package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst")
568 with optional_tarfile(package_archive, archive_packaged) as archive:
569 package_files = add_package_files(
570 d,
571 package_doc,
572 spdx_package,
573 pkgdest / package,
574 lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter),
575 lambda filepath: ["BINARY"],
576 archive=archive,
577 )
578
579 if archive is not None:
580 spdx_package.packageFileName = str(package_archive.name)
581
582 add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources)
583
584 oe.sbom.write_doc(d, package_doc, "packages")
585}
586# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000587addtask do_create_spdx after do_package do_packagedata do_unpack before do_populate_sdk do_build do_rm_work
Andrew Geissler5199d832021-09-24 16:47:35 -0500588
589SSTATETASKS += "do_create_spdx"
590do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
591do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
592
593python do_create_spdx_setscene () {
594 sstate_setscene(d)
595}
596addtask do_create_spdx_setscene
597
Andrew Geissler9aee5002022-03-30 16:27:02 +0000598do_create_spdx[dirs] = "${SPDXWORK}"
Andrew Geissler5199d832021-09-24 16:47:35 -0500599do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
600do_create_spdx[depends] += "${PATCHDEPENDENCY}"
601do_create_spdx[deptask] = "do_create_spdx"
602
603def collect_package_providers(d):
604 from pathlib import Path
605 import oe.sbom
606 import oe.spdx
607 import json
608
609 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
610
611 providers = {}
612
613 taskdepdata = d.getVar("BB_TASKDEPDATA", False)
614 deps = sorted(set(
615 dep[0] for dep in taskdepdata.values() if dep[0] != d.getVar("PN")
616 ))
617 deps.append(d.getVar("PN"))
618
619 for dep_pn in deps:
620 recipe_data = oe.packagedata.read_pkgdata(dep_pn, d)
621
622 for pkg in recipe_data.get("PACKAGES", "").split():
623
624 pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, d)
625 rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items())
626 rprovides.add(pkg)
627
628 for r in rprovides:
629 providers[r] = pkg
630
631 return providers
632
633collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA"
634
635python do_create_runtime_spdx() {
636 from datetime import datetime, timezone
637 import oe.sbom
638 import oe.spdx
639 import oe.packagedata
640 from pathlib import Path
641
642 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
643 spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
Andrew Geisslereff27472021-10-29 15:35:00 -0500644 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
Andrew Geissler5199d832021-09-24 16:47:35 -0500645
646 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
647
648 providers = collect_package_providers(d)
649
650 if not is_native:
651 bb.build.exec_func("read_subpackage_metadata", d)
652
653 dep_package_cache = {}
654
655 pkgdest = Path(d.getVar("PKGDEST"))
656 for package in d.getVar("PACKAGES").split():
657 localdata = bb.data.createCopy(d)
658 pkg_name = d.getVar("PKG:%s" % package) or package
659 localdata.setVar("PKG", pkg_name)
660 localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package)
661
662 if not oe.packagedata.packaged(package, localdata):
663 continue
664
665 pkg_spdx_path = deploy_dir_spdx / "packages" / (pkg_name + ".spdx.json")
666
667 package_doc, package_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
668
669 for p in package_doc.packages:
670 if p.name == pkg_name:
671 spdx_package = p
672 break
673 else:
674 bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path))
675
676 runtime_doc = oe.spdx.SPDXDocument()
677 runtime_doc.name = "runtime-" + pkg_name
678 runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc)
679 runtime_doc.creationInfo.created = creation_time
680 runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies."
681 runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
682 runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000683 runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500684 runtime_doc.creationInfo.creators.append("Person: N/A ()")
685
686 package_ref = oe.spdx.SPDXExternalDocumentRef()
687 package_ref.externalDocumentId = "DocumentRef-package-" + package
688 package_ref.spdxDocument = package_doc.documentNamespace
689 package_ref.checksum.algorithm = "SHA1"
690 package_ref.checksum.checksumValue = package_doc_sha1
691
692 runtime_doc.externalDocumentRefs.append(package_ref)
693
694 runtime_doc.add_relationship(
695 runtime_doc.SPDXID,
696 "AMENDS",
697 "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID)
698 )
699
700 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
701 seen_deps = set()
702 for dep, _ in deps.items():
703 if dep in seen_deps:
704 continue
705
Andrew Geissler595f6302022-01-24 19:11:47 +0000706 if dep not in providers:
707 continue
708
Andrew Geissler5199d832021-09-24 16:47:35 -0500709 dep = providers[dep]
710
711 if not oe.packagedata.packaged(dep, localdata):
712 continue
713
714 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
715 dep_pkg = dep_pkg_data["PKG"]
716
717 if dep in dep_package_cache:
718 (dep_spdx_package, dep_package_ref) = dep_package_cache[dep]
719 else:
720 dep_path = deploy_dir_spdx / "packages" / ("%s.spdx.json" % dep_pkg)
721
722 spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_path)
723
724 for pkg in spdx_dep_doc.packages:
725 if pkg.name == dep_pkg:
726 dep_spdx_package = pkg
727 break
728 else:
729 bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path))
730
731 dep_package_ref = oe.spdx.SPDXExternalDocumentRef()
732 dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name
733 dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace
734 dep_package_ref.checksum.algorithm = "SHA1"
735 dep_package_ref.checksum.checksumValue = spdx_dep_sha1
736
737 dep_package_cache[dep] = (dep_spdx_package, dep_package_ref)
738
739 runtime_doc.externalDocumentRefs.append(dep_package_ref)
740
741 runtime_doc.add_relationship(
742 "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID),
743 "RUNTIME_DEPENDENCY_OF",
744 "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID)
745 )
746 seen_deps.add(dep)
747
748 oe.sbom.write_doc(d, runtime_doc, "runtime", spdx_deploy)
749}
750
751addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work
752SSTATETASKS += "do_create_runtime_spdx"
753do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}"
754do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
755
756python do_create_runtime_spdx_setscene () {
757 sstate_setscene(d)
758}
759addtask do_create_runtime_spdx_setscene
760
761do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}"
762do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}"
763do_create_runtime_spdx[rdeptask] = "do_create_spdx"
764
765def spdx_get_src(d):
766 """
767 save patched source of the recipe in SPDX_WORKDIR.
768 """
769 import shutil
770 spdx_workdir = d.getVar('SPDXWORK')
771 spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE')
772 pn = d.getVar('PN')
773
774 workdir = d.getVar("WORKDIR")
775
776 try:
777 # The kernel class functions require it to be on work-shared, so we dont change WORKDIR
Andrew Geissler595f6302022-01-24 19:11:47 +0000778 if not is_work_shared_spdx(d):
Andrew Geissler5199d832021-09-24 16:47:35 -0500779 # Change the WORKDIR to make do_unpack do_patch run in another dir.
780 d.setVar('WORKDIR', spdx_workdir)
781 # Restore the original path to recipe's native sysroot (it's relative to WORKDIR).
782 d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
783
784 # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the
785 # possibly requiring of the following tasks (such as some recipes's
786 # do_patch required 'B' existed).
787 bb.utils.mkdirhier(d.getVar('B'))
788
789 bb.build.exec_func('do_unpack', d)
790 # Copy source of kernel to spdx_workdir
Andrew Geissler595f6302022-01-24 19:11:47 +0000791 if is_work_shared_spdx(d):
Andrew Geissler5199d832021-09-24 16:47:35 -0500792 d.setVar('WORKDIR', spdx_workdir)
793 d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
794 src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR')
795 bb.utils.mkdirhier(src_dir)
796 if bb.data.inherits_class('kernel',d):
797 share_src = d.getVar('STAGING_KERNEL_DIR')
798 cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/"
799 cmd_copy_kernel_result = os.popen(cmd_copy_share).read()
800 bb.note("cmd_copy_kernel_result = " + cmd_copy_kernel_result)
801
802 git_path = src_dir + "/.git"
803 if os.path.exists(git_path):
804 shutils.rmtree(git_path)
805
806 # Make sure gcc and kernel sources are patched only once
Andrew Geissler595f6302022-01-24 19:11:47 +0000807 if not (d.getVar('SRC_URI') == "" or is_work_shared_spdx(d)):
Andrew Geissler5199d832021-09-24 16:47:35 -0500808 bb.build.exec_func('do_patch', d)
809
810 # Some userland has no source.
811 if not os.path.exists( spdx_workdir ):
812 bb.utils.mkdirhier(spdx_workdir)
813 finally:
814 d.setVar("WORKDIR", workdir)
815
816do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
817
818ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx ; "
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000819
820do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
821POPULATE_SDK_POST_HOST_COMMAND:append:task-populate-sdk = " sdk_host_combine_spdx; "
822POPULATE_SDK_POST_TARGET_COMMAND:append:task-populate-sdk = " sdk_target_combine_spdx; "
823
Andrew Geissler5199d832021-09-24 16:47:35 -0500824python image_combine_spdx() {
825 import os
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000826 import oe.sbom
827 from pathlib import Path
828 from oe.rootfs import image_list_installed_packages
829
830 image_name = d.getVar("IMAGE_NAME")
831 image_link_name = d.getVar("IMAGE_LINK_NAME")
832 imgdeploydir = Path(d.getVar("IMGDEPLOYDIR"))
833 img_spdxid = oe.sbom.get_image_spdxid(image_name)
834 packages = image_list_installed_packages(d)
835
836 combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages)
837
838 if image_link_name:
839 image_spdx_path = imgdeploydir / (image_name + ".spdx.json")
840 image_spdx_link = imgdeploydir / (image_link_name + ".spdx.json")
841 image_spdx_link.symlink_to(os.path.relpath(image_spdx_path, image_spdx_link.parent))
842
843 def make_image_link(target_path, suffix):
844 if image_link_name:
845 link = imgdeploydir / (image_link_name + suffix)
846 link.symlink_to(os.path.relpath(target_path, link.parent))
847
848 spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst")
849 make_image_link(spdx_tar_path, ".spdx.tar.zst")
850 spdx_index_path = imgdeploydir / (image_name + ".spdx.index.json")
851 make_image_link(spdx_index_path, ".spdx.index.json")
852}
853
854python sdk_host_combine_spdx() {
855 sdk_combine_spdx(d, "host")
856}
857
858python sdk_target_combine_spdx() {
859 sdk_combine_spdx(d, "target")
860}
861
862def sdk_combine_spdx(d, sdk_type):
863 import oe.sbom
864 from pathlib import Path
865 from oe.sdk import sdk_list_installed_packages
866
867 sdk_name = d.getVar("SDK_NAME") + "-" + sdk_type
868 sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR"))
869 sdk_spdxid = oe.sbom.get_sdk_spdxid(sdk_name)
870 sdk_packages = sdk_list_installed_packages(d, sdk_type == "target")
871 combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages)
872
873def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages):
874 import os
Andrew Geissler5199d832021-09-24 16:47:35 -0500875 import oe.spdx
876 import oe.sbom
877 import io
878 import json
Andrew Geissler5199d832021-09-24 16:47:35 -0500879 from datetime import timezone, datetime
880 from pathlib import Path
881 import tarfile
882 import bb.compress.zstd
883
884 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
Andrew Geissler5199d832021-09-24 16:47:35 -0500885 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500886 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
887
888 doc = oe.spdx.SPDXDocument()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000889 doc.name = rootfs_name
Andrew Geissler5199d832021-09-24 16:47:35 -0500890 doc.documentNamespace = get_doc_namespace(d, doc)
891 doc.creationInfo.created = creation_time
892 doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build."
893 doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
894 doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000895 doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500896 doc.creationInfo.creators.append("Person: N/A ()")
897
898 image = oe.spdx.SPDXPackage()
899 image.name = d.getVar("PN")
900 image.versionInfo = d.getVar("PV")
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000901 image.SPDXID = rootfs_spdxid
902 image.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geissler5199d832021-09-24 16:47:35 -0500903
904 doc.packages.append(image)
905
Andrew Geissler5199d832021-09-24 16:47:35 -0500906 for name in sorted(packages.keys()):
907 pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json")
908 pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
909
910 for p in pkg_doc.packages:
911 if p.name == name:
912 pkg_ref = oe.spdx.SPDXExternalDocumentRef()
913 pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name
914 pkg_ref.spdxDocument = pkg_doc.documentNamespace
915 pkg_ref.checksum.algorithm = "SHA1"
916 pkg_ref.checksum.checksumValue = pkg_doc_sha1
917
918 doc.externalDocumentRefs.append(pkg_ref)
919 doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID))
920 break
921 else:
922 bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path))
923
924 runtime_spdx_path = deploy_dir_spdx / "runtime" / ("runtime-" + name + ".spdx.json")
925 runtime_doc, runtime_doc_sha1 = oe.sbom.read_doc(runtime_spdx_path)
926
927 runtime_ref = oe.spdx.SPDXExternalDocumentRef()
928 runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name
929 runtime_ref.spdxDocument = runtime_doc.documentNamespace
930 runtime_ref.checksum.algorithm = "SHA1"
931 runtime_ref.checksum.checksumValue = runtime_doc_sha1
932
933 # "OTHER" isn't ideal here, but I can't find a relationship that makes sense
934 doc.externalDocumentRefs.append(runtime_ref)
935 doc.add_relationship(
936 image,
937 "OTHER",
938 "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID),
939 comment="Runtime dependencies for %s" % name
940 )
941
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000942 image_spdx_path = rootfs_deploydir / (rootfs_name + ".spdx.json")
Andrew Geissler5199d832021-09-24 16:47:35 -0500943
944 with image_spdx_path.open("wb") as f:
945 doc.to_json(f, sort_keys=True)
946
Andrew Geissler5199d832021-09-24 16:47:35 -0500947 num_threads = int(d.getVar("BB_NUMBER_THREADS"))
948
949 visited_docs = set()
950
951 index = {"documents": []}
952
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000953 spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.zst")
Andrew Geissler5199d832021-09-24 16:47:35 -0500954 with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f:
955 with tarfile.open(fileobj=f, mode="w|") as tar:
956 def collect_spdx_document(path):
957 nonlocal tar
958 nonlocal deploy_dir_spdx
959 nonlocal source_date_epoch
960 nonlocal index
961
962 if path in visited_docs:
963 return
964
965 visited_docs.add(path)
966
967 with path.open("rb") as f:
968 doc, sha1 = oe.sbom.read_doc(f)
969 f.seek(0)
970
971 if doc.documentNamespace in visited_docs:
972 return
973
974 bb.note("Adding SPDX document %s" % path)
975 visited_docs.add(doc.documentNamespace)
976 info = tar.gettarinfo(fileobj=f)
977
978 info.name = doc.name + ".spdx.json"
979 info.uid = 0
980 info.gid = 0
981 info.uname = "root"
982 info.gname = "root"
983
984 if source_date_epoch is not None and info.mtime > int(source_date_epoch):
985 info.mtime = int(source_date_epoch)
986
987 tar.addfile(info, f)
988
989 index["documents"].append({
990 "filename": info.name,
991 "documentNamespace": doc.documentNamespace,
992 "sha1": sha1,
993 })
994
995 for ref in doc.externalDocumentRefs:
996 ref_path = deploy_dir_spdx / "by-namespace" / ref.spdxDocument.replace("/", "_")
997 collect_spdx_document(ref_path)
998
999 collect_spdx_document(image_spdx_path)
1000
1001 index["documents"].sort(key=lambda x: x["filename"])
1002
1003 index_str = io.BytesIO(json.dumps(index, sort_keys=True).encode("utf-8"))
1004
1005 info = tarfile.TarInfo()
1006 info.name = "index.json"
1007 info.size = len(index_str.getvalue())
1008 info.uid = 0
1009 info.gid = 0
1010 info.uname = "root"
1011 info.gname = "root"
1012
1013 tar.addfile(info, fileobj=index_str)
1014
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001015 spdx_index_path = rootfs_deploydir / (rootfs_name + ".spdx.index.json")
Andrew Geissler5199d832021-09-24 16:47:35 -05001016 with spdx_index_path.open("w") as f:
1017 json.dump(index, f, sort_keys=True)