blob: 37b6b569a1890f47dd60cc688fb17c102e3eae4e [file] [log] [blame]
Andrew Geissler5199d832021-09-24 16:47:35 -05001#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}"
6
7# The product name that the CVE database uses. Defaults to BPN, but may need to
8# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff).
9CVE_PRODUCT ??= "${BPN}"
10CVE_VERSION ??= "${PV}"
11
12SPDXDIR ??= "${WORKDIR}/spdx"
13SPDXDEPLOY = "${SPDXDIR}/deploy"
14SPDXWORK = "${SPDXDIR}/work"
15
Patrick Williams93c203f2021-10-06 16:15:23 -050016SPDX_TOOL_NAME ??= "oe-spdx-creator"
17SPDX_TOOL_VERSION ??= "1.0"
18
Andrew Geissler5199d832021-09-24 16:47:35 -050019SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
20
21SPDX_INCLUDE_SOURCES ??= "0"
22SPDX_INCLUDE_PACKAGED ??= "0"
23SPDX_ARCHIVE_SOURCES ??= "0"
24SPDX_ARCHIVE_PACKAGED ??= "0"
25
26SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
27SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc"
28
29SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
30
Andrew Geissler595f6302022-01-24 19:11:47 +000031SPDX_ORG ??= "OpenEmbedded ()"
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000032SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}"
33SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \
34 this recipe. For SPDX documents create using this class during the build, this \
35 is the contact information for the person or organization who is doing the \
36 build."
Andrew Geissler595f6302022-01-24 19:11:47 +000037
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000038def extract_licenses(filename):
39 import re
40
Andrew Geissler9aee5002022-03-30 16:27:02 +000041 lic_regex = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +000042
43 try:
44 with open(filename, 'rb') as f:
45 size = min(15000, os.stat(filename).st_size)
46 txt = f.read(size)
47 licenses = re.findall(lic_regex, txt)
48 if licenses:
49 ascii_licenses = [lic.decode('ascii') for lic in licenses]
50 return ascii_licenses
51 except Exception as e:
52 bb.warn(f"Exception reading {filename}: {e}")
53 return None
54
Andrew Geissler5199d832021-09-24 16:47:35 -050055def get_doc_namespace(d, doc):
56 import uuid
57 namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
58 return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name)))
59
Andrew Geisslereff27472021-10-29 15:35:00 -050060def create_annotation(d, comment):
61 from datetime import datetime, timezone
62
63 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
64 annotation = oe.spdx.SPDXAnnotation()
65 annotation.annotationDate = creation_time
66 annotation.annotationType = "OTHER"
67 annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION"))
68 annotation.comment = comment
69 return annotation
70
Patrick Williams93c203f2021-10-06 16:15:23 -050071def recipe_spdx_is_native(d, recipe):
72 return any(a.annotationType == "OTHER" and
73 a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and
74 a.comment == "isNative" for a in recipe.annotations)
Andrew Geissler5199d832021-09-24 16:47:35 -050075
Andrew Geissler595f6302022-01-24 19:11:47 +000076def is_work_shared_spdx(d):
77 return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR'))
Andrew Geissler5199d832021-09-24 16:47:35 -050078
79python() {
80 import json
81 if d.getVar("SPDX_LICENSE_DATA"):
82 return
83
84 with open(d.getVar("SPDX_LICENSES"), "r") as f:
85 data = json.load(f)
86 # Transform the license array to a dictionary
87 data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
88 d.setVar("SPDX_LICENSE_DATA", data)
89}
90
91def convert_license_to_spdx(lic, document, d, existing={}):
92 from pathlib import Path
93 import oe.spdx
94
Andrew Geissler5199d832021-09-24 16:47:35 -050095 license_data = d.getVar("SPDX_LICENSE_DATA")
96 extracted = {}
97
98 def add_extracted_license(ident, name):
99 nonlocal document
100
101 if name in extracted:
102 return
103
104 extracted_info = oe.spdx.SPDXExtractedLicensingInfo()
105 extracted_info.name = name
106 extracted_info.licenseId = ident
107 extracted_info.extractedText = None
108
109 if name == "PD":
110 # Special-case this.
111 extracted_info.extractedText = "Software released to the public domain"
Andrew Geissler9aee5002022-03-30 16:27:02 +0000112 else:
113 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
Andrew Geissler595f6302022-01-24 19:11:47 +0000114 for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split():
Andrew Geissler5199d832021-09-24 16:47:35 -0500115 try:
116 with (Path(directory) / name).open(errors="replace") as f:
117 extracted_info.extractedText = f.read()
118 break
119 except FileNotFoundError:
120 pass
121 if extracted_info.extractedText is None:
Andrew Geissler9aee5002022-03-30 16:27:02 +0000122 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
123 filename = d.getVarFlag('NO_GENERIC_LICENSE', name)
124 if filename:
125 filename = d.expand("${S}/" + filename)
126 with open(filename, errors="replace") as f:
127 extracted_info.extractedText = f.read()
128 else:
129 bb.error("Cannot find any text for license %s" % name)
Andrew Geissler5199d832021-09-24 16:47:35 -0500130
131 extracted[name] = extracted_info
132 document.hasExtractedLicensingInfos.append(extracted_info)
133
134 def convert(l):
135 if l == "(" or l == ")":
136 return l
137
138 if l == "&":
139 return "AND"
140
141 if l == "|":
142 return "OR"
143
144 if l == "CLOSED":
145 return "NONE"
146
147 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
148 if spdx_license in license_data["licenses"]:
149 return spdx_license
150
151 try:
152 spdx_license = existing[l]
153 except KeyError:
154 spdx_license = "LicenseRef-" + l
155 add_extracted_license(spdx_license, l)
156
157 return spdx_license
158
159 lic_split = lic.replace("(", " ( ").replace(")", " ) ").split()
160
161 return ' '.join(convert(l) for l in lic_split)
162
Andrew Geissler5199d832021-09-24 16:47:35 -0500163def process_sources(d):
164 pn = d.getVar('PN')
165 assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split()
166 if pn in assume_provided:
167 for p in d.getVar("PROVIDES").split():
168 if p != pn:
169 pn = p
170 break
171
172 # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted,
173 # so avoid archiving source here.
174 if pn.startswith('glibc-locale'):
175 return False
176 if d.getVar('PN') == "libtool-cross":
177 return False
178 if d.getVar('PN') == "libgcc-initial":
179 return False
180 if d.getVar('PN') == "shadow-sysroot":
181 return False
182
183 # We just archive gcc-source for all the gcc related recipes
184 if d.getVar('BPN') in ['gcc', 'libgcc']:
185 bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn)
186 return False
187
188 return True
189
190
191def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]):
192 from pathlib import Path
193 import oe.spdx
194 import hashlib
195
196 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
197 if source_date_epoch:
198 source_date_epoch = int(source_date_epoch)
199
200 sha1s = []
201 spdx_files = []
202
203 file_counter = 1
204 for subdir, dirs, files in os.walk(topdir):
205 dirs[:] = [d for d in dirs if d not in ignore_dirs]
206 if subdir == str(topdir):
207 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
208
209 for file in files:
210 filepath = Path(subdir) / file
211 filename = str(filepath.relative_to(topdir))
212
213 if filepath.is_file() and not filepath.is_symlink():
214 spdx_file = oe.spdx.SPDXFile()
215 spdx_file.SPDXID = get_spdxid(file_counter)
216 for t in get_types(filepath):
217 spdx_file.fileTypes.append(t)
218 spdx_file.fileName = filename
219
220 if archive is not None:
221 with filepath.open("rb") as f:
222 info = archive.gettarinfo(fileobj=f)
223 info.name = filename
224 info.uid = 0
225 info.gid = 0
226 info.uname = "root"
227 info.gname = "root"
228
229 if source_date_epoch is not None and info.mtime > source_date_epoch:
230 info.mtime = source_date_epoch
231
232 archive.addfile(info, f)
233
234 sha1 = bb.utils.sha1_file(filepath)
235 sha1s.append(sha1)
236 spdx_file.checksums.append(oe.spdx.SPDXChecksum(
237 algorithm="SHA1",
238 checksumValue=sha1,
239 ))
240 spdx_file.checksums.append(oe.spdx.SPDXChecksum(
241 algorithm="SHA256",
242 checksumValue=bb.utils.sha256_file(filepath),
243 ))
244
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000245 if "SOURCE" in spdx_file.fileTypes:
246 extracted_lics = extract_licenses(filepath)
247 if extracted_lics:
248 spdx_file.licenseInfoInFiles = extracted_lics
249
Andrew Geissler5199d832021-09-24 16:47:35 -0500250 doc.files.append(spdx_file)
251 doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
252 spdx_pkg.hasFiles.append(spdx_file.SPDXID)
253
254 spdx_files.append(spdx_file)
255
256 file_counter += 1
257
258 sha1s.sort()
259 verifier = hashlib.sha1()
260 for v in sha1s:
261 verifier.update(v.encode("utf-8"))
262 spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest()
263
264 return spdx_files
265
266
267def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources):
268 from pathlib import Path
269 import hashlib
270 import oe.packagedata
271 import oe.spdx
272
273 debug_search_paths = [
274 Path(d.getVar('PKGD')),
275 Path(d.getVar('STAGING_DIR_TARGET')),
276 Path(d.getVar('STAGING_DIR_NATIVE')),
Andrew Geissler595f6302022-01-24 19:11:47 +0000277 Path(d.getVar('STAGING_KERNEL_DIR')),
Andrew Geissler5199d832021-09-24 16:47:35 -0500278 ]
279
280 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
281
282 if pkg_data is None:
283 return
284
285 for file_path, file_data in pkg_data["files_info"].items():
286 if not "debugsrc" in file_data:
287 continue
288
289 for pkg_file in package_files:
290 if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"):
291 break
292 else:
293 bb.fatal("No package file found for %s" % str(file_path))
294 continue
295
296 for debugsrc in file_data["debugsrc"]:
297 ref_id = "NOASSERTION"
298 for search in debug_search_paths:
Andrew Geissler595f6302022-01-24 19:11:47 +0000299 if debugsrc.startswith("/usr/src/kernel"):
300 debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '')
301 else:
302 debugsrc_path = search / debugsrc.lstrip("/")
Andrew Geissler5199d832021-09-24 16:47:35 -0500303 if not debugsrc_path.exists():
304 continue
305
306 file_sha256 = bb.utils.sha256_file(debugsrc_path)
307
308 if file_sha256 in sources:
309 source_file = sources[file_sha256]
310
311 doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace)
312 if doc_ref is None:
313 doc_ref = oe.spdx.SPDXExternalDocumentRef()
314 doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name
315 doc_ref.spdxDocument = source_file.doc.documentNamespace
316 doc_ref.checksum.algorithm = "SHA1"
317 doc_ref.checksum.checksumValue = source_file.doc_sha1
318 package_doc.externalDocumentRefs.append(doc_ref)
319
320 ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID)
321 else:
322 bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256))
323 break
324 else:
325 bb.debug(1, "Debug source %s not found" % debugsrc)
326
327 package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc)
328
329def collect_dep_recipes(d, doc, spdx_recipe):
330 from pathlib import Path
331 import oe.sbom
332 import oe.spdx
333
334 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
335
336 dep_recipes = []
337 taskdepdata = d.getVar("BB_TASKDEPDATA", False)
338 deps = sorted(set(
339 dep[0] for dep in taskdepdata.values() if
340 dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN")
341 ))
342 for dep_pn in deps:
343 dep_recipe_path = deploy_dir_spdx / "recipes" / ("recipe-%s.spdx.json" % dep_pn)
344
345 spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path)
346
347 for pkg in spdx_dep_doc.packages:
348 if pkg.name == dep_pn:
349 spdx_dep_recipe = pkg
350 break
351 else:
352 continue
353
354 dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe))
355
356 dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef()
357 dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name
358 dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace
359 dep_recipe_ref.checksum.algorithm = "SHA1"
360 dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1
361
362 doc.externalDocumentRefs.append(dep_recipe_ref)
363
364 doc.add_relationship(
365 "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID),
366 "BUILD_DEPENDENCY_OF",
367 spdx_recipe
368 )
369
370 return dep_recipes
371
372collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA"
373
374
375def collect_dep_sources(d, dep_recipes):
376 import oe.sbom
377
378 sources = {}
379 for dep in dep_recipes:
Patrick Williams93c203f2021-10-06 16:15:23 -0500380 # Don't collect sources from native recipes as they
381 # match non-native sources also.
382 if recipe_spdx_is_native(d, dep.recipe):
383 continue
Andrew Geissler5199d832021-09-24 16:47:35 -0500384 recipe_files = set(dep.recipe.hasFiles)
385
386 for spdx_file in dep.doc.files:
387 if spdx_file.SPDXID not in recipe_files:
388 continue
389
390 if "SOURCE" in spdx_file.fileTypes:
391 for checksum in spdx_file.checksums:
392 if checksum.algorithm == "SHA256":
393 sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file)
394 break
395
396 return sources
397
398
399python do_create_spdx() {
400 from datetime import datetime, timezone
401 import oe.sbom
402 import oe.spdx
403 import uuid
404 from pathlib import Path
405 from contextlib import contextmanager
406 import oe.cve_check
407
408 @contextmanager
409 def optional_tarfile(name, guard, mode="w"):
410 import tarfile
411 import bb.compress.zstd
412
413 num_threads = int(d.getVar("BB_NUMBER_THREADS"))
414
415 if guard:
416 name.parent.mkdir(parents=True, exist_ok=True)
417 with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f:
418 with tarfile.open(fileobj=f, mode=mode + "|") as tf:
419 yield tf
420 else:
421 yield None
422
423
424 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
425 spdx_workdir = Path(d.getVar("SPDXWORK"))
426 include_packaged = d.getVar("SPDX_INCLUDE_PACKAGED") == "1"
427 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
428 archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1"
429 archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1"
Andrew Geissler5199d832021-09-24 16:47:35 -0500430
431 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
432
433 doc = oe.spdx.SPDXDocument()
434
435 doc.name = "recipe-" + d.getVar("PN")
436 doc.documentNamespace = get_doc_namespace(d, doc)
437 doc.creationInfo.created = creation_time
438 doc.creationInfo.comment = "This document was created by analyzing recipe files during the build."
439 doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
440 doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000441 doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500442 doc.creationInfo.creators.append("Person: N/A ()")
443
444 recipe = oe.spdx.SPDXPackage()
445 recipe.name = d.getVar("PN")
446 recipe.versionInfo = d.getVar("PV")
447 recipe.SPDXID = oe.sbom.get_recipe_spdxid(d)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000448 recipe.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geisslereff27472021-10-29 15:35:00 -0500449 if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d):
450 recipe.annotations.append(create_annotation(d, "isNative"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500451
452 for s in d.getVar('SRC_URI').split():
453 if not s.startswith("file://"):
454 recipe.downloadLocation = s
455 break
456 else:
457 recipe.downloadLocation = "NOASSERTION"
458
459 homepage = d.getVar("HOMEPAGE")
460 if homepage:
461 recipe.homepage = homepage
462
463 license = d.getVar("LICENSE")
464 if license:
465 recipe.licenseDeclared = convert_license_to_spdx(license, doc, d)
466
467 summary = d.getVar("SUMMARY")
468 if summary:
469 recipe.summary = summary
470
471 description = d.getVar("DESCRIPTION")
472 if description:
473 recipe.description = description
474
475 # Some CVEs may be patched during the build process without incrementing the version number,
476 # so querying for CVEs based on the CPE id can lead to false positives. To account for this,
477 # save the CVEs fixed by patches to source information field in the SPDX.
478 patched_cves = oe.cve_check.get_patched_cves(d)
479 patched_cves = list(patched_cves)
480 patched_cves = ' '.join(patched_cves)
481 if patched_cves:
482 recipe.sourceInfo = "CVEs fixed: " + patched_cves
483
484 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
485 if cpe_ids:
486 for cpe_id in cpe_ids:
487 cpe = oe.spdx.SPDXExternalReference()
488 cpe.referenceCategory = "SECURITY"
489 cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type"
490 cpe.referenceLocator = cpe_id
491 recipe.externalRefs.append(cpe)
492
493 doc.packages.append(recipe)
494 doc.add_relationship(doc, "DESCRIBES", recipe)
495
496 if process_sources(d) and include_sources:
497 recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst")
498 with optional_tarfile(recipe_archive, archive_sources) as archive:
499 spdx_get_src(d)
500
501 add_package_files(
502 d,
503 doc,
504 recipe,
505 spdx_workdir,
506 lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter),
507 lambda filepath: ["SOURCE"],
508 ignore_dirs=[".git"],
509 ignore_top_level_dirs=["temp"],
510 archive=archive,
511 )
512
513 if archive is not None:
514 recipe.packageFileName = str(recipe_archive.name)
515
516 dep_recipes = collect_dep_recipes(d, doc, recipe)
517
518 doc_sha1 = oe.sbom.write_doc(d, doc, "recipes")
519 dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe))
520
521 recipe_ref = oe.spdx.SPDXExternalDocumentRef()
522 recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name
523 recipe_ref.spdxDocument = doc.documentNamespace
524 recipe_ref.checksum.algorithm = "SHA1"
525 recipe_ref.checksum.checksumValue = doc_sha1
526
527 sources = collect_dep_sources(d, dep_recipes)
528 found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos}
529
Patrick Williams93c203f2021-10-06 16:15:23 -0500530 if not recipe_spdx_is_native(d, recipe):
Andrew Geissler5199d832021-09-24 16:47:35 -0500531 bb.build.exec_func("read_subpackage_metadata", d)
532
533 pkgdest = Path(d.getVar("PKGDEST"))
534 for package in d.getVar("PACKAGES").split():
535 if not oe.packagedata.packaged(package, d):
536 continue
537
538 package_doc = oe.spdx.SPDXDocument()
539 pkg_name = d.getVar("PKG:%s" % package) or package
540 package_doc.name = pkg_name
541 package_doc.documentNamespace = get_doc_namespace(d, package_doc)
542 package_doc.creationInfo.created = creation_time
543 package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build."
544 package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
545 package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000546 package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500547 package_doc.creationInfo.creators.append("Person: N/A ()")
548 package_doc.externalDocumentRefs.append(recipe_ref)
549
550 package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE")
551
552 spdx_package = oe.spdx.SPDXPackage()
553
554 spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name)
555 spdx_package.name = pkg_name
556 spdx_package.versionInfo = d.getVar("PV")
557 spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses)
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000558 spdx_package.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geissler5199d832021-09-24 16:47:35 -0500559
560 package_doc.packages.append(spdx_package)
561
562 package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID))
563 package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package)
564
565 package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst")
566 with optional_tarfile(package_archive, archive_packaged) as archive:
567 package_files = add_package_files(
568 d,
569 package_doc,
570 spdx_package,
571 pkgdest / package,
572 lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter),
573 lambda filepath: ["BINARY"],
574 archive=archive,
575 )
576
577 if archive is not None:
578 spdx_package.packageFileName = str(package_archive.name)
579
580 add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources)
581
582 oe.sbom.write_doc(d, package_doc, "packages")
583}
584# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000585addtask do_create_spdx after do_package do_packagedata do_unpack before do_populate_sdk do_build do_rm_work
Andrew Geissler5199d832021-09-24 16:47:35 -0500586
587SSTATETASKS += "do_create_spdx"
588do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
589do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
590
591python do_create_spdx_setscene () {
592 sstate_setscene(d)
593}
594addtask do_create_spdx_setscene
595
Andrew Geissler9aee5002022-03-30 16:27:02 +0000596do_create_spdx[dirs] = "${SPDXWORK}"
Andrew Geissler5199d832021-09-24 16:47:35 -0500597do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
598do_create_spdx[depends] += "${PATCHDEPENDENCY}"
599do_create_spdx[deptask] = "do_create_spdx"
600
601def collect_package_providers(d):
602 from pathlib import Path
603 import oe.sbom
604 import oe.spdx
605 import json
606
607 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
608
609 providers = {}
610
611 taskdepdata = d.getVar("BB_TASKDEPDATA", False)
612 deps = sorted(set(
613 dep[0] for dep in taskdepdata.values() if dep[0] != d.getVar("PN")
614 ))
615 deps.append(d.getVar("PN"))
616
617 for dep_pn in deps:
618 recipe_data = oe.packagedata.read_pkgdata(dep_pn, d)
619
620 for pkg in recipe_data.get("PACKAGES", "").split():
621
622 pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, d)
623 rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items())
624 rprovides.add(pkg)
625
626 for r in rprovides:
627 providers[r] = pkg
628
629 return providers
630
631collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA"
632
633python do_create_runtime_spdx() {
634 from datetime import datetime, timezone
635 import oe.sbom
636 import oe.spdx
637 import oe.packagedata
638 from pathlib import Path
639
640 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
641 spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
Andrew Geisslereff27472021-10-29 15:35:00 -0500642 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
Andrew Geissler5199d832021-09-24 16:47:35 -0500643
644 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
645
646 providers = collect_package_providers(d)
647
648 if not is_native:
649 bb.build.exec_func("read_subpackage_metadata", d)
650
651 dep_package_cache = {}
652
653 pkgdest = Path(d.getVar("PKGDEST"))
654 for package in d.getVar("PACKAGES").split():
655 localdata = bb.data.createCopy(d)
656 pkg_name = d.getVar("PKG:%s" % package) or package
657 localdata.setVar("PKG", pkg_name)
658 localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package)
659
660 if not oe.packagedata.packaged(package, localdata):
661 continue
662
663 pkg_spdx_path = deploy_dir_spdx / "packages" / (pkg_name + ".spdx.json")
664
665 package_doc, package_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
666
667 for p in package_doc.packages:
668 if p.name == pkg_name:
669 spdx_package = p
670 break
671 else:
672 bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path))
673
674 runtime_doc = oe.spdx.SPDXDocument()
675 runtime_doc.name = "runtime-" + pkg_name
676 runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc)
677 runtime_doc.creationInfo.created = creation_time
678 runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies."
679 runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
680 runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000681 runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500682 runtime_doc.creationInfo.creators.append("Person: N/A ()")
683
684 package_ref = oe.spdx.SPDXExternalDocumentRef()
685 package_ref.externalDocumentId = "DocumentRef-package-" + package
686 package_ref.spdxDocument = package_doc.documentNamespace
687 package_ref.checksum.algorithm = "SHA1"
688 package_ref.checksum.checksumValue = package_doc_sha1
689
690 runtime_doc.externalDocumentRefs.append(package_ref)
691
692 runtime_doc.add_relationship(
693 runtime_doc.SPDXID,
694 "AMENDS",
695 "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID)
696 )
697
698 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
699 seen_deps = set()
700 for dep, _ in deps.items():
701 if dep in seen_deps:
702 continue
703
Andrew Geissler595f6302022-01-24 19:11:47 +0000704 if dep not in providers:
705 continue
706
Andrew Geissler5199d832021-09-24 16:47:35 -0500707 dep = providers[dep]
708
709 if not oe.packagedata.packaged(dep, localdata):
710 continue
711
712 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
713 dep_pkg = dep_pkg_data["PKG"]
714
715 if dep in dep_package_cache:
716 (dep_spdx_package, dep_package_ref) = dep_package_cache[dep]
717 else:
718 dep_path = deploy_dir_spdx / "packages" / ("%s.spdx.json" % dep_pkg)
719
720 spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_path)
721
722 for pkg in spdx_dep_doc.packages:
723 if pkg.name == dep_pkg:
724 dep_spdx_package = pkg
725 break
726 else:
727 bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path))
728
729 dep_package_ref = oe.spdx.SPDXExternalDocumentRef()
730 dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name
731 dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace
732 dep_package_ref.checksum.algorithm = "SHA1"
733 dep_package_ref.checksum.checksumValue = spdx_dep_sha1
734
735 dep_package_cache[dep] = (dep_spdx_package, dep_package_ref)
736
737 runtime_doc.externalDocumentRefs.append(dep_package_ref)
738
739 runtime_doc.add_relationship(
740 "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID),
741 "RUNTIME_DEPENDENCY_OF",
742 "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID)
743 )
744 seen_deps.add(dep)
745
746 oe.sbom.write_doc(d, runtime_doc, "runtime", spdx_deploy)
747}
748
749addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work
750SSTATETASKS += "do_create_runtime_spdx"
751do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}"
752do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
753
754python do_create_runtime_spdx_setscene () {
755 sstate_setscene(d)
756}
757addtask do_create_runtime_spdx_setscene
758
759do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}"
760do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}"
761do_create_runtime_spdx[rdeptask] = "do_create_spdx"
762
763def spdx_get_src(d):
764 """
765 save patched source of the recipe in SPDX_WORKDIR.
766 """
767 import shutil
768 spdx_workdir = d.getVar('SPDXWORK')
769 spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE')
770 pn = d.getVar('PN')
771
772 workdir = d.getVar("WORKDIR")
773
774 try:
775 # The kernel class functions require it to be on work-shared, so we dont change WORKDIR
Andrew Geissler595f6302022-01-24 19:11:47 +0000776 if not is_work_shared_spdx(d):
Andrew Geissler5199d832021-09-24 16:47:35 -0500777 # Change the WORKDIR to make do_unpack do_patch run in another dir.
778 d.setVar('WORKDIR', spdx_workdir)
779 # Restore the original path to recipe's native sysroot (it's relative to WORKDIR).
780 d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
781
782 # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the
783 # possibly requiring of the following tasks (such as some recipes's
784 # do_patch required 'B' existed).
785 bb.utils.mkdirhier(d.getVar('B'))
786
787 bb.build.exec_func('do_unpack', d)
788 # Copy source of kernel to spdx_workdir
Andrew Geissler595f6302022-01-24 19:11:47 +0000789 if is_work_shared_spdx(d):
Andrew Geissler5199d832021-09-24 16:47:35 -0500790 d.setVar('WORKDIR', spdx_workdir)
791 d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
792 src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR')
793 bb.utils.mkdirhier(src_dir)
794 if bb.data.inherits_class('kernel',d):
795 share_src = d.getVar('STAGING_KERNEL_DIR')
796 cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/"
797 cmd_copy_kernel_result = os.popen(cmd_copy_share).read()
798 bb.note("cmd_copy_kernel_result = " + cmd_copy_kernel_result)
799
800 git_path = src_dir + "/.git"
801 if os.path.exists(git_path):
802 shutils.rmtree(git_path)
803
804 # Make sure gcc and kernel sources are patched only once
Andrew Geissler595f6302022-01-24 19:11:47 +0000805 if not (d.getVar('SRC_URI') == "" or is_work_shared_spdx(d)):
Andrew Geissler5199d832021-09-24 16:47:35 -0500806 bb.build.exec_func('do_patch', d)
807
808 # Some userland has no source.
809 if not os.path.exists( spdx_workdir ):
810 bb.utils.mkdirhier(spdx_workdir)
811 finally:
812 d.setVar("WORKDIR", workdir)
813
814do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
815
816ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx ; "
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000817
818do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
819POPULATE_SDK_POST_HOST_COMMAND:append:task-populate-sdk = " sdk_host_combine_spdx; "
820POPULATE_SDK_POST_TARGET_COMMAND:append:task-populate-sdk = " sdk_target_combine_spdx; "
821
Andrew Geissler5199d832021-09-24 16:47:35 -0500822python image_combine_spdx() {
823 import os
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000824 import oe.sbom
825 from pathlib import Path
826 from oe.rootfs import image_list_installed_packages
827
828 image_name = d.getVar("IMAGE_NAME")
829 image_link_name = d.getVar("IMAGE_LINK_NAME")
830 imgdeploydir = Path(d.getVar("IMGDEPLOYDIR"))
831 img_spdxid = oe.sbom.get_image_spdxid(image_name)
832 packages = image_list_installed_packages(d)
833
834 combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages)
835
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000836 def make_image_link(target_path, suffix):
837 if image_link_name:
838 link = imgdeploydir / (image_link_name + suffix)
Patrick Williams03907ee2022-05-01 06:28:52 -0500839 if link != target_path:
840 link.symlink_to(os.path.relpath(target_path, link.parent))
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000841
Patrick Williams03907ee2022-05-01 06:28:52 -0500842 image_spdx_path = imgdeploydir / (image_name + ".spdx.json")
843 make_image_link(image_spdx_path, ".spdx.json")
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000844 spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst")
845 make_image_link(spdx_tar_path, ".spdx.tar.zst")
846 spdx_index_path = imgdeploydir / (image_name + ".spdx.index.json")
847 make_image_link(spdx_index_path, ".spdx.index.json")
848}
849
850python sdk_host_combine_spdx() {
851 sdk_combine_spdx(d, "host")
852}
853
854python sdk_target_combine_spdx() {
855 sdk_combine_spdx(d, "target")
856}
857
858def sdk_combine_spdx(d, sdk_type):
859 import oe.sbom
860 from pathlib import Path
861 from oe.sdk import sdk_list_installed_packages
862
863 sdk_name = d.getVar("SDK_NAME") + "-" + sdk_type
864 sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR"))
865 sdk_spdxid = oe.sbom.get_sdk_spdxid(sdk_name)
866 sdk_packages = sdk_list_installed_packages(d, sdk_type == "target")
867 combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages)
868
869def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages):
870 import os
Andrew Geissler5199d832021-09-24 16:47:35 -0500871 import oe.spdx
872 import oe.sbom
873 import io
874 import json
Andrew Geissler5199d832021-09-24 16:47:35 -0500875 from datetime import timezone, datetime
876 from pathlib import Path
877 import tarfile
878 import bb.compress.zstd
879
880 creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
Andrew Geissler5199d832021-09-24 16:47:35 -0500881 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500882 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
883
884 doc = oe.spdx.SPDXDocument()
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000885 doc.name = rootfs_name
Andrew Geissler5199d832021-09-24 16:47:35 -0500886 doc.documentNamespace = get_doc_namespace(d, doc)
887 doc.creationInfo.created = creation_time
888 doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build."
889 doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
890 doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
Andrew Geissler595f6302022-01-24 19:11:47 +0000891 doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
Andrew Geissler5199d832021-09-24 16:47:35 -0500892 doc.creationInfo.creators.append("Person: N/A ()")
893
894 image = oe.spdx.SPDXPackage()
895 image.name = d.getVar("PN")
896 image.versionInfo = d.getVar("PV")
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000897 image.SPDXID = rootfs_spdxid
898 image.packageSupplier = d.getVar("SPDX_SUPPLIER")
Andrew Geissler5199d832021-09-24 16:47:35 -0500899
900 doc.packages.append(image)
901
Andrew Geissler5199d832021-09-24 16:47:35 -0500902 for name in sorted(packages.keys()):
903 pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json")
904 pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
905
906 for p in pkg_doc.packages:
907 if p.name == name:
908 pkg_ref = oe.spdx.SPDXExternalDocumentRef()
909 pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name
910 pkg_ref.spdxDocument = pkg_doc.documentNamespace
911 pkg_ref.checksum.algorithm = "SHA1"
912 pkg_ref.checksum.checksumValue = pkg_doc_sha1
913
914 doc.externalDocumentRefs.append(pkg_ref)
915 doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID))
916 break
917 else:
918 bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path))
919
920 runtime_spdx_path = deploy_dir_spdx / "runtime" / ("runtime-" + name + ".spdx.json")
921 runtime_doc, runtime_doc_sha1 = oe.sbom.read_doc(runtime_spdx_path)
922
923 runtime_ref = oe.spdx.SPDXExternalDocumentRef()
924 runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name
925 runtime_ref.spdxDocument = runtime_doc.documentNamespace
926 runtime_ref.checksum.algorithm = "SHA1"
927 runtime_ref.checksum.checksumValue = runtime_doc_sha1
928
929 # "OTHER" isn't ideal here, but I can't find a relationship that makes sense
930 doc.externalDocumentRefs.append(runtime_ref)
931 doc.add_relationship(
932 image,
933 "OTHER",
934 "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID),
935 comment="Runtime dependencies for %s" % name
936 )
937
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000938 image_spdx_path = rootfs_deploydir / (rootfs_name + ".spdx.json")
Andrew Geissler5199d832021-09-24 16:47:35 -0500939
940 with image_spdx_path.open("wb") as f:
941 doc.to_json(f, sort_keys=True)
942
Andrew Geissler5199d832021-09-24 16:47:35 -0500943 num_threads = int(d.getVar("BB_NUMBER_THREADS"))
944
945 visited_docs = set()
946
947 index = {"documents": []}
948
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000949 spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.zst")
Andrew Geissler5199d832021-09-24 16:47:35 -0500950 with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f:
951 with tarfile.open(fileobj=f, mode="w|") as tar:
952 def collect_spdx_document(path):
953 nonlocal tar
954 nonlocal deploy_dir_spdx
955 nonlocal source_date_epoch
956 nonlocal index
957
958 if path in visited_docs:
959 return
960
961 visited_docs.add(path)
962
963 with path.open("rb") as f:
964 doc, sha1 = oe.sbom.read_doc(f)
965 f.seek(0)
966
967 if doc.documentNamespace in visited_docs:
968 return
969
970 bb.note("Adding SPDX document %s" % path)
971 visited_docs.add(doc.documentNamespace)
972 info = tar.gettarinfo(fileobj=f)
973
974 info.name = doc.name + ".spdx.json"
975 info.uid = 0
976 info.gid = 0
977 info.uname = "root"
978 info.gname = "root"
979
980 if source_date_epoch is not None and info.mtime > int(source_date_epoch):
981 info.mtime = int(source_date_epoch)
982
983 tar.addfile(info, f)
984
985 index["documents"].append({
986 "filename": info.name,
987 "documentNamespace": doc.documentNamespace,
988 "sha1": sha1,
989 })
990
991 for ref in doc.externalDocumentRefs:
992 ref_path = deploy_dir_spdx / "by-namespace" / ref.spdxDocument.replace("/", "_")
993 collect_spdx_document(ref_path)
994
995 collect_spdx_document(image_spdx_path)
996
997 index["documents"].sort(key=lambda x: x["filename"])
998
999 index_str = io.BytesIO(json.dumps(index, sort_keys=True).encode("utf-8"))
1000
1001 info = tarfile.TarInfo()
1002 info.name = "index.json"
1003 info.size = len(index_str.getvalue())
1004 info.uid = 0
1005 info.gid = 0
1006 info.uname = "root"
1007 info.gname = "root"
1008
1009 tar.addfile(info, fileobj=index_str)
1010
Andrew Geissler7e0e3c02022-02-25 20:34:39 +00001011 spdx_index_path = rootfs_deploydir / (rootfs_name + ".spdx.index.json")
Andrew Geissler5199d832021-09-24 16:47:35 -05001012 with spdx_index_path.open("w") as f:
1013 json.dump(index, f, sort_keys=True)