blob: fb78e274a8edf308d0f16eeb6d1c3ee220055774 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# This class integrates real-time license scanning, generation of SPDX standard
2# output and verifiying license info during the building process.
3# It is a combination of efforts from the OE-Core, SPDX and Fossology projects.
4#
5# For more information on FOSSology:
6# http://www.fossology.org
7#
8# For more information on FOSSologySPDX commandline:
9# https://github.com/spdx-tools/fossology-spdx/wiki/Fossology-SPDX-Web-API
10#
11# For more information on SPDX:
12# http://www.spdx.org
13#
14
15# SPDX file will be output to the path which is defined as[SPDX_MANIFEST_DIR]
16# in ./meta/conf/licenses.conf.
17
18SPDXSSTATEDIR = "${WORKDIR}/spdx_sstate_dir"
19
20# If ${S} isn't actually the top-level source directory, set SPDX_S to point at
21# the real top-level directory.
22SPDX_S ?= "${S}"
23
24python do_spdx () {
25 import os, sys
26 import json, shutil
27
28 info = {}
Brad Bishop6e60e8b2018-02-01 10:27:11 -050029 info['workdir'] = d.getVar('WORKDIR')
30 info['sourcedir'] = d.getVar('SPDX_S')
31 info['pn'] = d.getVar('PN')
32 info['pv'] = d.getVar('PV')
33 info['spdx_version'] = d.getVar('SPDX_VERSION')
34 info['data_license'] = d.getVar('DATA_LICENSE')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050035
Brad Bishop6e60e8b2018-02-01 10:27:11 -050036 sstatedir = d.getVar('SPDXSSTATEDIR')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037 sstatefile = os.path.join(sstatedir, info['pn'] + info['pv'] + ".spdx")
38
Brad Bishop6e60e8b2018-02-01 10:27:11 -050039 manifest_dir = d.getVar('SPDX_MANIFEST_DIR')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050040 info['outfile'] = os.path.join(manifest_dir, info['pn'] + ".spdx" )
41
Brad Bishop6e60e8b2018-02-01 10:27:11 -050042 info['spdx_temp_dir'] = d.getVar('SPDX_TEMP_DIR')
Patrick Williamsc124f4f2015-09-15 14:41:29 -050043 info['tar_file'] = os.path.join(info['workdir'], info['pn'] + ".tar.gz" )
44
45 # Make sure important dirs exist
46 try:
47 bb.utils.mkdirhier(manifest_dir)
48 bb.utils.mkdirhier(sstatedir)
49 bb.utils.mkdirhier(info['spdx_temp_dir'])
50 except OSError as e:
51 bb.error("SPDX: Could not set up required directories: " + str(e))
52 return
53
54 ## get everything from cache. use it to decide if
55 ## something needs to be rerun
56 cur_ver_code = get_ver_code(info['sourcedir'])
57 cache_cur = False
58 if os.path.exists(sstatefile):
59 ## cache for this package exists. read it in
60 cached_spdx = get_cached_spdx(sstatefile)
61
62 if cached_spdx['PackageVerificationCode'] == cur_ver_code:
63 bb.warn("SPDX: Verification code for " + info['pn']
64 + "is same as cache's. do nothing")
65 cache_cur = True
66 else:
67 local_file_info = setup_foss_scan(info, True, cached_spdx['Files'])
68 else:
69 local_file_info = setup_foss_scan(info, False, None)
70
71 if cache_cur:
72 spdx_file_info = cached_spdx['Files']
73 foss_package_info = cached_spdx['Package']
74 foss_license_info = cached_spdx['Licenses']
75 else:
76 ## setup fossology command
Brad Bishop6e60e8b2018-02-01 10:27:11 -050077 foss_server = d.getVar('FOSS_SERVER')
78 foss_flags = d.getVar('FOSS_WGET_FLAGS')
79 foss_full_spdx = d.getVar('FOSS_FULL_SPDX') == "true" or False
Patrick Williamsc124f4f2015-09-15 14:41:29 -050080 foss_command = "wget %s --post-file=%s %s"\
81 % (foss_flags, info['tar_file'], foss_server)
82
83 foss_result = run_fossology(foss_command, foss_full_spdx)
84 if foss_result is not None:
85 (foss_package_info, foss_file_info, foss_license_info) = foss_result
86 spdx_file_info = create_spdx_doc(local_file_info, foss_file_info)
87 ## write to cache
88 write_cached_spdx(sstatefile, cur_ver_code, foss_package_info,
89 spdx_file_info, foss_license_info)
90 else:
91 bb.error("SPDX: Could not communicate with FOSSology server. Command was: " + foss_command)
92 return
93
94 ## Get document and package level information
95 spdx_header_info = get_header_info(info, cur_ver_code, foss_package_info)
96
97 ## CREATE MANIFEST
98 create_manifest(info, spdx_header_info, spdx_file_info, foss_license_info)
99
100 ## clean up the temp stuff
101 shutil.rmtree(info['spdx_temp_dir'], ignore_errors=True)
102 if os.path.exists(info['tar_file']):
103 remove_file(info['tar_file'])
104}
105addtask spdx after do_patch before do_configure
106
107def create_manifest(info, header, files, licenses):
108 import codecs
109 with codecs.open(info['outfile'], mode='w', encoding='utf-8') as f:
110 # Write header
111 f.write(header + '\n')
112
113 # Write file data
114 for chksum, block in files.iteritems():
115 f.write("FileName: " + block['FileName'] + '\n')
116 for key, value in block.iteritems():
117 if not key == 'FileName':
118 f.write(key + ": " + value + '\n')
119 f.write('\n')
120
121 # Write license data
122 for id, block in licenses.iteritems():
123 f.write("LicenseID: " + id + '\n')
124 for key, value in block.iteritems():
125 f.write(key + ": " + value + '\n')
126 f.write('\n')
127
128def get_cached_spdx(sstatefile):
129 import json
130 import codecs
131 cached_spdx_info = {}
132 with codecs.open(sstatefile, mode='r', encoding='utf-8') as f:
133 try:
134 cached_spdx_info = json.load(f)
135 except ValueError as e:
136 cached_spdx_info = None
137 return cached_spdx_info
138
139def write_cached_spdx(sstatefile, ver_code, package_info, files, license_info):
140 import json
141 import codecs
142 spdx_doc = {}
143 spdx_doc['PackageVerificationCode'] = ver_code
144 spdx_doc['Files'] = {}
145 spdx_doc['Files'] = files
146 spdx_doc['Package'] = {}
147 spdx_doc['Package'] = package_info
148 spdx_doc['Licenses'] = {}
149 spdx_doc['Licenses'] = license_info
150 with codecs.open(sstatefile, mode='w', encoding='utf-8') as f:
151 f.write(json.dumps(spdx_doc))
152
153def setup_foss_scan(info, cache, cached_files):
154 import errno, shutil
155 import tarfile
156 file_info = {}
157 cache_dict = {}
158
159 for f_dir, f in list_files(info['sourcedir']):
160 full_path = os.path.join(f_dir, f)
161 abs_path = os.path.join(info['sourcedir'], full_path)
162 dest_dir = os.path.join(info['spdx_temp_dir'], f_dir)
163 dest_path = os.path.join(info['spdx_temp_dir'], full_path)
164
165 checksum = hash_file(abs_path)
166 if not checksum is None:
167 file_info[checksum] = {}
168 ## retain cache information if it exists
169 if cache and checksum in cached_files:
170 file_info[checksum] = cached_files[checksum]
171 ## have the file included in what's sent to the FOSSology server
172 else:
173 file_info[checksum]['FileName'] = full_path
174 try:
175 bb.utils.mkdirhier(dest_dir)
176 shutil.copyfile(abs_path, dest_path)
177 except OSError as e:
178 bb.warn("SPDX: mkdirhier failed: " + str(e))
179 except shutil.Error as e:
180 bb.warn("SPDX: copyfile failed: " + str(e))
181 except IOError as e:
182 bb.warn("SPDX: copyfile failed: " + str(e))
183 else:
184 bb.warn("SPDX: Could not get checksum for file: " + f)
185
186 with tarfile.open(info['tar_file'], "w:gz") as tar:
187 tar.add(info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']))
188
189 return file_info
190
191def remove_file(file_name):
192 try:
193 os.remove(file_name)
194 except OSError as e:
195 pass
196
197def list_files(dir):
198 for root, subFolders, files in os.walk(dir):
199 for f in files:
200 rel_root = os.path.relpath(root, dir)
201 yield rel_root, f
202 return
203
204def hash_file(file_name):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800205 from bb.utils import sha1_file
206 return sha1_file(file_name)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500207
208def hash_string(data):
209 import hashlib
210 sha1 = hashlib.sha1()
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800211 sha1.update(data.encode('utf-8'))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 return sha1.hexdigest()
213
214def run_fossology(foss_command, full_spdx):
215 import string, re
216 import subprocess
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600217
218 try:
219 foss_output = subprocess.check_output(foss_command.split(),
220 stderr=subprocess.STDOUT).decode('utf-8')
221 except subprocess.CalledProcessError as e:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 return None
223
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800224 foss_output = foss_output.replace('\r', '')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225
226 # Package info
227 package_info = {}
228 if full_spdx:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500229 # All mandatory, only one occurrence
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230 package_info['PackageCopyrightText'] = re.findall('PackageCopyrightText: (.*?</text>)', foss_output, re.S)[0]
231 package_info['PackageLicenseDeclared'] = re.findall('PackageLicenseDeclared: (.*)', foss_output)[0]
232 package_info['PackageLicenseConcluded'] = re.findall('PackageLicenseConcluded: (.*)', foss_output)[0]
233 # These may be more than one
234 package_info['PackageLicenseInfoFromFiles'] = re.findall('PackageLicenseInfoFromFiles: (.*)', foss_output)
235 else:
236 DEFAULT = "NOASSERTION"
237 package_info['PackageCopyrightText'] = "<text>" + DEFAULT + "</text>"
238 package_info['PackageLicenseDeclared'] = DEFAULT
239 package_info['PackageLicenseConcluded'] = DEFAULT
240 package_info['PackageLicenseInfoFromFiles'] = []
241
242 # File info
243 file_info = {}
244 records = []
245 # FileName is also in PackageFileName, so we match on FileType as well.
246 records = re.findall('FileName:.*?FileType:.*?</text>', foss_output, re.S)
247 for rec in records:
248 chksum = re.findall('FileChecksum: SHA1: (.*)\n', rec)[0]
249 file_info[chksum] = {}
250 file_info[chksum]['FileCopyrightText'] = re.findall('FileCopyrightText: '
251 + '(.*?</text>)', rec, re.S )[0]
252 fields = ['FileName', 'FileType', 'LicenseConcluded', 'LicenseInfoInFile']
253 for field in fields:
254 file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0]
255
256 # Licenses
257 license_info = {}
258 licenses = []
259 licenses = re.findall('LicenseID:.*?LicenseName:.*?\n', foss_output, re.S)
260 for lic in licenses:
261 license_id = re.findall('LicenseID: (.*)\n', lic)[0]
262 license_info[license_id] = {}
263 license_info[license_id]['ExtractedText'] = re.findall('ExtractedText: (.*?</text>)', lic, re.S)[0]
264 license_info[license_id]['LicenseName'] = re.findall('LicenseName: (.*)', lic)[0]
265
266 return (package_info, file_info, license_info)
267
268def create_spdx_doc(file_info, scanned_files):
269 import json
270 ## push foss changes back into cache
271 for chksum, lic_info in scanned_files.iteritems():
272 if chksum in file_info:
273 file_info[chksum]['FileType'] = lic_info['FileType']
274 file_info[chksum]['FileChecksum: SHA1'] = chksum
275 file_info[chksum]['LicenseInfoInFile'] = lic_info['LicenseInfoInFile']
276 file_info[chksum]['LicenseConcluded'] = lic_info['LicenseConcluded']
277 file_info[chksum]['FileCopyrightText'] = lic_info['FileCopyrightText']
278 else:
279 bb.warn("SPDX: " + lic_info['FileName'] + " : " + chksum
280 + " : is not in the local file info: "
281 + json.dumps(lic_info, indent=1))
282 return file_info
283
284def get_ver_code(dirname):
285 chksums = []
286 for f_dir, f in list_files(dirname):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800287 path = os.path.join(dirname, f_dir, f)
288 hash = hash_file(path)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 if not hash is None:
290 chksums.append(hash)
291 else:
292 bb.warn("SPDX: Could not hash file: " + path)
293 ver_code_string = ''.join(chksums).lower()
294 ver_code = hash_string(ver_code_string)
295 return ver_code
296
297def get_header_info(info, spdx_verification_code, package_info):
298 """
299 Put together the header SPDX information.
300 Eventually this needs to become a lot less
301 of a hardcoded thing.
302 """
303 from datetime import datetime
304 import os
305 head = []
306 DEFAULT = "NOASSERTION"
307
308 package_checksum = hash_file(info['tar_file'])
309 if package_checksum is None:
310 package_checksum = DEFAULT
311
312 ## document level information
313 head.append("## SPDX Document Information")
314 head.append("SPDXVersion: " + info['spdx_version'])
315 head.append("DataLicense: " + info['data_license'])
316 head.append("DocumentComment: <text>SPDX for "
317 + info['pn'] + " version " + info['pv'] + "</text>")
318 head.append("")
319
320 ## Creator information
321 ## Note that this does not give time in UTC.
322 now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
323 head.append("## Creation Information")
324 ## Tools are supposed to have a version, but FOSSology+SPDX provides none.
325 head.append("Creator: Tool: FOSSology+SPDX")
326 head.append("Created: " + now)
327 head.append("CreatorComment: <text>UNO</text>")
328 head.append("")
329
330 ## package level information
331 head.append("## Package Information")
332 head.append("PackageName: " + info['pn'])
333 head.append("PackageVersion: " + info['pv'])
334 head.append("PackageFileName: " + os.path.basename(info['tar_file']))
335 head.append("PackageSupplier: Person:" + DEFAULT)
336 head.append("PackageDownloadLocation: " + DEFAULT)
337 head.append("PackageSummary: <text></text>")
338 head.append("PackageOriginator: Person:" + DEFAULT)
339 head.append("PackageChecksum: SHA1: " + package_checksum)
340 head.append("PackageVerificationCode: " + spdx_verification_code)
341 head.append("PackageDescription: <text>" + info['pn']
342 + " version " + info['pv'] + "</text>")
343 head.append("")
344 head.append("PackageCopyrightText: "
345 + package_info['PackageCopyrightText'])
346 head.append("")
347 head.append("PackageLicenseDeclared: "
348 + package_info['PackageLicenseDeclared'])
349 head.append("PackageLicenseConcluded: "
350 + package_info['PackageLicenseConcluded'])
351
352 for licref in package_info['PackageLicenseInfoFromFiles']:
353 head.append("PackageLicenseInfoFromFiles: " + licref)
354 head.append("")
355
356 ## header for file level
357 head.append("## File Information")
358 head.append("")
359
360 return '\n'.join(head)