Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 1 | # |
Patrick Williams | 92b42cb | 2022-09-03 06:53:57 -0500 | [diff] [blame] | 2 | # Copyright OpenEmbedded Contributors |
| 3 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 4 | # SPDX-License-Identifier: GPL-2.0-only |
| 5 | # |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 6 | """Code for parsing OpenEmbedded license strings""" |
| 7 | |
| 8 | import ast |
| 9 | import re |
| 10 | from fnmatch import fnmatchcase as fnmatch |
| 11 | |
| 12 | def license_ok(license, dont_want_licenses): |
| 13 | """ Return False if License exist in dont_want_licenses else True """ |
| 14 | for dwl in dont_want_licenses: |
Andrew Geissler | 90fd73c | 2021-03-05 15:25:55 -0600 | [diff] [blame] | 15 | if fnmatch(license, dwl): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 16 | return False |
| 17 | return True |
| 18 | |
Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 19 | def obsolete_license_list(): |
| 20 | return ["AGPL-3", "AGPL-3+", "AGPLv3", "AGPLv3+", "AGPLv3.0", "AGPLv3.0+", "AGPL-3.0", "AGPL-3.0+", "BSD-0-Clause", |
| 21 | "GPL-1", "GPL-1+", "GPLv1", "GPLv1+", "GPLv1.0", "GPLv1.0+", "GPL-1.0", "GPL-1.0+", "GPL-2", "GPL-2+", "GPLv2", |
| 22 | "GPLv2+", "GPLv2.0", "GPLv2.0+", "GPL-2.0", "GPL-2.0+", "GPL-3", "GPL-3+", "GPLv3", "GPLv3+", "GPLv3.0", "GPLv3.0+", |
| 23 | "GPL-3.0", "GPL-3.0+", "LGPLv2", "LGPLv2+", "LGPLv2.0", "LGPLv2.0+", "LGPL-2.0", "LGPL-2.0+", "LGPL2.1", "LGPL2.1+", |
| 24 | "LGPLv2.1", "LGPLv2.1+", "LGPL-2.1", "LGPL-2.1+", "LGPLv3", "LGPLv3+", "LGPL-3.0", "LGPL-3.0+", "MPL-1", "MPLv1", |
| 25 | "MPLv1.1", "MPLv2", "MIT-X", "MIT-style", "openssl", "PSF", "PSFv2", "Python-2", "Apachev2", "Apache-2", "Artisticv1", |
| 26 | "Artistic-1", "AFL-2", "AFL-1", "AFLv2", "AFLv1", "CDDLv1", "CDDL-1", "EPLv1.0", "FreeType", "Nauman", |
| 27 | "tcl", "vim", "SGIv1"] |
| 28 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 29 | class LicenseError(Exception): |
| 30 | pass |
| 31 | |
| 32 | class LicenseSyntaxError(LicenseError): |
| 33 | def __init__(self, licensestr, exc): |
| 34 | self.licensestr = licensestr |
| 35 | self.exc = exc |
| 36 | LicenseError.__init__(self) |
| 37 | |
| 38 | def __str__(self): |
| 39 | return "error in '%s': %s" % (self.licensestr, self.exc) |
| 40 | |
| 41 | class InvalidLicense(LicenseError): |
| 42 | def __init__(self, license): |
| 43 | self.license = license |
| 44 | LicenseError.__init__(self) |
| 45 | |
| 46 | def __str__(self): |
| 47 | return "invalid characters in license '%s'" % self.license |
| 48 | |
| 49 | license_operator_chars = '&|() ' |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 50 | license_operator = re.compile(r'([' + license_operator_chars + '])') |
| 51 | license_pattern = re.compile(r'[a-zA-Z0-9.+_\-]+$') |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 52 | |
| 53 | class LicenseVisitor(ast.NodeVisitor): |
| 54 | """Get elements based on OpenEmbedded license strings""" |
| 55 | def get_elements(self, licensestr): |
| 56 | new_elements = [] |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 57 | elements = list([x for x in license_operator.split(licensestr) if x.strip()]) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 58 | for pos, element in enumerate(elements): |
| 59 | if license_pattern.match(element): |
| 60 | if pos > 0 and license_pattern.match(elements[pos-1]): |
| 61 | new_elements.append('&') |
| 62 | element = '"' + element + '"' |
| 63 | elif not license_operator.match(element): |
| 64 | raise InvalidLicense(element) |
| 65 | new_elements.append(element) |
| 66 | |
| 67 | return new_elements |
| 68 | |
| 69 | """Syntax tree visitor which can accept elements previously generated with |
| 70 | OpenEmbedded license string""" |
| 71 | def visit_elements(self, elements): |
| 72 | self.visit(ast.parse(' '.join(elements))) |
| 73 | |
| 74 | """Syntax tree visitor which can accept OpenEmbedded license strings""" |
| 75 | def visit_string(self, licensestr): |
| 76 | self.visit_elements(self.get_elements(licensestr)) |
| 77 | |
| 78 | class FlattenVisitor(LicenseVisitor): |
| 79 | """Flatten a license tree (parsed from a string) by selecting one of each |
| 80 | set of OR options, in the way the user specifies""" |
| 81 | def __init__(self, choose_licenses): |
| 82 | self.choose_licenses = choose_licenses |
| 83 | self.licenses = [] |
| 84 | LicenseVisitor.__init__(self) |
| 85 | |
| 86 | def visit_Str(self, node): |
| 87 | self.licenses.append(node.s) |
| 88 | |
Andrew Geissler | eff2747 | 2021-10-29 15:35:00 -0500 | [diff] [blame] | 89 | def visit_Constant(self, node): |
| 90 | self.licenses.append(node.value) |
| 91 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 92 | def visit_BinOp(self, node): |
| 93 | if isinstance(node.op, ast.BitOr): |
| 94 | left = FlattenVisitor(self.choose_licenses) |
| 95 | left.visit(node.left) |
| 96 | |
| 97 | right = FlattenVisitor(self.choose_licenses) |
| 98 | right.visit(node.right) |
| 99 | |
| 100 | selected = self.choose_licenses(left.licenses, right.licenses) |
| 101 | self.licenses.extend(selected) |
| 102 | else: |
| 103 | self.generic_visit(node) |
| 104 | |
| 105 | def flattened_licenses(licensestr, choose_licenses): |
| 106 | """Given a license string and choose_licenses function, return a flat list of licenses""" |
| 107 | flatten = FlattenVisitor(choose_licenses) |
| 108 | try: |
| 109 | flatten.visit_string(licensestr) |
| 110 | except SyntaxError as exc: |
| 111 | raise LicenseSyntaxError(licensestr, exc) |
| 112 | return flatten.licenses |
| 113 | |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 114 | def is_included(licensestr, include_licenses=None, exclude_licenses=None): |
Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 115 | """Given a license string, a list of licenses to include and a list of |
| 116 | licenses to exclude, determine if the license string matches the include |
| 117 | list and does not match the exclude list. |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 118 | |
Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 119 | Returns a tuple holding the boolean state and a list of the applicable |
| 120 | licenses that were excluded if state is False, or the licenses that were |
| 121 | included if the state is True.""" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 122 | |
| 123 | def include_license(license): |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 124 | return any(fnmatch(license, pattern) for pattern in include_licenses) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 125 | |
| 126 | def exclude_license(license): |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 127 | return any(fnmatch(license, pattern) for pattern in exclude_licenses) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 128 | |
| 129 | def choose_licenses(alpha, beta): |
| 130 | """Select the option in an OR which is the 'best' (has the most |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 131 | included licenses and no excluded licenses).""" |
| 132 | # The factor 1000 below is arbitrary, just expected to be much larger |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 133 | # than the number of licenses actually specified. That way the weight |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 134 | # will be negative if the list of licenses contains an excluded license, |
| 135 | # but still gives a higher weight to the list with the most included |
| 136 | # licenses. |
| 137 | alpha_weight = (len(list(filter(include_license, alpha))) - |
| 138 | 1000 * (len(list(filter(exclude_license, alpha))) > 0)) |
| 139 | beta_weight = (len(list(filter(include_license, beta))) - |
| 140 | 1000 * (len(list(filter(exclude_license, beta))) > 0)) |
| 141 | if alpha_weight >= beta_weight: |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 142 | return alpha |
| 143 | else: |
| 144 | return beta |
| 145 | |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 146 | if not include_licenses: |
| 147 | include_licenses = ['*'] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 148 | |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 149 | if not exclude_licenses: |
| 150 | exclude_licenses = [] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 151 | |
| 152 | licenses = flattened_licenses(licensestr, choose_licenses) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 153 | excluded = [lic for lic in licenses if exclude_license(lic)] |
| 154 | included = [lic for lic in licenses if include_license(lic)] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 155 | if excluded: |
| 156 | return False, excluded |
| 157 | else: |
| 158 | return True, included |
| 159 | |
| 160 | class ManifestVisitor(LicenseVisitor): |
| 161 | """Walk license tree (parsed from a string) removing the incompatible |
| 162 | licenses specified""" |
| 163 | def __init__(self, dont_want_licenses, canonical_license, d): |
| 164 | self._dont_want_licenses = dont_want_licenses |
| 165 | self._canonical_license = canonical_license |
| 166 | self._d = d |
| 167 | self._operators = [] |
| 168 | |
| 169 | self.licenses = [] |
| 170 | self.licensestr = '' |
| 171 | |
| 172 | LicenseVisitor.__init__(self) |
| 173 | |
| 174 | def visit(self, node): |
| 175 | if isinstance(node, ast.Str): |
| 176 | lic = node.s |
| 177 | |
| 178 | if license_ok(self._canonical_license(self._d, lic), |
| 179 | self._dont_want_licenses) == True: |
| 180 | if self._operators: |
| 181 | ops = [] |
| 182 | for op in self._operators: |
| 183 | if op == '[': |
| 184 | ops.append(op) |
| 185 | elif op == ']': |
| 186 | ops.append(op) |
| 187 | else: |
| 188 | if not ops: |
| 189 | ops.append(op) |
| 190 | elif ops[-1] in ['[', ']']: |
| 191 | ops.append(op) |
| 192 | else: |
| 193 | ops[-1] = op |
| 194 | |
| 195 | for op in ops: |
| 196 | if op == '[' or op == ']': |
| 197 | self.licensestr += op |
| 198 | elif self.licenses: |
| 199 | self.licensestr += ' ' + op + ' ' |
| 200 | |
| 201 | self._operators = [] |
| 202 | |
| 203 | self.licensestr += lic |
| 204 | self.licenses.append(lic) |
| 205 | elif isinstance(node, ast.BitAnd): |
| 206 | self._operators.append("&") |
| 207 | elif isinstance(node, ast.BitOr): |
| 208 | self._operators.append("|") |
| 209 | elif isinstance(node, ast.List): |
| 210 | self._operators.append("[") |
| 211 | elif isinstance(node, ast.Load): |
| 212 | self.licensestr += "]" |
| 213 | |
| 214 | self.generic_visit(node) |
| 215 | |
| 216 | def manifest_licenses(licensestr, dont_want_licenses, canonical_license, d): |
| 217 | """Given a license string and dont_want_licenses list, |
| 218 | return license string filtered and a list of licenses""" |
| 219 | manifest = ManifestVisitor(dont_want_licenses, canonical_license, d) |
| 220 | |
| 221 | try: |
| 222 | elements = manifest.get_elements(licensestr) |
| 223 | |
| 224 | # Replace '()' to '[]' for handle in ast as List and Load types. |
| 225 | elements = ['[' if e == '(' else e for e in elements] |
| 226 | elements = [']' if e == ')' else e for e in elements] |
| 227 | |
| 228 | manifest.visit_elements(elements) |
| 229 | except SyntaxError as exc: |
| 230 | raise LicenseSyntaxError(licensestr, exc) |
| 231 | |
| 232 | # Replace '[]' to '()' for output correct license. |
| 233 | manifest.licensestr = manifest.licensestr.replace('[', '(').replace(']', ')') |
| 234 | |
| 235 | return (manifest.licensestr, manifest.licenses) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 236 | |
| 237 | class ListVisitor(LicenseVisitor): |
| 238 | """Record all different licenses found in the license string""" |
| 239 | def __init__(self): |
| 240 | self.licenses = set() |
| 241 | |
| 242 | def visit_Str(self, node): |
| 243 | self.licenses.add(node.s) |
| 244 | |
Andrew Geissler | eff2747 | 2021-10-29 15:35:00 -0500 | [diff] [blame] | 245 | def visit_Constant(self, node): |
| 246 | self.licenses.add(node.value) |
| 247 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 248 | def list_licenses(licensestr): |
| 249 | """Simply get a list of all licenses mentioned in a license string. |
| 250 | Binary operators are not applied or taken into account in any way""" |
| 251 | visitor = ListVisitor() |
| 252 | try: |
| 253 | visitor.visit_string(licensestr) |
| 254 | except SyntaxError as exc: |
| 255 | raise LicenseSyntaxError(licensestr, exc) |
| 256 | return visitor.licenses |
Andrew Geissler | 9aee500 | 2022-03-30 16:27:02 +0000 | [diff] [blame] | 257 | |
| 258 | def apply_pkg_license_exception(pkg, bad_licenses, exceptions): |
| 259 | """Return remaining bad licenses after removing any package exceptions""" |
| 260 | |
| 261 | return [lic for lic in bad_licenses if pkg + ':' + lic not in exceptions] |