Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 1 | """ |
| 2 | BitBake 'Fetch' implementations |
| 3 | |
| 4 | Classes for obtaining upstream sources for the |
| 5 | BitBake build tools. |
| 6 | |
| 7 | """ |
| 8 | |
| 9 | # Copyright (C) 2003, 2004 Chris Larson |
| 10 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 11 | # SPDX-License-Identifier: GPL-2.0-only |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 12 | # |
| 13 | # Based on functions from the base bb module, Copyright 2003 Holger Schurig |
| 14 | |
Andrew Geissler | 82c905d | 2020-04-13 13:39:40 -0500 | [diff] [blame] | 15 | import shlex |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 16 | import re |
| 17 | import tempfile |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 18 | import os |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 19 | import errno |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 20 | import bb |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 21 | import bb.progress |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 22 | import socket |
| 23 | import http.client |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 24 | import urllib.request, urllib.parse, urllib.error |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 25 | from bb.fetch2 import FetchMethod |
| 26 | from bb.fetch2 import FetchError |
| 27 | from bb.fetch2 import logger |
| 28 | from bb.fetch2 import runfetchcmd |
| 29 | from bs4 import BeautifulSoup |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 30 | from bs4 import SoupStrainer |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 31 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 32 | class WgetProgressHandler(bb.progress.LineFilterProgressHandler): |
| 33 | """ |
| 34 | Extract progress information from wget output. |
| 35 | Note: relies on --progress=dot (with -v or without -q/-nv) being |
| 36 | specified on the wget command line. |
| 37 | """ |
| 38 | def __init__(self, d): |
| 39 | super(WgetProgressHandler, self).__init__(d) |
| 40 | # Send an initial progress event so the bar gets shown |
| 41 | self._fire_progress(0) |
| 42 | |
| 43 | def writeline(self, line): |
| 44 | percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) |
| 45 | if percs: |
| 46 | progress = int(percs[-1][0]) |
| 47 | rate = percs[-1][1] + '/s' |
| 48 | self.update(progress, rate) |
| 49 | return False |
| 50 | return True |
| 51 | |
| 52 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 53 | class Wget(FetchMethod): |
Patrick Williams | 0ca19cc | 2021-08-16 14:03:13 -0500 | [diff] [blame] | 54 | """Class to fetch urls via 'wget'""" |
Andrew Geissler | d1e8949 | 2021-02-12 15:35:20 -0600 | [diff] [blame] | 55 | |
| 56 | # CDNs like CloudFlare may do a 'browser integrity test' which can fail |
| 57 | # with the standard wget/urllib User-Agent, so pretend to be a modern |
| 58 | # browser. |
| 59 | user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" |
| 60 | |
Patrick Williams | 0ca19cc | 2021-08-16 14:03:13 -0500 | [diff] [blame] | 61 | def check_certs(self, d): |
| 62 | """ |
| 63 | Should certificates be checked? |
| 64 | """ |
| 65 | return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" |
| 66 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 67 | def supports(self, ud, d): |
| 68 | """ |
| 69 | Check to see if a given url can be fetched with wget. |
| 70 | """ |
Andrew Geissler | 5199d83 | 2021-09-24 16:47:35 -0500 | [diff] [blame] | 71 | return ud.type in ['http', 'https', 'ftp', 'ftps'] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 72 | |
| 73 | def recommends_checksum(self, urldata): |
| 74 | return True |
| 75 | |
| 76 | def urldata_init(self, ud, d): |
| 77 | if 'protocol' in ud.parm: |
| 78 | if ud.parm['protocol'] == 'git': |
| 79 | raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) |
| 80 | |
| 81 | if 'downloadfilename' in ud.parm: |
| 82 | ud.basename = ud.parm['downloadfilename'] |
| 83 | else: |
| 84 | ud.basename = os.path.basename(ud.path) |
| 85 | |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 86 | ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 87 | if not ud.localfile: |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 88 | ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 89 | |
Patrick Williams | 0ca19cc | 2021-08-16 14:03:13 -0500 | [diff] [blame] | 90 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" |
| 91 | |
| 92 | if not self.check_certs(d): |
| 93 | self.basecmd += " --no-check-certificate" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 94 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 95 | def _runwget(self, ud, d, command, quiet, workdir=None): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 96 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 97 | progresshandler = WgetProgressHandler(d) |
| 98 | |
Andrew Geissler | d1e8949 | 2021-02-12 15:35:20 -0600 | [diff] [blame] | 99 | logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 100 | bb.fetch2.check_network_access(d, command, ud.url) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 101 | runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 102 | |
| 103 | def download(self, ud, d): |
| 104 | """Fetch urls""" |
| 105 | |
| 106 | fetchcmd = self.basecmd |
| 107 | |
Andrew Geissler | 78b7279 | 2022-06-14 06:47:25 -0500 | [diff] [blame] | 108 | localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" |
| 109 | bb.utils.mkdirhier(os.path.dirname(localpath)) |
| 110 | fetchcmd += " -O %s" % shlex.quote(localpath) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 111 | |
Brad Bishop | 37a0e4d | 2017-12-04 01:01:44 -0500 | [diff] [blame] | 112 | if ud.user and ud.pswd: |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 113 | fetchcmd += " --auth-no-challenge" |
| 114 | if ud.parm.get("redirectauth", "1") == "1": |
| 115 | # An undocumented feature of wget is that if the |
| 116 | # username/password are specified on the URI, wget will only |
| 117 | # send the Authorization header to the first host and not to |
| 118 | # any hosts that it is redirected to. With the increasing |
| 119 | # usage of temporary AWS URLs, this difference now matters as |
| 120 | # AWS will reject any request that has authentication both in |
| 121 | # the query parameters (from the redirect) and in the |
| 122 | # Authorization header. |
| 123 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 124 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 125 | uri = ud.url.split(";")[0] |
| 126 | if os.path.exists(ud.localpath): |
| 127 | # file exists, but we didnt complete it.. trying again.. |
| 128 | fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) |
| 129 | else: |
| 130 | fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) |
| 131 | |
| 132 | self._runwget(ud, d, fetchcmd, False) |
| 133 | |
Andrew Geissler | 87f5cff | 2022-09-30 13:13:31 -0500 | [diff] [blame] | 134 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the |
| 135 | # original file, which might be a race (imagine two recipes referencing the same |
| 136 | # source, one with an incorrect checksum) |
| 137 | bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) |
| 138 | |
Andrew Geissler | 78b7279 | 2022-06-14 06:47:25 -0500 | [diff] [blame] | 139 | # Remove the ".tmp" and move the file into position atomically |
| 140 | # Our lock prevents multiple writers but mirroring code may grab incomplete files |
| 141 | os.rename(localpath, localpath[:-4]) |
| 142 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 143 | # Sanity check since wget can pretend it succeed when it didn't |
| 144 | # Also, this used to happen if sourceforge sent us to the mirror page |
| 145 | if not os.path.exists(ud.localpath): |
| 146 | raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) |
| 147 | |
| 148 | if os.path.getsize(ud.localpath) == 0: |
| 149 | os.remove(ud.localpath) |
| 150 | raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) |
| 151 | |
| 152 | return True |
| 153 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 154 | def checkstatus(self, fetch, ud, d, try_again=True): |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 155 | class HTTPConnectionCache(http.client.HTTPConnection): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 156 | if fetch.connection_cache: |
| 157 | def connect(self): |
| 158 | """Connect to the host and port specified in __init__.""" |
| 159 | |
| 160 | sock = fetch.connection_cache.get_connection(self.host, self.port) |
| 161 | if sock: |
| 162 | self.sock = sock |
| 163 | else: |
| 164 | self.sock = socket.create_connection((self.host, self.port), |
| 165 | self.timeout, self.source_address) |
| 166 | fetch.connection_cache.add_connection(self.host, self.port, self.sock) |
| 167 | |
| 168 | if self._tunnel_host: |
| 169 | self._tunnel() |
| 170 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 171 | class CacheHTTPHandler(urllib.request.HTTPHandler): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 172 | def http_open(self, req): |
| 173 | return self.do_open(HTTPConnectionCache, req) |
| 174 | |
| 175 | def do_open(self, http_class, req): |
| 176 | """Return an addinfourl object for the request, using http_class. |
| 177 | |
| 178 | http_class must implement the HTTPConnection API from httplib. |
| 179 | The addinfourl return value is a file-like object. It also |
| 180 | has methods and attributes including: |
| 181 | - info(): return a mimetools.Message object for the headers |
| 182 | - geturl(): return the original request URL |
| 183 | - code: HTTP status code |
| 184 | """ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 185 | host = req.host |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 186 | if not host: |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 187 | raise urllib.error.URLError('no host given') |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 188 | |
| 189 | h = http_class(host, timeout=req.timeout) # will parse host:port |
| 190 | h.set_debuglevel(self._debuglevel) |
| 191 | |
| 192 | headers = dict(req.unredirected_hdrs) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 193 | headers.update(dict((k, v) for k, v in list(req.headers.items()) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 194 | if k not in headers)) |
| 195 | |
| 196 | # We want to make an HTTP/1.1 request, but the addinfourl |
| 197 | # class isn't prepared to deal with a persistent connection. |
| 198 | # It will try to read all remaining data from the socket, |
| 199 | # which will block while the server waits for the next request. |
| 200 | # So make sure the connection gets closed after the (only) |
| 201 | # request. |
| 202 | |
| 203 | # Don't close connection when connection_cache is enabled, |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 204 | if fetch.connection_cache is None: |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 205 | headers["Connection"] = "close" |
| 206 | else: |
| 207 | headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 |
| 208 | |
| 209 | headers = dict( |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 210 | (name.title(), val) for name, val in list(headers.items())) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 211 | |
| 212 | if req._tunnel_host: |
| 213 | tunnel_headers = {} |
| 214 | proxy_auth_hdr = "Proxy-Authorization" |
| 215 | if proxy_auth_hdr in headers: |
| 216 | tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] |
| 217 | # Proxy-Authorization should not be sent to origin |
| 218 | # server. |
| 219 | del headers[proxy_auth_hdr] |
| 220 | h.set_tunnel(req._tunnel_host, headers=tunnel_headers) |
| 221 | |
| 222 | try: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 223 | h.request(req.get_method(), req.selector, req.data, headers) |
| 224 | except socket.error as err: # XXX what error? |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 225 | # Don't close connection when cache is enabled. |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 226 | # Instead, try to detect connections that are no longer |
| 227 | # usable (for example, closed unexpectedly) and remove |
| 228 | # them from the cache. |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 229 | if fetch.connection_cache is None: |
| 230 | h.close() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 231 | elif isinstance(err, OSError) and err.errno == errno.EBADF: |
| 232 | # This happens when the server closes the connection despite the Keep-Alive. |
| 233 | # Apparently urllib then uses the file descriptor, expecting it to be |
| 234 | # connected, when in reality the connection is already gone. |
| 235 | # We let the request fail and expect it to be |
| 236 | # tried once more ("try_again" in check_status()), |
| 237 | # with the dead connection removed from the cache. |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 238 | # If it still fails, we give up, which can happen for bad |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 239 | # HTTP proxy settings. |
| 240 | fetch.connection_cache.remove_connection(h.host, h.port) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 241 | raise urllib.error.URLError(err) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 242 | else: |
Andrew Geissler | c9f7865 | 2020-09-18 14:11:35 -0500 | [diff] [blame] | 243 | r = h.getresponse() |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 244 | |
| 245 | # Pick apart the HTTPResponse object to get the addinfourl |
| 246 | # object initialized properly. |
| 247 | |
| 248 | # Wrap the HTTPResponse object in socket's file object adapter |
| 249 | # for Windows. That adapter calls recv(), so delegate recv() |
| 250 | # to read(). This weird wrapping allows the returned object to |
| 251 | # have readline() and readlines() methods. |
| 252 | |
| 253 | # XXX It might be better to extract the read buffering code |
| 254 | # out of socket._fileobject() and into a base class. |
| 255 | r.recv = r.read |
| 256 | |
| 257 | # no data, just have to read |
| 258 | r.read() |
| 259 | class fp_dummy(object): |
| 260 | def read(self): |
| 261 | return "" |
| 262 | def readline(self): |
| 263 | return "" |
| 264 | def close(self): |
| 265 | pass |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 266 | closed = False |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 267 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 268 | resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 269 | resp.code = r.status |
| 270 | resp.msg = r.reason |
| 271 | |
| 272 | # Close connection when server request it. |
| 273 | if fetch.connection_cache is not None: |
| 274 | if 'Connection' in r.msg and r.msg['Connection'] == 'close': |
| 275 | fetch.connection_cache.remove_connection(h.host, h.port) |
| 276 | |
| 277 | return resp |
| 278 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 279 | class HTTPMethodFallback(urllib.request.BaseHandler): |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 280 | """ |
| 281 | Fallback to GET if HEAD is not allowed (405 HTTP error) |
| 282 | """ |
| 283 | def http_error_405(self, req, fp, code, msg, headers): |
| 284 | fp.read() |
| 285 | fp.close() |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 286 | |
Brad Bishop | 08902b0 | 2019-08-20 09:16:51 -0400 | [diff] [blame] | 287 | if req.get_method() != 'GET': |
| 288 | newheaders = dict((k, v) for k, v in list(req.headers.items()) |
| 289 | if k.lower() not in ("content-length", "content-type")) |
| 290 | return self.parent.open(urllib.request.Request(req.get_full_url(), |
| 291 | headers=newheaders, |
| 292 | origin_req_host=req.origin_req_host, |
| 293 | unverifiable=True)) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 294 | |
Brad Bishop | 08902b0 | 2019-08-20 09:16:51 -0400 | [diff] [blame] | 295 | raise urllib.request.HTTPError(req, code, msg, headers, None) |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 296 | |
| 297 | # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 |
| 298 | # Forbidden when they actually mean 405 Method Not Allowed. |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 299 | http_error_403 = http_error_405 |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 300 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 301 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 302 | class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 303 | """ |
| 304 | urllib2.HTTPRedirectHandler resets the method to GET on redirect, |
| 305 | when we want to follow redirects using the original method. |
| 306 | """ |
| 307 | def redirect_request(self, req, fp, code, msg, headers, newurl): |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 308 | newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 309 | newreq.get_method = req.get_method |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 310 | return newreq |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 311 | |
Patrick Williams | 0ca19cc | 2021-08-16 14:03:13 -0500 | [diff] [blame] | 312 | # We need to update the environment here as both the proxy and HTTPS |
| 313 | # handlers need variables set. The proxy needs http_proxy and friends to |
| 314 | # be set, and HTTPSHandler ends up calling into openssl to load the |
| 315 | # certificates. In buildtools configurations this will be looking at the |
| 316 | # wrong place for certificates by default: we set SSL_CERT_FILE to the |
| 317 | # right location in the buildtools environment script but as BitBake |
| 318 | # prunes prunes the environment this is lost. When binaries are executed |
| 319 | # runfetchcmd ensures these values are in the environment, but this is |
| 320 | # pure Python so we need to update the environment. |
| 321 | # |
| 322 | # Avoid tramping the environment too much by using bb.utils.environment |
| 323 | # to scope the changes to the build_opener request, which is when the |
| 324 | # environment lookups happen. |
Andrew Geissler | 7e0e3c0 | 2022-02-25 20:34:39 +0000 | [diff] [blame] | 325 | newenv = bb.fetch2.get_fetcher_environment(d) |
Patrick Williams | 0ca19cc | 2021-08-16 14:03:13 -0500 | [diff] [blame] | 326 | |
| 327 | with bb.utils.environment(**newenv): |
| 328 | import ssl |
| 329 | |
| 330 | if self.check_certs(d): |
| 331 | context = ssl.create_default_context() |
| 332 | else: |
| 333 | context = ssl._create_unverified_context() |
| 334 | |
| 335 | handlers = [FixedHTTPRedirectHandler, |
| 336 | HTTPMethodFallback, |
| 337 | urllib.request.ProxyHandler(), |
| 338 | CacheHTTPHandler(), |
| 339 | urllib.request.HTTPSHandler(context=context)] |
| 340 | opener = urllib.request.build_opener(*handlers) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 341 | |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 342 | try: |
Andrew Geissler | 517393d | 2023-01-13 08:55:19 -0600 | [diff] [blame] | 343 | uri_base = ud.url.split(";")[0] |
| 344 | uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) |
Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame] | 345 | r = urllib.request.Request(uri) |
| 346 | r.get_method = lambda: "HEAD" |
| 347 | # Some servers (FusionForge, as used on Alioth) require that the |
| 348 | # optional Accept header is set. |
| 349 | r.add_header("Accept", "*/*") |
| 350 | r.add_header("User-Agent", self.user_agent) |
| 351 | def add_basic_auth(login_str, request): |
| 352 | '''Adds Basic auth to http request, pass in login:password as string''' |
| 353 | import base64 |
| 354 | encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") |
| 355 | authheader = "Basic %s" % encodeuser |
| 356 | r.add_header("Authorization", authheader) |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 357 | |
Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame] | 358 | if ud.user and ud.pswd: |
| 359 | add_basic_auth(ud.user + ':' + ud.pswd, r) |
| 360 | |
| 361 | try: |
| 362 | import netrc |
Andrew Geissler | 6aa7eec | 2023-03-03 12:41:14 -0600 | [diff] [blame^] | 363 | auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) |
| 364 | if auth_data: |
| 365 | login, _, password = auth_data |
| 366 | add_basic_auth("%s:%s" % (login, password), r) |
| 367 | except (FileNotFoundError, netrc.NetrcParseError): |
Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame] | 368 | pass |
| 369 | |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 370 | with opener.open(r, timeout=30) as response: |
Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame] | 371 | pass |
| 372 | except urllib.error.URLError as e: |
| 373 | if try_again: |
| 374 | logger.debug2("checkstatus: trying again") |
| 375 | return self.checkstatus(fetch, ud, d, False) |
| 376 | else: |
| 377 | # debug for now to avoid spamming the logs in e.g. remote sstate searches |
| 378 | logger.debug2("checkstatus() urlopen failed: %s" % e) |
| 379 | return False |
| 380 | except ConnectionResetError as e: |
| 381 | if try_again: |
| 382 | logger.debug2("checkstatus: trying again") |
| 383 | return self.checkstatus(fetch, ud, d, False) |
| 384 | else: |
| 385 | # debug for now to avoid spamming the logs in e.g. remote sstate searches |
| 386 | logger.debug2("checkstatus() urlopen failed: %s" % e) |
| 387 | return False |
| 388 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 389 | return True |
| 390 | |
| 391 | def _parse_path(self, regex, s): |
| 392 | """ |
| 393 | Find and group name, version and archive type in the given string s |
| 394 | """ |
| 395 | |
| 396 | m = regex.search(s) |
| 397 | if m: |
| 398 | pname = '' |
| 399 | pver = '' |
| 400 | ptype = '' |
| 401 | |
| 402 | mdict = m.groupdict() |
| 403 | if 'name' in mdict.keys(): |
| 404 | pname = mdict['name'] |
| 405 | if 'pver' in mdict.keys(): |
| 406 | pver = mdict['pver'] |
| 407 | if 'type' in mdict.keys(): |
| 408 | ptype = mdict['type'] |
| 409 | |
| 410 | bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) |
| 411 | |
| 412 | return (pname, pver, ptype) |
| 413 | |
| 414 | return None |
| 415 | |
| 416 | def _modelate_version(self, version): |
| 417 | if version[0] in ['.', '-']: |
| 418 | if version[1].isdigit(): |
| 419 | version = version[1] + version[0] + version[2:len(version)] |
| 420 | else: |
| 421 | version = version[1:len(version)] |
| 422 | |
| 423 | version = re.sub('-', '.', version) |
| 424 | version = re.sub('_', '.', version) |
| 425 | version = re.sub('(rc)+', '.1000.', version) |
| 426 | version = re.sub('(beta)+', '.100.', version) |
| 427 | version = re.sub('(alpha)+', '.10.', version) |
| 428 | if version[0] == 'v': |
| 429 | version = version[1:len(version)] |
| 430 | return version |
| 431 | |
| 432 | def _vercmp(self, old, new): |
| 433 | """ |
| 434 | Check whether 'new' is newer than 'old' version. We use existing vercmp() for the |
| 435 | purpose. PE is cleared in comparison as it's not for build, and PR is cleared too |
| 436 | for simplicity as it's somehow difficult to get from various upstream format |
| 437 | """ |
| 438 | |
| 439 | (oldpn, oldpv, oldsuffix) = old |
| 440 | (newpn, newpv, newsuffix) = new |
| 441 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 442 | # Check for a new suffix type that we have never heard of before |
| 443 | if newsuffix: |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 444 | m = self.suffix_regex_comp.search(newsuffix) |
| 445 | if not m: |
| 446 | bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) |
| 447 | return False |
| 448 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 449 | # Not our package so ignore it |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 450 | if oldpn != newpn: |
| 451 | return False |
| 452 | |
| 453 | oldpv = self._modelate_version(oldpv) |
| 454 | newpv = self._modelate_version(newpv) |
| 455 | |
| 456 | return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) |
| 457 | |
| 458 | def _fetch_index(self, uri, ud, d): |
| 459 | """ |
| 460 | Run fetch checkstatus to get directory information |
| 461 | """ |
| 462 | f = tempfile.NamedTemporaryFile() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 463 | with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 464 | fetchcmd = self.basecmd |
Andrew Geissler | d1e8949 | 2021-02-12 15:35:20 -0600 | [diff] [blame] | 465 | fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 466 | try: |
| 467 | self._runwget(ud, d, fetchcmd, True, workdir=workdir) |
| 468 | fetchresult = f.read() |
| 469 | except bb.fetch2.BBFetchException: |
| 470 | fetchresult = "" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 471 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 472 | return fetchresult |
| 473 | |
| 474 | def _check_latest_version(self, url, package, package_regex, current_version, ud, d): |
| 475 | """ |
| 476 | Return the latest version of a package inside a given directory path |
| 477 | If error or no version, return "" |
| 478 | """ |
| 479 | valid = 0 |
| 480 | version = ['', '', ''] |
| 481 | |
| 482 | bb.debug(3, "VersionURL: %s" % (url)) |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 483 | soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 484 | if not soup: |
| 485 | bb.debug(3, "*** %s NO SOUP" % (url)) |
| 486 | return "" |
| 487 | |
| 488 | for line in soup.find_all('a', href=True): |
| 489 | bb.debug(3, "line['href'] = '%s'" % (line['href'])) |
| 490 | bb.debug(3, "line = '%s'" % (str(line))) |
| 491 | |
| 492 | newver = self._parse_path(package_regex, line['href']) |
| 493 | if not newver: |
| 494 | newver = self._parse_path(package_regex, str(line)) |
| 495 | |
| 496 | if newver: |
| 497 | bb.debug(3, "Upstream version found: %s" % newver[1]) |
| 498 | if valid == 0: |
| 499 | version = newver |
| 500 | valid = 1 |
| 501 | elif self._vercmp(version, newver) < 0: |
| 502 | version = newver |
| 503 | |
| 504 | pupver = re.sub('_', '.', version[1]) |
| 505 | |
| 506 | bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % |
| 507 | (package, pupver or "N/A", current_version[1])) |
| 508 | |
| 509 | if valid: |
| 510 | return pupver |
| 511 | |
| 512 | return "" |
| 513 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 514 | def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 515 | """ |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 516 | Scan every directory in order to get upstream version. |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 517 | """ |
| 518 | version_dir = ['', '', ''] |
| 519 | version = ['', '', ''] |
| 520 | |
William A. Kennington III | ac69b48 | 2021-06-02 12:28:27 -0700 | [diff] [blame] | 521 | dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 522 | s = dirver_regex.search(dirver) |
| 523 | if s: |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 524 | version_dir[1] = s.group('ver') |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 525 | else: |
| 526 | version_dir[1] = dirver |
| 527 | |
| 528 | dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, |
| 529 | ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) |
| 530 | bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) |
| 531 | |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 532 | soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 533 | if not soup: |
| 534 | return version[1] |
| 535 | |
| 536 | for line in soup.find_all('a', href=True): |
| 537 | s = dirver_regex.search(line['href'].strip("/")) |
| 538 | if s: |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 539 | sver = s.group('ver') |
| 540 | |
| 541 | # When prefix is part of the version directory it need to |
| 542 | # ensure that only version directory is used so remove previous |
| 543 | # directories if exists. |
| 544 | # |
| 545 | # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected |
| 546 | # result is v2.5. |
| 547 | spfx = s.group('pfx').split('/')[-1] |
| 548 | |
| 549 | version_dir_new = ['', sver, ''] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 550 | if self._vercmp(version_dir, version_dir_new) <= 0: |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 551 | dirver_new = spfx + sver |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 552 | path = ud.path.replace(dirver, dirver_new, True) \ |
| 553 | .split(package)[0] |
| 554 | uri = bb.fetch.encodeurl([ud.type, ud.host, path, |
| 555 | ud.user, ud.pswd, {}]) |
| 556 | |
| 557 | pupver = self._check_latest_version(uri, |
| 558 | package, package_regex, current_version, ud, d) |
| 559 | if pupver: |
| 560 | version[1] = pupver |
| 561 | |
| 562 | version_dir = version_dir_new |
| 563 | |
| 564 | return version[1] |
| 565 | |
| 566 | def _init_regexes(self, package, ud, d): |
| 567 | """ |
| 568 | Match as many patterns as possible such as: |
| 569 | gnome-common-2.20.0.tar.gz (most common format) |
| 570 | gtk+-2.90.1.tar.gz |
| 571 | xf86-input-synaptics-12.6.9.tar.gz |
| 572 | dri2proto-2.3.tar.gz |
| 573 | blktool_4.orig.tar.gz |
| 574 | libid3tag-0.15.1b.tar.gz |
| 575 | unzip552.tar.gz |
| 576 | icu4c-3_6-src.tgz |
| 577 | genext2fs_1.3.orig.tar.gz |
| 578 | gst-fluendo-mp3 |
| 579 | """ |
| 580 | # match most patterns which uses "-" as separator to version digits |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 581 | pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 582 | # a loose pattern such as for unzip552.tar.gz |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 583 | pn_prefix2 = r"[a-zA-Z]+" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 584 | # a loose pattern such as for 80325-quicky-0.4.tar.gz |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 585 | pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 586 | # Save the Package Name (pn) Regex for use later |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 587 | pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 588 | |
| 589 | # match version |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 590 | pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 591 | |
| 592 | # match arch |
| 593 | parch_regex = "-source|_all_" |
| 594 | |
| 595 | # src.rpm extension was added only for rpm package. Can be removed if the rpm |
| 596 | # packaged will always be considered as having to be manually upgraded |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 597 | psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 598 | |
| 599 | # match name, version and archive type of a package |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 600 | package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 601 | % (pn_regex, pver_regex, parch_regex, psuffix_regex)) |
| 602 | self.suffix_regex_comp = re.compile(psuffix_regex) |
| 603 | |
| 604 | # compile regex, can be specific by package or generic regex |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 605 | pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 606 | if pn_regex: |
| 607 | package_custom_regex_comp = re.compile(pn_regex) |
| 608 | else: |
| 609 | version = self._parse_path(package_regex_comp, package) |
| 610 | if version: |
| 611 | package_custom_regex_comp = re.compile( |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 612 | r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 613 | (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) |
| 614 | else: |
| 615 | package_custom_regex_comp = None |
| 616 | |
| 617 | return package_custom_regex_comp |
| 618 | |
| 619 | def latest_versionstring(self, ud, d): |
| 620 | """ |
| 621 | Manipulate the URL and try to obtain the latest package version |
| 622 | |
| 623 | sanity check to ensure same name and type. |
| 624 | """ |
| 625 | package = ud.path.split("/")[-1] |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 626 | current_version = ['', d.getVar('PV'), ''] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 627 | |
| 628 | """possible to have no version in pkg name, such as spectrum-fw""" |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 629 | if not re.search(r"\d+", package): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 630 | current_version[1] = re.sub('_', '.', current_version[1]) |
| 631 | current_version[1] = re.sub('-', '.', current_version[1]) |
| 632 | return (current_version[1], '') |
| 633 | |
| 634 | package_regex = self._init_regexes(package, ud, d) |
| 635 | if package_regex is None: |
| 636 | bb.warn("latest_versionstring: package %s don't match pattern" % (package)) |
| 637 | return ('', '') |
| 638 | bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) |
| 639 | |
| 640 | uri = "" |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 641 | regex_uri = d.getVar("UPSTREAM_CHECK_URI") |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 642 | if not regex_uri: |
| 643 | path = ud.path.split(package)[0] |
| 644 | |
| 645 | # search for version matches on folders inside the path, like: |
| 646 | # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 647 | dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") |
Andrew Geissler | 517393d | 2023-01-13 08:55:19 -0600 | [diff] [blame] | 648 | m = dirver_regex.findall(path) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 649 | if m: |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 650 | pn = d.getVar('PN') |
Andrew Geissler | 517393d | 2023-01-13 08:55:19 -0600 | [diff] [blame] | 651 | dirver = m[-1][0] |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 652 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 653 | dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 654 | if not dirver_pn_regex.search(dirver): |
| 655 | return (self._check_latest_version_by_dir(dirver, |
| 656 | package, package_regex, current_version, ud, d), '') |
| 657 | |
| 658 | uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) |
| 659 | else: |
| 660 | uri = regex_uri |
| 661 | |
| 662 | return (self._check_latest_version(uri, package, package_regex, |
| 663 | current_version, ud, d), '') |