blob: b3a3de571aa2adabe0663bc672f1b0559fde72bb [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050055 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060056
57 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58 # with the standard wget/urllib User-Agent, so pretend to be a modern
59 # browser.
60 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61
Patrick Williams0ca19cc2021-08-16 14:03:13 -050062 def check_certs(self, d):
63 """
64 Should certificates be checked?
65 """
66 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068 def supports(self, ud, d):
69 """
70 Check to see if a given url can be fetched with wget.
71 """
Andrew Geissler5199d832021-09-24 16:47:35 -050072 return ud.type in ['http', 'https', 'ftp', 'ftps']
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
74 def recommends_checksum(self, urldata):
75 return True
76
77 def urldata_init(self, ud, d):
78 if 'protocol' in ud.parm:
79 if ud.parm['protocol'] == 'git':
80 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81
82 if 'downloadfilename' in ud.parm:
83 ud.basename = ud.parm['downloadfilename']
84 else:
85 ud.basename = os.path.basename(ud.path)
86
Brad Bishop6e60e8b2018-02-01 10:27:11 -050087 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050089 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
Patrick Williams0ca19cc2021-08-16 14:03:13 -050091 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92
93 if not self.check_certs(d):
94 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
Brad Bishopd7bf8c12018-02-25 22:55:05 -050096 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Patrick Williamsc0f7c042017-02-23 20:41:17 -060098 progresshandler = WgetProgressHandler(d)
99
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600100 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500101 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500102 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500103
104 def download(self, ud, d):
105 """Fetch urls"""
106
107 fetchcmd = self.basecmd
108
109 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
111 bb.utils.mkdirhier(os.path.dirname(localpath))
112 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500113
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500114 if ud.user and ud.pswd:
Andrew Geissler595f6302022-01-24 19:11:47 +0000115 fetchcmd += " --auth-no-challenge"
116 if ud.parm.get("redirectauth", "1") == "1":
117 # An undocumented feature of wget is that if the
118 # username/password are specified on the URI, wget will only
119 # send the Authorization header to the first host and not to
120 # any hosts that it is redirected to. With the increasing
121 # usage of temporary AWS URLs, this difference now matters as
122 # AWS will reject any request that has authentication both in
123 # the query parameters (from the redirect) and in the
124 # Authorization header.
125 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600126
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500127 uri = ud.url.split(";")[0]
128 if os.path.exists(ud.localpath):
129 # file exists, but we didnt complete it.. trying again..
130 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
131 else:
132 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
133
134 self._runwget(ud, d, fetchcmd, False)
135
136 # Sanity check since wget can pretend it succeed when it didn't
137 # Also, this used to happen if sourceforge sent us to the mirror page
138 if not os.path.exists(ud.localpath):
139 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
140
141 if os.path.getsize(ud.localpath) == 0:
142 os.remove(ud.localpath)
143 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
144
145 return True
146
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600147 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600148 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500149 if fetch.connection_cache:
150 def connect(self):
151 """Connect to the host and port specified in __init__."""
152
153 sock = fetch.connection_cache.get_connection(self.host, self.port)
154 if sock:
155 self.sock = sock
156 else:
157 self.sock = socket.create_connection((self.host, self.port),
158 self.timeout, self.source_address)
159 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
160
161 if self._tunnel_host:
162 self._tunnel()
163
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600164 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500165 def http_open(self, req):
166 return self.do_open(HTTPConnectionCache, req)
167
168 def do_open(self, http_class, req):
169 """Return an addinfourl object for the request, using http_class.
170
171 http_class must implement the HTTPConnection API from httplib.
172 The addinfourl return value is a file-like object. It also
173 has methods and attributes including:
174 - info(): return a mimetools.Message object for the headers
175 - geturl(): return the original request URL
176 - code: HTTP status code
177 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600178 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400180 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500181
182 h = http_class(host, timeout=req.timeout) # will parse host:port
183 h.set_debuglevel(self._debuglevel)
184
185 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600186 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500187 if k not in headers))
188
189 # We want to make an HTTP/1.1 request, but the addinfourl
190 # class isn't prepared to deal with a persistent connection.
191 # It will try to read all remaining data from the socket,
192 # which will block while the server waits for the next request.
193 # So make sure the connection gets closed after the (only)
194 # request.
195
196 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400197 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500198 headers["Connection"] = "close"
199 else:
200 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
201
202 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600203 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500204
205 if req._tunnel_host:
206 tunnel_headers = {}
207 proxy_auth_hdr = "Proxy-Authorization"
208 if proxy_auth_hdr in headers:
209 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
210 # Proxy-Authorization should not be sent to origin
211 # server.
212 del headers[proxy_auth_hdr]
213 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
214
215 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600216 h.request(req.get_method(), req.selector, req.data, headers)
217 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500219 # Instead, try to detect connections that are no longer
220 # usable (for example, closed unexpectedly) and remove
221 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500222 if fetch.connection_cache is None:
223 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500224 elif isinstance(err, OSError) and err.errno == errno.EBADF:
225 # This happens when the server closes the connection despite the Keep-Alive.
226 # Apparently urllib then uses the file descriptor, expecting it to be
227 # connected, when in reality the connection is already gone.
228 # We let the request fail and expect it to be
229 # tried once more ("try_again" in check_status()),
230 # with the dead connection removed from the cache.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000231 # If it still fails, we give up, which can happen for bad
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500232 # HTTP proxy settings.
233 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600234 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500236 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237
238 # Pick apart the HTTPResponse object to get the addinfourl
239 # object initialized properly.
240
241 # Wrap the HTTPResponse object in socket's file object adapter
242 # for Windows. That adapter calls recv(), so delegate recv()
243 # to read(). This weird wrapping allows the returned object to
244 # have readline() and readlines() methods.
245
246 # XXX It might be better to extract the read buffering code
247 # out of socket._fileobject() and into a base class.
248 r.recv = r.read
249
250 # no data, just have to read
251 r.read()
252 class fp_dummy(object):
253 def read(self):
254 return ""
255 def readline(self):
256 return ""
257 def close(self):
258 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400259 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500260
Brad Bishop19323692019-04-05 15:28:33 -0400261 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500262 resp.code = r.status
263 resp.msg = r.reason
264
265 # Close connection when server request it.
266 if fetch.connection_cache is not None:
267 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
268 fetch.connection_cache.remove_connection(h.host, h.port)
269
270 return resp
271
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600272 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500273 """
274 Fallback to GET if HEAD is not allowed (405 HTTP error)
275 """
276 def http_error_405(self, req, fp, code, msg, headers):
277 fp.read()
278 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279
Brad Bishop08902b02019-08-20 09:16:51 -0400280 if req.get_method() != 'GET':
281 newheaders = dict((k, v) for k, v in list(req.headers.items())
282 if k.lower() not in ("content-length", "content-type"))
283 return self.parent.open(urllib.request.Request(req.get_full_url(),
284 headers=newheaders,
285 origin_req_host=req.origin_req_host,
286 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500287
Brad Bishop08902b02019-08-20 09:16:51 -0400288 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400289
290 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
291 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500292 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500293
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500294
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600295 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500296 """
297 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
298 when we want to follow redirects using the original method.
299 """
300 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600301 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400302 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500303 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500305 # We need to update the environment here as both the proxy and HTTPS
306 # handlers need variables set. The proxy needs http_proxy and friends to
307 # be set, and HTTPSHandler ends up calling into openssl to load the
308 # certificates. In buildtools configurations this will be looking at the
309 # wrong place for certificates by default: we set SSL_CERT_FILE to the
310 # right location in the buildtools environment script but as BitBake
311 # prunes prunes the environment this is lost. When binaries are executed
312 # runfetchcmd ensures these values are in the environment, but this is
313 # pure Python so we need to update the environment.
314 #
315 # Avoid tramping the environment too much by using bb.utils.environment
316 # to scope the changes to the build_opener request, which is when the
317 # environment lookups happen.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000318 newenv = bb.fetch2.get_fetcher_environment(d)
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500319
320 with bb.utils.environment(**newenv):
321 import ssl
322
323 if self.check_certs(d):
324 context = ssl.create_default_context()
325 else:
326 context = ssl._create_unverified_context()
327
328 handlers = [FixedHTTPRedirectHandler,
329 HTTPMethodFallback,
330 urllib.request.ProxyHandler(),
331 CacheHTTPHandler(),
332 urllib.request.HTTPSHandler(context=context)]
333 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500334
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500335 try:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500336 uri = ud.url.split(";")[0]
337 r = urllib.request.Request(uri)
338 r.get_method = lambda: "HEAD"
339 # Some servers (FusionForge, as used on Alioth) require that the
340 # optional Accept header is set.
341 r.add_header("Accept", "*/*")
342 r.add_header("User-Agent", self.user_agent)
343 def add_basic_auth(login_str, request):
344 '''Adds Basic auth to http request, pass in login:password as string'''
345 import base64
346 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
347 authheader = "Basic %s" % encodeuser
348 r.add_header("Authorization", authheader)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500349
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500350 if ud.user and ud.pswd:
351 add_basic_auth(ud.user + ':' + ud.pswd, r)
352
353 try:
354 import netrc
355 n = netrc.netrc()
356 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
357 add_basic_auth("%s:%s" % (login, password), r)
358 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
359 pass
360
Andrew Geissler595f6302022-01-24 19:11:47 +0000361 with opener.open(r, timeout=30) as response:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500362 pass
363 except urllib.error.URLError as e:
364 if try_again:
365 logger.debug2("checkstatus: trying again")
366 return self.checkstatus(fetch, ud, d, False)
367 else:
368 # debug for now to avoid spamming the logs in e.g. remote sstate searches
369 logger.debug2("checkstatus() urlopen failed: %s" % e)
370 return False
371 except ConnectionResetError as e:
372 if try_again:
373 logger.debug2("checkstatus: trying again")
374 return self.checkstatus(fetch, ud, d, False)
375 else:
376 # debug for now to avoid spamming the logs in e.g. remote sstate searches
377 logger.debug2("checkstatus() urlopen failed: %s" % e)
378 return False
379
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500380 return True
381
382 def _parse_path(self, regex, s):
383 """
384 Find and group name, version and archive type in the given string s
385 """
386
387 m = regex.search(s)
388 if m:
389 pname = ''
390 pver = ''
391 ptype = ''
392
393 mdict = m.groupdict()
394 if 'name' in mdict.keys():
395 pname = mdict['name']
396 if 'pver' in mdict.keys():
397 pver = mdict['pver']
398 if 'type' in mdict.keys():
399 ptype = mdict['type']
400
401 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
402
403 return (pname, pver, ptype)
404
405 return None
406
407 def _modelate_version(self, version):
408 if version[0] in ['.', '-']:
409 if version[1].isdigit():
410 version = version[1] + version[0] + version[2:len(version)]
411 else:
412 version = version[1:len(version)]
413
414 version = re.sub('-', '.', version)
415 version = re.sub('_', '.', version)
416 version = re.sub('(rc)+', '.1000.', version)
417 version = re.sub('(beta)+', '.100.', version)
418 version = re.sub('(alpha)+', '.10.', version)
419 if version[0] == 'v':
420 version = version[1:len(version)]
421 return version
422
423 def _vercmp(self, old, new):
424 """
425 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
426 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
427 for simplicity as it's somehow difficult to get from various upstream format
428 """
429
430 (oldpn, oldpv, oldsuffix) = old
431 (newpn, newpv, newsuffix) = new
432
Brad Bishop19323692019-04-05 15:28:33 -0400433 # Check for a new suffix type that we have never heard of before
434 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435 m = self.suffix_regex_comp.search(newsuffix)
436 if not m:
437 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
438 return False
439
Brad Bishop19323692019-04-05 15:28:33 -0400440 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500441 if oldpn != newpn:
442 return False
443
444 oldpv = self._modelate_version(oldpv)
445 newpv = self._modelate_version(newpv)
446
447 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
448
449 def _fetch_index(self, uri, ud, d):
450 """
451 Run fetch checkstatus to get directory information
452 """
453 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500454 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500455 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600456 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500457 try:
458 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
459 fetchresult = f.read()
460 except bb.fetch2.BBFetchException:
461 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500462
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 return fetchresult
464
465 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
466 """
467 Return the latest version of a package inside a given directory path
468 If error or no version, return ""
469 """
470 valid = 0
471 version = ['', '', '']
472
473 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500474 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500475 if not soup:
476 bb.debug(3, "*** %s NO SOUP" % (url))
477 return ""
478
479 for line in soup.find_all('a', href=True):
480 bb.debug(3, "line['href'] = '%s'" % (line['href']))
481 bb.debug(3, "line = '%s'" % (str(line)))
482
483 newver = self._parse_path(package_regex, line['href'])
484 if not newver:
485 newver = self._parse_path(package_regex, str(line))
486
487 if newver:
488 bb.debug(3, "Upstream version found: %s" % newver[1])
489 if valid == 0:
490 version = newver
491 valid = 1
492 elif self._vercmp(version, newver) < 0:
493 version = newver
494
495 pupver = re.sub('_', '.', version[1])
496
497 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
498 (package, pupver or "N/A", current_version[1]))
499
500 if valid:
501 return pupver
502
503 return ""
504
Brad Bishop19323692019-04-05 15:28:33 -0400505 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500506 """
Brad Bishop19323692019-04-05 15:28:33 -0400507 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500508 """
509 version_dir = ['', '', '']
510 version = ['', '', '']
511
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700512 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500513 s = dirver_regex.search(dirver)
514 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500515 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500516 else:
517 version_dir[1] = dirver
518
519 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
520 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
521 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
522
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500523 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500524 if not soup:
525 return version[1]
526
527 for line in soup.find_all('a', href=True):
528 s = dirver_regex.search(line['href'].strip("/"))
529 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500530 sver = s.group('ver')
531
532 # When prefix is part of the version directory it need to
533 # ensure that only version directory is used so remove previous
534 # directories if exists.
535 #
536 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
537 # result is v2.5.
538 spfx = s.group('pfx').split('/')[-1]
539
540 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500541 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500542 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500543 path = ud.path.replace(dirver, dirver_new, True) \
544 .split(package)[0]
545 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
546 ud.user, ud.pswd, {}])
547
548 pupver = self._check_latest_version(uri,
549 package, package_regex, current_version, ud, d)
550 if pupver:
551 version[1] = pupver
552
553 version_dir = version_dir_new
554
555 return version[1]
556
557 def _init_regexes(self, package, ud, d):
558 """
559 Match as many patterns as possible such as:
560 gnome-common-2.20.0.tar.gz (most common format)
561 gtk+-2.90.1.tar.gz
562 xf86-input-synaptics-12.6.9.tar.gz
563 dri2proto-2.3.tar.gz
564 blktool_4.orig.tar.gz
565 libid3tag-0.15.1b.tar.gz
566 unzip552.tar.gz
567 icu4c-3_6-src.tgz
568 genext2fs_1.3.orig.tar.gz
569 gst-fluendo-mp3
570 """
571 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400572 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500573 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400574 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500575 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400576 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500577 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400578 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500579
580 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400581 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500582
583 # match arch
584 parch_regex = "-source|_all_"
585
586 # src.rpm extension was added only for rpm package. Can be removed if the rpm
587 # packaged will always be considered as having to be manually upgraded
Andrew Geissler595f6302022-01-24 19:11:47 +0000588 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500589
590 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400591 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500592 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
593 self.suffix_regex_comp = re.compile(psuffix_regex)
594
595 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500596 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500597 if pn_regex:
598 package_custom_regex_comp = re.compile(pn_regex)
599 else:
600 version = self._parse_path(package_regex_comp, package)
601 if version:
602 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400603 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500604 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
605 else:
606 package_custom_regex_comp = None
607
608 return package_custom_regex_comp
609
610 def latest_versionstring(self, ud, d):
611 """
612 Manipulate the URL and try to obtain the latest package version
613
614 sanity check to ensure same name and type.
615 """
616 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500617 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500618
619 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400620 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500621 current_version[1] = re.sub('_', '.', current_version[1])
622 current_version[1] = re.sub('-', '.', current_version[1])
623 return (current_version[1], '')
624
625 package_regex = self._init_regexes(package, ud, d)
626 if package_regex is None:
627 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
628 return ('', '')
629 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
630
631 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500632 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500633 if not regex_uri:
634 path = ud.path.split(package)[0]
635
636 # search for version matches on folders inside the path, like:
637 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400638 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500639 m = dirver_regex.search(path)
640 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500641 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500642 dirver = m.group('dirver')
643
Brad Bishop19323692019-04-05 15:28:33 -0400644 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500645 if not dirver_pn_regex.search(dirver):
646 return (self._check_latest_version_by_dir(dirver,
647 package, package_regex, current_version, ud, d), '')
648
649 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
650 else:
651 uri = regex_uri
652
653 return (self._check_latest_version(uri, package, package_regex,
654 current_version, ud, d), '')