blob: b2b542e1dc3ea09cea387060e61bd19d728f73c1 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050055 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060056
57 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58 # with the standard wget/urllib User-Agent, so pretend to be a modern
59 # browser.
60 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61
Patrick Williams0ca19cc2021-08-16 14:03:13 -050062 def check_certs(self, d):
63 """
64 Should certificates be checked?
65 """
66 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068 def supports(self, ud, d):
69 """
70 Check to see if a given url can be fetched with wget.
71 """
Andrew Geissler5199d832021-09-24 16:47:35 -050072 return ud.type in ['http', 'https', 'ftp', 'ftps']
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
74 def recommends_checksum(self, urldata):
75 return True
76
77 def urldata_init(self, ud, d):
78 if 'protocol' in ud.parm:
79 if ud.parm['protocol'] == 'git':
80 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81
82 if 'downloadfilename' in ud.parm:
83 ud.basename = ud.parm['downloadfilename']
84 else:
85 ud.basename = os.path.basename(ud.path)
86
Brad Bishop6e60e8b2018-02-01 10:27:11 -050087 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050089 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
Patrick Williams0ca19cc2021-08-16 14:03:13 -050091 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92
93 if not self.check_certs(d):
94 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
Brad Bishopd7bf8c12018-02-25 22:55:05 -050096 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Patrick Williamsc0f7c042017-02-23 20:41:17 -060098 progresshandler = WgetProgressHandler(d)
99
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600100 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500101 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500102 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500103
104 def download(self, ud, d):
105 """Fetch urls"""
106
107 fetchcmd = self.basecmd
108
Andrew Geissler78b72792022-06-14 06:47:25 -0500109 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
110 bb.utils.mkdirhier(os.path.dirname(localpath))
111 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500112
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500113 if ud.user and ud.pswd:
Andrew Geissler595f6302022-01-24 19:11:47 +0000114 fetchcmd += " --auth-no-challenge"
115 if ud.parm.get("redirectauth", "1") == "1":
116 # An undocumented feature of wget is that if the
117 # username/password are specified on the URI, wget will only
118 # send the Authorization header to the first host and not to
119 # any hosts that it is redirected to. With the increasing
120 # usage of temporary AWS URLs, this difference now matters as
121 # AWS will reject any request that has authentication both in
122 # the query parameters (from the redirect) and in the
123 # Authorization header.
124 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600125
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126 uri = ud.url.split(";")[0]
127 if os.path.exists(ud.localpath):
128 # file exists, but we didnt complete it.. trying again..
129 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
130 else:
131 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
132
133 self._runwget(ud, d, fetchcmd, False)
134
Andrew Geissler78b72792022-06-14 06:47:25 -0500135 # Remove the ".tmp" and move the file into position atomically
136 # Our lock prevents multiple writers but mirroring code may grab incomplete files
137 os.rename(localpath, localpath[:-4])
138
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500139 # Sanity check since wget can pretend it succeed when it didn't
140 # Also, this used to happen if sourceforge sent us to the mirror page
141 if not os.path.exists(ud.localpath):
142 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
143
144 if os.path.getsize(ud.localpath) == 0:
145 os.remove(ud.localpath)
146 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
147
148 return True
149
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600150 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600151 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500152 if fetch.connection_cache:
153 def connect(self):
154 """Connect to the host and port specified in __init__."""
155
156 sock = fetch.connection_cache.get_connection(self.host, self.port)
157 if sock:
158 self.sock = sock
159 else:
160 self.sock = socket.create_connection((self.host, self.port),
161 self.timeout, self.source_address)
162 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
163
164 if self._tunnel_host:
165 self._tunnel()
166
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600167 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 def http_open(self, req):
169 return self.do_open(HTTPConnectionCache, req)
170
171 def do_open(self, http_class, req):
172 """Return an addinfourl object for the request, using http_class.
173
174 http_class must implement the HTTPConnection API from httplib.
175 The addinfourl return value is a file-like object. It also
176 has methods and attributes including:
177 - info(): return a mimetools.Message object for the headers
178 - geturl(): return the original request URL
179 - code: HTTP status code
180 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600181 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500182 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400183 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500184
185 h = http_class(host, timeout=req.timeout) # will parse host:port
186 h.set_debuglevel(self._debuglevel)
187
188 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600189 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500190 if k not in headers))
191
192 # We want to make an HTTP/1.1 request, but the addinfourl
193 # class isn't prepared to deal with a persistent connection.
194 # It will try to read all remaining data from the socket,
195 # which will block while the server waits for the next request.
196 # So make sure the connection gets closed after the (only)
197 # request.
198
199 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400200 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500201 headers["Connection"] = "close"
202 else:
203 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
204
205 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600206 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500207
208 if req._tunnel_host:
209 tunnel_headers = {}
210 proxy_auth_hdr = "Proxy-Authorization"
211 if proxy_auth_hdr in headers:
212 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
213 # Proxy-Authorization should not be sent to origin
214 # server.
215 del headers[proxy_auth_hdr]
216 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
217
218 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600219 h.request(req.get_method(), req.selector, req.data, headers)
220 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500221 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500222 # Instead, try to detect connections that are no longer
223 # usable (for example, closed unexpectedly) and remove
224 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 if fetch.connection_cache is None:
226 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500227 elif isinstance(err, OSError) and err.errno == errno.EBADF:
228 # This happens when the server closes the connection despite the Keep-Alive.
229 # Apparently urllib then uses the file descriptor, expecting it to be
230 # connected, when in reality the connection is already gone.
231 # We let the request fail and expect it to be
232 # tried once more ("try_again" in check_status()),
233 # with the dead connection removed from the cache.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000234 # If it still fails, we give up, which can happen for bad
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500235 # HTTP proxy settings.
236 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600237 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500238 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500239 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240
241 # Pick apart the HTTPResponse object to get the addinfourl
242 # object initialized properly.
243
244 # Wrap the HTTPResponse object in socket's file object adapter
245 # for Windows. That adapter calls recv(), so delegate recv()
246 # to read(). This weird wrapping allows the returned object to
247 # have readline() and readlines() methods.
248
249 # XXX It might be better to extract the read buffering code
250 # out of socket._fileobject() and into a base class.
251 r.recv = r.read
252
253 # no data, just have to read
254 r.read()
255 class fp_dummy(object):
256 def read(self):
257 return ""
258 def readline(self):
259 return ""
260 def close(self):
261 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400262 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500263
Brad Bishop19323692019-04-05 15:28:33 -0400264 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500265 resp.code = r.status
266 resp.msg = r.reason
267
268 # Close connection when server request it.
269 if fetch.connection_cache is not None:
270 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
271 fetch.connection_cache.remove_connection(h.host, h.port)
272
273 return resp
274
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600275 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500276 """
277 Fallback to GET if HEAD is not allowed (405 HTTP error)
278 """
279 def http_error_405(self, req, fp, code, msg, headers):
280 fp.read()
281 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500282
Brad Bishop08902b02019-08-20 09:16:51 -0400283 if req.get_method() != 'GET':
284 newheaders = dict((k, v) for k, v in list(req.headers.items())
285 if k.lower() not in ("content-length", "content-type"))
286 return self.parent.open(urllib.request.Request(req.get_full_url(),
287 headers=newheaders,
288 origin_req_host=req.origin_req_host,
289 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500290
Brad Bishop08902b02019-08-20 09:16:51 -0400291 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400292
293 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
294 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500295 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500296
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600298 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 """
300 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
301 when we want to follow redirects using the original method.
302 """
303 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600304 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400305 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500307
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500308 # We need to update the environment here as both the proxy and HTTPS
309 # handlers need variables set. The proxy needs http_proxy and friends to
310 # be set, and HTTPSHandler ends up calling into openssl to load the
311 # certificates. In buildtools configurations this will be looking at the
312 # wrong place for certificates by default: we set SSL_CERT_FILE to the
313 # right location in the buildtools environment script but as BitBake
314 # prunes prunes the environment this is lost. When binaries are executed
315 # runfetchcmd ensures these values are in the environment, but this is
316 # pure Python so we need to update the environment.
317 #
318 # Avoid tramping the environment too much by using bb.utils.environment
319 # to scope the changes to the build_opener request, which is when the
320 # environment lookups happen.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000321 newenv = bb.fetch2.get_fetcher_environment(d)
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500322
323 with bb.utils.environment(**newenv):
324 import ssl
325
326 if self.check_certs(d):
327 context = ssl.create_default_context()
328 else:
329 context = ssl._create_unverified_context()
330
331 handlers = [FixedHTTPRedirectHandler,
332 HTTPMethodFallback,
333 urllib.request.ProxyHandler(),
334 CacheHTTPHandler(),
335 urllib.request.HTTPSHandler(context=context)]
336 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500337
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500338 try:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500339 uri = ud.url.split(";")[0]
340 r = urllib.request.Request(uri)
341 r.get_method = lambda: "HEAD"
342 # Some servers (FusionForge, as used on Alioth) require that the
343 # optional Accept header is set.
344 r.add_header("Accept", "*/*")
345 r.add_header("User-Agent", self.user_agent)
346 def add_basic_auth(login_str, request):
347 '''Adds Basic auth to http request, pass in login:password as string'''
348 import base64
349 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
350 authheader = "Basic %s" % encodeuser
351 r.add_header("Authorization", authheader)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500352
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500353 if ud.user and ud.pswd:
354 add_basic_auth(ud.user + ':' + ud.pswd, r)
355
356 try:
357 import netrc
358 n = netrc.netrc()
359 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
360 add_basic_auth("%s:%s" % (login, password), r)
361 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
362 pass
363
Andrew Geissler595f6302022-01-24 19:11:47 +0000364 with opener.open(r, timeout=30) as response:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500365 pass
366 except urllib.error.URLError as e:
367 if try_again:
368 logger.debug2("checkstatus: trying again")
369 return self.checkstatus(fetch, ud, d, False)
370 else:
371 # debug for now to avoid spamming the logs in e.g. remote sstate searches
372 logger.debug2("checkstatus() urlopen failed: %s" % e)
373 return False
374 except ConnectionResetError as e:
375 if try_again:
376 logger.debug2("checkstatus: trying again")
377 return self.checkstatus(fetch, ud, d, False)
378 else:
379 # debug for now to avoid spamming the logs in e.g. remote sstate searches
380 logger.debug2("checkstatus() urlopen failed: %s" % e)
381 return False
382
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500383 return True
384
385 def _parse_path(self, regex, s):
386 """
387 Find and group name, version and archive type in the given string s
388 """
389
390 m = regex.search(s)
391 if m:
392 pname = ''
393 pver = ''
394 ptype = ''
395
396 mdict = m.groupdict()
397 if 'name' in mdict.keys():
398 pname = mdict['name']
399 if 'pver' in mdict.keys():
400 pver = mdict['pver']
401 if 'type' in mdict.keys():
402 ptype = mdict['type']
403
404 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
405
406 return (pname, pver, ptype)
407
408 return None
409
410 def _modelate_version(self, version):
411 if version[0] in ['.', '-']:
412 if version[1].isdigit():
413 version = version[1] + version[0] + version[2:len(version)]
414 else:
415 version = version[1:len(version)]
416
417 version = re.sub('-', '.', version)
418 version = re.sub('_', '.', version)
419 version = re.sub('(rc)+', '.1000.', version)
420 version = re.sub('(beta)+', '.100.', version)
421 version = re.sub('(alpha)+', '.10.', version)
422 if version[0] == 'v':
423 version = version[1:len(version)]
424 return version
425
426 def _vercmp(self, old, new):
427 """
428 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
429 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
430 for simplicity as it's somehow difficult to get from various upstream format
431 """
432
433 (oldpn, oldpv, oldsuffix) = old
434 (newpn, newpv, newsuffix) = new
435
Brad Bishop19323692019-04-05 15:28:33 -0400436 # Check for a new suffix type that we have never heard of before
437 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438 m = self.suffix_regex_comp.search(newsuffix)
439 if not m:
440 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
441 return False
442
Brad Bishop19323692019-04-05 15:28:33 -0400443 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 if oldpn != newpn:
445 return False
446
447 oldpv = self._modelate_version(oldpv)
448 newpv = self._modelate_version(newpv)
449
450 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
451
452 def _fetch_index(self, uri, ud, d):
453 """
454 Run fetch checkstatus to get directory information
455 """
456 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500457 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500458 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600459 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500460 try:
461 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
462 fetchresult = f.read()
463 except bb.fetch2.BBFetchException:
464 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500465
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 return fetchresult
467
468 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
469 """
470 Return the latest version of a package inside a given directory path
471 If error or no version, return ""
472 """
473 valid = 0
474 version = ['', '', '']
475
476 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500477 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500478 if not soup:
479 bb.debug(3, "*** %s NO SOUP" % (url))
480 return ""
481
482 for line in soup.find_all('a', href=True):
483 bb.debug(3, "line['href'] = '%s'" % (line['href']))
484 bb.debug(3, "line = '%s'" % (str(line)))
485
486 newver = self._parse_path(package_regex, line['href'])
487 if not newver:
488 newver = self._parse_path(package_regex, str(line))
489
490 if newver:
491 bb.debug(3, "Upstream version found: %s" % newver[1])
492 if valid == 0:
493 version = newver
494 valid = 1
495 elif self._vercmp(version, newver) < 0:
496 version = newver
497
498 pupver = re.sub('_', '.', version[1])
499
500 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
501 (package, pupver or "N/A", current_version[1]))
502
503 if valid:
504 return pupver
505
506 return ""
507
Brad Bishop19323692019-04-05 15:28:33 -0400508 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500509 """
Brad Bishop19323692019-04-05 15:28:33 -0400510 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500511 """
512 version_dir = ['', '', '']
513 version = ['', '', '']
514
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700515 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500516 s = dirver_regex.search(dirver)
517 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500518 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500519 else:
520 version_dir[1] = dirver
521
522 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
523 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
524 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
525
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500526 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500527 if not soup:
528 return version[1]
529
530 for line in soup.find_all('a', href=True):
531 s = dirver_regex.search(line['href'].strip("/"))
532 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500533 sver = s.group('ver')
534
535 # When prefix is part of the version directory it need to
536 # ensure that only version directory is used so remove previous
537 # directories if exists.
538 #
539 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
540 # result is v2.5.
541 spfx = s.group('pfx').split('/')[-1]
542
543 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500545 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500546 path = ud.path.replace(dirver, dirver_new, True) \
547 .split(package)[0]
548 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
549 ud.user, ud.pswd, {}])
550
551 pupver = self._check_latest_version(uri,
552 package, package_regex, current_version, ud, d)
553 if pupver:
554 version[1] = pupver
555
556 version_dir = version_dir_new
557
558 return version[1]
559
560 def _init_regexes(self, package, ud, d):
561 """
562 Match as many patterns as possible such as:
563 gnome-common-2.20.0.tar.gz (most common format)
564 gtk+-2.90.1.tar.gz
565 xf86-input-synaptics-12.6.9.tar.gz
566 dri2proto-2.3.tar.gz
567 blktool_4.orig.tar.gz
568 libid3tag-0.15.1b.tar.gz
569 unzip552.tar.gz
570 icu4c-3_6-src.tgz
571 genext2fs_1.3.orig.tar.gz
572 gst-fluendo-mp3
573 """
574 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400575 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500576 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400577 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500578 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400579 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500580 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400581 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500582
583 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400584 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500585
586 # match arch
587 parch_regex = "-source|_all_"
588
589 # src.rpm extension was added only for rpm package. Can be removed if the rpm
590 # packaged will always be considered as having to be manually upgraded
Andrew Geissler595f6302022-01-24 19:11:47 +0000591 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500592
593 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400594 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500595 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
596 self.suffix_regex_comp = re.compile(psuffix_regex)
597
598 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500599 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500600 if pn_regex:
601 package_custom_regex_comp = re.compile(pn_regex)
602 else:
603 version = self._parse_path(package_regex_comp, package)
604 if version:
605 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400606 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500607 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
608 else:
609 package_custom_regex_comp = None
610
611 return package_custom_regex_comp
612
613 def latest_versionstring(self, ud, d):
614 """
615 Manipulate the URL and try to obtain the latest package version
616
617 sanity check to ensure same name and type.
618 """
619 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500620 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500621
622 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400623 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500624 current_version[1] = re.sub('_', '.', current_version[1])
625 current_version[1] = re.sub('-', '.', current_version[1])
626 return (current_version[1], '')
627
628 package_regex = self._init_regexes(package, ud, d)
629 if package_regex is None:
630 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
631 return ('', '')
632 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
633
634 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500635 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500636 if not regex_uri:
637 path = ud.path.split(package)[0]
638
639 # search for version matches on folders inside the path, like:
640 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400641 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500642 m = dirver_regex.search(path)
643 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500644 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500645 dirver = m.group('dirver')
646
Brad Bishop19323692019-04-05 15:28:33 -0400647 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500648 if not dirver_pn_regex.search(dirver):
649 return (self._check_latest_version_by_dir(dirver,
650 package, package_regex, current_version, ud, d), '')
651
652 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
653 else:
654 uri = regex_uri
655
656 return (self._check_latest_version(uri, package, package_regex,
657 current_version, ud, d), '')