blob: fbfa6938accb36feaa895fa71a9fb9a8324fd9fd [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
29from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050030from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031
Patrick Williamsc0f7c042017-02-23 20:41:17 -060032class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
33 """
34 Extract progress information from wget output.
35 Note: relies on --progress=dot (with -v or without -q/-nv) being
36 specified on the wget command line.
37 """
38 def __init__(self, d):
39 super(WgetProgressHandler, self).__init__(d)
40 # Send an initial progress event so the bar gets shown
41 self._fire_progress(0)
42
43 def writeline(self, line):
44 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
45 if percs:
46 progress = int(percs[-1][0])
47 rate = percs[-1][1] + '/s'
48 self.update(progress, rate)
49 return False
50 return True
51
52
Patrick Williamsc124f4f2015-09-15 14:41:29 -050053class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050054 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060055
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
Patrick Williams0ca19cc2021-08-16 14:03:13 -050061 def check_certs(self, d):
62 """
63 Should certificates be checked?
64 """
65 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
66
Patrick Williamsc124f4f2015-09-15 14:41:29 -050067 def supports(self, ud, d):
68 """
69 Check to see if a given url can be fetched with wget.
70 """
Andrew Geissler5199d832021-09-24 16:47:35 -050071 return ud.type in ['http', 'https', 'ftp', 'ftps']
Patrick Williamsc124f4f2015-09-15 14:41:29 -050072
73 def recommends_checksum(self, urldata):
74 return True
75
76 def urldata_init(self, ud, d):
77 if 'protocol' in ud.parm:
78 if ud.parm['protocol'] == 'git':
79 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
80
81 if 'downloadfilename' in ud.parm:
82 ud.basename = ud.parm['downloadfilename']
83 else:
84 ud.basename = os.path.basename(ud.path)
85
Brad Bishop6e60e8b2018-02-01 10:27:11 -050086 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050088 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089
Patrick Williams44b3caf2024-04-12 16:51:14 -050090 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
91
92 if ud.type == 'ftp' or ud.type == 'ftps':
93 self.basecmd += " --passive-ftp"
Patrick Williams0ca19cc2021-08-16 14:03:13 -050094
95 if not self.check_certs(d):
96 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Brad Bishopd7bf8c12018-02-25 22:55:05 -050098 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050099
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600100 progresshandler = WgetProgressHandler(d)
101
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600102 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500103 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500104 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500105
106 def download(self, ud, d):
107 """Fetch urls"""
108
109 fetchcmd = self.basecmd
110
Andrew Geissler78b72792022-06-14 06:47:25 -0500111 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
112 bb.utils.mkdirhier(os.path.dirname(localpath))
113 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500114
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500115 if ud.user and ud.pswd:
Andrew Geissler595f6302022-01-24 19:11:47 +0000116 fetchcmd += " --auth-no-challenge"
117 if ud.parm.get("redirectauth", "1") == "1":
118 # An undocumented feature of wget is that if the
119 # username/password are specified on the URI, wget will only
120 # send the Authorization header to the first host and not to
121 # any hosts that it is redirected to. With the increasing
122 # usage of temporary AWS URLs, this difference now matters as
123 # AWS will reject any request that has authentication both in
124 # the query parameters (from the redirect) and in the
125 # Authorization header.
126 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600127
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500128 uri = ud.url.split(";")[0]
129 if os.path.exists(ud.localpath):
130 # file exists, but we didnt complete it.. trying again..
131 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
132 else:
133 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
134
135 self._runwget(ud, d, fetchcmd, False)
136
Andrew Geissler87f5cff2022-09-30 13:13:31 -0500137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138 # original file, which might be a race (imagine two recipes referencing the same
139 # source, one with an incorrect checksum)
140 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
141
Andrew Geissler78b72792022-06-14 06:47:25 -0500142 # Remove the ".tmp" and move the file into position atomically
143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
144 os.rename(localpath, localpath[:-4])
145
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146 # Sanity check since wget can pretend it succeed when it didn't
147 # Also, this used to happen if sourceforge sent us to the mirror page
148 if not os.path.exists(ud.localpath):
149 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
150
151 if os.path.getsize(ud.localpath) == 0:
152 os.remove(ud.localpath)
153 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
154
155 return True
156
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600157 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600158 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500159 if fetch.connection_cache:
160 def connect(self):
161 """Connect to the host and port specified in __init__."""
162
163 sock = fetch.connection_cache.get_connection(self.host, self.port)
164 if sock:
165 self.sock = sock
166 else:
167 self.sock = socket.create_connection((self.host, self.port),
168 self.timeout, self.source_address)
169 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
170
171 if self._tunnel_host:
172 self._tunnel()
173
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600174 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175 def http_open(self, req):
176 return self.do_open(HTTPConnectionCache, req)
177
178 def do_open(self, http_class, req):
179 """Return an addinfourl object for the request, using http_class.
180
181 http_class must implement the HTTPConnection API from httplib.
182 The addinfourl return value is a file-like object. It also
183 has methods and attributes including:
184 - info(): return a mimetools.Message object for the headers
185 - geturl(): return the original request URL
186 - code: HTTP status code
187 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600188 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500189 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400190 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500191
192 h = http_class(host, timeout=req.timeout) # will parse host:port
193 h.set_debuglevel(self._debuglevel)
194
195 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600196 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500197 if k not in headers))
198
199 # We want to make an HTTP/1.1 request, but the addinfourl
200 # class isn't prepared to deal with a persistent connection.
201 # It will try to read all remaining data from the socket,
202 # which will block while the server waits for the next request.
203 # So make sure the connection gets closed after the (only)
204 # request.
205
206 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400207 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500208 headers["Connection"] = "close"
209 else:
210 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
211
212 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600213 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500214
215 if req._tunnel_host:
216 tunnel_headers = {}
217 proxy_auth_hdr = "Proxy-Authorization"
218 if proxy_auth_hdr in headers:
219 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
220 # Proxy-Authorization should not be sent to origin
221 # server.
222 del headers[proxy_auth_hdr]
223 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
224
225 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600226 h.request(req.get_method(), req.selector, req.data, headers)
227 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500228 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500229 # Instead, try to detect connections that are no longer
230 # usable (for example, closed unexpectedly) and remove
231 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232 if fetch.connection_cache is None:
233 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500234 elif isinstance(err, OSError) and err.errno == errno.EBADF:
235 # This happens when the server closes the connection despite the Keep-Alive.
236 # Apparently urllib then uses the file descriptor, expecting it to be
237 # connected, when in reality the connection is already gone.
238 # We let the request fail and expect it to be
239 # tried once more ("try_again" in check_status()),
240 # with the dead connection removed from the cache.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000241 # If it still fails, we give up, which can happen for bad
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500242 # HTTP proxy settings.
243 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600244 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500246 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500247
248 # Pick apart the HTTPResponse object to get the addinfourl
249 # object initialized properly.
250
251 # Wrap the HTTPResponse object in socket's file object adapter
252 # for Windows. That adapter calls recv(), so delegate recv()
253 # to read(). This weird wrapping allows the returned object to
254 # have readline() and readlines() methods.
255
256 # XXX It might be better to extract the read buffering code
257 # out of socket._fileobject() and into a base class.
258 r.recv = r.read
259
260 # no data, just have to read
261 r.read()
262 class fp_dummy(object):
263 def read(self):
264 return ""
265 def readline(self):
266 return ""
267 def close(self):
268 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400269 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270
Brad Bishop19323692019-04-05 15:28:33 -0400271 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500272 resp.code = r.status
273 resp.msg = r.reason
274
275 # Close connection when server request it.
276 if fetch.connection_cache is not None:
277 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
278 fetch.connection_cache.remove_connection(h.host, h.port)
279
280 return resp
281
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600282 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500283 """
284 Fallback to GET if HEAD is not allowed (405 HTTP error)
285 """
286 def http_error_405(self, req, fp, code, msg, headers):
287 fp.read()
288 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289
Brad Bishop08902b02019-08-20 09:16:51 -0400290 if req.get_method() != 'GET':
291 newheaders = dict((k, v) for k, v in list(req.headers.items())
292 if k.lower() not in ("content-length", "content-type"))
293 return self.parent.open(urllib.request.Request(req.get_full_url(),
294 headers=newheaders,
295 origin_req_host=req.origin_req_host,
296 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297
Brad Bishop08902b02019-08-20 09:16:51 -0400298 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400299
300 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
301 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500302 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500303
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500304
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600305 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500306 """
307 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
308 when we want to follow redirects using the original method.
309 """
310 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600311 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400312 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500313 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500314
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500315 # We need to update the environment here as both the proxy and HTTPS
316 # handlers need variables set. The proxy needs http_proxy and friends to
317 # be set, and HTTPSHandler ends up calling into openssl to load the
318 # certificates. In buildtools configurations this will be looking at the
319 # wrong place for certificates by default: we set SSL_CERT_FILE to the
320 # right location in the buildtools environment script but as BitBake
321 # prunes prunes the environment this is lost. When binaries are executed
322 # runfetchcmd ensures these values are in the environment, but this is
323 # pure Python so we need to update the environment.
324 #
325 # Avoid tramping the environment too much by using bb.utils.environment
326 # to scope the changes to the build_opener request, which is when the
327 # environment lookups happen.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000328 newenv = bb.fetch2.get_fetcher_environment(d)
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500329
330 with bb.utils.environment(**newenv):
331 import ssl
332
333 if self.check_certs(d):
334 context = ssl.create_default_context()
335 else:
336 context = ssl._create_unverified_context()
337
338 handlers = [FixedHTTPRedirectHandler,
339 HTTPMethodFallback,
340 urllib.request.ProxyHandler(),
341 CacheHTTPHandler(),
342 urllib.request.HTTPSHandler(context=context)]
343 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500344
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500345 try:
Andrew Geissler517393d2023-01-13 08:55:19 -0600346 uri_base = ud.url.split(";")[0]
347 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500348 r = urllib.request.Request(uri)
349 r.get_method = lambda: "HEAD"
350 # Some servers (FusionForge, as used on Alioth) require that the
351 # optional Accept header is set.
352 r.add_header("Accept", "*/*")
353 r.add_header("User-Agent", self.user_agent)
354 def add_basic_auth(login_str, request):
355 '''Adds Basic auth to http request, pass in login:password as string'''
356 import base64
357 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
358 authheader = "Basic %s" % encodeuser
359 r.add_header("Authorization", authheader)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500360
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500361 if ud.user and ud.pswd:
362 add_basic_auth(ud.user + ':' + ud.pswd, r)
363
364 try:
365 import netrc
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600366 auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
367 if auth_data:
368 login, _, password = auth_data
369 add_basic_auth("%s:%s" % (login, password), r)
370 except (FileNotFoundError, netrc.NetrcParseError):
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500371 pass
372
Andrew Geissler595f6302022-01-24 19:11:47 +0000373 with opener.open(r, timeout=30) as response:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500374 pass
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500375 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500376 if try_again:
377 logger.debug2("checkstatus: trying again")
378 return self.checkstatus(fetch, ud, d, False)
379 else:
380 # debug for now to avoid spamming the logs in e.g. remote sstate searches
Patrick Williams705982a2024-01-12 09:51:57 -0600381 logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500382 return False
383
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500384 return True
385
386 def _parse_path(self, regex, s):
387 """
388 Find and group name, version and archive type in the given string s
389 """
390
391 m = regex.search(s)
392 if m:
393 pname = ''
394 pver = ''
395 ptype = ''
396
397 mdict = m.groupdict()
398 if 'name' in mdict.keys():
399 pname = mdict['name']
400 if 'pver' in mdict.keys():
401 pver = mdict['pver']
402 if 'type' in mdict.keys():
403 ptype = mdict['type']
404
405 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
406
407 return (pname, pver, ptype)
408
409 return None
410
411 def _modelate_version(self, version):
412 if version[0] in ['.', '-']:
413 if version[1].isdigit():
414 version = version[1] + version[0] + version[2:len(version)]
415 else:
416 version = version[1:len(version)]
417
418 version = re.sub('-', '.', version)
419 version = re.sub('_', '.', version)
420 version = re.sub('(rc)+', '.1000.', version)
421 version = re.sub('(beta)+', '.100.', version)
422 version = re.sub('(alpha)+', '.10.', version)
423 if version[0] == 'v':
424 version = version[1:len(version)]
425 return version
426
427 def _vercmp(self, old, new):
428 """
429 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
430 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
431 for simplicity as it's somehow difficult to get from various upstream format
432 """
433
434 (oldpn, oldpv, oldsuffix) = old
435 (newpn, newpv, newsuffix) = new
436
Brad Bishop19323692019-04-05 15:28:33 -0400437 # Check for a new suffix type that we have never heard of before
438 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500439 m = self.suffix_regex_comp.search(newsuffix)
440 if not m:
441 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
442 return False
443
Brad Bishop19323692019-04-05 15:28:33 -0400444 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500445 if oldpn != newpn:
446 return False
447
448 oldpv = self._modelate_version(oldpv)
449 newpv = self._modelate_version(newpv)
450
451 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
452
453 def _fetch_index(self, uri, ud, d):
454 """
455 Run fetch checkstatus to get directory information
456 """
457 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500458 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500459 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600460 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500461 try:
462 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
463 fetchresult = f.read()
464 except bb.fetch2.BBFetchException:
465 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500467 return fetchresult
468
469 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
470 """
471 Return the latest version of a package inside a given directory path
472 If error or no version, return ""
473 """
474 valid = 0
475 version = ['', '', '']
476
477 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500478 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500479 if not soup:
480 bb.debug(3, "*** %s NO SOUP" % (url))
481 return ""
482
483 for line in soup.find_all('a', href=True):
484 bb.debug(3, "line['href'] = '%s'" % (line['href']))
485 bb.debug(3, "line = '%s'" % (str(line)))
486
487 newver = self._parse_path(package_regex, line['href'])
488 if not newver:
489 newver = self._parse_path(package_regex, str(line))
490
491 if newver:
492 bb.debug(3, "Upstream version found: %s" % newver[1])
493 if valid == 0:
494 version = newver
495 valid = 1
496 elif self._vercmp(version, newver) < 0:
497 version = newver
498
499 pupver = re.sub('_', '.', version[1])
500
501 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
502 (package, pupver or "N/A", current_version[1]))
503
504 if valid:
505 return pupver
506
507 return ""
508
Brad Bishop19323692019-04-05 15:28:33 -0400509 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500510 """
Brad Bishop19323692019-04-05 15:28:33 -0400511 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500512 """
513 version_dir = ['', '', '']
514 version = ['', '', '']
515
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700516 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500517 s = dirver_regex.search(dirver)
518 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500519 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500520 else:
521 version_dir[1] = dirver
522
523 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
524 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
525 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
526
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500527 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500528 if not soup:
529 return version[1]
530
531 for line in soup.find_all('a', href=True):
532 s = dirver_regex.search(line['href'].strip("/"))
533 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500534 sver = s.group('ver')
535
536 # When prefix is part of the version directory it need to
537 # ensure that only version directory is used so remove previous
538 # directories if exists.
539 #
540 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
541 # result is v2.5.
542 spfx = s.group('pfx').split('/')[-1]
543
544 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500545 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500546 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500547 path = ud.path.replace(dirver, dirver_new, True) \
548 .split(package)[0]
549 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
550 ud.user, ud.pswd, {}])
551
552 pupver = self._check_latest_version(uri,
553 package, package_regex, current_version, ud, d)
554 if pupver:
555 version[1] = pupver
556
557 version_dir = version_dir_new
558
559 return version[1]
560
561 def _init_regexes(self, package, ud, d):
562 """
563 Match as many patterns as possible such as:
564 gnome-common-2.20.0.tar.gz (most common format)
565 gtk+-2.90.1.tar.gz
566 xf86-input-synaptics-12.6.9.tar.gz
567 dri2proto-2.3.tar.gz
568 blktool_4.orig.tar.gz
569 libid3tag-0.15.1b.tar.gz
570 unzip552.tar.gz
571 icu4c-3_6-src.tgz
572 genext2fs_1.3.orig.tar.gz
573 gst-fluendo-mp3
574 """
575 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400576 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500577 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400578 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500579 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400580 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500581 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400582 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500583
584 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400585 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500586
587 # match arch
588 parch_regex = "-source|_all_"
589
590 # src.rpm extension was added only for rpm package. Can be removed if the rpm
591 # packaged will always be considered as having to be manually upgraded
Andrew Geissler595f6302022-01-24 19:11:47 +0000592 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500593
594 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400595 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500596 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
597 self.suffix_regex_comp = re.compile(psuffix_regex)
598
599 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500600 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500601 if pn_regex:
602 package_custom_regex_comp = re.compile(pn_regex)
603 else:
604 version = self._parse_path(package_regex_comp, package)
605 if version:
606 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400607 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500608 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
609 else:
610 package_custom_regex_comp = None
611
612 return package_custom_regex_comp
613
614 def latest_versionstring(self, ud, d):
615 """
616 Manipulate the URL and try to obtain the latest package version
617
618 sanity check to ensure same name and type.
619 """
620 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500621 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500622
623 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400624 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500625 current_version[1] = re.sub('_', '.', current_version[1])
626 current_version[1] = re.sub('-', '.', current_version[1])
627 return (current_version[1], '')
628
629 package_regex = self._init_regexes(package, ud, d)
630 if package_regex is None:
631 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
632 return ('', '')
633 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
634
635 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500636 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500637 if not regex_uri:
638 path = ud.path.split(package)[0]
639
640 # search for version matches on folders inside the path, like:
641 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400642 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Andrew Geissler517393d2023-01-13 08:55:19 -0600643 m = dirver_regex.findall(path)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500644 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500645 pn = d.getVar('PN')
Andrew Geissler517393d2023-01-13 08:55:19 -0600646 dirver = m[-1][0]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500647
Brad Bishop19323692019-04-05 15:28:33 -0400648 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500649 if not dirver_pn_regex.search(dirver):
650 return (self._check_latest_version_by_dir(dirver,
651 package, package_regex, current_version, ud, d), '')
652
653 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
654 else:
655 uri = regex_uri
656
657 return (self._check_latest_version(uri, package, package_regex,
658 current_version, ud, d), '')