blob: dc025800e6595ea9d3effd4acd2dc15508bccc6f [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
29from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050030from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050031
Patrick Williamsc0f7c042017-02-23 20:41:17 -060032class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
33 """
34 Extract progress information from wget output.
35 Note: relies on --progress=dot (with -v or without -q/-nv) being
36 specified on the wget command line.
37 """
38 def __init__(self, d):
39 super(WgetProgressHandler, self).__init__(d)
40 # Send an initial progress event so the bar gets shown
41 self._fire_progress(0)
42
43 def writeline(self, line):
44 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
45 if percs:
46 progress = int(percs[-1][0])
47 rate = percs[-1][1] + '/s'
48 self.update(progress, rate)
49 return False
50 return True
51
52
Patrick Williamsc124f4f2015-09-15 14:41:29 -050053class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050054 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060055
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
Patrick Williams0ca19cc2021-08-16 14:03:13 -050061 def check_certs(self, d):
62 """
63 Should certificates be checked?
64 """
65 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
66
Patrick Williamsc124f4f2015-09-15 14:41:29 -050067 def supports(self, ud, d):
68 """
69 Check to see if a given url can be fetched with wget.
70 """
Andrew Geissler5199d832021-09-24 16:47:35 -050071 return ud.type in ['http', 'https', 'ftp', 'ftps']
Patrick Williamsc124f4f2015-09-15 14:41:29 -050072
73 def recommends_checksum(self, urldata):
74 return True
75
76 def urldata_init(self, ud, d):
77 if 'protocol' in ud.parm:
78 if ud.parm['protocol'] == 'git':
79 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
80
81 if 'downloadfilename' in ud.parm:
82 ud.basename = ud.parm['downloadfilename']
83 else:
84 ud.basename = os.path.basename(ud.path)
85
Brad Bishop6e60e8b2018-02-01 10:27:11 -050086 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050087 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050088 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050089
Patrick Williams0ca19cc2021-08-16 14:03:13 -050090 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
91
92 if not self.check_certs(d):
93 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094
Brad Bishopd7bf8c12018-02-25 22:55:05 -050095 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050096
Patrick Williamsc0f7c042017-02-23 20:41:17 -060097 progresshandler = WgetProgressHandler(d)
98
Andrew Geisslerd1e89492021-02-12 15:35:20 -060099 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500100 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500101 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500102
103 def download(self, ud, d):
104 """Fetch urls"""
105
106 fetchcmd = self.basecmd
107
Andrew Geissler78b72792022-06-14 06:47:25 -0500108 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
109 bb.utils.mkdirhier(os.path.dirname(localpath))
110 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500112 if ud.user and ud.pswd:
Andrew Geissler595f6302022-01-24 19:11:47 +0000113 fetchcmd += " --auth-no-challenge"
114 if ud.parm.get("redirectauth", "1") == "1":
115 # An undocumented feature of wget is that if the
116 # username/password are specified on the URI, wget will only
117 # send the Authorization header to the first host and not to
118 # any hosts that it is redirected to. With the increasing
119 # usage of temporary AWS URLs, this difference now matters as
120 # AWS will reject any request that has authentication both in
121 # the query parameters (from the redirect) and in the
122 # Authorization header.
123 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600124
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500125 uri = ud.url.split(";")[0]
126 if os.path.exists(ud.localpath):
127 # file exists, but we didnt complete it.. trying again..
128 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
129 else:
130 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
131
132 self._runwget(ud, d, fetchcmd, False)
133
Andrew Geissler87f5cff2022-09-30 13:13:31 -0500134 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
135 # original file, which might be a race (imagine two recipes referencing the same
136 # source, one with an incorrect checksum)
137 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
138
Andrew Geissler78b72792022-06-14 06:47:25 -0500139 # Remove the ".tmp" and move the file into position atomically
140 # Our lock prevents multiple writers but mirroring code may grab incomplete files
141 os.rename(localpath, localpath[:-4])
142
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500143 # Sanity check since wget can pretend it succeed when it didn't
144 # Also, this used to happen if sourceforge sent us to the mirror page
145 if not os.path.exists(ud.localpath):
146 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
147
148 if os.path.getsize(ud.localpath) == 0:
149 os.remove(ud.localpath)
150 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
151
152 return True
153
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600154 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600155 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156 if fetch.connection_cache:
157 def connect(self):
158 """Connect to the host and port specified in __init__."""
159
160 sock = fetch.connection_cache.get_connection(self.host, self.port)
161 if sock:
162 self.sock = sock
163 else:
164 self.sock = socket.create_connection((self.host, self.port),
165 self.timeout, self.source_address)
166 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
167
168 if self._tunnel_host:
169 self._tunnel()
170
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600171 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500172 def http_open(self, req):
173 return self.do_open(HTTPConnectionCache, req)
174
175 def do_open(self, http_class, req):
176 """Return an addinfourl object for the request, using http_class.
177
178 http_class must implement the HTTPConnection API from httplib.
179 The addinfourl return value is a file-like object. It also
180 has methods and attributes including:
181 - info(): return a mimetools.Message object for the headers
182 - geturl(): return the original request URL
183 - code: HTTP status code
184 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600185 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500186 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400187 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188
189 h = http_class(host, timeout=req.timeout) # will parse host:port
190 h.set_debuglevel(self._debuglevel)
191
192 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600193 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500194 if k not in headers))
195
196 # We want to make an HTTP/1.1 request, but the addinfourl
197 # class isn't prepared to deal with a persistent connection.
198 # It will try to read all remaining data from the socket,
199 # which will block while the server waits for the next request.
200 # So make sure the connection gets closed after the (only)
201 # request.
202
203 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400204 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500205 headers["Connection"] = "close"
206 else:
207 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
208
209 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600210 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500211
212 if req._tunnel_host:
213 tunnel_headers = {}
214 proxy_auth_hdr = "Proxy-Authorization"
215 if proxy_auth_hdr in headers:
216 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
217 # Proxy-Authorization should not be sent to origin
218 # server.
219 del headers[proxy_auth_hdr]
220 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
221
222 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600223 h.request(req.get_method(), req.selector, req.data, headers)
224 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500226 # Instead, try to detect connections that are no longer
227 # usable (for example, closed unexpectedly) and remove
228 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500229 if fetch.connection_cache is None:
230 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500231 elif isinstance(err, OSError) and err.errno == errno.EBADF:
232 # This happens when the server closes the connection despite the Keep-Alive.
233 # Apparently urllib then uses the file descriptor, expecting it to be
234 # connected, when in reality the connection is already gone.
235 # We let the request fail and expect it to be
236 # tried once more ("try_again" in check_status()),
237 # with the dead connection removed from the cache.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000238 # If it still fails, we give up, which can happen for bad
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500239 # HTTP proxy settings.
240 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600241 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500243 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500244
245 # Pick apart the HTTPResponse object to get the addinfourl
246 # object initialized properly.
247
248 # Wrap the HTTPResponse object in socket's file object adapter
249 # for Windows. That adapter calls recv(), so delegate recv()
250 # to read(). This weird wrapping allows the returned object to
251 # have readline() and readlines() methods.
252
253 # XXX It might be better to extract the read buffering code
254 # out of socket._fileobject() and into a base class.
255 r.recv = r.read
256
257 # no data, just have to read
258 r.read()
259 class fp_dummy(object):
260 def read(self):
261 return ""
262 def readline(self):
263 return ""
264 def close(self):
265 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400266 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500267
Brad Bishop19323692019-04-05 15:28:33 -0400268 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500269 resp.code = r.status
270 resp.msg = r.reason
271
272 # Close connection when server request it.
273 if fetch.connection_cache is not None:
274 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
275 fetch.connection_cache.remove_connection(h.host, h.port)
276
277 return resp
278
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600279 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500280 """
281 Fallback to GET if HEAD is not allowed (405 HTTP error)
282 """
283 def http_error_405(self, req, fp, code, msg, headers):
284 fp.read()
285 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500286
Brad Bishop08902b02019-08-20 09:16:51 -0400287 if req.get_method() != 'GET':
288 newheaders = dict((k, v) for k, v in list(req.headers.items())
289 if k.lower() not in ("content-length", "content-type"))
290 return self.parent.open(urllib.request.Request(req.get_full_url(),
291 headers=newheaders,
292 origin_req_host=req.origin_req_host,
293 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500294
Brad Bishop08902b02019-08-20 09:16:51 -0400295 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400296
297 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
298 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500300
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500301
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600302 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500303 """
304 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
305 when we want to follow redirects using the original method.
306 """
307 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600308 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400309 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500310 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500311
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500312 # We need to update the environment here as both the proxy and HTTPS
313 # handlers need variables set. The proxy needs http_proxy and friends to
314 # be set, and HTTPSHandler ends up calling into openssl to load the
315 # certificates. In buildtools configurations this will be looking at the
316 # wrong place for certificates by default: we set SSL_CERT_FILE to the
317 # right location in the buildtools environment script but as BitBake
318 # prunes prunes the environment this is lost. When binaries are executed
319 # runfetchcmd ensures these values are in the environment, but this is
320 # pure Python so we need to update the environment.
321 #
322 # Avoid tramping the environment too much by using bb.utils.environment
323 # to scope the changes to the build_opener request, which is when the
324 # environment lookups happen.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000325 newenv = bb.fetch2.get_fetcher_environment(d)
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500326
327 with bb.utils.environment(**newenv):
328 import ssl
329
330 if self.check_certs(d):
331 context = ssl.create_default_context()
332 else:
333 context = ssl._create_unverified_context()
334
335 handlers = [FixedHTTPRedirectHandler,
336 HTTPMethodFallback,
337 urllib.request.ProxyHandler(),
338 CacheHTTPHandler(),
339 urllib.request.HTTPSHandler(context=context)]
340 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500341
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500342 try:
Andrew Geissler517393d2023-01-13 08:55:19 -0600343 uri_base = ud.url.split(";")[0]
344 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500345 r = urllib.request.Request(uri)
346 r.get_method = lambda: "HEAD"
347 # Some servers (FusionForge, as used on Alioth) require that the
348 # optional Accept header is set.
349 r.add_header("Accept", "*/*")
350 r.add_header("User-Agent", self.user_agent)
351 def add_basic_auth(login_str, request):
352 '''Adds Basic auth to http request, pass in login:password as string'''
353 import base64
354 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
355 authheader = "Basic %s" % encodeuser
356 r.add_header("Authorization", authheader)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500357
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500358 if ud.user and ud.pswd:
359 add_basic_auth(ud.user + ':' + ud.pswd, r)
360
361 try:
362 import netrc
Andrew Geissler6aa7eec2023-03-03 12:41:14 -0600363 auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
364 if auth_data:
365 login, _, password = auth_data
366 add_basic_auth("%s:%s" % (login, password), r)
367 except (FileNotFoundError, netrc.NetrcParseError):
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500368 pass
369
Andrew Geissler595f6302022-01-24 19:11:47 +0000370 with opener.open(r, timeout=30) as response:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500371 pass
Andrew Geisslerfc113ea2023-03-31 09:59:46 -0500372 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500373 if try_again:
374 logger.debug2("checkstatus: trying again")
375 return self.checkstatus(fetch, ud, d, False)
376 else:
377 # debug for now to avoid spamming the logs in e.g. remote sstate searches
Patrick Williams705982a2024-01-12 09:51:57 -0600378 logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500379 return False
380
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500381 return True
382
383 def _parse_path(self, regex, s):
384 """
385 Find and group name, version and archive type in the given string s
386 """
387
388 m = regex.search(s)
389 if m:
390 pname = ''
391 pver = ''
392 ptype = ''
393
394 mdict = m.groupdict()
395 if 'name' in mdict.keys():
396 pname = mdict['name']
397 if 'pver' in mdict.keys():
398 pver = mdict['pver']
399 if 'type' in mdict.keys():
400 ptype = mdict['type']
401
402 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
403
404 return (pname, pver, ptype)
405
406 return None
407
408 def _modelate_version(self, version):
409 if version[0] in ['.', '-']:
410 if version[1].isdigit():
411 version = version[1] + version[0] + version[2:len(version)]
412 else:
413 version = version[1:len(version)]
414
415 version = re.sub('-', '.', version)
416 version = re.sub('_', '.', version)
417 version = re.sub('(rc)+', '.1000.', version)
418 version = re.sub('(beta)+', '.100.', version)
419 version = re.sub('(alpha)+', '.10.', version)
420 if version[0] == 'v':
421 version = version[1:len(version)]
422 return version
423
424 def _vercmp(self, old, new):
425 """
426 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
427 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
428 for simplicity as it's somehow difficult to get from various upstream format
429 """
430
431 (oldpn, oldpv, oldsuffix) = old
432 (newpn, newpv, newsuffix) = new
433
Brad Bishop19323692019-04-05 15:28:33 -0400434 # Check for a new suffix type that we have never heard of before
435 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500436 m = self.suffix_regex_comp.search(newsuffix)
437 if not m:
438 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
439 return False
440
Brad Bishop19323692019-04-05 15:28:33 -0400441 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500442 if oldpn != newpn:
443 return False
444
445 oldpv = self._modelate_version(oldpv)
446 newpv = self._modelate_version(newpv)
447
448 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
449
450 def _fetch_index(self, uri, ud, d):
451 """
452 Run fetch checkstatus to get directory information
453 """
454 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500455 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500456 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600457 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500458 try:
459 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
460 fetchresult = f.read()
461 except bb.fetch2.BBFetchException:
462 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500464 return fetchresult
465
466 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
467 """
468 Return the latest version of a package inside a given directory path
469 If error or no version, return ""
470 """
471 valid = 0
472 version = ['', '', '']
473
474 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500475 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500476 if not soup:
477 bb.debug(3, "*** %s NO SOUP" % (url))
478 return ""
479
480 for line in soup.find_all('a', href=True):
481 bb.debug(3, "line['href'] = '%s'" % (line['href']))
482 bb.debug(3, "line = '%s'" % (str(line)))
483
484 newver = self._parse_path(package_regex, line['href'])
485 if not newver:
486 newver = self._parse_path(package_regex, str(line))
487
488 if newver:
489 bb.debug(3, "Upstream version found: %s" % newver[1])
490 if valid == 0:
491 version = newver
492 valid = 1
493 elif self._vercmp(version, newver) < 0:
494 version = newver
495
496 pupver = re.sub('_', '.', version[1])
497
498 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
499 (package, pupver or "N/A", current_version[1]))
500
501 if valid:
502 return pupver
503
504 return ""
505
Brad Bishop19323692019-04-05 15:28:33 -0400506 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500507 """
Brad Bishop19323692019-04-05 15:28:33 -0400508 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500509 """
510 version_dir = ['', '', '']
511 version = ['', '', '']
512
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700513 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500514 s = dirver_regex.search(dirver)
515 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500516 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500517 else:
518 version_dir[1] = dirver
519
520 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
521 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
522 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
523
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500524 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500525 if not soup:
526 return version[1]
527
528 for line in soup.find_all('a', href=True):
529 s = dirver_regex.search(line['href'].strip("/"))
530 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500531 sver = s.group('ver')
532
533 # When prefix is part of the version directory it need to
534 # ensure that only version directory is used so remove previous
535 # directories if exists.
536 #
537 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
538 # result is v2.5.
539 spfx = s.group('pfx').split('/')[-1]
540
541 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500542 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500543 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544 path = ud.path.replace(dirver, dirver_new, True) \
545 .split(package)[0]
546 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
547 ud.user, ud.pswd, {}])
548
549 pupver = self._check_latest_version(uri,
550 package, package_regex, current_version, ud, d)
551 if pupver:
552 version[1] = pupver
553
554 version_dir = version_dir_new
555
556 return version[1]
557
558 def _init_regexes(self, package, ud, d):
559 """
560 Match as many patterns as possible such as:
561 gnome-common-2.20.0.tar.gz (most common format)
562 gtk+-2.90.1.tar.gz
563 xf86-input-synaptics-12.6.9.tar.gz
564 dri2proto-2.3.tar.gz
565 blktool_4.orig.tar.gz
566 libid3tag-0.15.1b.tar.gz
567 unzip552.tar.gz
568 icu4c-3_6-src.tgz
569 genext2fs_1.3.orig.tar.gz
570 gst-fluendo-mp3
571 """
572 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400573 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500574 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400575 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500576 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400577 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500578 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400579 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500580
581 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400582 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500583
584 # match arch
585 parch_regex = "-source|_all_"
586
587 # src.rpm extension was added only for rpm package. Can be removed if the rpm
588 # packaged will always be considered as having to be manually upgraded
Andrew Geissler595f6302022-01-24 19:11:47 +0000589 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500590
591 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400592 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500593 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
594 self.suffix_regex_comp = re.compile(psuffix_regex)
595
596 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500597 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500598 if pn_regex:
599 package_custom_regex_comp = re.compile(pn_regex)
600 else:
601 version = self._parse_path(package_regex_comp, package)
602 if version:
603 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400604 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500605 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
606 else:
607 package_custom_regex_comp = None
608
609 return package_custom_regex_comp
610
611 def latest_versionstring(self, ud, d):
612 """
613 Manipulate the URL and try to obtain the latest package version
614
615 sanity check to ensure same name and type.
616 """
617 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500618 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500619
620 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400621 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500622 current_version[1] = re.sub('_', '.', current_version[1])
623 current_version[1] = re.sub('-', '.', current_version[1])
624 return (current_version[1], '')
625
626 package_regex = self._init_regexes(package, ud, d)
627 if package_regex is None:
628 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
629 return ('', '')
630 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
631
632 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500633 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500634 if not regex_uri:
635 path = ud.path.split(package)[0]
636
637 # search for version matches on folders inside the path, like:
638 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400639 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Andrew Geissler517393d2023-01-13 08:55:19 -0600640 m = dirver_regex.findall(path)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500641 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500642 pn = d.getVar('PN')
Andrew Geissler517393d2023-01-13 08:55:19 -0600643 dirver = m[-1][0]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500644
Brad Bishop19323692019-04-05 15:28:33 -0400645 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500646 if not dirver_pn_regex.search(dirver):
647 return (self._check_latest_version_by_dir(dirver,
648 package, package_regex, current_version, ud, d), '')
649
650 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
651 else:
652 uri = regex_uri
653
654 return (self._check_latest_version(uri, package, package_regex,
655 current_version, ud, d), '')