blob: 696e918030396ee19d615ff5e94a58c0a6485fed [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050055 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060056
57 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58 # with the standard wget/urllib User-Agent, so pretend to be a modern
59 # browser.
60 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61
Patrick Williams0ca19cc2021-08-16 14:03:13 -050062 def check_certs(self, d):
63 """
64 Should certificates be checked?
65 """
66 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068 def supports(self, ud, d):
69 """
70 Check to see if a given url can be fetched with wget.
71 """
Andrew Geissler5199d832021-09-24 16:47:35 -050072 return ud.type in ['http', 'https', 'ftp', 'ftps']
Patrick Williamsc124f4f2015-09-15 14:41:29 -050073
74 def recommends_checksum(self, urldata):
75 return True
76
77 def urldata_init(self, ud, d):
78 if 'protocol' in ud.parm:
79 if ud.parm['protocol'] == 'git':
80 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81
82 if 'downloadfilename' in ud.parm:
83 ud.basename = ud.parm['downloadfilename']
84 else:
85 ud.basename = os.path.basename(ud.path)
86
Brad Bishop6e60e8b2018-02-01 10:27:11 -050087 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050089 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
Patrick Williams0ca19cc2021-08-16 14:03:13 -050091 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92
93 if not self.check_certs(d):
94 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
Brad Bishopd7bf8c12018-02-25 22:55:05 -050096 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Patrick Williamsc0f7c042017-02-23 20:41:17 -060098 progresshandler = WgetProgressHandler(d)
99
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600100 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500101 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500102 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500103
104 def download(self, ud, d):
105 """Fetch urls"""
106
107 fetchcmd = self.basecmd
108
Andrew Geissler78b72792022-06-14 06:47:25 -0500109 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
110 bb.utils.mkdirhier(os.path.dirname(localpath))
111 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500112
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500113 if ud.user and ud.pswd:
Andrew Geissler595f6302022-01-24 19:11:47 +0000114 fetchcmd += " --auth-no-challenge"
115 if ud.parm.get("redirectauth", "1") == "1":
116 # An undocumented feature of wget is that if the
117 # username/password are specified on the URI, wget will only
118 # send the Authorization header to the first host and not to
119 # any hosts that it is redirected to. With the increasing
120 # usage of temporary AWS URLs, this difference now matters as
121 # AWS will reject any request that has authentication both in
122 # the query parameters (from the redirect) and in the
123 # Authorization header.
124 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600125
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126 uri = ud.url.split(";")[0]
127 if os.path.exists(ud.localpath):
128 # file exists, but we didnt complete it.. trying again..
129 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
130 else:
131 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
132
133 self._runwget(ud, d, fetchcmd, False)
134
Andrew Geissler87f5cff2022-09-30 13:13:31 -0500135 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
136 # original file, which might be a race (imagine two recipes referencing the same
137 # source, one with an incorrect checksum)
138 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
139
Andrew Geissler78b72792022-06-14 06:47:25 -0500140 # Remove the ".tmp" and move the file into position atomically
141 # Our lock prevents multiple writers but mirroring code may grab incomplete files
142 os.rename(localpath, localpath[:-4])
143
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500144 # Sanity check since wget can pretend it succeed when it didn't
145 # Also, this used to happen if sourceforge sent us to the mirror page
146 if not os.path.exists(ud.localpath):
147 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
148
149 if os.path.getsize(ud.localpath) == 0:
150 os.remove(ud.localpath)
151 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
152
153 return True
154
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600155 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600156 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500157 if fetch.connection_cache:
158 def connect(self):
159 """Connect to the host and port specified in __init__."""
160
161 sock = fetch.connection_cache.get_connection(self.host, self.port)
162 if sock:
163 self.sock = sock
164 else:
165 self.sock = socket.create_connection((self.host, self.port),
166 self.timeout, self.source_address)
167 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
168
169 if self._tunnel_host:
170 self._tunnel()
171
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600172 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500173 def http_open(self, req):
174 return self.do_open(HTTPConnectionCache, req)
175
176 def do_open(self, http_class, req):
177 """Return an addinfourl object for the request, using http_class.
178
179 http_class must implement the HTTPConnection API from httplib.
180 The addinfourl return value is a file-like object. It also
181 has methods and attributes including:
182 - info(): return a mimetools.Message object for the headers
183 - geturl(): return the original request URL
184 - code: HTTP status code
185 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600186 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500187 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400188 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500189
190 h = http_class(host, timeout=req.timeout) # will parse host:port
191 h.set_debuglevel(self._debuglevel)
192
193 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600194 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195 if k not in headers))
196
197 # We want to make an HTTP/1.1 request, but the addinfourl
198 # class isn't prepared to deal with a persistent connection.
199 # It will try to read all remaining data from the socket,
200 # which will block while the server waits for the next request.
201 # So make sure the connection gets closed after the (only)
202 # request.
203
204 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400205 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500206 headers["Connection"] = "close"
207 else:
208 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
209
210 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600211 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212
213 if req._tunnel_host:
214 tunnel_headers = {}
215 proxy_auth_hdr = "Proxy-Authorization"
216 if proxy_auth_hdr in headers:
217 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
218 # Proxy-Authorization should not be sent to origin
219 # server.
220 del headers[proxy_auth_hdr]
221 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
222
223 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600224 h.request(req.get_method(), req.selector, req.data, headers)
225 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500226 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500227 # Instead, try to detect connections that are no longer
228 # usable (for example, closed unexpectedly) and remove
229 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500230 if fetch.connection_cache is None:
231 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500232 elif isinstance(err, OSError) and err.errno == errno.EBADF:
233 # This happens when the server closes the connection despite the Keep-Alive.
234 # Apparently urllib then uses the file descriptor, expecting it to be
235 # connected, when in reality the connection is already gone.
236 # We let the request fail and expect it to be
237 # tried once more ("try_again" in check_status()),
238 # with the dead connection removed from the cache.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000239 # If it still fails, we give up, which can happen for bad
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500240 # HTTP proxy settings.
241 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600242 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500243 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500244 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
246 # Pick apart the HTTPResponse object to get the addinfourl
247 # object initialized properly.
248
249 # Wrap the HTTPResponse object in socket's file object adapter
250 # for Windows. That adapter calls recv(), so delegate recv()
251 # to read(). This weird wrapping allows the returned object to
252 # have readline() and readlines() methods.
253
254 # XXX It might be better to extract the read buffering code
255 # out of socket._fileobject() and into a base class.
256 r.recv = r.read
257
258 # no data, just have to read
259 r.read()
260 class fp_dummy(object):
261 def read(self):
262 return ""
263 def readline(self):
264 return ""
265 def close(self):
266 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400267 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500268
Brad Bishop19323692019-04-05 15:28:33 -0400269 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500270 resp.code = r.status
271 resp.msg = r.reason
272
273 # Close connection when server request it.
274 if fetch.connection_cache is not None:
275 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
276 fetch.connection_cache.remove_connection(h.host, h.port)
277
278 return resp
279
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600280 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500281 """
282 Fallback to GET if HEAD is not allowed (405 HTTP error)
283 """
284 def http_error_405(self, req, fp, code, msg, headers):
285 fp.read()
286 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500287
Brad Bishop08902b02019-08-20 09:16:51 -0400288 if req.get_method() != 'GET':
289 newheaders = dict((k, v) for k, v in list(req.headers.items())
290 if k.lower() not in ("content-length", "content-type"))
291 return self.parent.open(urllib.request.Request(req.get_full_url(),
292 headers=newheaders,
293 origin_req_host=req.origin_req_host,
294 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295
Brad Bishop08902b02019-08-20 09:16:51 -0400296 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400297
298 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
299 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500301
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600303 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500304 """
305 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
306 when we want to follow redirects using the original method.
307 """
308 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600309 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400310 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500311 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500312
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500313 # We need to update the environment here as both the proxy and HTTPS
314 # handlers need variables set. The proxy needs http_proxy and friends to
315 # be set, and HTTPSHandler ends up calling into openssl to load the
316 # certificates. In buildtools configurations this will be looking at the
317 # wrong place for certificates by default: we set SSL_CERT_FILE to the
318 # right location in the buildtools environment script but as BitBake
319 # prunes prunes the environment this is lost. When binaries are executed
320 # runfetchcmd ensures these values are in the environment, but this is
321 # pure Python so we need to update the environment.
322 #
323 # Avoid tramping the environment too much by using bb.utils.environment
324 # to scope the changes to the build_opener request, which is when the
325 # environment lookups happen.
Andrew Geissler7e0e3c02022-02-25 20:34:39 +0000326 newenv = bb.fetch2.get_fetcher_environment(d)
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500327
328 with bb.utils.environment(**newenv):
329 import ssl
330
331 if self.check_certs(d):
332 context = ssl.create_default_context()
333 else:
334 context = ssl._create_unverified_context()
335
336 handlers = [FixedHTTPRedirectHandler,
337 HTTPMethodFallback,
338 urllib.request.ProxyHandler(),
339 CacheHTTPHandler(),
340 urllib.request.HTTPSHandler(context=context)]
341 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500342
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500343 try:
Andrew Geissler517393d2023-01-13 08:55:19 -0600344 uri_base = ud.url.split(";")[0]
345 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500346 r = urllib.request.Request(uri)
347 r.get_method = lambda: "HEAD"
348 # Some servers (FusionForge, as used on Alioth) require that the
349 # optional Accept header is set.
350 r.add_header("Accept", "*/*")
351 r.add_header("User-Agent", self.user_agent)
352 def add_basic_auth(login_str, request):
353 '''Adds Basic auth to http request, pass in login:password as string'''
354 import base64
355 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
356 authheader = "Basic %s" % encodeuser
357 r.add_header("Authorization", authheader)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500358
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500359 if ud.user and ud.pswd:
360 add_basic_auth(ud.user + ':' + ud.pswd, r)
361
362 try:
363 import netrc
364 n = netrc.netrc()
365 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
366 add_basic_auth("%s:%s" % (login, password), r)
367 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
368 pass
369
Andrew Geissler595f6302022-01-24 19:11:47 +0000370 with opener.open(r, timeout=30) as response:
Andrew Geisslerd159c7f2021-09-02 21:05:58 -0500371 pass
372 except urllib.error.URLError as e:
373 if try_again:
374 logger.debug2("checkstatus: trying again")
375 return self.checkstatus(fetch, ud, d, False)
376 else:
377 # debug for now to avoid spamming the logs in e.g. remote sstate searches
378 logger.debug2("checkstatus() urlopen failed: %s" % e)
379 return False
380 except ConnectionResetError as e:
381 if try_again:
382 logger.debug2("checkstatus: trying again")
383 return self.checkstatus(fetch, ud, d, False)
384 else:
385 # debug for now to avoid spamming the logs in e.g. remote sstate searches
386 logger.debug2("checkstatus() urlopen failed: %s" % e)
387 return False
388
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500389 return True
390
391 def _parse_path(self, regex, s):
392 """
393 Find and group name, version and archive type in the given string s
394 """
395
396 m = regex.search(s)
397 if m:
398 pname = ''
399 pver = ''
400 ptype = ''
401
402 mdict = m.groupdict()
403 if 'name' in mdict.keys():
404 pname = mdict['name']
405 if 'pver' in mdict.keys():
406 pver = mdict['pver']
407 if 'type' in mdict.keys():
408 ptype = mdict['type']
409
410 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
411
412 return (pname, pver, ptype)
413
414 return None
415
416 def _modelate_version(self, version):
417 if version[0] in ['.', '-']:
418 if version[1].isdigit():
419 version = version[1] + version[0] + version[2:len(version)]
420 else:
421 version = version[1:len(version)]
422
423 version = re.sub('-', '.', version)
424 version = re.sub('_', '.', version)
425 version = re.sub('(rc)+', '.1000.', version)
426 version = re.sub('(beta)+', '.100.', version)
427 version = re.sub('(alpha)+', '.10.', version)
428 if version[0] == 'v':
429 version = version[1:len(version)]
430 return version
431
432 def _vercmp(self, old, new):
433 """
434 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
435 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
436 for simplicity as it's somehow difficult to get from various upstream format
437 """
438
439 (oldpn, oldpv, oldsuffix) = old
440 (newpn, newpv, newsuffix) = new
441
Brad Bishop19323692019-04-05 15:28:33 -0400442 # Check for a new suffix type that we have never heard of before
443 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 m = self.suffix_regex_comp.search(newsuffix)
445 if not m:
446 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
447 return False
448
Brad Bishop19323692019-04-05 15:28:33 -0400449 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500450 if oldpn != newpn:
451 return False
452
453 oldpv = self._modelate_version(oldpv)
454 newpv = self._modelate_version(newpv)
455
456 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
457
458 def _fetch_index(self, uri, ud, d):
459 """
460 Run fetch checkstatus to get directory information
461 """
462 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500463 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500464 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600465 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500466 try:
467 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
468 fetchresult = f.read()
469 except bb.fetch2.BBFetchException:
470 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500472 return fetchresult
473
474 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
475 """
476 Return the latest version of a package inside a given directory path
477 If error or no version, return ""
478 """
479 valid = 0
480 version = ['', '', '']
481
482 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500483 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500484 if not soup:
485 bb.debug(3, "*** %s NO SOUP" % (url))
486 return ""
487
488 for line in soup.find_all('a', href=True):
489 bb.debug(3, "line['href'] = '%s'" % (line['href']))
490 bb.debug(3, "line = '%s'" % (str(line)))
491
492 newver = self._parse_path(package_regex, line['href'])
493 if not newver:
494 newver = self._parse_path(package_regex, str(line))
495
496 if newver:
497 bb.debug(3, "Upstream version found: %s" % newver[1])
498 if valid == 0:
499 version = newver
500 valid = 1
501 elif self._vercmp(version, newver) < 0:
502 version = newver
503
504 pupver = re.sub('_', '.', version[1])
505
506 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
507 (package, pupver or "N/A", current_version[1]))
508
509 if valid:
510 return pupver
511
512 return ""
513
Brad Bishop19323692019-04-05 15:28:33 -0400514 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500515 """
Brad Bishop19323692019-04-05 15:28:33 -0400516 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500517 """
518 version_dir = ['', '', '']
519 version = ['', '', '']
520
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700521 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500522 s = dirver_regex.search(dirver)
523 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500524 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500525 else:
526 version_dir[1] = dirver
527
528 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
529 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
530 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
531
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500532 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500533 if not soup:
534 return version[1]
535
536 for line in soup.find_all('a', href=True):
537 s = dirver_regex.search(line['href'].strip("/"))
538 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500539 sver = s.group('ver')
540
541 # When prefix is part of the version directory it need to
542 # ensure that only version directory is used so remove previous
543 # directories if exists.
544 #
545 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
546 # result is v2.5.
547 spfx = s.group('pfx').split('/')[-1]
548
549 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500550 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500551 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500552 path = ud.path.replace(dirver, dirver_new, True) \
553 .split(package)[0]
554 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
555 ud.user, ud.pswd, {}])
556
557 pupver = self._check_latest_version(uri,
558 package, package_regex, current_version, ud, d)
559 if pupver:
560 version[1] = pupver
561
562 version_dir = version_dir_new
563
564 return version[1]
565
566 def _init_regexes(self, package, ud, d):
567 """
568 Match as many patterns as possible such as:
569 gnome-common-2.20.0.tar.gz (most common format)
570 gtk+-2.90.1.tar.gz
571 xf86-input-synaptics-12.6.9.tar.gz
572 dri2proto-2.3.tar.gz
573 blktool_4.orig.tar.gz
574 libid3tag-0.15.1b.tar.gz
575 unzip552.tar.gz
576 icu4c-3_6-src.tgz
577 genext2fs_1.3.orig.tar.gz
578 gst-fluendo-mp3
579 """
580 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400581 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500582 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400583 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500584 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400585 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500586 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400587 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500588
589 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400590 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500591
592 # match arch
593 parch_regex = "-source|_all_"
594
595 # src.rpm extension was added only for rpm package. Can be removed if the rpm
596 # packaged will always be considered as having to be manually upgraded
Andrew Geissler595f6302022-01-24 19:11:47 +0000597 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500598
599 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400600 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500601 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
602 self.suffix_regex_comp = re.compile(psuffix_regex)
603
604 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500605 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500606 if pn_regex:
607 package_custom_regex_comp = re.compile(pn_regex)
608 else:
609 version = self._parse_path(package_regex_comp, package)
610 if version:
611 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400612 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500613 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
614 else:
615 package_custom_regex_comp = None
616
617 return package_custom_regex_comp
618
619 def latest_versionstring(self, ud, d):
620 """
621 Manipulate the URL and try to obtain the latest package version
622
623 sanity check to ensure same name and type.
624 """
625 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500626 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500627
628 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400629 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500630 current_version[1] = re.sub('_', '.', current_version[1])
631 current_version[1] = re.sub('-', '.', current_version[1])
632 return (current_version[1], '')
633
634 package_regex = self._init_regexes(package, ud, d)
635 if package_regex is None:
636 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
637 return ('', '')
638 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
639
640 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500641 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500642 if not regex_uri:
643 path = ud.path.split(package)[0]
644
645 # search for version matches on folders inside the path, like:
646 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400647 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Andrew Geissler517393d2023-01-13 08:55:19 -0600648 m = dirver_regex.findall(path)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500649 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500650 pn = d.getVar('PN')
Andrew Geissler517393d2023-01-13 08:55:19 -0600651 dirver = m[-1][0]
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500652
Brad Bishop19323692019-04-05 15:28:33 -0400653 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500654 if not dirver_pn_regex.search(dirver):
655 return (self._check_latest_version_by_dir(dirver,
656 package, package_regex, current_version, ud, d), '')
657
658 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
659 else:
660 uri = regex_uri
661
662 return (self._check_latest_version(uri, package, package_regex,
663 current_version, ud, d), '')