blob: 29fcfbb3d14df83a116d44ca501d3f88e63a05f0 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Patrick Williams0ca19cc2021-08-16 14:03:13 -050055 """Class to fetch urls via 'wget'"""
Andrew Geisslerd1e89492021-02-12 15:35:20 -060056
57 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58 # with the standard wget/urllib User-Agent, so pretend to be a modern
59 # browser.
60 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61
Patrick Williams0ca19cc2021-08-16 14:03:13 -050062 def check_certs(self, d):
63 """
64 Should certificates be checked?
65 """
66 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068 def supports(self, ud, d):
69 """
70 Check to see if a given url can be fetched with wget.
71 """
72 return ud.type in ['http', 'https', 'ftp']
73
74 def recommends_checksum(self, urldata):
75 return True
76
77 def urldata_init(self, ud, d):
78 if 'protocol' in ud.parm:
79 if ud.parm['protocol'] == 'git':
80 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81
82 if 'downloadfilename' in ud.parm:
83 ud.basename = ud.parm['downloadfilename']
84 else:
85 ud.basename = os.path.basename(ud.path)
86
Brad Bishop6e60e8b2018-02-01 10:27:11 -050087 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050089 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
Patrick Williams0ca19cc2021-08-16 14:03:13 -050091 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92
93 if not self.check_certs(d):
94 self.basecmd += " --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
Brad Bishopd7bf8c12018-02-25 22:55:05 -050096 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Patrick Williamsc0f7c042017-02-23 20:41:17 -060098 progresshandler = WgetProgressHandler(d)
99
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600100 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500101 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500102 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500103
104 def download(self, ud, d):
105 """Fetch urls"""
106
107 fetchcmd = self.basecmd
108
109 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500110 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
111 bb.utils.mkdirhier(os.path.dirname(localpath))
112 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500113
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500114 if ud.user and ud.pswd:
115 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600116
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500117 uri = ud.url.split(";")[0]
118 if os.path.exists(ud.localpath):
119 # file exists, but we didnt complete it.. trying again..
120 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
121 else:
122 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
123
124 self._runwget(ud, d, fetchcmd, False)
125
126 # Sanity check since wget can pretend it succeed when it didn't
127 # Also, this used to happen if sourceforge sent us to the mirror page
128 if not os.path.exists(ud.localpath):
129 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
130
131 if os.path.getsize(ud.localpath) == 0:
132 os.remove(ud.localpath)
133 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
134
135 return True
136
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600137 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600138 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500139 if fetch.connection_cache:
140 def connect(self):
141 """Connect to the host and port specified in __init__."""
142
143 sock = fetch.connection_cache.get_connection(self.host, self.port)
144 if sock:
145 self.sock = sock
146 else:
147 self.sock = socket.create_connection((self.host, self.port),
148 self.timeout, self.source_address)
149 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
150
151 if self._tunnel_host:
152 self._tunnel()
153
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600154 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 def http_open(self, req):
156 return self.do_open(HTTPConnectionCache, req)
157
158 def do_open(self, http_class, req):
159 """Return an addinfourl object for the request, using http_class.
160
161 http_class must implement the HTTPConnection API from httplib.
162 The addinfourl return value is a file-like object. It also
163 has methods and attributes including:
164 - info(): return a mimetools.Message object for the headers
165 - geturl(): return the original request URL
166 - code: HTTP status code
167 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600168 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500169 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400170 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171
172 h = http_class(host, timeout=req.timeout) # will parse host:port
173 h.set_debuglevel(self._debuglevel)
174
175 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600176 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 if k not in headers))
178
179 # We want to make an HTTP/1.1 request, but the addinfourl
180 # class isn't prepared to deal with a persistent connection.
181 # It will try to read all remaining data from the socket,
182 # which will block while the server waits for the next request.
183 # So make sure the connection gets closed after the (only)
184 # request.
185
186 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400187 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188 headers["Connection"] = "close"
189 else:
190 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
191
192 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600193 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500194
195 if req._tunnel_host:
196 tunnel_headers = {}
197 proxy_auth_hdr = "Proxy-Authorization"
198 if proxy_auth_hdr in headers:
199 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
200 # Proxy-Authorization should not be sent to origin
201 # server.
202 del headers[proxy_auth_hdr]
203 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
204
205 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600206 h.request(req.get_method(), req.selector, req.data, headers)
207 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500208 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500209 # Instead, try to detect connections that are no longer
210 # usable (for example, closed unexpectedly) and remove
211 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 if fetch.connection_cache is None:
213 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500214 elif isinstance(err, OSError) and err.errno == errno.EBADF:
215 # This happens when the server closes the connection despite the Keep-Alive.
216 # Apparently urllib then uses the file descriptor, expecting it to be
217 # connected, when in reality the connection is already gone.
218 # We let the request fail and expect it to be
219 # tried once more ("try_again" in check_status()),
220 # with the dead connection removed from the cache.
221 # If it still fails, we give up, which can happend for bad
222 # HTTP proxy settings.
223 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600224 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500226 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500227
228 # Pick apart the HTTPResponse object to get the addinfourl
229 # object initialized properly.
230
231 # Wrap the HTTPResponse object in socket's file object adapter
232 # for Windows. That adapter calls recv(), so delegate recv()
233 # to read(). This weird wrapping allows the returned object to
234 # have readline() and readlines() methods.
235
236 # XXX It might be better to extract the read buffering code
237 # out of socket._fileobject() and into a base class.
238 r.recv = r.read
239
240 # no data, just have to read
241 r.read()
242 class fp_dummy(object):
243 def read(self):
244 return ""
245 def readline(self):
246 return ""
247 def close(self):
248 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400249 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500250
Brad Bishop19323692019-04-05 15:28:33 -0400251 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500252 resp.code = r.status
253 resp.msg = r.reason
254
255 # Close connection when server request it.
256 if fetch.connection_cache is not None:
257 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
258 fetch.connection_cache.remove_connection(h.host, h.port)
259
260 return resp
261
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600262 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500263 """
264 Fallback to GET if HEAD is not allowed (405 HTTP error)
265 """
266 def http_error_405(self, req, fp, code, msg, headers):
267 fp.read()
268 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500269
Brad Bishop08902b02019-08-20 09:16:51 -0400270 if req.get_method() != 'GET':
271 newheaders = dict((k, v) for k, v in list(req.headers.items())
272 if k.lower() not in ("content-length", "content-type"))
273 return self.parent.open(urllib.request.Request(req.get_full_url(),
274 headers=newheaders,
275 origin_req_host=req.origin_req_host,
276 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500277
Brad Bishop08902b02019-08-20 09:16:51 -0400278 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400279
280 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
281 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500282 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500284
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600285 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500286 """
287 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
288 when we want to follow redirects using the original method.
289 """
290 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600291 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400292 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500293 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500294
Patrick Williams0ca19cc2021-08-16 14:03:13 -0500295 # We need to update the environment here as both the proxy and HTTPS
296 # handlers need variables set. The proxy needs http_proxy and friends to
297 # be set, and HTTPSHandler ends up calling into openssl to load the
298 # certificates. In buildtools configurations this will be looking at the
299 # wrong place for certificates by default: we set SSL_CERT_FILE to the
300 # right location in the buildtools environment script but as BitBake
301 # prunes prunes the environment this is lost. When binaries are executed
302 # runfetchcmd ensures these values are in the environment, but this is
303 # pure Python so we need to update the environment.
304 #
305 # Avoid tramping the environment too much by using bb.utils.environment
306 # to scope the changes to the build_opener request, which is when the
307 # environment lookups happen.
308 newenv = {}
309 for name in bb.fetch2.FETCH_EXPORT_VARS:
310 value = d.getVar(name)
311 if not value:
312 origenv = d.getVar("BB_ORIGENV")
313 if origenv:
314 value = origenv.getVar(name)
315 if value:
316 newenv[name] = value
317
318 with bb.utils.environment(**newenv):
319 import ssl
320
321 if self.check_certs(d):
322 context = ssl.create_default_context()
323 else:
324 context = ssl._create_unverified_context()
325
326 handlers = [FixedHTTPRedirectHandler,
327 HTTPMethodFallback,
328 urllib.request.ProxyHandler(),
329 CacheHTTPHandler(),
330 urllib.request.HTTPSHandler(context=context)]
331 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500332
333 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500334 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600335 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500336 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500337 # Some servers (FusionForge, as used on Alioth) require that the
338 # optional Accept header is set.
339 r.add_header("Accept", "*/*")
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600340 r.add_header("User-Agent", self.user_agent)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500341 def add_basic_auth(login_str, request):
342 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600343 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500344 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400345 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600346 r.add_header("Authorization", authheader)
347
Brad Bishop19323692019-04-05 15:28:33 -0400348 if ud.user and ud.pswd:
349 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500350
351 try:
Brad Bishop19323692019-04-05 15:28:33 -0400352 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500353 n = netrc.netrc()
354 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
355 add_basic_auth("%s:%s" % (login, password), r)
356 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400357 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500358
Brad Bishop316dfdd2018-06-25 12:45:53 -0400359 with opener.open(r) as response:
360 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600361 except urllib.error.URLError as e:
362 if try_again:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600363 logger.debug2("checkstatus: trying again")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600364 return self.checkstatus(fetch, ud, d, False)
365 else:
366 # debug for now to avoid spamming the logs in e.g. remote sstate searches
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600367 logger.debug2("checkstatus() urlopen failed: %s" % e)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600368 return False
Andrew Geissler90fd73c2021-03-05 15:25:55 -0600369 except ConnectionResetError as e:
370 if try_again:
371 logger.debug2("checkstatus: trying again")
372 return self.checkstatus(fetch, ud, d, False)
373 else:
374 # debug for now to avoid spamming the logs in e.g. remote sstate searches
375 logger.debug2("checkstatus() urlopen failed: %s" % e)
376 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500377 return True
378
379 def _parse_path(self, regex, s):
380 """
381 Find and group name, version and archive type in the given string s
382 """
383
384 m = regex.search(s)
385 if m:
386 pname = ''
387 pver = ''
388 ptype = ''
389
390 mdict = m.groupdict()
391 if 'name' in mdict.keys():
392 pname = mdict['name']
393 if 'pver' in mdict.keys():
394 pver = mdict['pver']
395 if 'type' in mdict.keys():
396 ptype = mdict['type']
397
398 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
399
400 return (pname, pver, ptype)
401
402 return None
403
404 def _modelate_version(self, version):
405 if version[0] in ['.', '-']:
406 if version[1].isdigit():
407 version = version[1] + version[0] + version[2:len(version)]
408 else:
409 version = version[1:len(version)]
410
411 version = re.sub('-', '.', version)
412 version = re.sub('_', '.', version)
413 version = re.sub('(rc)+', '.1000.', version)
414 version = re.sub('(beta)+', '.100.', version)
415 version = re.sub('(alpha)+', '.10.', version)
416 if version[0] == 'v':
417 version = version[1:len(version)]
418 return version
419
420 def _vercmp(self, old, new):
421 """
422 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
423 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
424 for simplicity as it's somehow difficult to get from various upstream format
425 """
426
427 (oldpn, oldpv, oldsuffix) = old
428 (newpn, newpv, newsuffix) = new
429
Brad Bishop19323692019-04-05 15:28:33 -0400430 # Check for a new suffix type that we have never heard of before
431 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500432 m = self.suffix_regex_comp.search(newsuffix)
433 if not m:
434 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
435 return False
436
Brad Bishop19323692019-04-05 15:28:33 -0400437 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438 if oldpn != newpn:
439 return False
440
441 oldpv = self._modelate_version(oldpv)
442 newpv = self._modelate_version(newpv)
443
444 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
445
446 def _fetch_index(self, uri, ud, d):
447 """
448 Run fetch checkstatus to get directory information
449 """
450 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500451 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500452 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600453 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500454 try:
455 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
456 fetchresult = f.read()
457 except bb.fetch2.BBFetchException:
458 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500459
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500460 return fetchresult
461
462 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
463 """
464 Return the latest version of a package inside a given directory path
465 If error or no version, return ""
466 """
467 valid = 0
468 version = ['', '', '']
469
470 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500471 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500472 if not soup:
473 bb.debug(3, "*** %s NO SOUP" % (url))
474 return ""
475
476 for line in soup.find_all('a', href=True):
477 bb.debug(3, "line['href'] = '%s'" % (line['href']))
478 bb.debug(3, "line = '%s'" % (str(line)))
479
480 newver = self._parse_path(package_regex, line['href'])
481 if not newver:
482 newver = self._parse_path(package_regex, str(line))
483
484 if newver:
485 bb.debug(3, "Upstream version found: %s" % newver[1])
486 if valid == 0:
487 version = newver
488 valid = 1
489 elif self._vercmp(version, newver) < 0:
490 version = newver
491
492 pupver = re.sub('_', '.', version[1])
493
494 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
495 (package, pupver or "N/A", current_version[1]))
496
497 if valid:
498 return pupver
499
500 return ""
501
Brad Bishop19323692019-04-05 15:28:33 -0400502 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500503 """
Brad Bishop19323692019-04-05 15:28:33 -0400504 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500505 """
506 version_dir = ['', '', '']
507 version = ['', '', '']
508
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700509 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500510 s = dirver_regex.search(dirver)
511 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500512 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500513 else:
514 version_dir[1] = dirver
515
516 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
517 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
518 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
519
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500520 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500521 if not soup:
522 return version[1]
523
524 for line in soup.find_all('a', href=True):
525 s = dirver_regex.search(line['href'].strip("/"))
526 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500527 sver = s.group('ver')
528
529 # When prefix is part of the version directory it need to
530 # ensure that only version directory is used so remove previous
531 # directories if exists.
532 #
533 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
534 # result is v2.5.
535 spfx = s.group('pfx').split('/')[-1]
536
537 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500538 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500539 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500540 path = ud.path.replace(dirver, dirver_new, True) \
541 .split(package)[0]
542 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
543 ud.user, ud.pswd, {}])
544
545 pupver = self._check_latest_version(uri,
546 package, package_regex, current_version, ud, d)
547 if pupver:
548 version[1] = pupver
549
550 version_dir = version_dir_new
551
552 return version[1]
553
554 def _init_regexes(self, package, ud, d):
555 """
556 Match as many patterns as possible such as:
557 gnome-common-2.20.0.tar.gz (most common format)
558 gtk+-2.90.1.tar.gz
559 xf86-input-synaptics-12.6.9.tar.gz
560 dri2proto-2.3.tar.gz
561 blktool_4.orig.tar.gz
562 libid3tag-0.15.1b.tar.gz
563 unzip552.tar.gz
564 icu4c-3_6-src.tgz
565 genext2fs_1.3.orig.tar.gz
566 gst-fluendo-mp3
567 """
568 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400569 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500570 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400571 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500572 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400573 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500574 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400575 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500576
577 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400578 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500579
580 # match arch
581 parch_regex = "-source|_all_"
582
583 # src.rpm extension was added only for rpm package. Can be removed if the rpm
584 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400585 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500586
587 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400588 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500589 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
590 self.suffix_regex_comp = re.compile(psuffix_regex)
591
592 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500593 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500594 if pn_regex:
595 package_custom_regex_comp = re.compile(pn_regex)
596 else:
597 version = self._parse_path(package_regex_comp, package)
598 if version:
599 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400600 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500601 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
602 else:
603 package_custom_regex_comp = None
604
605 return package_custom_regex_comp
606
607 def latest_versionstring(self, ud, d):
608 """
609 Manipulate the URL and try to obtain the latest package version
610
611 sanity check to ensure same name and type.
612 """
613 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500614 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500615
616 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400617 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500618 current_version[1] = re.sub('_', '.', current_version[1])
619 current_version[1] = re.sub('-', '.', current_version[1])
620 return (current_version[1], '')
621
622 package_regex = self._init_regexes(package, ud, d)
623 if package_regex is None:
624 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
625 return ('', '')
626 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
627
628 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500629 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500630 if not regex_uri:
631 path = ud.path.split(package)[0]
632
633 # search for version matches on folders inside the path, like:
634 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400635 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500636 m = dirver_regex.search(path)
637 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500638 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500639 dirver = m.group('dirver')
640
Brad Bishop19323692019-04-05 15:28:33 -0400641 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500642 if not dirver_pn_regex.search(dirver):
643 return (self._check_latest_version_by_dir(dirver,
644 package, package_regex, current_version, ud, d), '')
645
646 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
647 else:
648 uri = regex_uri
649
650 return (self._check_latest_version(uri, package, package_regex,
651 current_version, ud, d), '')