blob: 784df70c9f62490968612af5df429d4ad6024ddd [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Andrew Geisslerd1e89492021-02-12 15:35:20 -060055
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061 """Class to fetch urls via 'wget'"""
62 def supports(self, ud, d):
63 """
64 Check to see if a given url can be fetched with wget.
65 """
66 return ud.type in ['http', 'https', 'ftp']
67
68 def recommends_checksum(self, urldata):
69 return True
70
71 def urldata_init(self, ud, d):
72 if 'protocol' in ud.parm:
73 if ud.parm['protocol'] == 'git':
74 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
75
76 if 'downloadfilename' in ud.parm:
77 ud.basename = ud.parm['downloadfilename']
78 else:
79 ud.basename = os.path.basename(ud.path)
80
Brad Bishop6e60e8b2018-02-01 10:27:11 -050081 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050082 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050083 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050084
Brad Bishop6e60e8b2018-02-01 10:27:11 -050085 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Brad Bishopd7bf8c12018-02-25 22:55:05 -050087 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088
Patrick Williamsc0f7c042017-02-23 20:41:17 -060089 progresshandler = WgetProgressHandler(d)
90
Andrew Geisslerd1e89492021-02-12 15:35:20 -060091 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050092 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050093 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094
95 def download(self, ud, d):
96 """Fetch urls"""
97
98 fetchcmd = self.basecmd
99
100 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
102 bb.utils.mkdirhier(os.path.dirname(localpath))
103 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500104
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500105 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600107
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath):
110 # file exists, but we didnt complete it.. trying again..
111 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112 else:
113 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115 self._runwget(ud, d, fetchcmd, False)
116
117 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath):
120 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
121
122 if os.path.getsize(ud.localpath) == 0:
123 os.remove(ud.localpath)
124 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125
126 return True
127
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600128 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600129 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500130 if fetch.connection_cache:
131 def connect(self):
132 """Connect to the host and port specified in __init__."""
133
134 sock = fetch.connection_cache.get_connection(self.host, self.port)
135 if sock:
136 self.sock = sock
137 else:
138 self.sock = socket.create_connection((self.host, self.port),
139 self.timeout, self.source_address)
140 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
141
142 if self._tunnel_host:
143 self._tunnel()
144
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600145 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146 def http_open(self, req):
147 return self.do_open(HTTPConnectionCache, req)
148
149 def do_open(self, http_class, req):
150 """Return an addinfourl object for the request, using http_class.
151
152 http_class must implement the HTTPConnection API from httplib.
153 The addinfourl return value is a file-like object. It also
154 has methods and attributes including:
155 - info(): return a mimetools.Message object for the headers
156 - geturl(): return the original request URL
157 - code: HTTP status code
158 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600159 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500160 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400161 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162
163 h = http_class(host, timeout=req.timeout) # will parse host:port
164 h.set_debuglevel(self._debuglevel)
165
166 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600167 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 if k not in headers))
169
170 # We want to make an HTTP/1.1 request, but the addinfourl
171 # class isn't prepared to deal with a persistent connection.
172 # It will try to read all remaining data from the socket,
173 # which will block while the server waits for the next request.
174 # So make sure the connection gets closed after the (only)
175 # request.
176
177 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400178 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 headers["Connection"] = "close"
180 else:
181 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
182
183 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600184 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500185
186 if req._tunnel_host:
187 tunnel_headers = {}
188 proxy_auth_hdr = "Proxy-Authorization"
189 if proxy_auth_hdr in headers:
190 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
191 # Proxy-Authorization should not be sent to origin
192 # server.
193 del headers[proxy_auth_hdr]
194 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
195
196 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600197 h.request(req.get_method(), req.selector, req.data, headers)
198 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500200 # Instead, try to detect connections that are no longer
201 # usable (for example, closed unexpectedly) and remove
202 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203 if fetch.connection_cache is None:
204 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500205 elif isinstance(err, OSError) and err.errno == errno.EBADF:
206 # This happens when the server closes the connection despite the Keep-Alive.
207 # Apparently urllib then uses the file descriptor, expecting it to be
208 # connected, when in reality the connection is already gone.
209 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad
213 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600215 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500216 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500217 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218
219 # Pick apart the HTTPResponse object to get the addinfourl
220 # object initialized properly.
221
222 # Wrap the HTTPResponse object in socket's file object adapter
223 # for Windows. That adapter calls recv(), so delegate recv()
224 # to read(). This weird wrapping allows the returned object to
225 # have readline() and readlines() methods.
226
227 # XXX It might be better to extract the read buffering code
228 # out of socket._fileobject() and into a base class.
229 r.recv = r.read
230
231 # no data, just have to read
232 r.read()
233 class fp_dummy(object):
234 def read(self):
235 return ""
236 def readline(self):
237 return ""
238 def close(self):
239 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400240 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500241
Brad Bishop19323692019-04-05 15:28:33 -0400242 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500243 resp.code = r.status
244 resp.msg = r.reason
245
246 # Close connection when server request it.
247 if fetch.connection_cache is not None:
248 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
249 fetch.connection_cache.remove_connection(h.host, h.port)
250
251 return resp
252
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600253 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500254 """
255 Fallback to GET if HEAD is not allowed (405 HTTP error)
256 """
257 def http_error_405(self, req, fp, code, msg, headers):
258 fp.read()
259 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500260
Brad Bishop08902b02019-08-20 09:16:51 -0400261 if req.get_method() != 'GET':
262 newheaders = dict((k, v) for k, v in list(req.headers.items())
263 if k.lower() not in ("content-length", "content-type"))
264 return self.parent.open(urllib.request.Request(req.get_full_url(),
265 headers=newheaders,
266 origin_req_host=req.origin_req_host,
267 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500268
Brad Bishop08902b02019-08-20 09:16:51 -0400269 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400270
271 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
272 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500273 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500274
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500275
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600276 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500277 """
278 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
279 when we want to follow redirects using the original method.
280 """
281 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400283 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500285 exported_proxies = export_proxies(d)
286
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400288 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600289 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500290 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400291 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 # see PEP-0476, this causes verification errors on some https servers
293 # so disable by default.
294 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
297 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500298
299 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600301 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500302 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500303 # Some servers (FusionForge, as used on Alioth) require that the
304 # optional Accept header is set.
305 r.add_header("Accept", "*/*")
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600306 r.add_header("User-Agent", self.user_agent)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500307 def add_basic_auth(login_str, request):
308 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600309 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400311 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600312 r.add_header("Authorization", authheader)
313
Brad Bishop19323692019-04-05 15:28:33 -0400314 if ud.user and ud.pswd:
315 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500316
317 try:
Brad Bishop19323692019-04-05 15:28:33 -0400318 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500319 n = netrc.netrc()
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
321 add_basic_auth("%s:%s" % (login, password), r)
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400323 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500324
Brad Bishop316dfdd2018-06-25 12:45:53 -0400325 with opener.open(r) as response:
326 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600327 except urllib.error.URLError as e:
328 if try_again:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600329 logger.debug2("checkstatus: trying again")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600330 return self.checkstatus(fetch, ud, d, False)
331 else:
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600333 logger.debug2("checkstatus() urlopen failed: %s" % e)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600334 return False
Andrew Geissler90fd73c2021-03-05 15:25:55 -0600335 except ConnectionResetError as e:
336 if try_again:
337 logger.debug2("checkstatus: trying again")
338 return self.checkstatus(fetch, ud, d, False)
339 else:
340 # debug for now to avoid spamming the logs in e.g. remote sstate searches
341 logger.debug2("checkstatus() urlopen failed: %s" % e)
342 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500343 return True
344
345 def _parse_path(self, regex, s):
346 """
347 Find and group name, version and archive type in the given string s
348 """
349
350 m = regex.search(s)
351 if m:
352 pname = ''
353 pver = ''
354 ptype = ''
355
356 mdict = m.groupdict()
357 if 'name' in mdict.keys():
358 pname = mdict['name']
359 if 'pver' in mdict.keys():
360 pver = mdict['pver']
361 if 'type' in mdict.keys():
362 ptype = mdict['type']
363
364 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
365
366 return (pname, pver, ptype)
367
368 return None
369
370 def _modelate_version(self, version):
371 if version[0] in ['.', '-']:
372 if version[1].isdigit():
373 version = version[1] + version[0] + version[2:len(version)]
374 else:
375 version = version[1:len(version)]
376
377 version = re.sub('-', '.', version)
378 version = re.sub('_', '.', version)
379 version = re.sub('(rc)+', '.1000.', version)
380 version = re.sub('(beta)+', '.100.', version)
381 version = re.sub('(alpha)+', '.10.', version)
382 if version[0] == 'v':
383 version = version[1:len(version)]
384 return version
385
386 def _vercmp(self, old, new):
387 """
388 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
389 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
390 for simplicity as it's somehow difficult to get from various upstream format
391 """
392
393 (oldpn, oldpv, oldsuffix) = old
394 (newpn, newpv, newsuffix) = new
395
Brad Bishop19323692019-04-05 15:28:33 -0400396 # Check for a new suffix type that we have never heard of before
397 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500398 m = self.suffix_regex_comp.search(newsuffix)
399 if not m:
400 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
401 return False
402
Brad Bishop19323692019-04-05 15:28:33 -0400403 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500404 if oldpn != newpn:
405 return False
406
407 oldpv = self._modelate_version(oldpv)
408 newpv = self._modelate_version(newpv)
409
410 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
411
412 def _fetch_index(self, uri, ud, d):
413 """
414 Run fetch checkstatus to get directory information
415 """
416 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500417 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500418 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600419 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500420 try:
421 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
422 fetchresult = f.read()
423 except bb.fetch2.BBFetchException:
424 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500425
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500426 return fetchresult
427
428 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
429 """
430 Return the latest version of a package inside a given directory path
431 If error or no version, return ""
432 """
433 valid = 0
434 version = ['', '', '']
435
436 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500437 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500438 if not soup:
439 bb.debug(3, "*** %s NO SOUP" % (url))
440 return ""
441
442 for line in soup.find_all('a', href=True):
443 bb.debug(3, "line['href'] = '%s'" % (line['href']))
444 bb.debug(3, "line = '%s'" % (str(line)))
445
446 newver = self._parse_path(package_regex, line['href'])
447 if not newver:
448 newver = self._parse_path(package_regex, str(line))
449
450 if newver:
451 bb.debug(3, "Upstream version found: %s" % newver[1])
452 if valid == 0:
453 version = newver
454 valid = 1
455 elif self._vercmp(version, newver) < 0:
456 version = newver
457
458 pupver = re.sub('_', '.', version[1])
459
460 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
461 (package, pupver or "N/A", current_version[1]))
462
463 if valid:
464 return pupver
465
466 return ""
467
Brad Bishop19323692019-04-05 15:28:33 -0400468 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500469 """
Brad Bishop19323692019-04-05 15:28:33 -0400470 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 """
472 version_dir = ['', '', '']
473 version = ['', '', '']
474
William A. Kennington IIIac69b482021-06-02 12:28:27 -0700475 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500476 s = dirver_regex.search(dirver)
477 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500478 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500479 else:
480 version_dir[1] = dirver
481
482 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
483 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
484 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
485
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500486 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500487 if not soup:
488 return version[1]
489
490 for line in soup.find_all('a', href=True):
491 s = dirver_regex.search(line['href'].strip("/"))
492 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500493 sver = s.group('ver')
494
495 # When prefix is part of the version directory it need to
496 # ensure that only version directory is used so remove previous
497 # directories if exists.
498 #
499 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
500 # result is v2.5.
501 spfx = s.group('pfx').split('/')[-1]
502
503 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500504 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500505 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500506 path = ud.path.replace(dirver, dirver_new, True) \
507 .split(package)[0]
508 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
509 ud.user, ud.pswd, {}])
510
511 pupver = self._check_latest_version(uri,
512 package, package_regex, current_version, ud, d)
513 if pupver:
514 version[1] = pupver
515
516 version_dir = version_dir_new
517
518 return version[1]
519
520 def _init_regexes(self, package, ud, d):
521 """
522 Match as many patterns as possible such as:
523 gnome-common-2.20.0.tar.gz (most common format)
524 gtk+-2.90.1.tar.gz
525 xf86-input-synaptics-12.6.9.tar.gz
526 dri2proto-2.3.tar.gz
527 blktool_4.orig.tar.gz
528 libid3tag-0.15.1b.tar.gz
529 unzip552.tar.gz
530 icu4c-3_6-src.tgz
531 genext2fs_1.3.orig.tar.gz
532 gst-fluendo-mp3
533 """
534 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400535 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500536 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400537 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500538 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400539 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500540 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400541 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500542
543 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400544 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500545
546 # match arch
547 parch_regex = "-source|_all_"
548
549 # src.rpm extension was added only for rpm package. Can be removed if the rpm
550 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400551 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500552
553 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400554 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500555 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
556 self.suffix_regex_comp = re.compile(psuffix_regex)
557
558 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500559 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500560 if pn_regex:
561 package_custom_regex_comp = re.compile(pn_regex)
562 else:
563 version = self._parse_path(package_regex_comp, package)
564 if version:
565 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400566 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500567 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
568 else:
569 package_custom_regex_comp = None
570
571 return package_custom_regex_comp
572
573 def latest_versionstring(self, ud, d):
574 """
575 Manipulate the URL and try to obtain the latest package version
576
577 sanity check to ensure same name and type.
578 """
579 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500580 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500581
582 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400583 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500584 current_version[1] = re.sub('_', '.', current_version[1])
585 current_version[1] = re.sub('-', '.', current_version[1])
586 return (current_version[1], '')
587
588 package_regex = self._init_regexes(package, ud, d)
589 if package_regex is None:
590 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
591 return ('', '')
592 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
593
594 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500595 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500596 if not regex_uri:
597 path = ud.path.split(package)[0]
598
599 # search for version matches on folders inside the path, like:
600 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400601 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500602 m = dirver_regex.search(path)
603 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500604 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500605 dirver = m.group('dirver')
606
Brad Bishop19323692019-04-05 15:28:33 -0400607 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500608 if not dirver_pn_regex.search(dirver):
609 return (self._check_latest_version_by_dir(dirver,
610 package, package_regex, current_version, ud, d), '')
611
612 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
613 else:
614 uri = regex_uri
615
616 return (self._check_latest_version(uri, package, package_regex,
617 current_version, ud, d), '')