blob: f7d1de26b759413f2887c730b34c5f6dfb9ca387 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
55 """Class to fetch urls via 'wget'"""
56 def supports(self, ud, d):
57 """
58 Check to see if a given url can be fetched with wget.
59 """
60 return ud.type in ['http', 'https', 'ftp']
61
62 def recommends_checksum(self, urldata):
63 return True
64
65 def urldata_init(self, ud, d):
66 if 'protocol' in ud.parm:
67 if ud.parm['protocol'] == 'git':
68 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
69
70 if 'downloadfilename' in ud.parm:
71 ud.basename = ud.parm['downloadfilename']
72 else:
73 ud.basename = os.path.basename(ud.path)
74
Brad Bishop6e60e8b2018-02-01 10:27:11 -050075 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050076 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050077 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050078
Brad Bishop6e60e8b2018-02-01 10:27:11 -050079 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050080
Brad Bishopd7bf8c12018-02-25 22:55:05 -050081 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050082
Patrick Williamsc0f7c042017-02-23 20:41:17 -060083 progresshandler = WgetProgressHandler(d)
84
Patrick Williamsc124f4f2015-09-15 14:41:29 -050085 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050086 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050087 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088
89 def download(self, ud, d):
90 """Fetch urls"""
91
92 fetchcmd = self.basecmd
93
94 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -050095 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
96 bb.utils.mkdirhier(os.path.dirname(localpath))
97 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098
Brad Bishop37a0e4d2017-12-04 01:01:44 -050099 if ud.user and ud.pswd:
100 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600101
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500102 uri = ud.url.split(";")[0]
103 if os.path.exists(ud.localpath):
104 # file exists, but we didnt complete it.. trying again..
105 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
106 else:
107 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
108
109 self._runwget(ud, d, fetchcmd, False)
110
111 # Sanity check since wget can pretend it succeed when it didn't
112 # Also, this used to happen if sourceforge sent us to the mirror page
113 if not os.path.exists(ud.localpath):
114 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
115
116 if os.path.getsize(ud.localpath) == 0:
117 os.remove(ud.localpath)
118 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
119
120 return True
121
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600122 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600123 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500124 if fetch.connection_cache:
125 def connect(self):
126 """Connect to the host and port specified in __init__."""
127
128 sock = fetch.connection_cache.get_connection(self.host, self.port)
129 if sock:
130 self.sock = sock
131 else:
132 self.sock = socket.create_connection((self.host, self.port),
133 self.timeout, self.source_address)
134 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
135
136 if self._tunnel_host:
137 self._tunnel()
138
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600139 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500140 def http_open(self, req):
141 return self.do_open(HTTPConnectionCache, req)
142
143 def do_open(self, http_class, req):
144 """Return an addinfourl object for the request, using http_class.
145
146 http_class must implement the HTTPConnection API from httplib.
147 The addinfourl return value is a file-like object. It also
148 has methods and attributes including:
149 - info(): return a mimetools.Message object for the headers
150 - geturl(): return the original request URL
151 - code: HTTP status code
152 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600153 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500154 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400155 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156
157 h = http_class(host, timeout=req.timeout) # will parse host:port
158 h.set_debuglevel(self._debuglevel)
159
160 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600161 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162 if k not in headers))
163
164 # We want to make an HTTP/1.1 request, but the addinfourl
165 # class isn't prepared to deal with a persistent connection.
166 # It will try to read all remaining data from the socket,
167 # which will block while the server waits for the next request.
168 # So make sure the connection gets closed after the (only)
169 # request.
170
171 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400172 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500173 headers["Connection"] = "close"
174 else:
175 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
176
177 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600178 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179
180 if req._tunnel_host:
181 tunnel_headers = {}
182 proxy_auth_hdr = "Proxy-Authorization"
183 if proxy_auth_hdr in headers:
184 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
185 # Proxy-Authorization should not be sent to origin
186 # server.
187 del headers[proxy_auth_hdr]
188 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
189
190 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600191 h.request(req.get_method(), req.selector, req.data, headers)
192 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500193 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500194 # Instead, try to detect connections that are no longer
195 # usable (for example, closed unexpectedly) and remove
196 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500197 if fetch.connection_cache is None:
198 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500199 elif isinstance(err, OSError) and err.errno == errno.EBADF:
200 # This happens when the server closes the connection despite the Keep-Alive.
201 # Apparently urllib then uses the file descriptor, expecting it to be
202 # connected, when in reality the connection is already gone.
203 # We let the request fail and expect it to be
204 # tried once more ("try_again" in check_status()),
205 # with the dead connection removed from the cache.
206 # If it still fails, we give up, which can happend for bad
207 # HTTP proxy settings.
208 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600209 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500210 else:
211 try:
212 r = h.getresponse(buffering=True)
213 except TypeError: # buffering kw not supported
214 r = h.getresponse()
215
216 # Pick apart the HTTPResponse object to get the addinfourl
217 # object initialized properly.
218
219 # Wrap the HTTPResponse object in socket's file object adapter
220 # for Windows. That adapter calls recv(), so delegate recv()
221 # to read(). This weird wrapping allows the returned object to
222 # have readline() and readlines() methods.
223
224 # XXX It might be better to extract the read buffering code
225 # out of socket._fileobject() and into a base class.
226 r.recv = r.read
227
228 # no data, just have to read
229 r.read()
230 class fp_dummy(object):
231 def read(self):
232 return ""
233 def readline(self):
234 return ""
235 def close(self):
236 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400237 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500238
Brad Bishop19323692019-04-05 15:28:33 -0400239 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240 resp.code = r.status
241 resp.msg = r.reason
242
243 # Close connection when server request it.
244 if fetch.connection_cache is not None:
245 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
246 fetch.connection_cache.remove_connection(h.host, h.port)
247
248 return resp
249
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600250 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500251 """
252 Fallback to GET if HEAD is not allowed (405 HTTP error)
253 """
254 def http_error_405(self, req, fp, code, msg, headers):
255 fp.read()
256 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500257
Brad Bishop08902b02019-08-20 09:16:51 -0400258 if req.get_method() != 'GET':
259 newheaders = dict((k, v) for k, v in list(req.headers.items())
260 if k.lower() not in ("content-length", "content-type"))
261 return self.parent.open(urllib.request.Request(req.get_full_url(),
262 headers=newheaders,
263 origin_req_host=req.origin_req_host,
264 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500265
Brad Bishop08902b02019-08-20 09:16:51 -0400266 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400267
268 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
269 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500270 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500271
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500272
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600273 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500274 """
275 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
276 when we want to follow redirects using the original method.
277 """
278 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600279 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400280 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500281 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500282 exported_proxies = export_proxies(d)
283
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400285 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600286 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400288 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 # see PEP-0476, this causes verification errors on some https servers
290 # so disable by default.
291 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600293 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
294 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295
296 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500297 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600298 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500300 # Some servers (FusionForge, as used on Alioth) require that the
301 # optional Accept header is set.
302 r.add_header("Accept", "*/*")
Andrew Geissler82c905d2020-04-13 13:39:40 -0500303 r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500304 def add_basic_auth(login_str, request):
305 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600306 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500307 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400308 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600309 r.add_header("Authorization", authheader)
310
Brad Bishop19323692019-04-05 15:28:33 -0400311 if ud.user and ud.pswd:
312 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500313
314 try:
Brad Bishop19323692019-04-05 15:28:33 -0400315 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500316 n = netrc.netrc()
317 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
318 add_basic_auth("%s:%s" % (login, password), r)
319 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400320 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500321
Brad Bishop316dfdd2018-06-25 12:45:53 -0400322 with opener.open(r) as response:
323 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600324 except urllib.error.URLError as e:
325 if try_again:
326 logger.debug(2, "checkstatus: trying again")
327 return self.checkstatus(fetch, ud, d, False)
328 else:
329 # debug for now to avoid spamming the logs in e.g. remote sstate searches
330 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
331 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500332 return True
333
334 def _parse_path(self, regex, s):
335 """
336 Find and group name, version and archive type in the given string s
337 """
338
339 m = regex.search(s)
340 if m:
341 pname = ''
342 pver = ''
343 ptype = ''
344
345 mdict = m.groupdict()
346 if 'name' in mdict.keys():
347 pname = mdict['name']
348 if 'pver' in mdict.keys():
349 pver = mdict['pver']
350 if 'type' in mdict.keys():
351 ptype = mdict['type']
352
353 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
354
355 return (pname, pver, ptype)
356
357 return None
358
359 def _modelate_version(self, version):
360 if version[0] in ['.', '-']:
361 if version[1].isdigit():
362 version = version[1] + version[0] + version[2:len(version)]
363 else:
364 version = version[1:len(version)]
365
366 version = re.sub('-', '.', version)
367 version = re.sub('_', '.', version)
368 version = re.sub('(rc)+', '.1000.', version)
369 version = re.sub('(beta)+', '.100.', version)
370 version = re.sub('(alpha)+', '.10.', version)
371 if version[0] == 'v':
372 version = version[1:len(version)]
373 return version
374
375 def _vercmp(self, old, new):
376 """
377 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
378 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
379 for simplicity as it's somehow difficult to get from various upstream format
380 """
381
382 (oldpn, oldpv, oldsuffix) = old
383 (newpn, newpv, newsuffix) = new
384
Brad Bishop19323692019-04-05 15:28:33 -0400385 # Check for a new suffix type that we have never heard of before
386 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500387 m = self.suffix_regex_comp.search(newsuffix)
388 if not m:
389 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
390 return False
391
Brad Bishop19323692019-04-05 15:28:33 -0400392 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500393 if oldpn != newpn:
394 return False
395
396 oldpv = self._modelate_version(oldpv)
397 newpv = self._modelate_version(newpv)
398
399 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
400
401 def _fetch_index(self, uri, ud, d):
402 """
403 Run fetch checkstatus to get directory information
404 """
405 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500406 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
407 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
408 fetchcmd = self.basecmd
409 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
410 try:
411 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
412 fetchresult = f.read()
413 except bb.fetch2.BBFetchException:
414 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500415
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500416 return fetchresult
417
418 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
419 """
420 Return the latest version of a package inside a given directory path
421 If error or no version, return ""
422 """
423 valid = 0
424 version = ['', '', '']
425
426 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500427 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500428 if not soup:
429 bb.debug(3, "*** %s NO SOUP" % (url))
430 return ""
431
432 for line in soup.find_all('a', href=True):
433 bb.debug(3, "line['href'] = '%s'" % (line['href']))
434 bb.debug(3, "line = '%s'" % (str(line)))
435
436 newver = self._parse_path(package_regex, line['href'])
437 if not newver:
438 newver = self._parse_path(package_regex, str(line))
439
440 if newver:
441 bb.debug(3, "Upstream version found: %s" % newver[1])
442 if valid == 0:
443 version = newver
444 valid = 1
445 elif self._vercmp(version, newver) < 0:
446 version = newver
447
448 pupver = re.sub('_', '.', version[1])
449
450 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
451 (package, pupver or "N/A", current_version[1]))
452
453 if valid:
454 return pupver
455
456 return ""
457
Brad Bishop19323692019-04-05 15:28:33 -0400458 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500459 """
Brad Bishop19323692019-04-05 15:28:33 -0400460 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500461 """
462 version_dir = ['', '', '']
463 version = ['', '', '']
464
Brad Bishop19323692019-04-05 15:28:33 -0400465 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 s = dirver_regex.search(dirver)
467 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500468 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500469 else:
470 version_dir[1] = dirver
471
472 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
473 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
474 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
475
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500476 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500477 if not soup:
478 return version[1]
479
480 for line in soup.find_all('a', href=True):
481 s = dirver_regex.search(line['href'].strip("/"))
482 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500483 sver = s.group('ver')
484
485 # When prefix is part of the version directory it need to
486 # ensure that only version directory is used so remove previous
487 # directories if exists.
488 #
489 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
490 # result is v2.5.
491 spfx = s.group('pfx').split('/')[-1]
492
493 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500494 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500495 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500496 path = ud.path.replace(dirver, dirver_new, True) \
497 .split(package)[0]
498 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
499 ud.user, ud.pswd, {}])
500
501 pupver = self._check_latest_version(uri,
502 package, package_regex, current_version, ud, d)
503 if pupver:
504 version[1] = pupver
505
506 version_dir = version_dir_new
507
508 return version[1]
509
510 def _init_regexes(self, package, ud, d):
511 """
512 Match as many patterns as possible such as:
513 gnome-common-2.20.0.tar.gz (most common format)
514 gtk+-2.90.1.tar.gz
515 xf86-input-synaptics-12.6.9.tar.gz
516 dri2proto-2.3.tar.gz
517 blktool_4.orig.tar.gz
518 libid3tag-0.15.1b.tar.gz
519 unzip552.tar.gz
520 icu4c-3_6-src.tgz
521 genext2fs_1.3.orig.tar.gz
522 gst-fluendo-mp3
523 """
524 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400525 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500526 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400527 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500528 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400529 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500530 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400531 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500532
533 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400534 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500535
536 # match arch
537 parch_regex = "-source|_all_"
538
539 # src.rpm extension was added only for rpm package. Can be removed if the rpm
540 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400541 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500542
543 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400544 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500545 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
546 self.suffix_regex_comp = re.compile(psuffix_regex)
547
548 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500549 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500550 if pn_regex:
551 package_custom_regex_comp = re.compile(pn_regex)
552 else:
553 version = self._parse_path(package_regex_comp, package)
554 if version:
555 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400556 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500557 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
558 else:
559 package_custom_regex_comp = None
560
561 return package_custom_regex_comp
562
563 def latest_versionstring(self, ud, d):
564 """
565 Manipulate the URL and try to obtain the latest package version
566
567 sanity check to ensure same name and type.
568 """
569 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500570 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500571
572 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400573 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500574 current_version[1] = re.sub('_', '.', current_version[1])
575 current_version[1] = re.sub('-', '.', current_version[1])
576 return (current_version[1], '')
577
578 package_regex = self._init_regexes(package, ud, d)
579 if package_regex is None:
580 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
581 return ('', '')
582 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
583
584 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500585 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500586 if not regex_uri:
587 path = ud.path.split(package)[0]
588
589 # search for version matches on folders inside the path, like:
590 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400591 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500592 m = dirver_regex.search(path)
593 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500594 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500595 dirver = m.group('dirver')
596
Brad Bishop19323692019-04-05 15:28:33 -0400597 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500598 if not dirver_pn_regex.search(dirver):
599 return (self._check_latest_version_by_dir(dirver,
600 package, package_regex, current_version, ud, d), '')
601
602 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
603 else:
604 uri = regex_uri
605
606 return (self._check_latest_version(uri, package, package_regex,
607 current_version, ud, d), '')