blob: e6d9f528d05f9661deea98d92588d45cbb1ff5a0 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
55 """Class to fetch urls via 'wget'"""
56 def supports(self, ud, d):
57 """
58 Check to see if a given url can be fetched with wget.
59 """
60 return ud.type in ['http', 'https', 'ftp']
61
62 def recommends_checksum(self, urldata):
63 return True
64
65 def urldata_init(self, ud, d):
66 if 'protocol' in ud.parm:
67 if ud.parm['protocol'] == 'git':
68 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
69
70 if 'downloadfilename' in ud.parm:
71 ud.basename = ud.parm['downloadfilename']
72 else:
73 ud.basename = os.path.basename(ud.path)
74
Brad Bishop6e60e8b2018-02-01 10:27:11 -050075 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050076 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050077 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050078
Brad Bishop6e60e8b2018-02-01 10:27:11 -050079 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050080
Brad Bishopd7bf8c12018-02-25 22:55:05 -050081 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050082
Patrick Williamsc0f7c042017-02-23 20:41:17 -060083 progresshandler = WgetProgressHandler(d)
84
Patrick Williamsc124f4f2015-09-15 14:41:29 -050085 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050086 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050087 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088
89 def download(self, ud, d):
90 """Fetch urls"""
91
92 fetchcmd = self.basecmd
93
94 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -050095 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
96 bb.utils.mkdirhier(os.path.dirname(localpath))
97 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098
Brad Bishop37a0e4d2017-12-04 01:01:44 -050099 if ud.user and ud.pswd:
100 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600101
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500102 uri = ud.url.split(";")[0]
103 if os.path.exists(ud.localpath):
104 # file exists, but we didnt complete it.. trying again..
105 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
106 else:
107 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
108
109 self._runwget(ud, d, fetchcmd, False)
110
111 # Sanity check since wget can pretend it succeed when it didn't
112 # Also, this used to happen if sourceforge sent us to the mirror page
113 if not os.path.exists(ud.localpath):
114 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
115
116 if os.path.getsize(ud.localpath) == 0:
117 os.remove(ud.localpath)
118 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
119
120 return True
121
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600122 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600123 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500124 if fetch.connection_cache:
125 def connect(self):
126 """Connect to the host and port specified in __init__."""
127
128 sock = fetch.connection_cache.get_connection(self.host, self.port)
129 if sock:
130 self.sock = sock
131 else:
132 self.sock = socket.create_connection((self.host, self.port),
133 self.timeout, self.source_address)
134 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
135
136 if self._tunnel_host:
137 self._tunnel()
138
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600139 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500140 def http_open(self, req):
141 return self.do_open(HTTPConnectionCache, req)
142
143 def do_open(self, http_class, req):
144 """Return an addinfourl object for the request, using http_class.
145
146 http_class must implement the HTTPConnection API from httplib.
147 The addinfourl return value is a file-like object. It also
148 has methods and attributes including:
149 - info(): return a mimetools.Message object for the headers
150 - geturl(): return the original request URL
151 - code: HTTP status code
152 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600153 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500154 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400155 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156
157 h = http_class(host, timeout=req.timeout) # will parse host:port
158 h.set_debuglevel(self._debuglevel)
159
160 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600161 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162 if k not in headers))
163
164 # We want to make an HTTP/1.1 request, but the addinfourl
165 # class isn't prepared to deal with a persistent connection.
166 # It will try to read all remaining data from the socket,
167 # which will block while the server waits for the next request.
168 # So make sure the connection gets closed after the (only)
169 # request.
170
171 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400172 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500173 headers["Connection"] = "close"
174 else:
175 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
176
177 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600178 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179
180 if req._tunnel_host:
181 tunnel_headers = {}
182 proxy_auth_hdr = "Proxy-Authorization"
183 if proxy_auth_hdr in headers:
184 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
185 # Proxy-Authorization should not be sent to origin
186 # server.
187 del headers[proxy_auth_hdr]
188 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
189
190 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600191 h.request(req.get_method(), req.selector, req.data, headers)
192 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500193 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500194 # Instead, try to detect connections that are no longer
195 # usable (for example, closed unexpectedly) and remove
196 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500197 if fetch.connection_cache is None:
198 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500199 elif isinstance(err, OSError) and err.errno == errno.EBADF:
200 # This happens when the server closes the connection despite the Keep-Alive.
201 # Apparently urllib then uses the file descriptor, expecting it to be
202 # connected, when in reality the connection is already gone.
203 # We let the request fail and expect it to be
204 # tried once more ("try_again" in check_status()),
205 # with the dead connection removed from the cache.
206 # If it still fails, we give up, which can happend for bad
207 # HTTP proxy settings.
208 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600209 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500210 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500211 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212
213 # Pick apart the HTTPResponse object to get the addinfourl
214 # object initialized properly.
215
216 # Wrap the HTTPResponse object in socket's file object adapter
217 # for Windows. That adapter calls recv(), so delegate recv()
218 # to read(). This weird wrapping allows the returned object to
219 # have readline() and readlines() methods.
220
221 # XXX It might be better to extract the read buffering code
222 # out of socket._fileobject() and into a base class.
223 r.recv = r.read
224
225 # no data, just have to read
226 r.read()
227 class fp_dummy(object):
228 def read(self):
229 return ""
230 def readline(self):
231 return ""
232 def close(self):
233 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400234 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500235
Brad Bishop19323692019-04-05 15:28:33 -0400236 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500237 resp.code = r.status
238 resp.msg = r.reason
239
240 # Close connection when server request it.
241 if fetch.connection_cache is not None:
242 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
243 fetch.connection_cache.remove_connection(h.host, h.port)
244
245 return resp
246
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600247 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500248 """
249 Fallback to GET if HEAD is not allowed (405 HTTP error)
250 """
251 def http_error_405(self, req, fp, code, msg, headers):
252 fp.read()
253 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500254
Brad Bishop08902b02019-08-20 09:16:51 -0400255 if req.get_method() != 'GET':
256 newheaders = dict((k, v) for k, v in list(req.headers.items())
257 if k.lower() not in ("content-length", "content-type"))
258 return self.parent.open(urllib.request.Request(req.get_full_url(),
259 headers=newheaders,
260 origin_req_host=req.origin_req_host,
261 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500262
Brad Bishop08902b02019-08-20 09:16:51 -0400263 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400264
265 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
266 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500267 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500268
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500269
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600270 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500271 """
272 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
273 when we want to follow redirects using the original method.
274 """
275 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600276 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400277 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500278 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279 exported_proxies = export_proxies(d)
280
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500281 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400282 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600283 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400285 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500286 # see PEP-0476, this causes verification errors on some https servers
287 # so disable by default.
288 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600290 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
291 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292
293 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500294 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600295 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500296 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500297 # Some servers (FusionForge, as used on Alioth) require that the
298 # optional Accept header is set.
299 r.add_header("Accept", "*/*")
Andrew Geissler82c905d2020-04-13 13:39:40 -0500300 r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500301 def add_basic_auth(login_str, request):
302 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600303 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500304 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400305 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600306 r.add_header("Authorization", authheader)
307
Brad Bishop19323692019-04-05 15:28:33 -0400308 if ud.user and ud.pswd:
309 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500310
311 try:
Brad Bishop19323692019-04-05 15:28:33 -0400312 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500313 n = netrc.netrc()
314 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
315 add_basic_auth("%s:%s" % (login, password), r)
316 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400317 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500318
Brad Bishop316dfdd2018-06-25 12:45:53 -0400319 with opener.open(r) as response:
320 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600321 except urllib.error.URLError as e:
322 if try_again:
323 logger.debug(2, "checkstatus: trying again")
324 return self.checkstatus(fetch, ud, d, False)
325 else:
326 # debug for now to avoid spamming the logs in e.g. remote sstate searches
327 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
328 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500329 return True
330
331 def _parse_path(self, regex, s):
332 """
333 Find and group name, version and archive type in the given string s
334 """
335
336 m = regex.search(s)
337 if m:
338 pname = ''
339 pver = ''
340 ptype = ''
341
342 mdict = m.groupdict()
343 if 'name' in mdict.keys():
344 pname = mdict['name']
345 if 'pver' in mdict.keys():
346 pver = mdict['pver']
347 if 'type' in mdict.keys():
348 ptype = mdict['type']
349
350 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
351
352 return (pname, pver, ptype)
353
354 return None
355
356 def _modelate_version(self, version):
357 if version[0] in ['.', '-']:
358 if version[1].isdigit():
359 version = version[1] + version[0] + version[2:len(version)]
360 else:
361 version = version[1:len(version)]
362
363 version = re.sub('-', '.', version)
364 version = re.sub('_', '.', version)
365 version = re.sub('(rc)+', '.1000.', version)
366 version = re.sub('(beta)+', '.100.', version)
367 version = re.sub('(alpha)+', '.10.', version)
368 if version[0] == 'v':
369 version = version[1:len(version)]
370 return version
371
372 def _vercmp(self, old, new):
373 """
374 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
375 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
376 for simplicity as it's somehow difficult to get from various upstream format
377 """
378
379 (oldpn, oldpv, oldsuffix) = old
380 (newpn, newpv, newsuffix) = new
381
Brad Bishop19323692019-04-05 15:28:33 -0400382 # Check for a new suffix type that we have never heard of before
383 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500384 m = self.suffix_regex_comp.search(newsuffix)
385 if not m:
386 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
387 return False
388
Brad Bishop19323692019-04-05 15:28:33 -0400389 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500390 if oldpn != newpn:
391 return False
392
393 oldpv = self._modelate_version(oldpv)
394 newpv = self._modelate_version(newpv)
395
396 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
397
398 def _fetch_index(self, uri, ud, d):
399 """
400 Run fetch checkstatus to get directory information
401 """
402 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500403 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
404 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
405 fetchcmd = self.basecmd
406 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
407 try:
408 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
409 fetchresult = f.read()
410 except bb.fetch2.BBFetchException:
411 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500412
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500413 return fetchresult
414
415 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
416 """
417 Return the latest version of a package inside a given directory path
418 If error or no version, return ""
419 """
420 valid = 0
421 version = ['', '', '']
422
423 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500424 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500425 if not soup:
426 bb.debug(3, "*** %s NO SOUP" % (url))
427 return ""
428
429 for line in soup.find_all('a', href=True):
430 bb.debug(3, "line['href'] = '%s'" % (line['href']))
431 bb.debug(3, "line = '%s'" % (str(line)))
432
433 newver = self._parse_path(package_regex, line['href'])
434 if not newver:
435 newver = self._parse_path(package_regex, str(line))
436
437 if newver:
438 bb.debug(3, "Upstream version found: %s" % newver[1])
439 if valid == 0:
440 version = newver
441 valid = 1
442 elif self._vercmp(version, newver) < 0:
443 version = newver
444
445 pupver = re.sub('_', '.', version[1])
446
447 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
448 (package, pupver or "N/A", current_version[1]))
449
450 if valid:
451 return pupver
452
453 return ""
454
Brad Bishop19323692019-04-05 15:28:33 -0400455 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500456 """
Brad Bishop19323692019-04-05 15:28:33 -0400457 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500458 """
459 version_dir = ['', '', '']
460 version = ['', '', '']
461
Brad Bishop19323692019-04-05 15:28:33 -0400462 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 s = dirver_regex.search(dirver)
464 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500465 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500466 else:
467 version_dir[1] = dirver
468
469 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
470 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
471 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
472
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500473 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500474 if not soup:
475 return version[1]
476
477 for line in soup.find_all('a', href=True):
478 s = dirver_regex.search(line['href'].strip("/"))
479 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500480 sver = s.group('ver')
481
482 # When prefix is part of the version directory it need to
483 # ensure that only version directory is used so remove previous
484 # directories if exists.
485 #
486 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
487 # result is v2.5.
488 spfx = s.group('pfx').split('/')[-1]
489
490 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500491 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500492 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500493 path = ud.path.replace(dirver, dirver_new, True) \
494 .split(package)[0]
495 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
496 ud.user, ud.pswd, {}])
497
498 pupver = self._check_latest_version(uri,
499 package, package_regex, current_version, ud, d)
500 if pupver:
501 version[1] = pupver
502
503 version_dir = version_dir_new
504
505 return version[1]
506
507 def _init_regexes(self, package, ud, d):
508 """
509 Match as many patterns as possible such as:
510 gnome-common-2.20.0.tar.gz (most common format)
511 gtk+-2.90.1.tar.gz
512 xf86-input-synaptics-12.6.9.tar.gz
513 dri2proto-2.3.tar.gz
514 blktool_4.orig.tar.gz
515 libid3tag-0.15.1b.tar.gz
516 unzip552.tar.gz
517 icu4c-3_6-src.tgz
518 genext2fs_1.3.orig.tar.gz
519 gst-fluendo-mp3
520 """
521 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400522 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500523 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400524 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500525 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400526 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500527 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400528 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500529
530 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400531 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500532
533 # match arch
534 parch_regex = "-source|_all_"
535
536 # src.rpm extension was added only for rpm package. Can be removed if the rpm
537 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400538 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500539
540 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400541 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500542 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
543 self.suffix_regex_comp = re.compile(psuffix_regex)
544
545 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500546 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500547 if pn_regex:
548 package_custom_regex_comp = re.compile(pn_regex)
549 else:
550 version = self._parse_path(package_regex_comp, package)
551 if version:
552 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400553 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500554 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
555 else:
556 package_custom_regex_comp = None
557
558 return package_custom_regex_comp
559
560 def latest_versionstring(self, ud, d):
561 """
562 Manipulate the URL and try to obtain the latest package version
563
564 sanity check to ensure same name and type.
565 """
566 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500567 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500568
569 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400570 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500571 current_version[1] = re.sub('_', '.', current_version[1])
572 current_version[1] = re.sub('-', '.', current_version[1])
573 return (current_version[1], '')
574
575 package_regex = self._init_regexes(package, ud, d)
576 if package_regex is None:
577 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
578 return ('', '')
579 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
580
581 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500582 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500583 if not regex_uri:
584 path = ud.path.split(package)[0]
585
586 # search for version matches on folders inside the path, like:
587 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400588 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500589 m = dirver_regex.search(path)
590 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500591 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500592 dirver = m.group('dirver')
593
Brad Bishop19323692019-04-05 15:28:33 -0400594 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500595 if not dirver_pn_regex.search(dirver):
596 return (self._check_latest_version_by_dir(dirver,
597 package, package_regex, current_version, ud, d), '')
598
599 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
600 else:
601 uri = regex_uri
602
603 return (self._check_latest_version(uri, package, package_regex,
604 current_version, ud, d), '')