blob: 78a49676fe03bba72be3311c0a75c6f22cc922c9 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
Andrew Geissler82c905d2020-04-13 13:39:40 -050015import shlex
Patrick Williamsc124f4f2015-09-15 14:41:29 -050016import re
17import tempfile
Patrick Williamsc124f4f2015-09-15 14:41:29 -050018import os
Brad Bishopd7bf8c12018-02-25 22:55:05 -050019import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050020import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040022import socket
23import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060024import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050025from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050029from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050030from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
Patrick Williamsc124f4f2015-09-15 14:41:29 -050054class Wget(FetchMethod):
Andrew Geisslerd1e89492021-02-12 15:35:20 -060055
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
Patrick Williamsc124f4f2015-09-15 14:41:29 -050061 """Class to fetch urls via 'wget'"""
62 def supports(self, ud, d):
63 """
64 Check to see if a given url can be fetched with wget.
65 """
66 return ud.type in ['http', 'https', 'ftp']
67
68 def recommends_checksum(self, urldata):
69 return True
70
71 def urldata_init(self, ud, d):
72 if 'protocol' in ud.parm:
73 if ud.parm['protocol'] == 'git':
74 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
75
76 if 'downloadfilename' in ud.parm:
77 ud.basename = ud.parm['downloadfilename']
78 else:
79 ud.basename = os.path.basename(ud.path)
80
Brad Bishop6e60e8b2018-02-01 10:27:11 -050081 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050082 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050083 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050084
Brad Bishop6e60e8b2018-02-01 10:27:11 -050085 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050086
Brad Bishopd7bf8c12018-02-25 22:55:05 -050087 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050088
Patrick Williamsc0f7c042017-02-23 20:41:17 -060089 progresshandler = WgetProgressHandler(d)
90
Andrew Geisslerd1e89492021-02-12 15:35:20 -060091 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050092 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050093 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094
95 def download(self, ud, d):
96 """Fetch urls"""
97
98 fetchcmd = self.basecmd
99
100 if 'downloadfilename' in ud.parm:
Andrew Geissler82c905d2020-04-13 13:39:40 -0500101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
102 bb.utils.mkdirhier(os.path.dirname(localpath))
103 fetchcmd += " -O %s" % shlex.quote(localpath)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500104
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500105 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600107
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath):
110 # file exists, but we didnt complete it.. trying again..
111 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112 else:
113 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115 self._runwget(ud, d, fetchcmd, False)
116
117 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath):
120 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
121
122 if os.path.getsize(ud.localpath) == 0:
123 os.remove(ud.localpath)
124 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125
126 return True
127
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600128 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600129 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500130 if fetch.connection_cache:
131 def connect(self):
132 """Connect to the host and port specified in __init__."""
133
134 sock = fetch.connection_cache.get_connection(self.host, self.port)
135 if sock:
136 self.sock = sock
137 else:
138 self.sock = socket.create_connection((self.host, self.port),
139 self.timeout, self.source_address)
140 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
141
142 if self._tunnel_host:
143 self._tunnel()
144
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600145 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500146 def http_open(self, req):
147 return self.do_open(HTTPConnectionCache, req)
148
149 def do_open(self, http_class, req):
150 """Return an addinfourl object for the request, using http_class.
151
152 http_class must implement the HTTPConnection API from httplib.
153 The addinfourl return value is a file-like object. It also
154 has methods and attributes including:
155 - info(): return a mimetools.Message object for the headers
156 - geturl(): return the original request URL
157 - code: HTTP status code
158 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600159 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500160 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400161 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500162
163 h = http_class(host, timeout=req.timeout) # will parse host:port
164 h.set_debuglevel(self._debuglevel)
165
166 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600167 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500168 if k not in headers))
169
170 # We want to make an HTTP/1.1 request, but the addinfourl
171 # class isn't prepared to deal with a persistent connection.
172 # It will try to read all remaining data from the socket,
173 # which will block while the server waits for the next request.
174 # So make sure the connection gets closed after the (only)
175 # request.
176
177 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400178 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500179 headers["Connection"] = "close"
180 else:
181 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
182
183 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600184 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500185
186 if req._tunnel_host:
187 tunnel_headers = {}
188 proxy_auth_hdr = "Proxy-Authorization"
189 if proxy_auth_hdr in headers:
190 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
191 # Proxy-Authorization should not be sent to origin
192 # server.
193 del headers[proxy_auth_hdr]
194 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
195
196 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600197 h.request(req.get_method(), req.selector, req.data, headers)
198 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500200 # Instead, try to detect connections that are no longer
201 # usable (for example, closed unexpectedly) and remove
202 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500203 if fetch.connection_cache is None:
204 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500205 elif isinstance(err, OSError) and err.errno == errno.EBADF:
206 # This happens when the server closes the connection despite the Keep-Alive.
207 # Apparently urllib then uses the file descriptor, expecting it to be
208 # connected, when in reality the connection is already gone.
209 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad
213 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600215 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500216 else:
Andrew Geisslerc9f78652020-09-18 14:11:35 -0500217 r = h.getresponse()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500218
219 # Pick apart the HTTPResponse object to get the addinfourl
220 # object initialized properly.
221
222 # Wrap the HTTPResponse object in socket's file object adapter
223 # for Windows. That adapter calls recv(), so delegate recv()
224 # to read(). This weird wrapping allows the returned object to
225 # have readline() and readlines() methods.
226
227 # XXX It might be better to extract the read buffering code
228 # out of socket._fileobject() and into a base class.
229 r.recv = r.read
230
231 # no data, just have to read
232 r.read()
233 class fp_dummy(object):
234 def read(self):
235 return ""
236 def readline(self):
237 return ""
238 def close(self):
239 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400240 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500241
Brad Bishop19323692019-04-05 15:28:33 -0400242 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500243 resp.code = r.status
244 resp.msg = r.reason
245
246 # Close connection when server request it.
247 if fetch.connection_cache is not None:
248 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
249 fetch.connection_cache.remove_connection(h.host, h.port)
250
251 return resp
252
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600253 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500254 """
255 Fallback to GET if HEAD is not allowed (405 HTTP error)
256 """
257 def http_error_405(self, req, fp, code, msg, headers):
258 fp.read()
259 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500260
Brad Bishop08902b02019-08-20 09:16:51 -0400261 if req.get_method() != 'GET':
262 newheaders = dict((k, v) for k, v in list(req.headers.items())
263 if k.lower() not in ("content-length", "content-type"))
264 return self.parent.open(urllib.request.Request(req.get_full_url(),
265 headers=newheaders,
266 origin_req_host=req.origin_req_host,
267 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500268
Brad Bishop08902b02019-08-20 09:16:51 -0400269 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400270
271 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
272 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500273 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500274
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500275
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600276 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500277 """
278 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
279 when we want to follow redirects using the original method.
280 """
281 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400283 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500285 exported_proxies = export_proxies(d)
286
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400288 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600289 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500290 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400291 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 # see PEP-0476, this causes verification errors on some https servers
293 # so disable by default.
294 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
297 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500298
299 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600301 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500302 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500303 # Some servers (FusionForge, as used on Alioth) require that the
304 # optional Accept header is set.
305 r.add_header("Accept", "*/*")
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600306 r.add_header("User-Agent", self.user_agent)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500307 def add_basic_auth(login_str, request):
308 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600309 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400311 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600312 r.add_header("Authorization", authheader)
313
Brad Bishop19323692019-04-05 15:28:33 -0400314 if ud.user and ud.pswd:
315 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500316
317 try:
Brad Bishop19323692019-04-05 15:28:33 -0400318 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500319 n = netrc.netrc()
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
321 add_basic_auth("%s:%s" % (login, password), r)
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400323 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500324
Brad Bishop316dfdd2018-06-25 12:45:53 -0400325 with opener.open(r) as response:
326 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600327 except urllib.error.URLError as e:
328 if try_again:
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600329 logger.debug2("checkstatus: trying again")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600330 return self.checkstatus(fetch, ud, d, False)
331 else:
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600333 logger.debug2("checkstatus() urlopen failed: %s" % e)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600334 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500335 return True
336
337 def _parse_path(self, regex, s):
338 """
339 Find and group name, version and archive type in the given string s
340 """
341
342 m = regex.search(s)
343 if m:
344 pname = ''
345 pver = ''
346 ptype = ''
347
348 mdict = m.groupdict()
349 if 'name' in mdict.keys():
350 pname = mdict['name']
351 if 'pver' in mdict.keys():
352 pver = mdict['pver']
353 if 'type' in mdict.keys():
354 ptype = mdict['type']
355
356 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
357
358 return (pname, pver, ptype)
359
360 return None
361
362 def _modelate_version(self, version):
363 if version[0] in ['.', '-']:
364 if version[1].isdigit():
365 version = version[1] + version[0] + version[2:len(version)]
366 else:
367 version = version[1:len(version)]
368
369 version = re.sub('-', '.', version)
370 version = re.sub('_', '.', version)
371 version = re.sub('(rc)+', '.1000.', version)
372 version = re.sub('(beta)+', '.100.', version)
373 version = re.sub('(alpha)+', '.10.', version)
374 if version[0] == 'v':
375 version = version[1:len(version)]
376 return version
377
378 def _vercmp(self, old, new):
379 """
380 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
381 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
382 for simplicity as it's somehow difficult to get from various upstream format
383 """
384
385 (oldpn, oldpv, oldsuffix) = old
386 (newpn, newpv, newsuffix) = new
387
Brad Bishop19323692019-04-05 15:28:33 -0400388 # Check for a new suffix type that we have never heard of before
389 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500390 m = self.suffix_regex_comp.search(newsuffix)
391 if not m:
392 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
393 return False
394
Brad Bishop19323692019-04-05 15:28:33 -0400395 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500396 if oldpn != newpn:
397 return False
398
399 oldpv = self._modelate_version(oldpv)
400 newpv = self._modelate_version(newpv)
401
402 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
403
404 def _fetch_index(self, uri, ud, d):
405 """
406 Run fetch checkstatus to get directory information
407 """
408 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500409 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500410 fetchcmd = self.basecmd
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600411 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500412 try:
413 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
414 fetchresult = f.read()
415 except bb.fetch2.BBFetchException:
416 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500417
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500418 return fetchresult
419
420 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
421 """
422 Return the latest version of a package inside a given directory path
423 If error or no version, return ""
424 """
425 valid = 0
426 version = ['', '', '']
427
428 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500429 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500430 if not soup:
431 bb.debug(3, "*** %s NO SOUP" % (url))
432 return ""
433
434 for line in soup.find_all('a', href=True):
435 bb.debug(3, "line['href'] = '%s'" % (line['href']))
436 bb.debug(3, "line = '%s'" % (str(line)))
437
438 newver = self._parse_path(package_regex, line['href'])
439 if not newver:
440 newver = self._parse_path(package_regex, str(line))
441
442 if newver:
443 bb.debug(3, "Upstream version found: %s" % newver[1])
444 if valid == 0:
445 version = newver
446 valid = 1
447 elif self._vercmp(version, newver) < 0:
448 version = newver
449
450 pupver = re.sub('_', '.', version[1])
451
452 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
453 (package, pupver or "N/A", current_version[1]))
454
455 if valid:
456 return pupver
457
458 return ""
459
Brad Bishop19323692019-04-05 15:28:33 -0400460 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500461 """
Brad Bishop19323692019-04-05 15:28:33 -0400462 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500463 """
464 version_dir = ['', '', '']
465 version = ['', '', '']
466
Brad Bishop19323692019-04-05 15:28:33 -0400467 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500468 s = dirver_regex.search(dirver)
469 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500470 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 else:
472 version_dir[1] = dirver
473
474 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
475 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
476 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
477
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500478 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500479 if not soup:
480 return version[1]
481
482 for line in soup.find_all('a', href=True):
483 s = dirver_regex.search(line['href'].strip("/"))
484 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500485 sver = s.group('ver')
486
487 # When prefix is part of the version directory it need to
488 # ensure that only version directory is used so remove previous
489 # directories if exists.
490 #
491 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
492 # result is v2.5.
493 spfx = s.group('pfx').split('/')[-1]
494
495 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500496 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500497 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500498 path = ud.path.replace(dirver, dirver_new, True) \
499 .split(package)[0]
500 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
501 ud.user, ud.pswd, {}])
502
503 pupver = self._check_latest_version(uri,
504 package, package_regex, current_version, ud, d)
505 if pupver:
506 version[1] = pupver
507
508 version_dir = version_dir_new
509
510 return version[1]
511
512 def _init_regexes(self, package, ud, d):
513 """
514 Match as many patterns as possible such as:
515 gnome-common-2.20.0.tar.gz (most common format)
516 gtk+-2.90.1.tar.gz
517 xf86-input-synaptics-12.6.9.tar.gz
518 dri2proto-2.3.tar.gz
519 blktool_4.orig.tar.gz
520 libid3tag-0.15.1b.tar.gz
521 unzip552.tar.gz
522 icu4c-3_6-src.tgz
523 genext2fs_1.3.orig.tar.gz
524 gst-fluendo-mp3
525 """
526 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400527 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500528 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400529 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500530 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400531 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500532 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400533 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500534
535 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400536 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500537
538 # match arch
539 parch_regex = "-source|_all_"
540
541 # src.rpm extension was added only for rpm package. Can be removed if the rpm
542 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400543 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544
545 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400546 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500547 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
548 self.suffix_regex_comp = re.compile(psuffix_regex)
549
550 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500551 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500552 if pn_regex:
553 package_custom_regex_comp = re.compile(pn_regex)
554 else:
555 version = self._parse_path(package_regex_comp, package)
556 if version:
557 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400558 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500559 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
560 else:
561 package_custom_regex_comp = None
562
563 return package_custom_regex_comp
564
565 def latest_versionstring(self, ud, d):
566 """
567 Manipulate the URL and try to obtain the latest package version
568
569 sanity check to ensure same name and type.
570 """
571 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500572 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500573
574 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400575 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500576 current_version[1] = re.sub('_', '.', current_version[1])
577 current_version[1] = re.sub('-', '.', current_version[1])
578 return (current_version[1], '')
579
580 package_regex = self._init_regexes(package, ud, d)
581 if package_regex is None:
582 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
583 return ('', '')
584 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
585
586 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500587 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500588 if not regex_uri:
589 path = ud.path.split(package)[0]
590
591 # search for version matches on folders inside the path, like:
592 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400593 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500594 m = dirver_regex.search(path)
595 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500596 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500597 dirver = m.group('dirver')
598
Brad Bishop19323692019-04-05 15:28:33 -0400599 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500600 if not dirver_pn_regex.search(dirver):
601 return (self._check_latest_version_by_dir(dirver,
602 package, package_regex, current_version, ud, d), '')
603
604 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
605 else:
606 uri = regex_uri
607
608 return (self._check_latest_version(uri, package, package_regex,
609 current_version, ud, d), '')