blob: 3bb3e3bb0c7104038ceddb6488b1c8b2c2aa9992 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# ex:ts=4:sw=4:sts=4:et
2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3"""
4BitBake 'Fetch' implementations
5
6Classes for obtaining upstream sources for the
7BitBake build tools.
8
9"""
10
11# Copyright (C) 2003, 2004 Chris Larson
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License version 2 as
15# published by the Free Software Foundation.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License along
23# with this program; if not, write to the Free Software Foundation, Inc.,
24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25#
26# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27
28import re
29import tempfile
30import subprocess
31import os
32import logging
Brad Bishopd7bf8c12018-02-25 22:55:05 -050033import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060035import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040036import socket
37import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060038import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050039from bb.fetch2 import FetchMethod
40from bb.fetch2 import FetchError
41from bb.fetch2 import logger
42from bb.fetch2 import runfetchcmd
Brad Bishop19323692019-04-05 15:28:33 -040043from bb.fetch2 import FetchConnectionCache
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050044from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050045from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050046from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050047
Patrick Williamsc0f7c042017-02-23 20:41:17 -060048class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
49 """
50 Extract progress information from wget output.
51 Note: relies on --progress=dot (with -v or without -q/-nv) being
52 specified on the wget command line.
53 """
54 def __init__(self, d):
55 super(WgetProgressHandler, self).__init__(d)
56 # Send an initial progress event so the bar gets shown
57 self._fire_progress(0)
58
59 def writeline(self, line):
60 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
61 if percs:
62 progress = int(percs[-1][0])
63 rate = percs[-1][1] + '/s'
64 self.update(progress, rate)
65 return False
66 return True
67
68
Patrick Williamsc124f4f2015-09-15 14:41:29 -050069class Wget(FetchMethod):
70 """Class to fetch urls via 'wget'"""
71 def supports(self, ud, d):
72 """
73 Check to see if a given url can be fetched with wget.
74 """
75 return ud.type in ['http', 'https', 'ftp']
76
77 def recommends_checksum(self, urldata):
78 return True
79
80 def urldata_init(self, ud, d):
81 if 'protocol' in ud.parm:
82 if ud.parm['protocol'] == 'git':
83 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
84
85 if 'downloadfilename' in ud.parm:
86 ud.basename = ud.parm['downloadfilename']
87 else:
88 ud.basename = os.path.basename(ud.path)
89
Brad Bishop6e60e8b2018-02-01 10:27:11 -050090 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050091 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050092 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050093
Brad Bishop6e60e8b2018-02-01 10:27:11 -050094 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050095
Brad Bishopd7bf8c12018-02-25 22:55:05 -050096 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097
Patrick Williamsc0f7c042017-02-23 20:41:17 -060098 progresshandler = WgetProgressHandler(d)
99
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500100 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500101 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500102 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500103
104 def download(self, ud, d):
105 """Fetch urls"""
106
107 fetchcmd = self.basecmd
108
109 if 'downloadfilename' in ud.parm:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500110 dldir = d.getVar("DL_DIR")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500111 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
112 fetchcmd += " -O " + dldir + os.sep + ud.localfile
113
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500114 if ud.user and ud.pswd:
115 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600116
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500117 uri = ud.url.split(";")[0]
118 if os.path.exists(ud.localpath):
119 # file exists, but we didnt complete it.. trying again..
120 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
121 else:
122 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
123
124 self._runwget(ud, d, fetchcmd, False)
125
126 # Sanity check since wget can pretend it succeed when it didn't
127 # Also, this used to happen if sourceforge sent us to the mirror page
128 if not os.path.exists(ud.localpath):
129 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
130
131 if os.path.getsize(ud.localpath) == 0:
132 os.remove(ud.localpath)
133 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
134
135 return True
136
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600137 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600138 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500139 if fetch.connection_cache:
140 def connect(self):
141 """Connect to the host and port specified in __init__."""
142
143 sock = fetch.connection_cache.get_connection(self.host, self.port)
144 if sock:
145 self.sock = sock
146 else:
147 self.sock = socket.create_connection((self.host, self.port),
148 self.timeout, self.source_address)
149 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
150
151 if self._tunnel_host:
152 self._tunnel()
153
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600154 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500155 def http_open(self, req):
156 return self.do_open(HTTPConnectionCache, req)
157
158 def do_open(self, http_class, req):
159 """Return an addinfourl object for the request, using http_class.
160
161 http_class must implement the HTTPConnection API from httplib.
162 The addinfourl return value is a file-like object. It also
163 has methods and attributes including:
164 - info(): return a mimetools.Message object for the headers
165 - geturl(): return the original request URL
166 - code: HTTP status code
167 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600168 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500169 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400170 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500171
172 h = http_class(host, timeout=req.timeout) # will parse host:port
173 h.set_debuglevel(self._debuglevel)
174
175 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600176 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500177 if k not in headers))
178
179 # We want to make an HTTP/1.1 request, but the addinfourl
180 # class isn't prepared to deal with a persistent connection.
181 # It will try to read all remaining data from the socket,
182 # which will block while the server waits for the next request.
183 # So make sure the connection gets closed after the (only)
184 # request.
185
186 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400187 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500188 headers["Connection"] = "close"
189 else:
190 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
191
192 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600193 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500194
195 if req._tunnel_host:
196 tunnel_headers = {}
197 proxy_auth_hdr = "Proxy-Authorization"
198 if proxy_auth_hdr in headers:
199 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
200 # Proxy-Authorization should not be sent to origin
201 # server.
202 del headers[proxy_auth_hdr]
203 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
204
205 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600206 h.request(req.get_method(), req.selector, req.data, headers)
207 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500208 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500209 # Instead, try to detect connections that are no longer
210 # usable (for example, closed unexpectedly) and remove
211 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 if fetch.connection_cache is None:
213 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500214 elif isinstance(err, OSError) and err.errno == errno.EBADF:
215 # This happens when the server closes the connection despite the Keep-Alive.
216 # Apparently urllib then uses the file descriptor, expecting it to be
217 # connected, when in reality the connection is already gone.
218 # We let the request fail and expect it to be
219 # tried once more ("try_again" in check_status()),
220 # with the dead connection removed from the cache.
221 # If it still fails, we give up, which can happend for bad
222 # HTTP proxy settings.
223 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600224 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500225 else:
226 try:
227 r = h.getresponse(buffering=True)
228 except TypeError: # buffering kw not supported
229 r = h.getresponse()
230
231 # Pick apart the HTTPResponse object to get the addinfourl
232 # object initialized properly.
233
234 # Wrap the HTTPResponse object in socket's file object adapter
235 # for Windows. That adapter calls recv(), so delegate recv()
236 # to read(). This weird wrapping allows the returned object to
237 # have readline() and readlines() methods.
238
239 # XXX It might be better to extract the read buffering code
240 # out of socket._fileobject() and into a base class.
241 r.recv = r.read
242
243 # no data, just have to read
244 r.read()
245 class fp_dummy(object):
246 def read(self):
247 return ""
248 def readline(self):
249 return ""
250 def close(self):
251 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400252 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500253
Brad Bishop19323692019-04-05 15:28:33 -0400254 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500255 resp.code = r.status
256 resp.msg = r.reason
257
258 # Close connection when server request it.
259 if fetch.connection_cache is not None:
260 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
261 fetch.connection_cache.remove_connection(h.host, h.port)
262
263 return resp
264
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600265 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500266 """
267 Fallback to GET if HEAD is not allowed (405 HTTP error)
268 """
269 def http_error_405(self, req, fp, code, msg, headers):
270 fp.read()
271 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500272
Brad Bishop19323692019-04-05 15:28:33 -0400273 newheaders = dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500274 if k.lower() not in ("content-length", "content-type"))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600275 return self.parent.open(urllib.request.Request(req.get_full_url(),
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500276 headers=newheaders,
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600277 origin_req_host=req.origin_req_host,
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500278 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279
Brad Bishop19323692019-04-05 15:28:33 -0400280
281 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
282 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500283 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500284
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500285
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600286 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 """
288 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
289 when we want to follow redirects using the original method.
290 """
291 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600292 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400293 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500294 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295 exported_proxies = export_proxies(d)
296
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500297 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400298 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600299 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500300 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400301 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302 # see PEP-0476, this causes verification errors on some https servers
303 # so disable by default.
304 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500305 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600306 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
307 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500308
309 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500310 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600311 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500312 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500313 # Some servers (FusionForge, as used on Alioth) require that the
314 # optional Accept header is set.
315 r.add_header("Accept", "*/*")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500316 def add_basic_auth(login_str, request):
317 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600318 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500319 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400320 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600321 r.add_header("Authorization", authheader)
322
Brad Bishop19323692019-04-05 15:28:33 -0400323 if ud.user and ud.pswd:
324 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500325
326 try:
Brad Bishop19323692019-04-05 15:28:33 -0400327 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500328 n = netrc.netrc()
329 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
330 add_basic_auth("%s:%s" % (login, password), r)
331 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400332 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500333
Brad Bishop316dfdd2018-06-25 12:45:53 -0400334 with opener.open(r) as response:
335 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600336 except urllib.error.URLError as e:
337 if try_again:
338 logger.debug(2, "checkstatus: trying again")
339 return self.checkstatus(fetch, ud, d, False)
340 else:
341 # debug for now to avoid spamming the logs in e.g. remote sstate searches
342 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
343 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500344 return True
345
346 def _parse_path(self, regex, s):
347 """
348 Find and group name, version and archive type in the given string s
349 """
350
351 m = regex.search(s)
352 if m:
353 pname = ''
354 pver = ''
355 ptype = ''
356
357 mdict = m.groupdict()
358 if 'name' in mdict.keys():
359 pname = mdict['name']
360 if 'pver' in mdict.keys():
361 pver = mdict['pver']
362 if 'type' in mdict.keys():
363 ptype = mdict['type']
364
365 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
366
367 return (pname, pver, ptype)
368
369 return None
370
371 def _modelate_version(self, version):
372 if version[0] in ['.', '-']:
373 if version[1].isdigit():
374 version = version[1] + version[0] + version[2:len(version)]
375 else:
376 version = version[1:len(version)]
377
378 version = re.sub('-', '.', version)
379 version = re.sub('_', '.', version)
380 version = re.sub('(rc)+', '.1000.', version)
381 version = re.sub('(beta)+', '.100.', version)
382 version = re.sub('(alpha)+', '.10.', version)
383 if version[0] == 'v':
384 version = version[1:len(version)]
385 return version
386
387 def _vercmp(self, old, new):
388 """
389 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
390 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
391 for simplicity as it's somehow difficult to get from various upstream format
392 """
393
394 (oldpn, oldpv, oldsuffix) = old
395 (newpn, newpv, newsuffix) = new
396
Brad Bishop19323692019-04-05 15:28:33 -0400397 # Check for a new suffix type that we have never heard of before
398 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500399 m = self.suffix_regex_comp.search(newsuffix)
400 if not m:
401 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
402 return False
403
Brad Bishop19323692019-04-05 15:28:33 -0400404 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500405 if oldpn != newpn:
406 return False
407
408 oldpv = self._modelate_version(oldpv)
409 newpv = self._modelate_version(newpv)
410
411 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
412
413 def _fetch_index(self, uri, ud, d):
414 """
415 Run fetch checkstatus to get directory information
416 """
417 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500418 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
419 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
420 fetchcmd = self.basecmd
421 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
422 try:
423 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
424 fetchresult = f.read()
425 except bb.fetch2.BBFetchException:
426 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500427
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500428 return fetchresult
429
430 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
431 """
432 Return the latest version of a package inside a given directory path
433 If error or no version, return ""
434 """
435 valid = 0
436 version = ['', '', '']
437
438 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500439 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500440 if not soup:
441 bb.debug(3, "*** %s NO SOUP" % (url))
442 return ""
443
444 for line in soup.find_all('a', href=True):
445 bb.debug(3, "line['href'] = '%s'" % (line['href']))
446 bb.debug(3, "line = '%s'" % (str(line)))
447
448 newver = self._parse_path(package_regex, line['href'])
449 if not newver:
450 newver = self._parse_path(package_regex, str(line))
451
452 if newver:
453 bb.debug(3, "Upstream version found: %s" % newver[1])
454 if valid == 0:
455 version = newver
456 valid = 1
457 elif self._vercmp(version, newver) < 0:
458 version = newver
459
460 pupver = re.sub('_', '.', version[1])
461
462 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
463 (package, pupver or "N/A", current_version[1]))
464
465 if valid:
466 return pupver
467
468 return ""
469
Brad Bishop19323692019-04-05 15:28:33 -0400470 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500471 """
Brad Bishop19323692019-04-05 15:28:33 -0400472 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500473 """
474 version_dir = ['', '', '']
475 version = ['', '', '']
476
Brad Bishop19323692019-04-05 15:28:33 -0400477 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500478 s = dirver_regex.search(dirver)
479 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500480 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500481 else:
482 version_dir[1] = dirver
483
484 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
485 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
486 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
487
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500488 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500489 if not soup:
490 return version[1]
491
492 for line in soup.find_all('a', href=True):
493 s = dirver_regex.search(line['href'].strip("/"))
494 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500495 sver = s.group('ver')
496
497 # When prefix is part of the version directory it need to
498 # ensure that only version directory is used so remove previous
499 # directories if exists.
500 #
501 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
502 # result is v2.5.
503 spfx = s.group('pfx').split('/')[-1]
504
505 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500506 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500507 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500508 path = ud.path.replace(dirver, dirver_new, True) \
509 .split(package)[0]
510 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
511 ud.user, ud.pswd, {}])
512
513 pupver = self._check_latest_version(uri,
514 package, package_regex, current_version, ud, d)
515 if pupver:
516 version[1] = pupver
517
518 version_dir = version_dir_new
519
520 return version[1]
521
522 def _init_regexes(self, package, ud, d):
523 """
524 Match as many patterns as possible such as:
525 gnome-common-2.20.0.tar.gz (most common format)
526 gtk+-2.90.1.tar.gz
527 xf86-input-synaptics-12.6.9.tar.gz
528 dri2proto-2.3.tar.gz
529 blktool_4.orig.tar.gz
530 libid3tag-0.15.1b.tar.gz
531 unzip552.tar.gz
532 icu4c-3_6-src.tgz
533 genext2fs_1.3.orig.tar.gz
534 gst-fluendo-mp3
535 """
536 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400537 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500538 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400539 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500540 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400541 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500542 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400543 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544
545 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400546 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500547
548 # match arch
549 parch_regex = "-source|_all_"
550
551 # src.rpm extension was added only for rpm package. Can be removed if the rpm
552 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400553 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500554
555 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400556 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500557 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
558 self.suffix_regex_comp = re.compile(psuffix_regex)
559
560 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500561 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500562 if pn_regex:
563 package_custom_regex_comp = re.compile(pn_regex)
564 else:
565 version = self._parse_path(package_regex_comp, package)
566 if version:
567 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400568 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500569 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
570 else:
571 package_custom_regex_comp = None
572
573 return package_custom_regex_comp
574
575 def latest_versionstring(self, ud, d):
576 """
577 Manipulate the URL and try to obtain the latest package version
578
579 sanity check to ensure same name and type.
580 """
581 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500582 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500583
584 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400585 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500586 current_version[1] = re.sub('_', '.', current_version[1])
587 current_version[1] = re.sub('-', '.', current_version[1])
588 return (current_version[1], '')
589
590 package_regex = self._init_regexes(package, ud, d)
591 if package_regex is None:
592 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
593 return ('', '')
594 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
595
596 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500597 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500598 if not regex_uri:
599 path = ud.path.split(package)[0]
600
601 # search for version matches on folders inside the path, like:
602 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400603 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500604 m = dirver_regex.search(path)
605 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500606 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500607 dirver = m.group('dirver')
608
Brad Bishop19323692019-04-05 15:28:33 -0400609 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500610 if not dirver_pn_regex.search(dirver):
611 return (self._check_latest_version_by_dir(dirver,
612 package, package_regex, current_version, ud, d), '')
613
614 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
615 else:
616 uri = regex_uri
617
618 return (self._check_latest_version(uri, package, package_regex,
619 current_version, ud, d), '')