blob: 0f71ee4eacf79ffa79c9ded29921a78befccf222 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
15import re
16import tempfile
17import subprocess
18import os
19import logging
Brad Bishopd7bf8c12018-02-25 22:55:05 -050020import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060022import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040023import socket
24import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060025import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026from bb.fetch2 import FetchMethod
27from bb.fetch2 import FetchError
28from bb.fetch2 import logger
29from bb.fetch2 import runfetchcmd
Brad Bishop19323692019-04-05 15:28:33 -040030from bb.fetch2 import FetchConnectionCache
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050033from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034
Patrick Williamsc0f7c042017-02-23 20:41:17 -060035class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
36 """
37 Extract progress information from wget output.
38 Note: relies on --progress=dot (with -v or without -q/-nv) being
39 specified on the wget command line.
40 """
41 def __init__(self, d):
42 super(WgetProgressHandler, self).__init__(d)
43 # Send an initial progress event so the bar gets shown
44 self._fire_progress(0)
45
46 def writeline(self, line):
47 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
48 if percs:
49 progress = int(percs[-1][0])
50 rate = percs[-1][1] + '/s'
51 self.update(progress, rate)
52 return False
53 return True
54
55
Patrick Williamsc124f4f2015-09-15 14:41:29 -050056class Wget(FetchMethod):
57 """Class to fetch urls via 'wget'"""
58 def supports(self, ud, d):
59 """
60 Check to see if a given url can be fetched with wget.
61 """
62 return ud.type in ['http', 'https', 'ftp']
63
64 def recommends_checksum(self, urldata):
65 return True
66
67 def urldata_init(self, ud, d):
68 if 'protocol' in ud.parm:
69 if ud.parm['protocol'] == 'git':
70 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
71
72 if 'downloadfilename' in ud.parm:
73 ud.basename = ud.parm['downloadfilename']
74 else:
75 ud.basename = os.path.basename(ud.path)
76
Brad Bishop6e60e8b2018-02-01 10:27:11 -050077 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050078 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050079 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050080
Brad Bishop6e60e8b2018-02-01 10:27:11 -050081 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050082
Brad Bishopd7bf8c12018-02-25 22:55:05 -050083 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050084
Patrick Williamsc0f7c042017-02-23 20:41:17 -060085 progresshandler = WgetProgressHandler(d)
86
Patrick Williamsc124f4f2015-09-15 14:41:29 -050087 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050088 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050089 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
91 def download(self, ud, d):
92 """Fetch urls"""
93
94 fetchcmd = self.basecmd
95
96 if 'downloadfilename' in ud.parm:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050097 dldir = d.getVar("DL_DIR")
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
99 fetchcmd += " -O " + dldir + os.sep + ud.localfile
100
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500101 if ud.user and ud.pswd:
102 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600103
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500104 uri = ud.url.split(";")[0]
105 if os.path.exists(ud.localpath):
106 # file exists, but we didnt complete it.. trying again..
107 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
108 else:
109 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
110
111 self._runwget(ud, d, fetchcmd, False)
112
113 # Sanity check since wget can pretend it succeed when it didn't
114 # Also, this used to happen if sourceforge sent us to the mirror page
115 if not os.path.exists(ud.localpath):
116 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
117
118 if os.path.getsize(ud.localpath) == 0:
119 os.remove(ud.localpath)
120 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
121
122 return True
123
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600124 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600125 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126 if fetch.connection_cache:
127 def connect(self):
128 """Connect to the host and port specified in __init__."""
129
130 sock = fetch.connection_cache.get_connection(self.host, self.port)
131 if sock:
132 self.sock = sock
133 else:
134 self.sock = socket.create_connection((self.host, self.port),
135 self.timeout, self.source_address)
136 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
137
138 if self._tunnel_host:
139 self._tunnel()
140
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600141 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500142 def http_open(self, req):
143 return self.do_open(HTTPConnectionCache, req)
144
145 def do_open(self, http_class, req):
146 """Return an addinfourl object for the request, using http_class.
147
148 http_class must implement the HTTPConnection API from httplib.
149 The addinfourl return value is a file-like object. It also
150 has methods and attributes including:
151 - info(): return a mimetools.Message object for the headers
152 - geturl(): return the original request URL
153 - code: HTTP status code
154 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600155 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400157 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500158
159 h = http_class(host, timeout=req.timeout) # will parse host:port
160 h.set_debuglevel(self._debuglevel)
161
162 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600163 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500164 if k not in headers))
165
166 # We want to make an HTTP/1.1 request, but the addinfourl
167 # class isn't prepared to deal with a persistent connection.
168 # It will try to read all remaining data from the socket,
169 # which will block while the server waits for the next request.
170 # So make sure the connection gets closed after the (only)
171 # request.
172
173 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400174 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175 headers["Connection"] = "close"
176 else:
177 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
178
179 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600180 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500181
182 if req._tunnel_host:
183 tunnel_headers = {}
184 proxy_auth_hdr = "Proxy-Authorization"
185 if proxy_auth_hdr in headers:
186 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
187 # Proxy-Authorization should not be sent to origin
188 # server.
189 del headers[proxy_auth_hdr]
190 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
191
192 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600193 h.request(req.get_method(), req.selector, req.data, headers)
194 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500196 # Instead, try to detect connections that are no longer
197 # usable (for example, closed unexpectedly) and remove
198 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199 if fetch.connection_cache is None:
200 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500201 elif isinstance(err, OSError) and err.errno == errno.EBADF:
202 # This happens when the server closes the connection despite the Keep-Alive.
203 # Apparently urllib then uses the file descriptor, expecting it to be
204 # connected, when in reality the connection is already gone.
205 # We let the request fail and expect it to be
206 # tried once more ("try_again" in check_status()),
207 # with the dead connection removed from the cache.
208 # If it still fails, we give up, which can happend for bad
209 # HTTP proxy settings.
210 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600211 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 else:
213 try:
214 r = h.getresponse(buffering=True)
215 except TypeError: # buffering kw not supported
216 r = h.getresponse()
217
218 # Pick apart the HTTPResponse object to get the addinfourl
219 # object initialized properly.
220
221 # Wrap the HTTPResponse object in socket's file object adapter
222 # for Windows. That adapter calls recv(), so delegate recv()
223 # to read(). This weird wrapping allows the returned object to
224 # have readline() and readlines() methods.
225
226 # XXX It might be better to extract the read buffering code
227 # out of socket._fileobject() and into a base class.
228 r.recv = r.read
229
230 # no data, just have to read
231 r.read()
232 class fp_dummy(object):
233 def read(self):
234 return ""
235 def readline(self):
236 return ""
237 def close(self):
238 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400239 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240
Brad Bishop19323692019-04-05 15:28:33 -0400241 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 resp.code = r.status
243 resp.msg = r.reason
244
245 # Close connection when server request it.
246 if fetch.connection_cache is not None:
247 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
248 fetch.connection_cache.remove_connection(h.host, h.port)
249
250 return resp
251
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600252 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500253 """
254 Fallback to GET if HEAD is not allowed (405 HTTP error)
255 """
256 def http_error_405(self, req, fp, code, msg, headers):
257 fp.read()
258 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500259
Brad Bishop19323692019-04-05 15:28:33 -0400260 newheaders = dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500261 if k.lower() not in ("content-length", "content-type"))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600262 return self.parent.open(urllib.request.Request(req.get_full_url(),
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500263 headers=newheaders,
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600264 origin_req_host=req.origin_req_host,
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500265 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500266
Brad Bishop19323692019-04-05 15:28:33 -0400267
268 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
269 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500270 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500271
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500272
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600273 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500274 """
275 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
276 when we want to follow redirects using the original method.
277 """
278 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600279 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400280 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500281 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500282 exported_proxies = export_proxies(d)
283
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500284 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400285 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600286 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500287 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400288 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500289 # see PEP-0476, this causes verification errors on some https servers
290 # so disable by default.
291 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500292 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600293 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
294 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500295
296 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500297 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600298 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500300 # Some servers (FusionForge, as used on Alioth) require that the
301 # optional Accept header is set.
302 r.add_header("Accept", "*/*")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500303 def add_basic_auth(login_str, request):
304 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600305 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500306 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400307 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600308 r.add_header("Authorization", authheader)
309
Brad Bishop19323692019-04-05 15:28:33 -0400310 if ud.user and ud.pswd:
311 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500312
313 try:
Brad Bishop19323692019-04-05 15:28:33 -0400314 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500315 n = netrc.netrc()
316 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
317 add_basic_auth("%s:%s" % (login, password), r)
318 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400319 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500320
Brad Bishop316dfdd2018-06-25 12:45:53 -0400321 with opener.open(r) as response:
322 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600323 except urllib.error.URLError as e:
324 if try_again:
325 logger.debug(2, "checkstatus: trying again")
326 return self.checkstatus(fetch, ud, d, False)
327 else:
328 # debug for now to avoid spamming the logs in e.g. remote sstate searches
329 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
330 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500331 return True
332
333 def _parse_path(self, regex, s):
334 """
335 Find and group name, version and archive type in the given string s
336 """
337
338 m = regex.search(s)
339 if m:
340 pname = ''
341 pver = ''
342 ptype = ''
343
344 mdict = m.groupdict()
345 if 'name' in mdict.keys():
346 pname = mdict['name']
347 if 'pver' in mdict.keys():
348 pver = mdict['pver']
349 if 'type' in mdict.keys():
350 ptype = mdict['type']
351
352 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
353
354 return (pname, pver, ptype)
355
356 return None
357
358 def _modelate_version(self, version):
359 if version[0] in ['.', '-']:
360 if version[1].isdigit():
361 version = version[1] + version[0] + version[2:len(version)]
362 else:
363 version = version[1:len(version)]
364
365 version = re.sub('-', '.', version)
366 version = re.sub('_', '.', version)
367 version = re.sub('(rc)+', '.1000.', version)
368 version = re.sub('(beta)+', '.100.', version)
369 version = re.sub('(alpha)+', '.10.', version)
370 if version[0] == 'v':
371 version = version[1:len(version)]
372 return version
373
374 def _vercmp(self, old, new):
375 """
376 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
377 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
378 for simplicity as it's somehow difficult to get from various upstream format
379 """
380
381 (oldpn, oldpv, oldsuffix) = old
382 (newpn, newpv, newsuffix) = new
383
Brad Bishop19323692019-04-05 15:28:33 -0400384 # Check for a new suffix type that we have never heard of before
385 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500386 m = self.suffix_regex_comp.search(newsuffix)
387 if not m:
388 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
389 return False
390
Brad Bishop19323692019-04-05 15:28:33 -0400391 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500392 if oldpn != newpn:
393 return False
394
395 oldpv = self._modelate_version(oldpv)
396 newpv = self._modelate_version(newpv)
397
398 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
399
400 def _fetch_index(self, uri, ud, d):
401 """
402 Run fetch checkstatus to get directory information
403 """
404 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500405 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
406 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
407 fetchcmd = self.basecmd
408 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
409 try:
410 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
411 fetchresult = f.read()
412 except bb.fetch2.BBFetchException:
413 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500414
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500415 return fetchresult
416
417 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
418 """
419 Return the latest version of a package inside a given directory path
420 If error or no version, return ""
421 """
422 valid = 0
423 version = ['', '', '']
424
425 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500426 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500427 if not soup:
428 bb.debug(3, "*** %s NO SOUP" % (url))
429 return ""
430
431 for line in soup.find_all('a', href=True):
432 bb.debug(3, "line['href'] = '%s'" % (line['href']))
433 bb.debug(3, "line = '%s'" % (str(line)))
434
435 newver = self._parse_path(package_regex, line['href'])
436 if not newver:
437 newver = self._parse_path(package_regex, str(line))
438
439 if newver:
440 bb.debug(3, "Upstream version found: %s" % newver[1])
441 if valid == 0:
442 version = newver
443 valid = 1
444 elif self._vercmp(version, newver) < 0:
445 version = newver
446
447 pupver = re.sub('_', '.', version[1])
448
449 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
450 (package, pupver or "N/A", current_version[1]))
451
452 if valid:
453 return pupver
454
455 return ""
456
Brad Bishop19323692019-04-05 15:28:33 -0400457 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500458 """
Brad Bishop19323692019-04-05 15:28:33 -0400459 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500460 """
461 version_dir = ['', '', '']
462 version = ['', '', '']
463
Brad Bishop19323692019-04-05 15:28:33 -0400464 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500465 s = dirver_regex.search(dirver)
466 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500467 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500468 else:
469 version_dir[1] = dirver
470
471 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
472 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
473 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
474
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500475 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500476 if not soup:
477 return version[1]
478
479 for line in soup.find_all('a', href=True):
480 s = dirver_regex.search(line['href'].strip("/"))
481 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500482 sver = s.group('ver')
483
484 # When prefix is part of the version directory it need to
485 # ensure that only version directory is used so remove previous
486 # directories if exists.
487 #
488 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
489 # result is v2.5.
490 spfx = s.group('pfx').split('/')[-1]
491
492 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500493 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500494 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500495 path = ud.path.replace(dirver, dirver_new, True) \
496 .split(package)[0]
497 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
498 ud.user, ud.pswd, {}])
499
500 pupver = self._check_latest_version(uri,
501 package, package_regex, current_version, ud, d)
502 if pupver:
503 version[1] = pupver
504
505 version_dir = version_dir_new
506
507 return version[1]
508
509 def _init_regexes(self, package, ud, d):
510 """
511 Match as many patterns as possible such as:
512 gnome-common-2.20.0.tar.gz (most common format)
513 gtk+-2.90.1.tar.gz
514 xf86-input-synaptics-12.6.9.tar.gz
515 dri2proto-2.3.tar.gz
516 blktool_4.orig.tar.gz
517 libid3tag-0.15.1b.tar.gz
518 unzip552.tar.gz
519 icu4c-3_6-src.tgz
520 genext2fs_1.3.orig.tar.gz
521 gst-fluendo-mp3
522 """
523 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400524 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500525 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400526 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500527 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400528 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500529 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400530 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500531
532 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400533 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500534
535 # match arch
536 parch_regex = "-source|_all_"
537
538 # src.rpm extension was added only for rpm package. Can be removed if the rpm
539 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400540 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500541
542 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400543 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
545 self.suffix_regex_comp = re.compile(psuffix_regex)
546
547 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500548 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500549 if pn_regex:
550 package_custom_regex_comp = re.compile(pn_regex)
551 else:
552 version = self._parse_path(package_regex_comp, package)
553 if version:
554 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400555 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500556 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
557 else:
558 package_custom_regex_comp = None
559
560 return package_custom_regex_comp
561
562 def latest_versionstring(self, ud, d):
563 """
564 Manipulate the URL and try to obtain the latest package version
565
566 sanity check to ensure same name and type.
567 """
568 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500569 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500570
571 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400572 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500573 current_version[1] = re.sub('_', '.', current_version[1])
574 current_version[1] = re.sub('-', '.', current_version[1])
575 return (current_version[1], '')
576
577 package_regex = self._init_regexes(package, ud, d)
578 if package_regex is None:
579 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
580 return ('', '')
581 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
582
583 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500584 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500585 if not regex_uri:
586 path = ud.path.split(package)[0]
587
588 # search for version matches on folders inside the path, like:
589 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400590 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500591 m = dirver_regex.search(path)
592 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500593 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500594 dirver = m.group('dirver')
595
Brad Bishop19323692019-04-05 15:28:33 -0400596 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500597 if not dirver_pn_regex.search(dirver):
598 return (self._check_latest_version_by_dir(dirver,
599 package, package_regex, current_version, ud, d), '')
600
601 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
602 else:
603 uri = regex_uri
604
605 return (self._check_latest_version(uri, package, package_regex,
606 current_version, ud, d), '')