blob: 725586d2b5a03a1d33646be3e84b74df991f0fe7 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
Brad Bishopc342db32019-05-15 21:57:59 -040011# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -050012#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
15import re
16import tempfile
17import subprocess
18import os
19import logging
Brad Bishopd7bf8c12018-02-25 22:55:05 -050020import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060022import bb.progress
Brad Bishop19323692019-04-05 15:28:33 -040023import socket
24import http.client
Patrick Williamsc0f7c042017-02-23 20:41:17 -060025import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050026from bb.fetch2 import FetchMethod
27from bb.fetch2 import FetchError
28from bb.fetch2 import logger
29from bb.fetch2 import runfetchcmd
Brad Bishop19323692019-04-05 15:28:33 -040030from bb.fetch2 import FetchConnectionCache
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050031from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050032from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050033from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034
Patrick Williamsc0f7c042017-02-23 20:41:17 -060035class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
36 """
37 Extract progress information from wget output.
38 Note: relies on --progress=dot (with -v or without -q/-nv) being
39 specified on the wget command line.
40 """
41 def __init__(self, d):
42 super(WgetProgressHandler, self).__init__(d)
43 # Send an initial progress event so the bar gets shown
44 self._fire_progress(0)
45
46 def writeline(self, line):
47 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
48 if percs:
49 progress = int(percs[-1][0])
50 rate = percs[-1][1] + '/s'
51 self.update(progress, rate)
52 return False
53 return True
54
55
Patrick Williamsc124f4f2015-09-15 14:41:29 -050056class Wget(FetchMethod):
57 """Class to fetch urls via 'wget'"""
58 def supports(self, ud, d):
59 """
60 Check to see if a given url can be fetched with wget.
61 """
62 return ud.type in ['http', 'https', 'ftp']
63
64 def recommends_checksum(self, urldata):
65 return True
66
67 def urldata_init(self, ud, d):
68 if 'protocol' in ud.parm:
69 if ud.parm['protocol'] == 'git':
70 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
71
72 if 'downloadfilename' in ud.parm:
73 ud.basename = ud.parm['downloadfilename']
74 else:
75 ud.basename = os.path.basename(ud.path)
76
Brad Bishop6e60e8b2018-02-01 10:27:11 -050077 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050078 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050079 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050080
Brad Bishop6e60e8b2018-02-01 10:27:11 -050081 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050082
Brad Bishopd7bf8c12018-02-25 22:55:05 -050083 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050084
Patrick Williamsc0f7c042017-02-23 20:41:17 -060085 progresshandler = WgetProgressHandler(d)
86
Patrick Williamsc124f4f2015-09-15 14:41:29 -050087 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050088 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050089 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
91 def download(self, ud, d):
92 """Fetch urls"""
93
94 fetchcmd = self.basecmd
95
96 if 'downloadfilename' in ud.parm:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050097 dldir = d.getVar("DL_DIR")
Patrick Williamsc124f4f2015-09-15 14:41:29 -050098 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
99 fetchcmd += " -O " + dldir + os.sep + ud.localfile
100
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500101 if ud.user and ud.pswd:
102 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600103
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500104 uri = ud.url.split(";")[0]
105 if os.path.exists(ud.localpath):
106 # file exists, but we didnt complete it.. trying again..
107 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
108 else:
109 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
110
111 self._runwget(ud, d, fetchcmd, False)
112
113 # Sanity check since wget can pretend it succeed when it didn't
114 # Also, this used to happen if sourceforge sent us to the mirror page
115 if not os.path.exists(ud.localpath):
116 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
117
118 if os.path.getsize(ud.localpath) == 0:
119 os.remove(ud.localpath)
120 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
121
122 return True
123
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600124 def checkstatus(self, fetch, ud, d, try_again=True):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600125 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500126 if fetch.connection_cache:
127 def connect(self):
128 """Connect to the host and port specified in __init__."""
129
130 sock = fetch.connection_cache.get_connection(self.host, self.port)
131 if sock:
132 self.sock = sock
133 else:
134 self.sock = socket.create_connection((self.host, self.port),
135 self.timeout, self.source_address)
136 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
137
138 if self._tunnel_host:
139 self._tunnel()
140
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600141 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500142 def http_open(self, req):
143 return self.do_open(HTTPConnectionCache, req)
144
145 def do_open(self, http_class, req):
146 """Return an addinfourl object for the request, using http_class.
147
148 http_class must implement the HTTPConnection API from httplib.
149 The addinfourl return value is a file-like object. It also
150 has methods and attributes including:
151 - info(): return a mimetools.Message object for the headers
152 - geturl(): return the original request URL
153 - code: HTTP status code
154 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600155 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156 if not host:
Brad Bishop19323692019-04-05 15:28:33 -0400157 raise urllib.error.URLError('no host given')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500158
159 h = http_class(host, timeout=req.timeout) # will parse host:port
160 h.set_debuglevel(self._debuglevel)
161
162 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600163 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500164 if k not in headers))
165
166 # We want to make an HTTP/1.1 request, but the addinfourl
167 # class isn't prepared to deal with a persistent connection.
168 # It will try to read all remaining data from the socket,
169 # which will block while the server waits for the next request.
170 # So make sure the connection gets closed after the (only)
171 # request.
172
173 # Don't close connection when connection_cache is enabled,
Brad Bishop19323692019-04-05 15:28:33 -0400174 if fetch.connection_cache is None:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500175 headers["Connection"] = "close"
176 else:
177 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
178
179 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600180 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500181
182 if req._tunnel_host:
183 tunnel_headers = {}
184 proxy_auth_hdr = "Proxy-Authorization"
185 if proxy_auth_hdr in headers:
186 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
187 # Proxy-Authorization should not be sent to origin
188 # server.
189 del headers[proxy_auth_hdr]
190 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
191
192 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600193 h.request(req.get_method(), req.selector, req.data, headers)
194 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500196 # Instead, try to detect connections that are no longer
197 # usable (for example, closed unexpectedly) and remove
198 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500199 if fetch.connection_cache is None:
200 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500201 elif isinstance(err, OSError) and err.errno == errno.EBADF:
202 # This happens when the server closes the connection despite the Keep-Alive.
203 # Apparently urllib then uses the file descriptor, expecting it to be
204 # connected, when in reality the connection is already gone.
205 # We let the request fail and expect it to be
206 # tried once more ("try_again" in check_status()),
207 # with the dead connection removed from the cache.
208 # If it still fails, we give up, which can happend for bad
209 # HTTP proxy settings.
210 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600211 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500212 else:
213 try:
214 r = h.getresponse(buffering=True)
215 except TypeError: # buffering kw not supported
216 r = h.getresponse()
217
218 # Pick apart the HTTPResponse object to get the addinfourl
219 # object initialized properly.
220
221 # Wrap the HTTPResponse object in socket's file object adapter
222 # for Windows. That adapter calls recv(), so delegate recv()
223 # to read(). This weird wrapping allows the returned object to
224 # have readline() and readlines() methods.
225
226 # XXX It might be better to extract the read buffering code
227 # out of socket._fileobject() and into a base class.
228 r.recv = r.read
229
230 # no data, just have to read
231 r.read()
232 class fp_dummy(object):
233 def read(self):
234 return ""
235 def readline(self):
236 return ""
237 def close(self):
238 pass
Brad Bishop316dfdd2018-06-25 12:45:53 -0400239 closed = False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500240
Brad Bishop19323692019-04-05 15:28:33 -0400241 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500242 resp.code = r.status
243 resp.msg = r.reason
244
245 # Close connection when server request it.
246 if fetch.connection_cache is not None:
247 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
248 fetch.connection_cache.remove_connection(h.host, h.port)
249
250 return resp
251
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600252 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500253 """
254 Fallback to GET if HEAD is not allowed (405 HTTP error)
255 """
256 def http_error_405(self, req, fp, code, msg, headers):
257 fp.read()
258 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500259
Brad Bishop08902b02019-08-20 09:16:51 -0400260 if req.get_method() != 'GET':
261 newheaders = dict((k, v) for k, v in list(req.headers.items())
262 if k.lower() not in ("content-length", "content-type"))
263 return self.parent.open(urllib.request.Request(req.get_full_url(),
264 headers=newheaders,
265 origin_req_host=req.origin_req_host,
266 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500267
Brad Bishop08902b02019-08-20 09:16:51 -0400268 raise urllib.request.HTTPError(req, code, msg, headers, None)
Brad Bishop19323692019-04-05 15:28:33 -0400269
270 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
271 # Forbidden when they actually mean 405 Method Not Allowed.
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500272 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500273
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500274
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600275 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500276 """
277 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
278 when we want to follow redirects using the original method.
279 """
280 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600281 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Brad Bishop19323692019-04-05 15:28:33 -0400282 newreq.get_method = req.get_method
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500283 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500284 exported_proxies = export_proxies(d)
285
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500286 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
Brad Bishop19323692019-04-05 15:28:33 -0400287 if exported_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600288 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500289 handlers.append(CacheHTTPHandler())
Brad Bishop19323692019-04-05 15:28:33 -0400290 # Since Python 2.7.9 ssl cert validation is enabled by default
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500291 # see PEP-0476, this causes verification errors on some https servers
292 # so disable by default.
293 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500294 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600295 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
296 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500297
298 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500299 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600300 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500301 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500302 # Some servers (FusionForge, as used on Alioth) require that the
303 # optional Accept header is set.
304 r.add_header("Accept", "*/*")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500305 def add_basic_auth(login_str, request):
306 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600307 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500308 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Brad Bishop19323692019-04-05 15:28:33 -0400309 authheader = "Basic %s" % encodeuser
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600310 r.add_header("Authorization", authheader)
311
Brad Bishop19323692019-04-05 15:28:33 -0400312 if ud.user and ud.pswd:
313 add_basic_auth(ud.user + ':' + ud.pswd, r)
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500314
315 try:
Brad Bishop19323692019-04-05 15:28:33 -0400316 import netrc
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500317 n = netrc.netrc()
318 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
319 add_basic_auth("%s:%s" % (login, password), r)
320 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
Brad Bishop19323692019-04-05 15:28:33 -0400321 pass
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500322
Brad Bishop316dfdd2018-06-25 12:45:53 -0400323 with opener.open(r) as response:
324 pass
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600325 except urllib.error.URLError as e:
326 if try_again:
327 logger.debug(2, "checkstatus: trying again")
328 return self.checkstatus(fetch, ud, d, False)
329 else:
330 # debug for now to avoid spamming the logs in e.g. remote sstate searches
331 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
332 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500333 return True
334
335 def _parse_path(self, regex, s):
336 """
337 Find and group name, version and archive type in the given string s
338 """
339
340 m = regex.search(s)
341 if m:
342 pname = ''
343 pver = ''
344 ptype = ''
345
346 mdict = m.groupdict()
347 if 'name' in mdict.keys():
348 pname = mdict['name']
349 if 'pver' in mdict.keys():
350 pver = mdict['pver']
351 if 'type' in mdict.keys():
352 ptype = mdict['type']
353
354 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
355
356 return (pname, pver, ptype)
357
358 return None
359
360 def _modelate_version(self, version):
361 if version[0] in ['.', '-']:
362 if version[1].isdigit():
363 version = version[1] + version[0] + version[2:len(version)]
364 else:
365 version = version[1:len(version)]
366
367 version = re.sub('-', '.', version)
368 version = re.sub('_', '.', version)
369 version = re.sub('(rc)+', '.1000.', version)
370 version = re.sub('(beta)+', '.100.', version)
371 version = re.sub('(alpha)+', '.10.', version)
372 if version[0] == 'v':
373 version = version[1:len(version)]
374 return version
375
376 def _vercmp(self, old, new):
377 """
378 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
379 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
380 for simplicity as it's somehow difficult to get from various upstream format
381 """
382
383 (oldpn, oldpv, oldsuffix) = old
384 (newpn, newpv, newsuffix) = new
385
Brad Bishop19323692019-04-05 15:28:33 -0400386 # Check for a new suffix type that we have never heard of before
387 if newsuffix:
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500388 m = self.suffix_regex_comp.search(newsuffix)
389 if not m:
390 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
391 return False
392
Brad Bishop19323692019-04-05 15:28:33 -0400393 # Not our package so ignore it
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500394 if oldpn != newpn:
395 return False
396
397 oldpv = self._modelate_version(oldpv)
398 newpv = self._modelate_version(newpv)
399
400 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
401
402 def _fetch_index(self, uri, ud, d):
403 """
404 Run fetch checkstatus to get directory information
405 """
406 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500407 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
408 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
409 fetchcmd = self.basecmd
410 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
411 try:
412 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
413 fetchresult = f.read()
414 except bb.fetch2.BBFetchException:
415 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500416
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500417 return fetchresult
418
419 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
420 """
421 Return the latest version of a package inside a given directory path
422 If error or no version, return ""
423 """
424 valid = 0
425 version = ['', '', '']
426
427 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500428 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500429 if not soup:
430 bb.debug(3, "*** %s NO SOUP" % (url))
431 return ""
432
433 for line in soup.find_all('a', href=True):
434 bb.debug(3, "line['href'] = '%s'" % (line['href']))
435 bb.debug(3, "line = '%s'" % (str(line)))
436
437 newver = self._parse_path(package_regex, line['href'])
438 if not newver:
439 newver = self._parse_path(package_regex, str(line))
440
441 if newver:
442 bb.debug(3, "Upstream version found: %s" % newver[1])
443 if valid == 0:
444 version = newver
445 valid = 1
446 elif self._vercmp(version, newver) < 0:
447 version = newver
448
449 pupver = re.sub('_', '.', version[1])
450
451 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
452 (package, pupver or "N/A", current_version[1]))
453
454 if valid:
455 return pupver
456
457 return ""
458
Brad Bishop19323692019-04-05 15:28:33 -0400459 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500460 """
Brad Bishop19323692019-04-05 15:28:33 -0400461 Scan every directory in order to get upstream version.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500462 """
463 version_dir = ['', '', '']
464 version = ['', '', '']
465
Brad Bishop19323692019-04-05 15:28:33 -0400466 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500467 s = dirver_regex.search(dirver)
468 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500469 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500470 else:
471 version_dir[1] = dirver
472
473 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
474 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
475 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
476
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500477 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500478 if not soup:
479 return version[1]
480
481 for line in soup.find_all('a', href=True):
482 s = dirver_regex.search(line['href'].strip("/"))
483 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500484 sver = s.group('ver')
485
486 # When prefix is part of the version directory it need to
487 # ensure that only version directory is used so remove previous
488 # directories if exists.
489 #
490 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
491 # result is v2.5.
492 spfx = s.group('pfx').split('/')[-1]
493
494 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500495 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500496 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500497 path = ud.path.replace(dirver, dirver_new, True) \
498 .split(package)[0]
499 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
500 ud.user, ud.pswd, {}])
501
502 pupver = self._check_latest_version(uri,
503 package, package_regex, current_version, ud, d)
504 if pupver:
505 version[1] = pupver
506
507 version_dir = version_dir_new
508
509 return version[1]
510
511 def _init_regexes(self, package, ud, d):
512 """
513 Match as many patterns as possible such as:
514 gnome-common-2.20.0.tar.gz (most common format)
515 gtk+-2.90.1.tar.gz
516 xf86-input-synaptics-12.6.9.tar.gz
517 dri2proto-2.3.tar.gz
518 blktool_4.orig.tar.gz
519 libid3tag-0.15.1b.tar.gz
520 unzip552.tar.gz
521 icu4c-3_6-src.tgz
522 genext2fs_1.3.orig.tar.gz
523 gst-fluendo-mp3
524 """
525 # match most patterns which uses "-" as separator to version digits
Brad Bishop19323692019-04-05 15:28:33 -0400526 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500527 # a loose pattern such as for unzip552.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400528 pn_prefix2 = r"[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500529 # a loose pattern such as for 80325-quicky-0.4.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400530 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500531 # Save the Package Name (pn) Regex for use later
Brad Bishop19323692019-04-05 15:28:33 -0400532 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500533
534 # match version
Brad Bishop19323692019-04-05 15:28:33 -0400535 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500536
537 # match arch
538 parch_regex = "-source|_all_"
539
540 # src.rpm extension was added only for rpm package. Can be removed if the rpm
541 # packaged will always be considered as having to be manually upgraded
Brad Bishop19323692019-04-05 15:28:33 -0400542 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500543
544 # match name, version and archive type of a package
Brad Bishop19323692019-04-05 15:28:33 -0400545 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500546 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
547 self.suffix_regex_comp = re.compile(psuffix_regex)
548
549 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500550 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500551 if pn_regex:
552 package_custom_regex_comp = re.compile(pn_regex)
553 else:
554 version = self._parse_path(package_regex_comp, package)
555 if version:
556 package_custom_regex_comp = re.compile(
Brad Bishop19323692019-04-05 15:28:33 -0400557 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500558 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
559 else:
560 package_custom_regex_comp = None
561
562 return package_custom_regex_comp
563
564 def latest_versionstring(self, ud, d):
565 """
566 Manipulate the URL and try to obtain the latest package version
567
568 sanity check to ensure same name and type.
569 """
570 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500571 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500572
573 """possible to have no version in pkg name, such as spectrum-fw"""
Brad Bishop19323692019-04-05 15:28:33 -0400574 if not re.search(r"\d+", package):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500575 current_version[1] = re.sub('_', '.', current_version[1])
576 current_version[1] = re.sub('-', '.', current_version[1])
577 return (current_version[1], '')
578
579 package_regex = self._init_regexes(package, ud, d)
580 if package_regex is None:
581 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
582 return ('', '')
583 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
584
585 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500586 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500587 if not regex_uri:
588 path = ud.path.split(package)[0]
589
590 # search for version matches on folders inside the path, like:
591 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
Brad Bishop19323692019-04-05 15:28:33 -0400592 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500593 m = dirver_regex.search(path)
594 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500595 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500596 dirver = m.group('dirver')
597
Brad Bishop19323692019-04-05 15:28:33 -0400598 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500599 if not dirver_pn_regex.search(dirver):
600 return (self._check_latest_version_by_dir(dirver,
601 package, package_regex, current_version, ud, d), '')
602
603 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
604 else:
605 uri = regex_uri
606
607 return (self._check_latest_version(uri, package, package_regex,
608 current_version, ud, d), '')