blob: 7c49c2b12820d10f514d8a617281e82aebd449bc [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# ex:ts=4:sw=4:sts=4:et
2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3"""
4BitBake 'Fetch' implementations
5
6Classes for obtaining upstream sources for the
7BitBake build tools.
8
9"""
10
11# Copyright (C) 2003, 2004 Chris Larson
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License version 2 as
15# published by the Free Software Foundation.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License along
23# with this program; if not, write to the Free Software Foundation, Inc.,
24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25#
26# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27
28import re
29import tempfile
30import subprocess
31import os
32import logging
Brad Bishopd7bf8c12018-02-25 22:55:05 -050033import errno
Patrick Williamsc124f4f2015-09-15 14:41:29 -050034import bb
Patrick Williamsc0f7c042017-02-23 20:41:17 -060035import bb.progress
36import urllib.request, urllib.parse, urllib.error
Patrick Williamsc124f4f2015-09-15 14:41:29 -050037from bb.fetch2 import FetchMethod
38from bb.fetch2 import FetchError
39from bb.fetch2 import logger
40from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050041from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050042from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050043from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050044
Patrick Williamsc0f7c042017-02-23 20:41:17 -060045class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
46 """
47 Extract progress information from wget output.
48 Note: relies on --progress=dot (with -v or without -q/-nv) being
49 specified on the wget command line.
50 """
51 def __init__(self, d):
52 super(WgetProgressHandler, self).__init__(d)
53 # Send an initial progress event so the bar gets shown
54 self._fire_progress(0)
55
56 def writeline(self, line):
57 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
58 if percs:
59 progress = int(percs[-1][0])
60 rate = percs[-1][1] + '/s'
61 self.update(progress, rate)
62 return False
63 return True
64
65
Patrick Williamsc124f4f2015-09-15 14:41:29 -050066class Wget(FetchMethod):
67 """Class to fetch urls via 'wget'"""
68 def supports(self, ud, d):
69 """
70 Check to see if a given url can be fetched with wget.
71 """
72 return ud.type in ['http', 'https', 'ftp']
73
74 def recommends_checksum(self, urldata):
75 return True
76
77 def urldata_init(self, ud, d):
78 if 'protocol' in ud.parm:
79 if ud.parm['protocol'] == 'git':
80 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81
82 if 'downloadfilename' in ud.parm:
83 ud.basename = ud.parm['downloadfilename']
84 else:
85 ud.basename = os.path.basename(ud.path)
86
Brad Bishop6e60e8b2018-02-01 10:27:11 -050087 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050088 if not ud.localfile:
Brad Bishop6e60e8b2018-02-01 10:27:11 -050089 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
Patrick Williamsc124f4f2015-09-15 14:41:29 -050090
Brad Bishop6e60e8b2018-02-01 10:27:11 -050091 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
Patrick Williamsc124f4f2015-09-15 14:41:29 -050092
Brad Bishopd7bf8c12018-02-25 22:55:05 -050093 def _runwget(self, ud, d, command, quiet, workdir=None):
Patrick Williamsc124f4f2015-09-15 14:41:29 -050094
Patrick Williamsc0f7c042017-02-23 20:41:17 -060095 progresshandler = WgetProgressHandler(d)
96
Patrick Williamsc124f4f2015-09-15 14:41:29 -050097 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
Brad Bishop6e60e8b2018-02-01 10:27:11 -050098 bb.fetch2.check_network_access(d, command, ud.url)
Brad Bishopd7bf8c12018-02-25 22:55:05 -050099 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500100
101 def download(self, ud, d):
102 """Fetch urls"""
103
104 fetchcmd = self.basecmd
105
106 if 'downloadfilename' in ud.parm:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500107 dldir = d.getVar("DL_DIR")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500108 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
109 fetchcmd += " -O " + dldir + os.sep + ud.localfile
110
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500111 if ud.user and ud.pswd:
112 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600113
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500114 uri = ud.url.split(";")[0]
115 if os.path.exists(ud.localpath):
116 # file exists, but we didnt complete it.. trying again..
117 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
118 else:
119 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
120
121 self._runwget(ud, d, fetchcmd, False)
122
123 # Sanity check since wget can pretend it succeed when it didn't
124 # Also, this used to happen if sourceforge sent us to the mirror page
125 if not os.path.exists(ud.localpath):
126 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
127
128 if os.path.getsize(ud.localpath) == 0:
129 os.remove(ud.localpath)
130 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
131
132 return True
133
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600134 def checkstatus(self, fetch, ud, d, try_again=True):
135 import urllib.request, urllib.error, urllib.parse, socket, http.client
136 from urllib.response import addinfourl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500137 from bb.fetch2 import FetchConnectionCache
138
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600139 class HTTPConnectionCache(http.client.HTTPConnection):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500140 if fetch.connection_cache:
141 def connect(self):
142 """Connect to the host and port specified in __init__."""
143
144 sock = fetch.connection_cache.get_connection(self.host, self.port)
145 if sock:
146 self.sock = sock
147 else:
148 self.sock = socket.create_connection((self.host, self.port),
149 self.timeout, self.source_address)
150 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
151
152 if self._tunnel_host:
153 self._tunnel()
154
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600155 class CacheHTTPHandler(urllib.request.HTTPHandler):
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500156 def http_open(self, req):
157 return self.do_open(HTTPConnectionCache, req)
158
159 def do_open(self, http_class, req):
160 """Return an addinfourl object for the request, using http_class.
161
162 http_class must implement the HTTPConnection API from httplib.
163 The addinfourl return value is a file-like object. It also
164 has methods and attributes including:
165 - info(): return a mimetools.Message object for the headers
166 - geturl(): return the original request URL
167 - code: HTTP status code
168 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600169 host = req.host
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500170 if not host:
171 raise urlllib2.URLError('no host given')
172
173 h = http_class(host, timeout=req.timeout) # will parse host:port
174 h.set_debuglevel(self._debuglevel)
175
176 headers = dict(req.unredirected_hdrs)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600177 headers.update(dict((k, v) for k, v in list(req.headers.items())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500178 if k not in headers))
179
180 # We want to make an HTTP/1.1 request, but the addinfourl
181 # class isn't prepared to deal with a persistent connection.
182 # It will try to read all remaining data from the socket,
183 # which will block while the server waits for the next request.
184 # So make sure the connection gets closed after the (only)
185 # request.
186
187 # Don't close connection when connection_cache is enabled,
188 if fetch.connection_cache is None:
189 headers["Connection"] = "close"
190 else:
191 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
192
193 headers = dict(
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600194 (name.title(), val) for name, val in list(headers.items()))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500195
196 if req._tunnel_host:
197 tunnel_headers = {}
198 proxy_auth_hdr = "Proxy-Authorization"
199 if proxy_auth_hdr in headers:
200 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
201 # Proxy-Authorization should not be sent to origin
202 # server.
203 del headers[proxy_auth_hdr]
204 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
205
206 try:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600207 h.request(req.get_method(), req.selector, req.data, headers)
208 except socket.error as err: # XXX what error?
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500209 # Don't close connection when cache is enabled.
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500210 # Instead, try to detect connections that are no longer
211 # usable (for example, closed unexpectedly) and remove
212 # them from the cache.
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500213 if fetch.connection_cache is None:
214 h.close()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500215 elif isinstance(err, OSError) and err.errno == errno.EBADF:
216 # This happens when the server closes the connection despite the Keep-Alive.
217 # Apparently urllib then uses the file descriptor, expecting it to be
218 # connected, when in reality the connection is already gone.
219 # We let the request fail and expect it to be
220 # tried once more ("try_again" in check_status()),
221 # with the dead connection removed from the cache.
222 # If it still fails, we give up, which can happend for bad
223 # HTTP proxy settings.
224 fetch.connection_cache.remove_connection(h.host, h.port)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600225 raise urllib.error.URLError(err)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500226 else:
227 try:
228 r = h.getresponse(buffering=True)
229 except TypeError: # buffering kw not supported
230 r = h.getresponse()
231
232 # Pick apart the HTTPResponse object to get the addinfourl
233 # object initialized properly.
234
235 # Wrap the HTTPResponse object in socket's file object adapter
236 # for Windows. That adapter calls recv(), so delegate recv()
237 # to read(). This weird wrapping allows the returned object to
238 # have readline() and readlines() methods.
239
240 # XXX It might be better to extract the read buffering code
241 # out of socket._fileobject() and into a base class.
242 r.recv = r.read
243
244 # no data, just have to read
245 r.read()
246 class fp_dummy(object):
247 def read(self):
248 return ""
249 def readline(self):
250 return ""
251 def close(self):
252 pass
253
254 resp = addinfourl(fp_dummy(), r.msg, req.get_full_url())
255 resp.code = r.status
256 resp.msg = r.reason
257
258 # Close connection when server request it.
259 if fetch.connection_cache is not None:
260 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
261 fetch.connection_cache.remove_connection(h.host, h.port)
262
263 return resp
264
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600265 class HTTPMethodFallback(urllib.request.BaseHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500266 """
267 Fallback to GET if HEAD is not allowed (405 HTTP error)
268 """
269 def http_error_405(self, req, fp, code, msg, headers):
270 fp.read()
271 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500272
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600273 newheaders = dict((k,v) for k,v in list(req.headers.items())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500274 if k.lower() not in ("content-length", "content-type"))
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600275 return self.parent.open(urllib.request.Request(req.get_full_url(),
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500276 headers=newheaders,
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600277 origin_req_host=req.origin_req_host,
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500278 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500279
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500280 """
281 Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
282 Forbidden when they actually mean 405 Method Not Allowed.
283 """
284 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500285
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500286
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600287 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500288 """
289 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
290 when we want to follow redirects using the original method.
291 """
292 def redirect_request(self, req, fp, code, msg, headers, newurl):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600293 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500294 newreq.get_method = lambda: req.get_method()
295 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500296 exported_proxies = export_proxies(d)
297
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500298 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
299 if export_proxies:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600300 handlers.append(urllib.request.ProxyHandler())
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500301 handlers.append(CacheHTTPHandler())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500302 # XXX: Since Python 2.7.9 ssl cert validation is enabled by default
303 # see PEP-0476, this causes verification errors on some https servers
304 # so disable by default.
305 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500306 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600307 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
308 opener = urllib.request.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500309
310 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500311 uri = ud.url.split(";")[0]
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600312 r = urllib.request.Request(uri)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500313 r.get_method = lambda: "HEAD"
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500314 # Some servers (FusionForge, as used on Alioth) require that the
315 # optional Accept header is set.
316 r.add_header("Accept", "*/*")
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500317 def add_basic_auth(login_str, request):
318 '''Adds Basic auth to http request, pass in login:password as string'''
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600319 import base64
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500320 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600321 authheader = "Basic %s" % encodeuser
322 r.add_header("Authorization", authheader)
323
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500324 if ud.user:
325 add_basic_auth(ud.user, r)
326
327 try:
328 import netrc, urllib.parse
329 n = netrc.netrc()
330 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
331 add_basic_auth("%s:%s" % (login, password), r)
332 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
333 pass
334
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500335 opener.open(r)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600336 except urllib.error.URLError as e:
337 if try_again:
338 logger.debug(2, "checkstatus: trying again")
339 return self.checkstatus(fetch, ud, d, False)
340 else:
341 # debug for now to avoid spamming the logs in e.g. remote sstate searches
342 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
343 return False
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500344 return True
345
346 def _parse_path(self, regex, s):
347 """
348 Find and group name, version and archive type in the given string s
349 """
350
351 m = regex.search(s)
352 if m:
353 pname = ''
354 pver = ''
355 ptype = ''
356
357 mdict = m.groupdict()
358 if 'name' in mdict.keys():
359 pname = mdict['name']
360 if 'pver' in mdict.keys():
361 pver = mdict['pver']
362 if 'type' in mdict.keys():
363 ptype = mdict['type']
364
365 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
366
367 return (pname, pver, ptype)
368
369 return None
370
371 def _modelate_version(self, version):
372 if version[0] in ['.', '-']:
373 if version[1].isdigit():
374 version = version[1] + version[0] + version[2:len(version)]
375 else:
376 version = version[1:len(version)]
377
378 version = re.sub('-', '.', version)
379 version = re.sub('_', '.', version)
380 version = re.sub('(rc)+', '.1000.', version)
381 version = re.sub('(beta)+', '.100.', version)
382 version = re.sub('(alpha)+', '.10.', version)
383 if version[0] == 'v':
384 version = version[1:len(version)]
385 return version
386
387 def _vercmp(self, old, new):
388 """
389 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
390 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
391 for simplicity as it's somehow difficult to get from various upstream format
392 """
393
394 (oldpn, oldpv, oldsuffix) = old
395 (newpn, newpv, newsuffix) = new
396
397 """
398 Check for a new suffix type that we have never heard of before
399 """
400 if (newsuffix):
401 m = self.suffix_regex_comp.search(newsuffix)
402 if not m:
403 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
404 return False
405
406 """
407 Not our package so ignore it
408 """
409 if oldpn != newpn:
410 return False
411
412 oldpv = self._modelate_version(oldpv)
413 newpv = self._modelate_version(newpv)
414
415 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
416
417 def _fetch_index(self, uri, ud, d):
418 """
419 Run fetch checkstatus to get directory information
420 """
421 f = tempfile.NamedTemporaryFile()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500422 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
423 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
424 fetchcmd = self.basecmd
425 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
426 try:
427 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
428 fetchresult = f.read()
429 except bb.fetch2.BBFetchException:
430 fetchresult = ""
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500431
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500432 return fetchresult
433
434 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
435 """
436 Return the latest version of a package inside a given directory path
437 If error or no version, return ""
438 """
439 valid = 0
440 version = ['', '', '']
441
442 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500443 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500444 if not soup:
445 bb.debug(3, "*** %s NO SOUP" % (url))
446 return ""
447
448 for line in soup.find_all('a', href=True):
449 bb.debug(3, "line['href'] = '%s'" % (line['href']))
450 bb.debug(3, "line = '%s'" % (str(line)))
451
452 newver = self._parse_path(package_regex, line['href'])
453 if not newver:
454 newver = self._parse_path(package_regex, str(line))
455
456 if newver:
457 bb.debug(3, "Upstream version found: %s" % newver[1])
458 if valid == 0:
459 version = newver
460 valid = 1
461 elif self._vercmp(version, newver) < 0:
462 version = newver
463
464 pupver = re.sub('_', '.', version[1])
465
466 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
467 (package, pupver or "N/A", current_version[1]))
468
469 if valid:
470 return pupver
471
472 return ""
473
474 def _check_latest_version_by_dir(self, dirver, package, package_regex,
475 current_version, ud, d):
476 """
477 Scan every directory in order to get upstream version.
478 """
479 version_dir = ['', '', '']
480 version = ['', '', '']
481
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500482 dirver_regex = re.compile("(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500483 s = dirver_regex.search(dirver)
484 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500485 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500486 else:
487 version_dir[1] = dirver
488
489 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
490 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
491 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
492
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500493 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500494 if not soup:
495 return version[1]
496
497 for line in soup.find_all('a', href=True):
498 s = dirver_regex.search(line['href'].strip("/"))
499 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500500 sver = s.group('ver')
501
502 # When prefix is part of the version directory it need to
503 # ensure that only version directory is used so remove previous
504 # directories if exists.
505 #
506 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
507 # result is v2.5.
508 spfx = s.group('pfx').split('/')[-1]
509
510 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500511 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500512 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500513 path = ud.path.replace(dirver, dirver_new, True) \
514 .split(package)[0]
515 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
516 ud.user, ud.pswd, {}])
517
518 pupver = self._check_latest_version(uri,
519 package, package_regex, current_version, ud, d)
520 if pupver:
521 version[1] = pupver
522
523 version_dir = version_dir_new
524
525 return version[1]
526
527 def _init_regexes(self, package, ud, d):
528 """
529 Match as many patterns as possible such as:
530 gnome-common-2.20.0.tar.gz (most common format)
531 gtk+-2.90.1.tar.gz
532 xf86-input-synaptics-12.6.9.tar.gz
533 dri2proto-2.3.tar.gz
534 blktool_4.orig.tar.gz
535 libid3tag-0.15.1b.tar.gz
536 unzip552.tar.gz
537 icu4c-3_6-src.tgz
538 genext2fs_1.3.orig.tar.gz
539 gst-fluendo-mp3
540 """
541 # match most patterns which uses "-" as separator to version digits
542 pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
543 # a loose pattern such as for unzip552.tar.gz
544 pn_prefix2 = "[a-zA-Z]+"
545 # a loose pattern such as for 80325-quicky-0.4.tar.gz
546 pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+"
547 # Save the Package Name (pn) Regex for use later
548 pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
549
550 # match version
551 pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
552
553 # match arch
554 parch_regex = "-source|_all_"
555
556 # src.rpm extension was added only for rpm package. Can be removed if the rpm
557 # packaged will always be considered as having to be manually upgraded
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500558 psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500559
560 # match name, version and archive type of a package
561 package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
562 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
563 self.suffix_regex_comp = re.compile(psuffix_regex)
564
565 # compile regex, can be specific by package or generic regex
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500566 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500567 if pn_regex:
568 package_custom_regex_comp = re.compile(pn_regex)
569 else:
570 version = self._parse_path(package_regex_comp, package)
571 if version:
572 package_custom_regex_comp = re.compile(
573 "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
574 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
575 else:
576 package_custom_regex_comp = None
577
578 return package_custom_regex_comp
579
580 def latest_versionstring(self, ud, d):
581 """
582 Manipulate the URL and try to obtain the latest package version
583
584 sanity check to ensure same name and type.
585 """
586 package = ud.path.split("/")[-1]
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500587 current_version = ['', d.getVar('PV'), '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500588
589 """possible to have no version in pkg name, such as spectrum-fw"""
590 if not re.search("\d+", package):
591 current_version[1] = re.sub('_', '.', current_version[1])
592 current_version[1] = re.sub('-', '.', current_version[1])
593 return (current_version[1], '')
594
595 package_regex = self._init_regexes(package, ud, d)
596 if package_regex is None:
597 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
598 return ('', '')
599 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
600
601 uri = ""
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500602 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500603 if not regex_uri:
604 path = ud.path.split(package)[0]
605
606 # search for version matches on folders inside the path, like:
607 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
608 dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
609 m = dirver_regex.search(path)
610 if m:
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500611 pn = d.getVar('PN')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500612 dirver = m.group('dirver')
613
614 dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn)))
615 if not dirver_pn_regex.search(dirver):
616 return (self._check_latest_version_by_dir(dirver,
617 package, package_regex, current_version, ud, d), '')
618
619 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
620 else:
621 uri = regex_uri
622
623 return (self._check_latest_version(uri, package, package_regex,
624 current_version, ud, d), '')