blob: 8bc9e93ca05503de9c6cda4503d4330aae197474 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# ex:ts=4:sw=4:sts=4:et
2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3"""
4BitBake 'Fetch' implementations
5
6Classes for obtaining upstream sources for the
7BitBake build tools.
8
9"""
10
11# Copyright (C) 2003, 2004 Chris Larson
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License version 2 as
15# published by the Free Software Foundation.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License along
23# with this program; if not, write to the Free Software Foundation, Inc.,
24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25#
26# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27
28import re
29import tempfile
30import subprocess
31import os
32import logging
33import bb
34import urllib
35from bb import data
36from bb.fetch2 import FetchMethod
37from bb.fetch2 import FetchError
38from bb.fetch2 import logger
39from bb.fetch2 import runfetchcmd
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050040from bb.utils import export_proxies
Patrick Williamsc124f4f2015-09-15 14:41:29 -050041from bs4 import BeautifulSoup
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050042from bs4 import SoupStrainer
Patrick Williamsc124f4f2015-09-15 14:41:29 -050043
44class Wget(FetchMethod):
45 """Class to fetch urls via 'wget'"""
46 def supports(self, ud, d):
47 """
48 Check to see if a given url can be fetched with wget.
49 """
50 return ud.type in ['http', 'https', 'ftp']
51
52 def recommends_checksum(self, urldata):
53 return True
54
55 def urldata_init(self, ud, d):
56 if 'protocol' in ud.parm:
57 if ud.parm['protocol'] == 'git':
58 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
59
60 if 'downloadfilename' in ud.parm:
61 ud.basename = ud.parm['downloadfilename']
62 else:
63 ud.basename = os.path.basename(ud.path)
64
65 ud.localfile = data.expand(urllib.unquote(ud.basename), d)
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050066 if not ud.localfile:
67 ud.localfile = data.expand(urllib.unquote(ud.host + ud.path).replace("/", "."), d)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050068
69 self.basecmd = d.getVar("FETCHCMD_wget", True) or "/usr/bin/env wget -t 2 -T 30 -nv --passive-ftp --no-check-certificate"
70
71 def _runwget(self, ud, d, command, quiet):
72
73 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
74 bb.fetch2.check_network_access(d, command)
75 runfetchcmd(command, d, quiet)
76
77 def download(self, ud, d):
78 """Fetch urls"""
79
80 fetchcmd = self.basecmd
81
82 if 'downloadfilename' in ud.parm:
83 dldir = d.getVar("DL_DIR", True)
84 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
85 fetchcmd += " -O " + dldir + os.sep + ud.localfile
86
87 uri = ud.url.split(";")[0]
88 if os.path.exists(ud.localpath):
89 # file exists, but we didnt complete it.. trying again..
90 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
91 else:
92 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
93
94 self._runwget(ud, d, fetchcmd, False)
95
96 # Sanity check since wget can pretend it succeed when it didn't
97 # Also, this used to happen if sourceforge sent us to the mirror page
98 if not os.path.exists(ud.localpath):
99 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
100
101 if os.path.getsize(ud.localpath) == 0:
102 os.remove(ud.localpath)
103 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
104
105 return True
106
107 def checkstatus(self, fetch, ud, d):
108 import urllib2, socket, httplib
109 from urllib import addinfourl
110 from bb.fetch2 import FetchConnectionCache
111
112 class HTTPConnectionCache(httplib.HTTPConnection):
113 if fetch.connection_cache:
114 def connect(self):
115 """Connect to the host and port specified in __init__."""
116
117 sock = fetch.connection_cache.get_connection(self.host, self.port)
118 if sock:
119 self.sock = sock
120 else:
121 self.sock = socket.create_connection((self.host, self.port),
122 self.timeout, self.source_address)
123 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
124
125 if self._tunnel_host:
126 self._tunnel()
127
128 class CacheHTTPHandler(urllib2.HTTPHandler):
129 def http_open(self, req):
130 return self.do_open(HTTPConnectionCache, req)
131
132 def do_open(self, http_class, req):
133 """Return an addinfourl object for the request, using http_class.
134
135 http_class must implement the HTTPConnection API from httplib.
136 The addinfourl return value is a file-like object. It also
137 has methods and attributes including:
138 - info(): return a mimetools.Message object for the headers
139 - geturl(): return the original request URL
140 - code: HTTP status code
141 """
142 host = req.get_host()
143 if not host:
144 raise urlllib2.URLError('no host given')
145
146 h = http_class(host, timeout=req.timeout) # will parse host:port
147 h.set_debuglevel(self._debuglevel)
148
149 headers = dict(req.unredirected_hdrs)
150 headers.update(dict((k, v) for k, v in req.headers.items()
151 if k not in headers))
152
153 # We want to make an HTTP/1.1 request, but the addinfourl
154 # class isn't prepared to deal with a persistent connection.
155 # It will try to read all remaining data from the socket,
156 # which will block while the server waits for the next request.
157 # So make sure the connection gets closed after the (only)
158 # request.
159
160 # Don't close connection when connection_cache is enabled,
161 if fetch.connection_cache is None:
162 headers["Connection"] = "close"
163 else:
164 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
165
166 headers = dict(
167 (name.title(), val) for name, val in headers.items())
168
169 if req._tunnel_host:
170 tunnel_headers = {}
171 proxy_auth_hdr = "Proxy-Authorization"
172 if proxy_auth_hdr in headers:
173 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
174 # Proxy-Authorization should not be sent to origin
175 # server.
176 del headers[proxy_auth_hdr]
177 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
178
179 try:
180 h.request(req.get_method(), req.get_selector(), req.data, headers)
181 except socket.error, err: # XXX what error?
182 # Don't close connection when cache is enabled.
183 if fetch.connection_cache is None:
184 h.close()
185 raise urllib2.URLError(err)
186 else:
187 try:
188 r = h.getresponse(buffering=True)
189 except TypeError: # buffering kw not supported
190 r = h.getresponse()
191
192 # Pick apart the HTTPResponse object to get the addinfourl
193 # object initialized properly.
194
195 # Wrap the HTTPResponse object in socket's file object adapter
196 # for Windows. That adapter calls recv(), so delegate recv()
197 # to read(). This weird wrapping allows the returned object to
198 # have readline() and readlines() methods.
199
200 # XXX It might be better to extract the read buffering code
201 # out of socket._fileobject() and into a base class.
202 r.recv = r.read
203
204 # no data, just have to read
205 r.read()
206 class fp_dummy(object):
207 def read(self):
208 return ""
209 def readline(self):
210 return ""
211 def close(self):
212 pass
213
214 resp = addinfourl(fp_dummy(), r.msg, req.get_full_url())
215 resp.code = r.status
216 resp.msg = r.reason
217
218 # Close connection when server request it.
219 if fetch.connection_cache is not None:
220 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
221 fetch.connection_cache.remove_connection(h.host, h.port)
222
223 return resp
224
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500225 class HTTPMethodFallback(urllib2.BaseHandler):
226 """
227 Fallback to GET if HEAD is not allowed (405 HTTP error)
228 """
229 def http_error_405(self, req, fp, code, msg, headers):
230 fp.read()
231 fp.close()
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500232
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500233 newheaders = dict((k,v) for k,v in req.headers.items()
234 if k.lower() not in ("content-length", "content-type"))
235 return self.parent.open(urllib2.Request(req.get_full_url(),
236 headers=newheaders,
237 origin_req_host=req.get_origin_req_host(),
238 unverifiable=True))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500239
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500240 """
241 Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
242 Forbidden when they actually mean 405 Method Not Allowed.
243 """
244 http_error_403 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500245
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500246 """
247 Some servers (e.g. FusionForge) returns 406 Not Acceptable when they
248 actually mean 405 Method Not Allowed.
249 """
250 http_error_406 = http_error_405
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500251
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500252 class FixedHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
253 """
254 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
255 when we want to follow redirects using the original method.
256 """
257 def redirect_request(self, req, fp, code, msg, headers, newurl):
258 newreq = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
259 newreq.get_method = lambda: req.get_method()
260 return newreq
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500261 exported_proxies = export_proxies(d)
262
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500263 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
264 if export_proxies:
265 handlers.append(urllib2.ProxyHandler())
266 handlers.append(CacheHTTPHandler())
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500267 # XXX: Since Python 2.7.9 ssl cert validation is enabled by default
268 # see PEP-0476, this causes verification errors on some https servers
269 # so disable by default.
270 import ssl
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500271 if hasattr(ssl, '_create_unverified_context'):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500272 handlers.append(urllib2.HTTPSHandler(context=ssl._create_unverified_context()))
273 opener = urllib2.build_opener(*handlers)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500274
275 try:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500276 uri = ud.url.split(";")[0]
277 r = urllib2.Request(uri)
278 r.get_method = lambda: "HEAD"
279 opener.open(r)
280 except urllib2.URLError as e:
281 # debug for now to avoid spamming the logs in e.g. remote sstate searches
282 logger.debug(2, "checkstatus() urlopen failed: %s" % e)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500283 return False
284 return True
285
286 def _parse_path(self, regex, s):
287 """
288 Find and group name, version and archive type in the given string s
289 """
290
291 m = regex.search(s)
292 if m:
293 pname = ''
294 pver = ''
295 ptype = ''
296
297 mdict = m.groupdict()
298 if 'name' in mdict.keys():
299 pname = mdict['name']
300 if 'pver' in mdict.keys():
301 pver = mdict['pver']
302 if 'type' in mdict.keys():
303 ptype = mdict['type']
304
305 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
306
307 return (pname, pver, ptype)
308
309 return None
310
311 def _modelate_version(self, version):
312 if version[0] in ['.', '-']:
313 if version[1].isdigit():
314 version = version[1] + version[0] + version[2:len(version)]
315 else:
316 version = version[1:len(version)]
317
318 version = re.sub('-', '.', version)
319 version = re.sub('_', '.', version)
320 version = re.sub('(rc)+', '.1000.', version)
321 version = re.sub('(beta)+', '.100.', version)
322 version = re.sub('(alpha)+', '.10.', version)
323 if version[0] == 'v':
324 version = version[1:len(version)]
325 return version
326
327 def _vercmp(self, old, new):
328 """
329 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
330 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
331 for simplicity as it's somehow difficult to get from various upstream format
332 """
333
334 (oldpn, oldpv, oldsuffix) = old
335 (newpn, newpv, newsuffix) = new
336
337 """
338 Check for a new suffix type that we have never heard of before
339 """
340 if (newsuffix):
341 m = self.suffix_regex_comp.search(newsuffix)
342 if not m:
343 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
344 return False
345
346 """
347 Not our package so ignore it
348 """
349 if oldpn != newpn:
350 return False
351
352 oldpv = self._modelate_version(oldpv)
353 newpv = self._modelate_version(newpv)
354
355 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
356
357 def _fetch_index(self, uri, ud, d):
358 """
359 Run fetch checkstatus to get directory information
360 """
361 f = tempfile.NamedTemporaryFile()
362
363 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
364 fetchcmd = self.basecmd
365 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
366 try:
367 self._runwget(ud, d, fetchcmd, True)
368 fetchresult = f.read()
369 except bb.fetch2.BBFetchException:
370 fetchresult = ""
371
372 f.close()
373 return fetchresult
374
375 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
376 """
377 Return the latest version of a package inside a given directory path
378 If error or no version, return ""
379 """
380 valid = 0
381 version = ['', '', '']
382
383 bb.debug(3, "VersionURL: %s" % (url))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500384 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500385 if not soup:
386 bb.debug(3, "*** %s NO SOUP" % (url))
387 return ""
388
389 for line in soup.find_all('a', href=True):
390 bb.debug(3, "line['href'] = '%s'" % (line['href']))
391 bb.debug(3, "line = '%s'" % (str(line)))
392
393 newver = self._parse_path(package_regex, line['href'])
394 if not newver:
395 newver = self._parse_path(package_regex, str(line))
396
397 if newver:
398 bb.debug(3, "Upstream version found: %s" % newver[1])
399 if valid == 0:
400 version = newver
401 valid = 1
402 elif self._vercmp(version, newver) < 0:
403 version = newver
404
405 pupver = re.sub('_', '.', version[1])
406
407 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
408 (package, pupver or "N/A", current_version[1]))
409
410 if valid:
411 return pupver
412
413 return ""
414
415 def _check_latest_version_by_dir(self, dirver, package, package_regex,
416 current_version, ud, d):
417 """
418 Scan every directory in order to get upstream version.
419 """
420 version_dir = ['', '', '']
421 version = ['', '', '']
422
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500423 dirver_regex = re.compile("(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500424 s = dirver_regex.search(dirver)
425 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500426 version_dir[1] = s.group('ver')
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500427 else:
428 version_dir[1] = dirver
429
430 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
431 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
432 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
433
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500434 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500435 if not soup:
436 return version[1]
437
438 for line in soup.find_all('a', href=True):
439 s = dirver_regex.search(line['href'].strip("/"))
440 if s:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500441 sver = s.group('ver')
442
443 # When prefix is part of the version directory it need to
444 # ensure that only version directory is used so remove previous
445 # directories if exists.
446 #
447 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
448 # result is v2.5.
449 spfx = s.group('pfx').split('/')[-1]
450
451 version_dir_new = ['', sver, '']
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500452 if self._vercmp(version_dir, version_dir_new) <= 0:
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500453 dirver_new = spfx + sver
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500454 path = ud.path.replace(dirver, dirver_new, True) \
455 .split(package)[0]
456 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
457 ud.user, ud.pswd, {}])
458
459 pupver = self._check_latest_version(uri,
460 package, package_regex, current_version, ud, d)
461 if pupver:
462 version[1] = pupver
463
464 version_dir = version_dir_new
465
466 return version[1]
467
468 def _init_regexes(self, package, ud, d):
469 """
470 Match as many patterns as possible such as:
471 gnome-common-2.20.0.tar.gz (most common format)
472 gtk+-2.90.1.tar.gz
473 xf86-input-synaptics-12.6.9.tar.gz
474 dri2proto-2.3.tar.gz
475 blktool_4.orig.tar.gz
476 libid3tag-0.15.1b.tar.gz
477 unzip552.tar.gz
478 icu4c-3_6-src.tgz
479 genext2fs_1.3.orig.tar.gz
480 gst-fluendo-mp3
481 """
482 # match most patterns which uses "-" as separator to version digits
483 pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
484 # a loose pattern such as for unzip552.tar.gz
485 pn_prefix2 = "[a-zA-Z]+"
486 # a loose pattern such as for 80325-quicky-0.4.tar.gz
487 pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+"
488 # Save the Package Name (pn) Regex for use later
489 pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
490
491 # match version
492 pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
493
494 # match arch
495 parch_regex = "-source|_all_"
496
497 # src.rpm extension was added only for rpm package. Can be removed if the rpm
498 # packaged will always be considered as having to be manually upgraded
499 psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
500
501 # match name, version and archive type of a package
502 package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
503 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
504 self.suffix_regex_comp = re.compile(psuffix_regex)
505
506 # compile regex, can be specific by package or generic regex
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500507 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX', True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500508 if pn_regex:
509 package_custom_regex_comp = re.compile(pn_regex)
510 else:
511 version = self._parse_path(package_regex_comp, package)
512 if version:
513 package_custom_regex_comp = re.compile(
514 "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
515 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
516 else:
517 package_custom_regex_comp = None
518
519 return package_custom_regex_comp
520
521 def latest_versionstring(self, ud, d):
522 """
523 Manipulate the URL and try to obtain the latest package version
524
525 sanity check to ensure same name and type.
526 """
527 package = ud.path.split("/")[-1]
528 current_version = ['', d.getVar('PV', True), '']
529
530 """possible to have no version in pkg name, such as spectrum-fw"""
531 if not re.search("\d+", package):
532 current_version[1] = re.sub('_', '.', current_version[1])
533 current_version[1] = re.sub('-', '.', current_version[1])
534 return (current_version[1], '')
535
536 package_regex = self._init_regexes(package, ud, d)
537 if package_regex is None:
538 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
539 return ('', '')
540 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
541
542 uri = ""
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500543 regex_uri = d.getVar("UPSTREAM_CHECK_URI", True)
Patrick Williamsc124f4f2015-09-15 14:41:29 -0500544 if not regex_uri:
545 path = ud.path.split(package)[0]
546
547 # search for version matches on folders inside the path, like:
548 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
549 dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
550 m = dirver_regex.search(path)
551 if m:
552 pn = d.getVar('PN', True)
553 dirver = m.group('dirver')
554
555 dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn)))
556 if not dirver_pn_regex.search(dirver):
557 return (self._check_latest_version_by_dir(dirver,
558 package, package_regex, current_version, ud, d), '')
559
560 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
561 else:
562 uri = regex_uri
563
564 return (self._check_latest_version(uri, package, package_regex,
565 current_version, ud, d), '')