blob: bd2a8972a760e1ce0f0567cdfb56188b14cef8b8 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# ex:ts=4:sw=4:sts=4:et
2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3"""
4BitBake 'Fetch' implementations
5
6Classes for obtaining upstream sources for the
7BitBake build tools.
8
9"""
10
11# Copyright (C) 2003, 2004 Chris Larson
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License version 2 as
15# published by the Free Software Foundation.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License along
23# with this program; if not, write to the Free Software Foundation, Inc.,
24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25#
26# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27
28import re
29import tempfile
30import subprocess
31import os
32import logging
33import bb
34import urllib
35from bb import data
36from bb.fetch2 import FetchMethod
37from bb.fetch2 import FetchError
38from bb.fetch2 import logger
39from bb.fetch2 import runfetchcmd
40from bs4 import BeautifulSoup
41
42class Wget(FetchMethod):
43 """Class to fetch urls via 'wget'"""
44 def supports(self, ud, d):
45 """
46 Check to see if a given url can be fetched with wget.
47 """
48 return ud.type in ['http', 'https', 'ftp']
49
50 def recommends_checksum(self, urldata):
51 return True
52
53 def urldata_init(self, ud, d):
54 if 'protocol' in ud.parm:
55 if ud.parm['protocol'] == 'git':
56 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
57
58 if 'downloadfilename' in ud.parm:
59 ud.basename = ud.parm['downloadfilename']
60 else:
61 ud.basename = os.path.basename(ud.path)
62
63 ud.localfile = data.expand(urllib.unquote(ud.basename), d)
64
65 self.basecmd = d.getVar("FETCHCMD_wget", True) or "/usr/bin/env wget -t 2 -T 30 -nv --passive-ftp --no-check-certificate"
66
67 def _runwget(self, ud, d, command, quiet):
68
69 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
70 bb.fetch2.check_network_access(d, command)
71 runfetchcmd(command, d, quiet)
72
73 def download(self, ud, d):
74 """Fetch urls"""
75
76 fetchcmd = self.basecmd
77
78 if 'downloadfilename' in ud.parm:
79 dldir = d.getVar("DL_DIR", True)
80 bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
81 fetchcmd += " -O " + dldir + os.sep + ud.localfile
82
83 uri = ud.url.split(";")[0]
84 if os.path.exists(ud.localpath):
85 # file exists, but we didnt complete it.. trying again..
86 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
87 else:
88 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
89
90 self._runwget(ud, d, fetchcmd, False)
91
92 # Sanity check since wget can pretend it succeed when it didn't
93 # Also, this used to happen if sourceforge sent us to the mirror page
94 if not os.path.exists(ud.localpath):
95 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
96
97 if os.path.getsize(ud.localpath) == 0:
98 os.remove(ud.localpath)
99 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
100
101 return True
102
103 def checkstatus(self, fetch, ud, d):
104 import urllib2, socket, httplib
105 from urllib import addinfourl
106 from bb.fetch2 import FetchConnectionCache
107
108 class HTTPConnectionCache(httplib.HTTPConnection):
109 if fetch.connection_cache:
110 def connect(self):
111 """Connect to the host and port specified in __init__."""
112
113 sock = fetch.connection_cache.get_connection(self.host, self.port)
114 if sock:
115 self.sock = sock
116 else:
117 self.sock = socket.create_connection((self.host, self.port),
118 self.timeout, self.source_address)
119 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
120
121 if self._tunnel_host:
122 self._tunnel()
123
124 class CacheHTTPHandler(urllib2.HTTPHandler):
125 def http_open(self, req):
126 return self.do_open(HTTPConnectionCache, req)
127
128 def do_open(self, http_class, req):
129 """Return an addinfourl object for the request, using http_class.
130
131 http_class must implement the HTTPConnection API from httplib.
132 The addinfourl return value is a file-like object. It also
133 has methods and attributes including:
134 - info(): return a mimetools.Message object for the headers
135 - geturl(): return the original request URL
136 - code: HTTP status code
137 """
138 host = req.get_host()
139 if not host:
140 raise urlllib2.URLError('no host given')
141
142 h = http_class(host, timeout=req.timeout) # will parse host:port
143 h.set_debuglevel(self._debuglevel)
144
145 headers = dict(req.unredirected_hdrs)
146 headers.update(dict((k, v) for k, v in req.headers.items()
147 if k not in headers))
148
149 # We want to make an HTTP/1.1 request, but the addinfourl
150 # class isn't prepared to deal with a persistent connection.
151 # It will try to read all remaining data from the socket,
152 # which will block while the server waits for the next request.
153 # So make sure the connection gets closed after the (only)
154 # request.
155
156 # Don't close connection when connection_cache is enabled,
157 if fetch.connection_cache is None:
158 headers["Connection"] = "close"
159 else:
160 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
161
162 headers = dict(
163 (name.title(), val) for name, val in headers.items())
164
165 if req._tunnel_host:
166 tunnel_headers = {}
167 proxy_auth_hdr = "Proxy-Authorization"
168 if proxy_auth_hdr in headers:
169 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
170 # Proxy-Authorization should not be sent to origin
171 # server.
172 del headers[proxy_auth_hdr]
173 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
174
175 try:
176 h.request(req.get_method(), req.get_selector(), req.data, headers)
177 except socket.error, err: # XXX what error?
178 # Don't close connection when cache is enabled.
179 if fetch.connection_cache is None:
180 h.close()
181 raise urllib2.URLError(err)
182 else:
183 try:
184 r = h.getresponse(buffering=True)
185 except TypeError: # buffering kw not supported
186 r = h.getresponse()
187
188 # Pick apart the HTTPResponse object to get the addinfourl
189 # object initialized properly.
190
191 # Wrap the HTTPResponse object in socket's file object adapter
192 # for Windows. That adapter calls recv(), so delegate recv()
193 # to read(). This weird wrapping allows the returned object to
194 # have readline() and readlines() methods.
195
196 # XXX It might be better to extract the read buffering code
197 # out of socket._fileobject() and into a base class.
198 r.recv = r.read
199
200 # no data, just have to read
201 r.read()
202 class fp_dummy(object):
203 def read(self):
204 return ""
205 def readline(self):
206 return ""
207 def close(self):
208 pass
209
210 resp = addinfourl(fp_dummy(), r.msg, req.get_full_url())
211 resp.code = r.status
212 resp.msg = r.reason
213
214 # Close connection when server request it.
215 if fetch.connection_cache is not None:
216 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
217 fetch.connection_cache.remove_connection(h.host, h.port)
218
219 return resp
220
221 def export_proxies(d):
222 variables = ['http_proxy', 'HTTP_PROXY', 'https_proxy', 'HTTPS_PROXY',
223 'ftp_proxy', 'FTP_PROXY', 'no_proxy', 'NO_PROXY']
224 exported = False
225
226 for v in variables:
227 if v in os.environ.keys():
228 exported = True
229 else:
230 v_proxy = d.getVar(v, True)
231 if v_proxy is not None:
232 os.environ[v] = v_proxy
233 exported = True
234
235 return exported
236
237 def head_method(self):
238 return "HEAD"
239
240 exported_proxies = export_proxies(d)
241
242 # XXX: Since Python 2.7.9 ssl cert validation is enabled by default
243 # see PEP-0476, this causes verification errors on some https servers
244 # so disable by default.
245 import ssl
246 ssl_context = None
247 if hasattr(ssl, '_create_unverified_context'):
248 ssl_context = ssl._create_unverified_context()
249
250 if exported_proxies == True and ssl_context is not None:
251 opener = urllib2.build_opener(urllib2.ProxyHandler, CacheHTTPHandler,
252 urllib2.HTTPSHandler(context=ssl_context))
253 elif exported_proxies == False and ssl_context is not None:
254 opener = urllib2.build_opener(CacheHTTPHandler,
255 urllib2.HTTPSHandler(context=ssl_context))
256 elif exported_proxies == True and ssl_context is None:
257 opener = urllib2.build_opener(urllib2.ProxyHandler, CacheHTTPHandler)
258 else:
259 opener = urllib2.build_opener(CacheHTTPHandler)
260
261 urllib2.Request.get_method = head_method
262 urllib2.install_opener(opener)
263
264 uri = ud.url.split(";")[0]
265
266 try:
267 urllib2.urlopen(uri)
268 except:
269 return False
270 return True
271
272 def _parse_path(self, regex, s):
273 """
274 Find and group name, version and archive type in the given string s
275 """
276
277 m = regex.search(s)
278 if m:
279 pname = ''
280 pver = ''
281 ptype = ''
282
283 mdict = m.groupdict()
284 if 'name' in mdict.keys():
285 pname = mdict['name']
286 if 'pver' in mdict.keys():
287 pver = mdict['pver']
288 if 'type' in mdict.keys():
289 ptype = mdict['type']
290
291 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
292
293 return (pname, pver, ptype)
294
295 return None
296
297 def _modelate_version(self, version):
298 if version[0] in ['.', '-']:
299 if version[1].isdigit():
300 version = version[1] + version[0] + version[2:len(version)]
301 else:
302 version = version[1:len(version)]
303
304 version = re.sub('-', '.', version)
305 version = re.sub('_', '.', version)
306 version = re.sub('(rc)+', '.1000.', version)
307 version = re.sub('(beta)+', '.100.', version)
308 version = re.sub('(alpha)+', '.10.', version)
309 if version[0] == 'v':
310 version = version[1:len(version)]
311 return version
312
313 def _vercmp(self, old, new):
314 """
315 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
316 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
317 for simplicity as it's somehow difficult to get from various upstream format
318 """
319
320 (oldpn, oldpv, oldsuffix) = old
321 (newpn, newpv, newsuffix) = new
322
323 """
324 Check for a new suffix type that we have never heard of before
325 """
326 if (newsuffix):
327 m = self.suffix_regex_comp.search(newsuffix)
328 if not m:
329 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
330 return False
331
332 """
333 Not our package so ignore it
334 """
335 if oldpn != newpn:
336 return False
337
338 oldpv = self._modelate_version(oldpv)
339 newpv = self._modelate_version(newpv)
340
341 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
342
343 def _fetch_index(self, uri, ud, d):
344 """
345 Run fetch checkstatus to get directory information
346 """
347 f = tempfile.NamedTemporaryFile()
348
349 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
350 fetchcmd = self.basecmd
351 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
352 try:
353 self._runwget(ud, d, fetchcmd, True)
354 fetchresult = f.read()
355 except bb.fetch2.BBFetchException:
356 fetchresult = ""
357
358 f.close()
359 return fetchresult
360
361 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
362 """
363 Return the latest version of a package inside a given directory path
364 If error or no version, return ""
365 """
366 valid = 0
367 version = ['', '', '']
368
369 bb.debug(3, "VersionURL: %s" % (url))
370 soup = BeautifulSoup(self._fetch_index(url, ud, d))
371 if not soup:
372 bb.debug(3, "*** %s NO SOUP" % (url))
373 return ""
374
375 for line in soup.find_all('a', href=True):
376 bb.debug(3, "line['href'] = '%s'" % (line['href']))
377 bb.debug(3, "line = '%s'" % (str(line)))
378
379 newver = self._parse_path(package_regex, line['href'])
380 if not newver:
381 newver = self._parse_path(package_regex, str(line))
382
383 if newver:
384 bb.debug(3, "Upstream version found: %s" % newver[1])
385 if valid == 0:
386 version = newver
387 valid = 1
388 elif self._vercmp(version, newver) < 0:
389 version = newver
390
391 pupver = re.sub('_', '.', version[1])
392
393 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
394 (package, pupver or "N/A", current_version[1]))
395
396 if valid:
397 return pupver
398
399 return ""
400
401 def _check_latest_version_by_dir(self, dirver, package, package_regex,
402 current_version, ud, d):
403 """
404 Scan every directory in order to get upstream version.
405 """
406 version_dir = ['', '', '']
407 version = ['', '', '']
408
409 dirver_regex = re.compile("(\D*)((\d+[\.\-_])+(\d+))")
410 s = dirver_regex.search(dirver)
411 if s:
412 version_dir[1] = s.group(2)
413 else:
414 version_dir[1] = dirver
415
416 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
417 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
418 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
419
420 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
421 if not soup:
422 return version[1]
423
424 for line in soup.find_all('a', href=True):
425 s = dirver_regex.search(line['href'].strip("/"))
426 if s:
427 version_dir_new = ['', s.group(2), '']
428 if self._vercmp(version_dir, version_dir_new) <= 0:
429 dirver_new = s.group(1) + s.group(2)
430 path = ud.path.replace(dirver, dirver_new, True) \
431 .split(package)[0]
432 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
433 ud.user, ud.pswd, {}])
434
435 pupver = self._check_latest_version(uri,
436 package, package_regex, current_version, ud, d)
437 if pupver:
438 version[1] = pupver
439
440 version_dir = version_dir_new
441
442 return version[1]
443
444 def _init_regexes(self, package, ud, d):
445 """
446 Match as many patterns as possible such as:
447 gnome-common-2.20.0.tar.gz (most common format)
448 gtk+-2.90.1.tar.gz
449 xf86-input-synaptics-12.6.9.tar.gz
450 dri2proto-2.3.tar.gz
451 blktool_4.orig.tar.gz
452 libid3tag-0.15.1b.tar.gz
453 unzip552.tar.gz
454 icu4c-3_6-src.tgz
455 genext2fs_1.3.orig.tar.gz
456 gst-fluendo-mp3
457 """
458 # match most patterns which uses "-" as separator to version digits
459 pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
460 # a loose pattern such as for unzip552.tar.gz
461 pn_prefix2 = "[a-zA-Z]+"
462 # a loose pattern such as for 80325-quicky-0.4.tar.gz
463 pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+"
464 # Save the Package Name (pn) Regex for use later
465 pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
466
467 # match version
468 pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
469
470 # match arch
471 parch_regex = "-source|_all_"
472
473 # src.rpm extension was added only for rpm package. Can be removed if the rpm
474 # packaged will always be considered as having to be manually upgraded
475 psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
476
477 # match name, version and archive type of a package
478 package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
479 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
480 self.suffix_regex_comp = re.compile(psuffix_regex)
481
482 # compile regex, can be specific by package or generic regex
483 pn_regex = d.getVar('REGEX', True)
484 if pn_regex:
485 package_custom_regex_comp = re.compile(pn_regex)
486 else:
487 version = self._parse_path(package_regex_comp, package)
488 if version:
489 package_custom_regex_comp = re.compile(
490 "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
491 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
492 else:
493 package_custom_regex_comp = None
494
495 return package_custom_regex_comp
496
497 def latest_versionstring(self, ud, d):
498 """
499 Manipulate the URL and try to obtain the latest package version
500
501 sanity check to ensure same name and type.
502 """
503 package = ud.path.split("/")[-1]
504 current_version = ['', d.getVar('PV', True), '']
505
506 """possible to have no version in pkg name, such as spectrum-fw"""
507 if not re.search("\d+", package):
508 current_version[1] = re.sub('_', '.', current_version[1])
509 current_version[1] = re.sub('-', '.', current_version[1])
510 return (current_version[1], '')
511
512 package_regex = self._init_regexes(package, ud, d)
513 if package_regex is None:
514 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
515 return ('', '')
516 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
517
518 uri = ""
519 regex_uri = d.getVar("REGEX_URI", True)
520 if not regex_uri:
521 path = ud.path.split(package)[0]
522
523 # search for version matches on folders inside the path, like:
524 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
525 dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
526 m = dirver_regex.search(path)
527 if m:
528 pn = d.getVar('PN', True)
529 dirver = m.group('dirver')
530
531 dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn)))
532 if not dirver_pn_regex.search(dirver):
533 return (self._check_latest_version_by_dir(dirver,
534 package, package_regex, current_version, ud, d), '')
535
536 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
537 else:
538 uri = regex_uri
539
540 return (self._check_latest_version(uri, package, package_regex,
541 current_version, ud, d), '')