Andrew Geissler | 5082cc7 | 2023-09-11 08:41:39 -0400 | [diff] [blame] | 1 | """ |
| 2 | BitBake 'Fetch' implementation for Google Cloup Platform Storage. |
| 3 | |
| 4 | Class for fetching files from Google Cloud Storage using the |
| 5 | Google Cloud Storage Python Client. The GCS Python Client must |
| 6 | be correctly installed, configured and authenticated prior to use. |
| 7 | Additionally, gsutil must also be installed. |
| 8 | |
| 9 | """ |
| 10 | |
| 11 | # Copyright (C) 2023, Snap Inc. |
| 12 | # |
| 13 | # Based in part on bb.fetch2.s3: |
| 14 | # Copyright (C) 2017 Andre McCurdy |
| 15 | # |
| 16 | # SPDX-License-Identifier: GPL-2.0-only |
| 17 | # |
| 18 | # Based on functions from the base bb module, Copyright 2003 Holger Schurig |
| 19 | |
| 20 | import os |
| 21 | import bb |
| 22 | import urllib.parse, urllib.error |
| 23 | from bb.fetch2 import FetchMethod |
| 24 | from bb.fetch2 import FetchError |
| 25 | from bb.fetch2 import logger |
| 26 | |
| 27 | class GCP(FetchMethod): |
| 28 | """ |
| 29 | Class to fetch urls via GCP's Python API. |
| 30 | """ |
| 31 | def __init__(self): |
| 32 | self.gcp_client = None |
| 33 | |
| 34 | def supports(self, ud, d): |
| 35 | """ |
| 36 | Check to see if a given url can be fetched with GCP. |
| 37 | """ |
| 38 | return ud.type in ['gs'] |
| 39 | |
| 40 | def recommends_checksum(self, urldata): |
| 41 | return True |
| 42 | |
| 43 | def urldata_init(self, ud, d): |
| 44 | if 'downloadfilename' in ud.parm: |
| 45 | ud.basename = ud.parm['downloadfilename'] |
| 46 | else: |
| 47 | ud.basename = os.path.basename(ud.path) |
| 48 | |
| 49 | ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) |
Patrick Williams | da29531 | 2023-12-05 16:48:56 -0600 | [diff] [blame] | 50 | ud.basecmd = "gsutil stat" |
Andrew Geissler | 5082cc7 | 2023-09-11 08:41:39 -0400 | [diff] [blame] | 51 | |
| 52 | def get_gcp_client(self): |
| 53 | from google.cloud import storage |
| 54 | self.gcp_client = storage.Client(project=None) |
| 55 | |
| 56 | def download(self, ud, d): |
| 57 | """ |
| 58 | Fetch urls using the GCP API. |
| 59 | Assumes localpath was called first. |
| 60 | """ |
| 61 | logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}") |
| 62 | if self.gcp_client is None: |
| 63 | self.get_gcp_client() |
| 64 | |
Patrick Williams | da29531 | 2023-12-05 16:48:56 -0600 | [diff] [blame] | 65 | bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") |
| 66 | runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) |
Andrew Geissler | 5082cc7 | 2023-09-11 08:41:39 -0400 | [diff] [blame] | 67 | |
| 68 | # Path sometimes has leading slash, so strip it |
| 69 | path = ud.path.lstrip("/") |
| 70 | blob = self.gcp_client.bucket(ud.host).blob(path) |
| 71 | blob.download_to_filename(ud.localpath) |
| 72 | |
| 73 | # Additional sanity checks copied from the wget class (although there |
| 74 | # are no known issues which mean these are required, treat the GCP API |
| 75 | # tool with a little healthy suspicion). |
| 76 | if not os.path.exists(ud.localpath): |
| 77 | raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!") |
| 78 | |
| 79 | if os.path.getsize(ud.localpath) == 0: |
| 80 | os.remove(ud.localpath) |
| 81 | raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.") |
| 82 | |
| 83 | return True |
| 84 | |
| 85 | def checkstatus(self, fetch, ud, d): |
| 86 | """ |
| 87 | Check the status of a URL. |
| 88 | """ |
| 89 | logger.debug2(f"Checking status of gs://{ud.host}{ud.path}") |
| 90 | if self.gcp_client is None: |
| 91 | self.get_gcp_client() |
| 92 | |
Patrick Williams | da29531 | 2023-12-05 16:48:56 -0600 | [diff] [blame] | 93 | bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") |
| 94 | runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) |
Andrew Geissler | 5082cc7 | 2023-09-11 08:41:39 -0400 | [diff] [blame] | 95 | |
| 96 | # Path sometimes has leading slash, so strip it |
| 97 | path = ud.path.lstrip("/") |
| 98 | if self.gcp_client.bucket(ud.host).blob(path).exists() == False: |
| 99 | raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist") |
| 100 | else: |
| 101 | return True |