Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 1 | # Local file checksum cache implementation |
| 2 | # |
| 3 | # Copyright (C) 2012 Intel Corporation |
| 4 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 5 | # SPDX-License-Identifier: GPL-2.0-only |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 6 | # |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 7 | |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 8 | import glob |
| 9 | import operator |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 10 | import os |
| 11 | import stat |
| 12 | import bb.utils |
| 13 | import logging |
Patrick Williams | 03907ee | 2022-05-01 06:28:52 -0500 | [diff] [blame] | 14 | import re |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 15 | from bb.cache import MultiProcessCache |
| 16 | |
| 17 | logger = logging.getLogger("BitBake.Cache") |
| 18 | |
Patrick Williams | 03907ee | 2022-05-01 06:28:52 -0500 | [diff] [blame] | 19 | filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+') |
| 20 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 21 | # mtime cache (non-persistent) |
| 22 | # based upon the assumption that files do not change during bitbake run |
| 23 | class FileMtimeCache(object): |
| 24 | cache = {} |
| 25 | |
| 26 | def cached_mtime(self, f): |
| 27 | if f not in self.cache: |
| 28 | self.cache[f] = os.stat(f)[stat.ST_MTIME] |
| 29 | return self.cache[f] |
| 30 | |
| 31 | def cached_mtime_noerror(self, f): |
| 32 | if f not in self.cache: |
| 33 | try: |
| 34 | self.cache[f] = os.stat(f)[stat.ST_MTIME] |
| 35 | except OSError: |
| 36 | return 0 |
| 37 | return self.cache[f] |
| 38 | |
| 39 | def update_mtime(self, f): |
| 40 | self.cache[f] = os.stat(f)[stat.ST_MTIME] |
| 41 | return self.cache[f] |
| 42 | |
| 43 | def clear(self): |
| 44 | self.cache.clear() |
| 45 | |
| 46 | # Checksum + mtime cache (persistent) |
| 47 | class FileChecksumCache(MultiProcessCache): |
| 48 | cache_file_name = "local_file_checksum_cache.dat" |
| 49 | CACHE_VERSION = 1 |
| 50 | |
| 51 | def __init__(self): |
| 52 | self.mtime_cache = FileMtimeCache() |
| 53 | MultiProcessCache.__init__(self) |
| 54 | |
| 55 | def get_checksum(self, f): |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 56 | f = os.path.normpath(f) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 57 | entry = self.cachedata[0].get(f) |
| 58 | cmtime = self.mtime_cache.cached_mtime(f) |
| 59 | if entry: |
| 60 | (mtime, hashval) = entry |
| 61 | if cmtime == mtime: |
| 62 | return hashval |
| 63 | else: |
| 64 | bb.debug(2, "file %s changed mtime, recompute checksum" % f) |
| 65 | |
| 66 | hashval = bb.utils.md5_file(f) |
| 67 | self.cachedata_extras[0][f] = (cmtime, hashval) |
| 68 | return hashval |
| 69 | |
| 70 | def merge_data(self, source, dest): |
| 71 | for h in source[0]: |
| 72 | if h in dest: |
| 73 | (smtime, _) = source[0][h] |
| 74 | (dmtime, _) = dest[0][h] |
| 75 | if smtime > dmtime: |
| 76 | dest[0][h] = source[0][h] |
| 77 | else: |
| 78 | dest[0][h] = source[0][h] |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 79 | |
Andrew Geissler | 82c905d | 2020-04-13 13:39:40 -0500 | [diff] [blame] | 80 | def get_checksums(self, filelist, pn, localdirsexclude): |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 81 | """Get checksums for a list of files""" |
| 82 | |
| 83 | def checksum_file(f): |
| 84 | try: |
| 85 | checksum = self.get_checksum(f) |
| 86 | except OSError as e: |
| 87 | bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e)) |
| 88 | return None |
| 89 | return checksum |
| 90 | |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 91 | # |
| 92 | # Changing the format of file-checksums is problematic as both OE and Bitbake have |
| 93 | # knowledge of them. We need to encode a new piece of data, the portion of the path |
| 94 | # we care about from a checksum perspective. This means that files that change subdirectory |
| 95 | # are tracked by the task hashes. To do this, we do something horrible and put a "/./" into |
| 96 | # the path. The filesystem handles it but it gives us a marker to know which subsection |
| 97 | # of the path to cache. |
| 98 | # |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 99 | def checksum_dir(pth): |
| 100 | # Handle directories recursively |
Brad Bishop | 220d553 | 2018-08-14 00:59:39 +0100 | [diff] [blame] | 101 | if pth == "/": |
| 102 | bb.fatal("Refusing to checksum /") |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 103 | pth = pth.rstrip("/") |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 104 | dirchecksums = [] |
Andrew Geissler | 82c905d | 2020-04-13 13:39:40 -0500 | [diff] [blame] | 105 | for root, dirs, files in os.walk(pth, topdown=True): |
| 106 | [dirs.remove(d) for d in list(dirs) if d in localdirsexclude] |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 107 | for name in files: |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 108 | fullpth = os.path.join(root, name).replace(pth, os.path.join(pth, ".")) |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 109 | checksum = checksum_file(fullpth) |
| 110 | if checksum: |
| 111 | dirchecksums.append((fullpth, checksum)) |
| 112 | return dirchecksums |
| 113 | |
| 114 | checksums = [] |
Patrick Williams | 03907ee | 2022-05-01 06:28:52 -0500 | [diff] [blame] | 115 | for pth in filelist_regex.split(filelist): |
| 116 | if not pth: |
| 117 | continue |
| 118 | pth = pth.strip() |
| 119 | if not pth: |
| 120 | continue |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 121 | exist = pth.split(":")[1] |
| 122 | if exist == "False": |
| 123 | continue |
| 124 | pth = pth.split(":")[0] |
| 125 | if '*' in pth: |
| 126 | # Handle globs |
| 127 | for f in glob.glob(pth): |
| 128 | if os.path.isdir(f): |
| 129 | if not os.path.islink(f): |
| 130 | checksums.extend(checksum_dir(f)) |
| 131 | else: |
| 132 | checksum = checksum_file(f) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 133 | if checksum: |
| 134 | checksums.append((f, checksum)) |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 135 | elif os.path.isdir(pth): |
| 136 | if not os.path.islink(pth): |
| 137 | checksums.extend(checksum_dir(pth)) |
| 138 | else: |
| 139 | checksum = checksum_file(pth) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 140 | if checksum: |
| 141 | checksums.append((pth, checksum)) |
Patrick Williams | d8c66bc | 2016-06-20 12:57:21 -0500 | [diff] [blame] | 142 | |
| 143 | checksums.sort(key=operator.itemgetter(1)) |
| 144 | return checksums |