blob: 557793d3668feb4cb43ac7e8de79bf9700acf244 [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# Local file checksum cache implementation
2#
3# Copyright (C) 2012 Intel Corporation
4#
Brad Bishopc342db32019-05-15 21:57:59 -04005# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc124f4f2015-09-15 14:41:29 -05006#
Patrick Williamsc124f4f2015-09-15 14:41:29 -05007
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05008import glob
9import operator
Patrick Williamsc124f4f2015-09-15 14:41:29 -050010import os
11import stat
12import bb.utils
13import logging
Patrick Williams03907ee2022-05-01 06:28:52 -050014import re
Patrick Williamsc124f4f2015-09-15 14:41:29 -050015from bb.cache import MultiProcessCache
16
17logger = logging.getLogger("BitBake.Cache")
18
Patrick Williams03907ee2022-05-01 06:28:52 -050019filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+')
20
Patrick Williamsc124f4f2015-09-15 14:41:29 -050021# mtime cache (non-persistent)
22# based upon the assumption that files do not change during bitbake run
23class FileMtimeCache(object):
24 cache = {}
25
26 def cached_mtime(self, f):
27 if f not in self.cache:
28 self.cache[f] = os.stat(f)[stat.ST_MTIME]
29 return self.cache[f]
30
31 def cached_mtime_noerror(self, f):
32 if f not in self.cache:
33 try:
34 self.cache[f] = os.stat(f)[stat.ST_MTIME]
35 except OSError:
36 return 0
37 return self.cache[f]
38
39 def update_mtime(self, f):
40 self.cache[f] = os.stat(f)[stat.ST_MTIME]
41 return self.cache[f]
42
43 def clear(self):
44 self.cache.clear()
45
46# Checksum + mtime cache (persistent)
47class FileChecksumCache(MultiProcessCache):
48 cache_file_name = "local_file_checksum_cache.dat"
49 CACHE_VERSION = 1
50
51 def __init__(self):
52 self.mtime_cache = FileMtimeCache()
53 MultiProcessCache.__init__(self)
54
55 def get_checksum(self, f):
Andrew Geissler595f6302022-01-24 19:11:47 +000056 f = os.path.normpath(f)
Patrick Williamsc124f4f2015-09-15 14:41:29 -050057 entry = self.cachedata[0].get(f)
58 cmtime = self.mtime_cache.cached_mtime(f)
59 if entry:
60 (mtime, hashval) = entry
61 if cmtime == mtime:
62 return hashval
63 else:
64 bb.debug(2, "file %s changed mtime, recompute checksum" % f)
65
66 hashval = bb.utils.md5_file(f)
67 self.cachedata_extras[0][f] = (cmtime, hashval)
68 return hashval
69
70 def merge_data(self, source, dest):
71 for h in source[0]:
72 if h in dest:
73 (smtime, _) = source[0][h]
74 (dmtime, _) = dest[0][h]
75 if smtime > dmtime:
76 dest[0][h] = source[0][h]
77 else:
78 dest[0][h] = source[0][h]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050079
Andrew Geissler82c905d2020-04-13 13:39:40 -050080 def get_checksums(self, filelist, pn, localdirsexclude):
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050081 """Get checksums for a list of files"""
82
83 def checksum_file(f):
84 try:
85 checksum = self.get_checksum(f)
86 except OSError as e:
87 bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
88 return None
89 return checksum
90
Andrew Geissler595f6302022-01-24 19:11:47 +000091 #
92 # Changing the format of file-checksums is problematic as both OE and Bitbake have
93 # knowledge of them. We need to encode a new piece of data, the portion of the path
94 # we care about from a checksum perspective. This means that files that change subdirectory
95 # are tracked by the task hashes. To do this, we do something horrible and put a "/./" into
96 # the path. The filesystem handles it but it gives us a marker to know which subsection
97 # of the path to cache.
98 #
Patrick Williamsd8c66bc2016-06-20 12:57:21 -050099 def checksum_dir(pth):
100 # Handle directories recursively
Brad Bishop220d5532018-08-14 00:59:39 +0100101 if pth == "/":
102 bb.fatal("Refusing to checksum /")
Andrew Geissler595f6302022-01-24 19:11:47 +0000103 pth = pth.rstrip("/")
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500104 dirchecksums = []
Andrew Geissler82c905d2020-04-13 13:39:40 -0500105 for root, dirs, files in os.walk(pth, topdown=True):
106 [dirs.remove(d) for d in list(dirs) if d in localdirsexclude]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500107 for name in files:
Andrew Geissler595f6302022-01-24 19:11:47 +0000108 fullpth = os.path.join(root, name).replace(pth, os.path.join(pth, "."))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500109 checksum = checksum_file(fullpth)
110 if checksum:
111 dirchecksums.append((fullpth, checksum))
112 return dirchecksums
113
114 checksums = []
Patrick Williams03907ee2022-05-01 06:28:52 -0500115 for pth in filelist_regex.split(filelist):
116 if not pth:
117 continue
118 pth = pth.strip()
119 if not pth:
120 continue
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500121 exist = pth.split(":")[1]
122 if exist == "False":
123 continue
124 pth = pth.split(":")[0]
125 if '*' in pth:
126 # Handle globs
127 for f in glob.glob(pth):
128 if os.path.isdir(f):
129 if not os.path.islink(f):
130 checksums.extend(checksum_dir(f))
131 else:
132 checksum = checksum_file(f)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600133 if checksum:
134 checksums.append((f, checksum))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500135 elif os.path.isdir(pth):
136 if not os.path.islink(pth):
137 checksums.extend(checksum_dir(pth))
138 else:
139 checksum = checksum_file(pth)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600140 if checksum:
141 checksums.append((pth, checksum))
Patrick Williamsd8c66bc2016-06-20 12:57:21 -0500142
143 checksums.sort(key=operator.itemgetter(1))
144 return checksums