Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 1 | # |
| 2 | # SPDX-License-Identifier: GPL-2.0-only |
| 3 | # |
| 4 | import os |
| 5 | import subprocess |
| 6 | import bb |
| 7 | |
Andrew Geissler | eff2747 | 2021-10-29 15:35:00 -0500 | [diff] [blame] | 8 | # For reproducible builds, this code sets the default SOURCE_DATE_EPOCH in each |
| 9 | # component's build environment. The format is number of seconds since the |
| 10 | # system epoch. |
| 11 | # |
| 12 | # Upstream components (generally) respect this environment variable, |
| 13 | # using it in place of the "current" date and time. |
| 14 | # See https://reproducible-builds.org/specs/source-date-epoch/ |
| 15 | # |
| 16 | # The default value of SOURCE_DATE_EPOCH comes from the function |
| 17 | # get_source_date_epoch_value which reads from the SDE_FILE, or if the file |
| 18 | # is not available will use the fallback of SOURCE_DATE_EPOCH_FALLBACK. |
| 19 | # |
| 20 | # The SDE_FILE is normally constructed from the function |
| 21 | # create_source_date_epoch_stamp which is typically added as a postfuncs to |
| 22 | # the do_unpack task. If a recipe does NOT have do_unpack, it should be added |
| 23 | # to a task that runs after the source is available and before the |
| 24 | # do_deploy_source_date_epoch task is executed. |
| 25 | # |
| 26 | # If a recipe wishes to override the default behavior it should set it's own |
| 27 | # SOURCE_DATE_EPOCH or override the do_deploy_source_date_epoch_stamp task |
| 28 | # with recipe-specific functionality to write the appropriate |
| 29 | # SOURCE_DATE_EPOCH into the SDE_FILE. |
| 30 | # |
| 31 | # SOURCE_DATE_EPOCH is intended to be a reproducible value. This value should |
| 32 | # be reproducible for anyone who builds the same revision from the same |
| 33 | # sources. |
| 34 | # |
| 35 | # There are 4 ways the create_source_date_epoch_stamp function determines what |
| 36 | # becomes SOURCE_DATE_EPOCH: |
| 37 | # |
| 38 | # 1. Use the value from __source_date_epoch.txt file if this file exists. |
| 39 | # This file was most likely created in the previous build by one of the |
| 40 | # following methods 2,3,4. |
| 41 | # Alternatively, it can be provided by a recipe via SRC_URI. |
| 42 | # |
| 43 | # If the file does not exist: |
| 44 | # |
| 45 | # 2. If there is a git checkout, use the last git commit timestamp. |
| 46 | # Git does not preserve file timestamps on checkout. |
| 47 | # |
| 48 | # 3. Use the mtime of "known" files such as NEWS, CHANGLELOG, ... |
| 49 | # This works for well-kept repositories distributed via tarball. |
| 50 | # |
| 51 | # 4. Use the modification time of the youngest file in the source tree, if |
| 52 | # there is one. |
| 53 | # This will be the newest file from the distribution tarball, if any. |
| 54 | # |
| 55 | # 5. Fall back to a fixed timestamp (SOURCE_DATE_EPOCH_FALLBACK). |
| 56 | # |
| 57 | # Once the value is determined, it is stored in the recipe's SDE_FILE. |
| 58 | |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 59 | def get_source_date_epoch_from_known_files(d, sourcedir): |
| 60 | source_date_epoch = None |
| 61 | newest_file = None |
| 62 | known_files = set(["NEWS", "ChangeLog", "Changelog", "CHANGES"]) |
| 63 | for file in known_files: |
| 64 | filepath = os.path.join(sourcedir, file) |
| 65 | if os.path.isfile(filepath): |
| 66 | mtime = int(os.lstat(filepath).st_mtime) |
| 67 | # There may be more than one "known_file" present, if so, use the youngest one |
| 68 | if not source_date_epoch or mtime > source_date_epoch: |
| 69 | source_date_epoch = mtime |
| 70 | newest_file = filepath |
| 71 | if newest_file: |
| 72 | bb.debug(1, "SOURCE_DATE_EPOCH taken from: %s" % newest_file) |
| 73 | return source_date_epoch |
| 74 | |
| 75 | def find_git_folder(d, sourcedir): |
| 76 | # First guess: WORKDIR/git |
| 77 | # This is the default git fetcher unpack path |
| 78 | workdir = d.getVar('WORKDIR') |
| 79 | gitpath = os.path.join(workdir, "git/.git") |
| 80 | if os.path.isdir(gitpath): |
| 81 | return gitpath |
| 82 | |
| 83 | # Second guess: ${S} |
| 84 | gitpath = os.path.join(sourcedir, ".git") |
| 85 | if os.path.isdir(gitpath): |
| 86 | return gitpath |
| 87 | |
| 88 | # Perhaps there was a subpath or destsuffix specified. |
| 89 | # Go looking in the WORKDIR |
| 90 | exclude = set(["build", "image", "license-destdir", "patches", "pseudo", |
| 91 | "recipe-sysroot", "recipe-sysroot-native", "sysroot-destdir", "temp"]) |
| 92 | for root, dirs, files in os.walk(workdir, topdown=True): |
| 93 | dirs[:] = [d for d in dirs if d not in exclude] |
| 94 | if '.git' in dirs: |
Andrew Geissler | 595f630 | 2022-01-24 19:11:47 +0000 | [diff] [blame] | 95 | return os.path.join(root, ".git") |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 96 | |
| 97 | bb.warn("Failed to find a git repository in WORKDIR: %s" % workdir) |
| 98 | return None |
| 99 | |
| 100 | def get_source_date_epoch_from_git(d, sourcedir): |
Andrew Geissler | 09209ee | 2020-12-13 08:44:15 -0600 | [diff] [blame] | 101 | if not "git://" in d.getVar('SRC_URI') and not "gitsm://" in d.getVar('SRC_URI'): |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 102 | return None |
| 103 | |
| 104 | gitpath = find_git_folder(d, sourcedir) |
| 105 | if not gitpath: |
| 106 | return None |
| 107 | |
| 108 | # Check that the repository has a valid HEAD; it may not if subdir is used |
| 109 | # in SRC_URI |
Andrew Geissler | 635e0e4 | 2020-08-21 15:58:33 -0500 | [diff] [blame] | 110 | p = subprocess.run(['git', '--git-dir', gitpath, 'rev-parse', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 111 | if p.returncode != 0: |
| 112 | bb.debug(1, "%s does not have a valid HEAD: %s" % (gitpath, p.stdout.decode('utf-8'))) |
| 113 | return None |
| 114 | |
| 115 | bb.debug(1, "git repository: %s" % gitpath) |
Andrew Geissler | 635e0e4 | 2020-08-21 15:58:33 -0500 | [diff] [blame] | 116 | p = subprocess.run(['git', '--git-dir', gitpath, 'log', '-1', '--pretty=%ct'], check=True, stdout=subprocess.PIPE) |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 117 | return int(p.stdout.decode('utf-8')) |
| 118 | |
| 119 | def get_source_date_epoch_from_youngest_file(d, sourcedir): |
| 120 | if sourcedir == d.getVar('WORKDIR'): |
| 121 | # These sources are almost certainly not from a tarball |
| 122 | return None |
| 123 | |
| 124 | # Do it the hard way: check all files and find the youngest one... |
| 125 | source_date_epoch = None |
| 126 | newest_file = None |
| 127 | for root, dirs, files in os.walk(sourcedir, topdown=True): |
| 128 | files = [f for f in files if not f[0] == '.'] |
| 129 | |
| 130 | for fname in files: |
| 131 | filename = os.path.join(root, fname) |
| 132 | try: |
| 133 | mtime = int(os.lstat(filename).st_mtime) |
| 134 | except ValueError: |
| 135 | mtime = 0 |
| 136 | if not source_date_epoch or mtime > source_date_epoch: |
| 137 | source_date_epoch = mtime |
| 138 | newest_file = filename |
| 139 | |
| 140 | if newest_file: |
| 141 | bb.debug(1, "Newest file found: %s" % newest_file) |
| 142 | return source_date_epoch |
| 143 | |
Andrew Geissler | 90fd73c | 2021-03-05 15:25:55 -0600 | [diff] [blame] | 144 | def fixed_source_date_epoch(d): |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 145 | bb.debug(1, "No tarball or git repo found to determine SOURCE_DATE_EPOCH") |
Andrew Geissler | 90fd73c | 2021-03-05 15:25:55 -0600 | [diff] [blame] | 146 | source_date_epoch = d.getVar('SOURCE_DATE_EPOCH_FALLBACK') |
| 147 | if source_date_epoch: |
| 148 | bb.debug(1, "Using SOURCE_DATE_EPOCH_FALLBACK") |
| 149 | return int(source_date_epoch) |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 150 | return 0 |
| 151 | |
| 152 | def get_source_date_epoch(d, sourcedir): |
| 153 | return ( |
| 154 | get_source_date_epoch_from_git(d, sourcedir) or |
| 155 | get_source_date_epoch_from_known_files(d, sourcedir) or |
| 156 | get_source_date_epoch_from_youngest_file(d, sourcedir) or |
Andrew Geissler | 90fd73c | 2021-03-05 15:25:55 -0600 | [diff] [blame] | 157 | fixed_source_date_epoch(d) # Last resort |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 158 | ) |
| 159 | |
Andrew Geissler | eff2747 | 2021-10-29 15:35:00 -0500 | [diff] [blame] | 160 | def epochfile_read(epochfile, d): |
| 161 | cached, efile = d.getVar('__CACHED_SOURCE_DATE_EPOCH') or (None, None) |
| 162 | if cached and efile == epochfile: |
| 163 | return cached |
| 164 | |
| 165 | if cached and epochfile != efile: |
| 166 | bb.debug(1, "Epoch file changed from %s to %s" % (efile, epochfile)) |
| 167 | |
| 168 | source_date_epoch = int(d.getVar('SOURCE_DATE_EPOCH_FALLBACK')) |
| 169 | try: |
| 170 | with open(epochfile, 'r') as f: |
| 171 | s = f.read() |
| 172 | try: |
| 173 | source_date_epoch = int(s) |
| 174 | except ValueError: |
| 175 | bb.warn("SOURCE_DATE_EPOCH value '%s' is invalid. Reverting to SOURCE_DATE_EPOCH_FALLBACK" % s) |
| 176 | source_date_epoch = int(d.getVar('SOURCE_DATE_EPOCH_FALLBACK')) |
| 177 | bb.debug(1, "SOURCE_DATE_EPOCH: %d" % source_date_epoch) |
| 178 | except FileNotFoundError: |
| 179 | bb.debug(1, "Cannot find %s. SOURCE_DATE_EPOCH will default to %d" % (epochfile, source_date_epoch)) |
| 180 | |
| 181 | d.setVar('__CACHED_SOURCE_DATE_EPOCH', (str(source_date_epoch), epochfile)) |
| 182 | return str(source_date_epoch) |
| 183 | |
| 184 | def epochfile_write(source_date_epoch, epochfile, d): |
| 185 | |
| 186 | bb.debug(1, "SOURCE_DATE_EPOCH: %d" % source_date_epoch) |
| 187 | bb.utils.mkdirhier(os.path.dirname(epochfile)) |
| 188 | |
| 189 | tmp_file = "%s.new" % epochfile |
| 190 | with open(tmp_file, 'w') as f: |
| 191 | f.write(str(source_date_epoch)) |
| 192 | os.rename(tmp_file, epochfile) |