Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 2 | # |
Patrick Williams | 92b42cb | 2022-09-03 06:53:57 -0500 | [diff] [blame] | 3 | # Copyright BitBake Contributors |
| 4 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 5 | # SPDX-License-Identifier: GPL-2.0-only |
| 6 | # |
| 7 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 8 | """git-make-shallow: make the current git repository shallow |
| 9 | |
| 10 | Remove the history of the specified revisions, then optionally filter the |
| 11 | available refs to those specified. |
| 12 | """ |
| 13 | |
| 14 | import argparse |
| 15 | import collections |
| 16 | import errno |
| 17 | import itertools |
| 18 | import os |
| 19 | import subprocess |
| 20 | import sys |
Andrew Geissler | 5199d83 | 2021-09-24 16:47:35 -0500 | [diff] [blame] | 21 | import warnings |
| 22 | warnings.simplefilter("default") |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 23 | |
| 24 | version = 1.0 |
| 25 | |
| 26 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 27 | git_cmd = ['git', '-c', 'safe.bareRepository=all'] |
| 28 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 29 | def main(): |
| 30 | if sys.version_info < (3, 4, 0): |
| 31 | sys.exit('Python 3.4 or greater is required') |
| 32 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 33 | git_dir = check_output(git_cmd + ['rev-parse', '--git-dir']).rstrip() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 34 | shallow_file = os.path.join(git_dir, 'shallow') |
| 35 | if os.path.exists(shallow_file): |
| 36 | try: |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 37 | check_output(git_cmd + ['fetch', '--unshallow']) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 38 | except subprocess.CalledProcessError: |
| 39 | try: |
| 40 | os.unlink(shallow_file) |
| 41 | except OSError as exc: |
| 42 | if exc.errno != errno.ENOENT: |
| 43 | raise |
| 44 | |
| 45 | args = process_args() |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 46 | revs = check_output(git_cmd + ['rev-list'] + args.revisions).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 47 | |
| 48 | make_shallow(shallow_file, args.revisions, args.refs) |
| 49 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 50 | ref_revs = check_output(git_cmd + ['rev-list'] + args.refs).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 51 | remaining_history = set(revs) & set(ref_revs) |
| 52 | for rev in remaining_history: |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 53 | if check_output(git_cmd + ['rev-parse', '{}^@'.format(rev)]): |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 54 | sys.exit('Error: %s was not made shallow' % rev) |
| 55 | |
| 56 | filter_refs(args.refs) |
| 57 | |
| 58 | if args.shrink: |
| 59 | shrink_repo(git_dir) |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 60 | subprocess.check_call(git_cmd + ['fsck', '--unreachable']) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 61 | |
| 62 | |
| 63 | def process_args(): |
| 64 | # TODO: add argument to automatically keep local-only refs, since they |
| 65 | # can't be easily restored with a git fetch. |
| 66 | parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.') |
| 67 | parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)') |
| 68 | parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning') |
| 69 | parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit') |
| 70 | if len(sys.argv) < 2: |
| 71 | parser.print_help() |
| 72 | sys.exit(2) |
| 73 | |
| 74 | args = parser.parse_args() |
| 75 | |
| 76 | if args.refs: |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 77 | args.refs = check_output(git_cmd + ['rev-parse', '--symbolic-full-name'] + args.refs).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 78 | else: |
| 79 | args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit') |
| 80 | |
| 81 | args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs)) |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 82 | args.revisions = check_output(git_cmd + ['rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 83 | return args |
| 84 | |
| 85 | |
| 86 | def check_output(cmd, input=None): |
| 87 | return subprocess.check_output(cmd, universal_newlines=True, input=input) |
| 88 | |
| 89 | |
| 90 | def make_shallow(shallow_file, revisions, refs): |
| 91 | """Remove the history of the specified revisions.""" |
| 92 | for rev in follow_history_intersections(revisions, refs): |
| 93 | print("Processing %s" % rev) |
| 94 | with open(shallow_file, 'a') as f: |
| 95 | f.write(rev + '\n') |
| 96 | |
| 97 | |
| 98 | def get_all_refs(ref_filter=None): |
| 99 | """Return all the existing refs in this repository, optionally filtering the refs.""" |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 100 | ref_output = check_output(git_cmd + ['for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)']) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 101 | ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()] |
| 102 | if ref_filter: |
| 103 | ref_split = (e for e in ref_split if ref_filter(*e)) |
| 104 | refs = [r[0] for r in ref_split] |
| 105 | return refs |
| 106 | |
| 107 | |
| 108 | def iter_extend(iterable, length, obj=None): |
| 109 | """Ensure that iterable is the specified length by extending with obj.""" |
| 110 | return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length) |
| 111 | |
| 112 | |
| 113 | def filter_refs(refs): |
| 114 | """Remove all but the specified refs from the git repository.""" |
| 115 | all_refs = get_all_refs() |
| 116 | to_remove = set(all_refs) - set(refs) |
| 117 | if to_remove: |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 118 | check_output(['xargs', '-0', '-n', '1'] + git_cmd + ['update-ref', '-d', '--no-deref'], |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 119 | input=''.join(l + '\0' for l in to_remove)) |
| 120 | |
| 121 | |
| 122 | def follow_history_intersections(revisions, refs): |
| 123 | """Determine all the points where the history of the specified revisions intersects the specified refs.""" |
| 124 | queue = collections.deque(revisions) |
| 125 | seen = set() |
| 126 | |
| 127 | for rev in iter_except(queue.popleft, IndexError): |
| 128 | if rev in seen: |
| 129 | continue |
| 130 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 131 | parents = check_output(git_cmd + ['rev-parse', '%s^@' % rev]).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 132 | |
| 133 | yield rev |
| 134 | seen.add(rev) |
| 135 | |
| 136 | if not parents: |
| 137 | continue |
| 138 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 139 | check_refs = check_output(git_cmd + ['merge-base', '--independent'] + sorted(refs)).splitlines() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 140 | for parent in parents: |
| 141 | for ref in check_refs: |
| 142 | print("Checking %s vs %s" % (parent, ref)) |
| 143 | try: |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 144 | merge_base = check_output(git_cmd + ['merge-base', parent, ref]).rstrip() |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 145 | except subprocess.CalledProcessError: |
| 146 | continue |
| 147 | else: |
| 148 | queue.append(merge_base) |
| 149 | |
| 150 | |
| 151 | def iter_except(func, exception, start=None): |
| 152 | """Yield a function repeatedly until it raises an exception.""" |
| 153 | try: |
| 154 | if start is not None: |
| 155 | yield start() |
| 156 | while True: |
| 157 | yield func() |
| 158 | except exception: |
| 159 | pass |
| 160 | |
| 161 | |
| 162 | def shrink_repo(git_dir): |
| 163 | """Shrink the newly shallow repository, removing the unreachable objects.""" |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 164 | subprocess.check_call(git_cmd + ['reflog', 'expire', '--expire-unreachable=now', '--all']) |
| 165 | subprocess.check_call(git_cmd + ['repack', '-ad']) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 166 | try: |
| 167 | os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates')) |
| 168 | except OSError as exc: |
| 169 | if exc.errno != errno.ENOENT: |
| 170 | raise |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 171 | subprocess.check_call(git_cmd + ['prune', '--expire', 'now']) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 172 | |
| 173 | |
| 174 | if __name__ == '__main__': |
| 175 | main() |