blob: 9de557c10e4aa175192a3885065ec76f001a0a82 [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#!/usr/bin/env python3
Brad Bishopc342db32019-05-15 21:57:59 -04002#
Patrick Williams92b42cb2022-09-03 06:53:57 -05003# Copyright BitBake Contributors
4#
Brad Bishopc342db32019-05-15 21:57:59 -04005# SPDX-License-Identifier: GPL-2.0-only
6#
7
Brad Bishopd7bf8c12018-02-25 22:55:05 -05008"""git-make-shallow: make the current git repository shallow
9
10Remove the history of the specified revisions, then optionally filter the
11available refs to those specified.
12"""
13
14import argparse
15import collections
16import errno
17import itertools
18import os
19import subprocess
20import sys
Andrew Geissler5199d832021-09-24 16:47:35 -050021import warnings
22warnings.simplefilter("default")
Brad Bishopd7bf8c12018-02-25 22:55:05 -050023
24version = 1.0
25
26
Patrick Williams73bd93f2024-02-20 08:07:48 -060027git_cmd = ['git', '-c', 'safe.bareRepository=all']
28
Brad Bishopd7bf8c12018-02-25 22:55:05 -050029def main():
30 if sys.version_info < (3, 4, 0):
31 sys.exit('Python 3.4 or greater is required')
32
Patrick Williams73bd93f2024-02-20 08:07:48 -060033 git_dir = check_output(git_cmd + ['rev-parse', '--git-dir']).rstrip()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050034 shallow_file = os.path.join(git_dir, 'shallow')
35 if os.path.exists(shallow_file):
36 try:
Patrick Williams73bd93f2024-02-20 08:07:48 -060037 check_output(git_cmd + ['fetch', '--unshallow'])
Brad Bishopd7bf8c12018-02-25 22:55:05 -050038 except subprocess.CalledProcessError:
39 try:
40 os.unlink(shallow_file)
41 except OSError as exc:
42 if exc.errno != errno.ENOENT:
43 raise
44
45 args = process_args()
Patrick Williams73bd93f2024-02-20 08:07:48 -060046 revs = check_output(git_cmd + ['rev-list'] + args.revisions).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050047
48 make_shallow(shallow_file, args.revisions, args.refs)
49
Patrick Williams73bd93f2024-02-20 08:07:48 -060050 ref_revs = check_output(git_cmd + ['rev-list'] + args.refs).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050051 remaining_history = set(revs) & set(ref_revs)
52 for rev in remaining_history:
Patrick Williams73bd93f2024-02-20 08:07:48 -060053 if check_output(git_cmd + ['rev-parse', '{}^@'.format(rev)]):
Brad Bishopd7bf8c12018-02-25 22:55:05 -050054 sys.exit('Error: %s was not made shallow' % rev)
55
56 filter_refs(args.refs)
57
58 if args.shrink:
59 shrink_repo(git_dir)
Patrick Williams73bd93f2024-02-20 08:07:48 -060060 subprocess.check_call(git_cmd + ['fsck', '--unreachable'])
Brad Bishopd7bf8c12018-02-25 22:55:05 -050061
62
63def process_args():
64 # TODO: add argument to automatically keep local-only refs, since they
65 # can't be easily restored with a git fetch.
66 parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
67 parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
68 parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
69 parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
70 if len(sys.argv) < 2:
71 parser.print_help()
72 sys.exit(2)
73
74 args = parser.parse_args()
75
76 if args.refs:
Patrick Williams73bd93f2024-02-20 08:07:48 -060077 args.refs = check_output(git_cmd + ['rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050078 else:
79 args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
80
81 args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
Patrick Williams73bd93f2024-02-20 08:07:48 -060082 args.revisions = check_output(git_cmd + ['rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -050083 return args
84
85
86def check_output(cmd, input=None):
87 return subprocess.check_output(cmd, universal_newlines=True, input=input)
88
89
90def make_shallow(shallow_file, revisions, refs):
91 """Remove the history of the specified revisions."""
92 for rev in follow_history_intersections(revisions, refs):
93 print("Processing %s" % rev)
94 with open(shallow_file, 'a') as f:
95 f.write(rev + '\n')
96
97
98def get_all_refs(ref_filter=None):
99 """Return all the existing refs in this repository, optionally filtering the refs."""
Patrick Williams73bd93f2024-02-20 08:07:48 -0600100 ref_output = check_output(git_cmd + ['for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500101 ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
102 if ref_filter:
103 ref_split = (e for e in ref_split if ref_filter(*e))
104 refs = [r[0] for r in ref_split]
105 return refs
106
107
108def iter_extend(iterable, length, obj=None):
109 """Ensure that iterable is the specified length by extending with obj."""
110 return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
111
112
113def filter_refs(refs):
114 """Remove all but the specified refs from the git repository."""
115 all_refs = get_all_refs()
116 to_remove = set(all_refs) - set(refs)
117 if to_remove:
Patrick Williams73bd93f2024-02-20 08:07:48 -0600118 check_output(['xargs', '-0', '-n', '1'] + git_cmd + ['update-ref', '-d', '--no-deref'],
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500119 input=''.join(l + '\0' for l in to_remove))
120
121
122def follow_history_intersections(revisions, refs):
123 """Determine all the points where the history of the specified revisions intersects the specified refs."""
124 queue = collections.deque(revisions)
125 seen = set()
126
127 for rev in iter_except(queue.popleft, IndexError):
128 if rev in seen:
129 continue
130
Patrick Williams73bd93f2024-02-20 08:07:48 -0600131 parents = check_output(git_cmd + ['rev-parse', '%s^@' % rev]).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500132
133 yield rev
134 seen.add(rev)
135
136 if not parents:
137 continue
138
Patrick Williams73bd93f2024-02-20 08:07:48 -0600139 check_refs = check_output(git_cmd + ['merge-base', '--independent'] + sorted(refs)).splitlines()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500140 for parent in parents:
141 for ref in check_refs:
142 print("Checking %s vs %s" % (parent, ref))
143 try:
Patrick Williams73bd93f2024-02-20 08:07:48 -0600144 merge_base = check_output(git_cmd + ['merge-base', parent, ref]).rstrip()
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500145 except subprocess.CalledProcessError:
146 continue
147 else:
148 queue.append(merge_base)
149
150
151def iter_except(func, exception, start=None):
152 """Yield a function repeatedly until it raises an exception."""
153 try:
154 if start is not None:
155 yield start()
156 while True:
157 yield func()
158 except exception:
159 pass
160
161
162def shrink_repo(git_dir):
163 """Shrink the newly shallow repository, removing the unreachable objects."""
Patrick Williams73bd93f2024-02-20 08:07:48 -0600164 subprocess.check_call(git_cmd + ['reflog', 'expire', '--expire-unreachable=now', '--all'])
165 subprocess.check_call(git_cmd + ['repack', '-ad'])
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500166 try:
167 os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
168 except OSError as exc:
169 if exc.errno != errno.ENOENT:
170 raise
Patrick Williams73bd93f2024-02-20 08:07:48 -0600171 subprocess.check_call(git_cmd + ['prune', '--expire', 'now'])
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500172
173
174if __name__ == '__main__':
175 main()