blob: 57069f7edf7a628f0732cc9c710e34f4b7882c41 [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#!/usr/bin/env python3
Brad Bishopc342db32019-05-15 21:57:59 -04002#
3# SPDX-License-Identifier: GPL-2.0-only
4#
5
Brad Bishopd7bf8c12018-02-25 22:55:05 -05006"""git-make-shallow: make the current git repository shallow
7
8Remove the history of the specified revisions, then optionally filter the
9available refs to those specified.
10"""
11
12import argparse
13import collections
14import errno
15import itertools
16import os
17import subprocess
18import sys
19
20version = 1.0
21
22
23def main():
24 if sys.version_info < (3, 4, 0):
25 sys.exit('Python 3.4 or greater is required')
26
27 git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
28 shallow_file = os.path.join(git_dir, 'shallow')
29 if os.path.exists(shallow_file):
30 try:
31 check_output(['git', 'fetch', '--unshallow'])
32 except subprocess.CalledProcessError:
33 try:
34 os.unlink(shallow_file)
35 except OSError as exc:
36 if exc.errno != errno.ENOENT:
37 raise
38
39 args = process_args()
40 revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
41
42 make_shallow(shallow_file, args.revisions, args.refs)
43
44 ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
45 remaining_history = set(revs) & set(ref_revs)
46 for rev in remaining_history:
47 if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
48 sys.exit('Error: %s was not made shallow' % rev)
49
50 filter_refs(args.refs)
51
52 if args.shrink:
53 shrink_repo(git_dir)
54 subprocess.check_call(['git', 'fsck', '--unreachable'])
55
56
57def process_args():
58 # TODO: add argument to automatically keep local-only refs, since they
59 # can't be easily restored with a git fetch.
60 parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
61 parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
62 parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
63 parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
64 if len(sys.argv) < 2:
65 parser.print_help()
66 sys.exit(2)
67
68 args = parser.parse_args()
69
70 if args.refs:
71 args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
72 else:
73 args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
74
75 args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
76 args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
77 return args
78
79
80def check_output(cmd, input=None):
81 return subprocess.check_output(cmd, universal_newlines=True, input=input)
82
83
84def make_shallow(shallow_file, revisions, refs):
85 """Remove the history of the specified revisions."""
86 for rev in follow_history_intersections(revisions, refs):
87 print("Processing %s" % rev)
88 with open(shallow_file, 'a') as f:
89 f.write(rev + '\n')
90
91
92def get_all_refs(ref_filter=None):
93 """Return all the existing refs in this repository, optionally filtering the refs."""
94 ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
95 ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
96 if ref_filter:
97 ref_split = (e for e in ref_split if ref_filter(*e))
98 refs = [r[0] for r in ref_split]
99 return refs
100
101
102def iter_extend(iterable, length, obj=None):
103 """Ensure that iterable is the specified length by extending with obj."""
104 return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
105
106
107def filter_refs(refs):
108 """Remove all but the specified refs from the git repository."""
109 all_refs = get_all_refs()
110 to_remove = set(all_refs) - set(refs)
111 if to_remove:
112 check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
113 input=''.join(l + '\0' for l in to_remove))
114
115
116def follow_history_intersections(revisions, refs):
117 """Determine all the points where the history of the specified revisions intersects the specified refs."""
118 queue = collections.deque(revisions)
119 seen = set()
120
121 for rev in iter_except(queue.popleft, IndexError):
122 if rev in seen:
123 continue
124
125 parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
126
127 yield rev
128 seen.add(rev)
129
130 if not parents:
131 continue
132
133 check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
134 for parent in parents:
135 for ref in check_refs:
136 print("Checking %s vs %s" % (parent, ref))
137 try:
138 merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
139 except subprocess.CalledProcessError:
140 continue
141 else:
142 queue.append(merge_base)
143
144
145def iter_except(func, exception, start=None):
146 """Yield a function repeatedly until it raises an exception."""
147 try:
148 if start is not None:
149 yield start()
150 while True:
151 yield func()
152 except exception:
153 pass
154
155
156def shrink_repo(git_dir):
157 """Shrink the newly shallow repository, removing the unreachable objects."""
158 subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
159 subprocess.check_call(['git', 'repack', '-ad'])
160 try:
161 os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
162 except OSError as exc:
163 if exc.errno != errno.ENOENT:
164 raise
165 subprocess.check_call(['git', 'prune', '--expire', 'now'])
166
167
168if __name__ == '__main__':
169 main()