blob: 296d3a3dbdd757d09621c83b389acd46debb2f01 [file] [log] [blame]
Brad Bishopd7bf8c12018-02-25 22:55:05 -05001#!/usr/bin/env python3
2"""git-make-shallow: make the current git repository shallow
3
4Remove the history of the specified revisions, then optionally filter the
5available refs to those specified.
6"""
7
8import argparse
9import collections
10import errno
11import itertools
12import os
13import subprocess
14import sys
15
16version = 1.0
17
18
19def main():
20 if sys.version_info < (3, 4, 0):
21 sys.exit('Python 3.4 or greater is required')
22
23 git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
24 shallow_file = os.path.join(git_dir, 'shallow')
25 if os.path.exists(shallow_file):
26 try:
27 check_output(['git', 'fetch', '--unshallow'])
28 except subprocess.CalledProcessError:
29 try:
30 os.unlink(shallow_file)
31 except OSError as exc:
32 if exc.errno != errno.ENOENT:
33 raise
34
35 args = process_args()
36 revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
37
38 make_shallow(shallow_file, args.revisions, args.refs)
39
40 ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
41 remaining_history = set(revs) & set(ref_revs)
42 for rev in remaining_history:
43 if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
44 sys.exit('Error: %s was not made shallow' % rev)
45
46 filter_refs(args.refs)
47
48 if args.shrink:
49 shrink_repo(git_dir)
50 subprocess.check_call(['git', 'fsck', '--unreachable'])
51
52
53def process_args():
54 # TODO: add argument to automatically keep local-only refs, since they
55 # can't be easily restored with a git fetch.
56 parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
57 parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
58 parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
59 parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
60 if len(sys.argv) < 2:
61 parser.print_help()
62 sys.exit(2)
63
64 args = parser.parse_args()
65
66 if args.refs:
67 args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
68 else:
69 args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
70
71 args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
72 args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
73 return args
74
75
76def check_output(cmd, input=None):
77 return subprocess.check_output(cmd, universal_newlines=True, input=input)
78
79
80def make_shallow(shallow_file, revisions, refs):
81 """Remove the history of the specified revisions."""
82 for rev in follow_history_intersections(revisions, refs):
83 print("Processing %s" % rev)
84 with open(shallow_file, 'a') as f:
85 f.write(rev + '\n')
86
87
88def get_all_refs(ref_filter=None):
89 """Return all the existing refs in this repository, optionally filtering the refs."""
90 ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
91 ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
92 if ref_filter:
93 ref_split = (e for e in ref_split if ref_filter(*e))
94 refs = [r[0] for r in ref_split]
95 return refs
96
97
98def iter_extend(iterable, length, obj=None):
99 """Ensure that iterable is the specified length by extending with obj."""
100 return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
101
102
103def filter_refs(refs):
104 """Remove all but the specified refs from the git repository."""
105 all_refs = get_all_refs()
106 to_remove = set(all_refs) - set(refs)
107 if to_remove:
108 check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
109 input=''.join(l + '\0' for l in to_remove))
110
111
112def follow_history_intersections(revisions, refs):
113 """Determine all the points where the history of the specified revisions intersects the specified refs."""
114 queue = collections.deque(revisions)
115 seen = set()
116
117 for rev in iter_except(queue.popleft, IndexError):
118 if rev in seen:
119 continue
120
121 parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
122
123 yield rev
124 seen.add(rev)
125
126 if not parents:
127 continue
128
129 check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
130 for parent in parents:
131 for ref in check_refs:
132 print("Checking %s vs %s" % (parent, ref))
133 try:
134 merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
135 except subprocess.CalledProcessError:
136 continue
137 else:
138 queue.append(merge_base)
139
140
141def iter_except(func, exception, start=None):
142 """Yield a function repeatedly until it raises an exception."""
143 try:
144 if start is not None:
145 yield start()
146 while True:
147 yield func()
148 except exception:
149 pass
150
151
152def shrink_repo(git_dir):
153 """Shrink the newly shallow repository, removing the unreachable objects."""
154 subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
155 subprocess.check_call(['git', 'repack', '-ad'])
156 try:
157 os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
158 except OSError as exc:
159 if exc.errno != errno.ENOENT:
160 raise
161 subprocess.check_call(['git', 'prune', '--expire', 'now'])
162
163
164if __name__ == '__main__':
165 main()