| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python3 | 
| Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 2 | # | 
|  | 3 | # SPDX-License-Identifier: GPL-2.0-only | 
|  | 4 | # | 
|  | 5 |  | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 6 | """git-make-shallow: make the current git repository shallow | 
|  | 7 |  | 
|  | 8 | Remove the history of the specified revisions, then optionally filter the | 
|  | 9 | available refs to those specified. | 
|  | 10 | """ | 
|  | 11 |  | 
|  | 12 | import argparse | 
|  | 13 | import collections | 
|  | 14 | import errno | 
|  | 15 | import itertools | 
|  | 16 | import os | 
|  | 17 | import subprocess | 
|  | 18 | import sys | 
| Andrew Geissler | 5199d83 | 2021-09-24 16:47:35 -0500 | [diff] [blame] | 19 | import warnings | 
|  | 20 | warnings.simplefilter("default") | 
| Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 21 |  | 
|  | 22 | version = 1.0 | 
|  | 23 |  | 
|  | 24 |  | 
|  | 25 | def main(): | 
|  | 26 | if sys.version_info < (3, 4, 0): | 
|  | 27 | sys.exit('Python 3.4 or greater is required') | 
|  | 28 |  | 
|  | 29 | git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip() | 
|  | 30 | shallow_file = os.path.join(git_dir, 'shallow') | 
|  | 31 | if os.path.exists(shallow_file): | 
|  | 32 | try: | 
|  | 33 | check_output(['git', 'fetch', '--unshallow']) | 
|  | 34 | except subprocess.CalledProcessError: | 
|  | 35 | try: | 
|  | 36 | os.unlink(shallow_file) | 
|  | 37 | except OSError as exc: | 
|  | 38 | if exc.errno != errno.ENOENT: | 
|  | 39 | raise | 
|  | 40 |  | 
|  | 41 | args = process_args() | 
|  | 42 | revs = check_output(['git', 'rev-list'] + args.revisions).splitlines() | 
|  | 43 |  | 
|  | 44 | make_shallow(shallow_file, args.revisions, args.refs) | 
|  | 45 |  | 
|  | 46 | ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines() | 
|  | 47 | remaining_history = set(revs) & set(ref_revs) | 
|  | 48 | for rev in remaining_history: | 
|  | 49 | if check_output(['git', 'rev-parse', '{}^@'.format(rev)]): | 
|  | 50 | sys.exit('Error: %s was not made shallow' % rev) | 
|  | 51 |  | 
|  | 52 | filter_refs(args.refs) | 
|  | 53 |  | 
|  | 54 | if args.shrink: | 
|  | 55 | shrink_repo(git_dir) | 
|  | 56 | subprocess.check_call(['git', 'fsck', '--unreachable']) | 
|  | 57 |  | 
|  | 58 |  | 
|  | 59 | def process_args(): | 
|  | 60 | # TODO: add argument to automatically keep local-only refs, since they | 
|  | 61 | # can't be easily restored with a git fetch. | 
|  | 62 | parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.') | 
|  | 63 | parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)') | 
|  | 64 | parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning') | 
|  | 65 | parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit') | 
|  | 66 | if len(sys.argv) < 2: | 
|  | 67 | parser.print_help() | 
|  | 68 | sys.exit(2) | 
|  | 69 |  | 
|  | 70 | args = parser.parse_args() | 
|  | 71 |  | 
|  | 72 | if args.refs: | 
|  | 73 | args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines() | 
|  | 74 | else: | 
|  | 75 | args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit') | 
|  | 76 |  | 
|  | 77 | args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs)) | 
|  | 78 | args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines() | 
|  | 79 | return args | 
|  | 80 |  | 
|  | 81 |  | 
|  | 82 | def check_output(cmd, input=None): | 
|  | 83 | return subprocess.check_output(cmd, universal_newlines=True, input=input) | 
|  | 84 |  | 
|  | 85 |  | 
|  | 86 | def make_shallow(shallow_file, revisions, refs): | 
|  | 87 | """Remove the history of the specified revisions.""" | 
|  | 88 | for rev in follow_history_intersections(revisions, refs): | 
|  | 89 | print("Processing %s" % rev) | 
|  | 90 | with open(shallow_file, 'a') as f: | 
|  | 91 | f.write(rev + '\n') | 
|  | 92 |  | 
|  | 93 |  | 
|  | 94 | def get_all_refs(ref_filter=None): | 
|  | 95 | """Return all the existing refs in this repository, optionally filtering the refs.""" | 
|  | 96 | ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)']) | 
|  | 97 | ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()] | 
|  | 98 | if ref_filter: | 
|  | 99 | ref_split = (e for e in ref_split if ref_filter(*e)) | 
|  | 100 | refs = [r[0] for r in ref_split] | 
|  | 101 | return refs | 
|  | 102 |  | 
|  | 103 |  | 
|  | 104 | def iter_extend(iterable, length, obj=None): | 
|  | 105 | """Ensure that iterable is the specified length by extending with obj.""" | 
|  | 106 | return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length) | 
|  | 107 |  | 
|  | 108 |  | 
|  | 109 | def filter_refs(refs): | 
|  | 110 | """Remove all but the specified refs from the git repository.""" | 
|  | 111 | all_refs = get_all_refs() | 
|  | 112 | to_remove = set(all_refs) - set(refs) | 
|  | 113 | if to_remove: | 
|  | 114 | check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'], | 
|  | 115 | input=''.join(l + '\0' for l in to_remove)) | 
|  | 116 |  | 
|  | 117 |  | 
|  | 118 | def follow_history_intersections(revisions, refs): | 
|  | 119 | """Determine all the points where the history of the specified revisions intersects the specified refs.""" | 
|  | 120 | queue = collections.deque(revisions) | 
|  | 121 | seen = set() | 
|  | 122 |  | 
|  | 123 | for rev in iter_except(queue.popleft, IndexError): | 
|  | 124 | if rev in seen: | 
|  | 125 | continue | 
|  | 126 |  | 
|  | 127 | parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines() | 
|  | 128 |  | 
|  | 129 | yield rev | 
|  | 130 | seen.add(rev) | 
|  | 131 |  | 
|  | 132 | if not parents: | 
|  | 133 | continue | 
|  | 134 |  | 
|  | 135 | check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines() | 
|  | 136 | for parent in parents: | 
|  | 137 | for ref in check_refs: | 
|  | 138 | print("Checking %s vs %s" % (parent, ref)) | 
|  | 139 | try: | 
|  | 140 | merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip() | 
|  | 141 | except subprocess.CalledProcessError: | 
|  | 142 | continue | 
|  | 143 | else: | 
|  | 144 | queue.append(merge_base) | 
|  | 145 |  | 
|  | 146 |  | 
|  | 147 | def iter_except(func, exception, start=None): | 
|  | 148 | """Yield a function repeatedly until it raises an exception.""" | 
|  | 149 | try: | 
|  | 150 | if start is not None: | 
|  | 151 | yield start() | 
|  | 152 | while True: | 
|  | 153 | yield func() | 
|  | 154 | except exception: | 
|  | 155 | pass | 
|  | 156 |  | 
|  | 157 |  | 
|  | 158 | def shrink_repo(git_dir): | 
|  | 159 | """Shrink the newly shallow repository, removing the unreachable objects.""" | 
|  | 160 | subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all']) | 
|  | 161 | subprocess.check_call(['git', 'repack', '-ad']) | 
|  | 162 | try: | 
|  | 163 | os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates')) | 
|  | 164 | except OSError as exc: | 
|  | 165 | if exc.errno != errno.ENOENT: | 
|  | 166 | raise | 
|  | 167 | subprocess.check_call(['git', 'prune', '--expire', 'now']) | 
|  | 168 |  | 
|  | 169 |  | 
|  | 170 | if __name__ == '__main__': | 
|  | 171 | main() |