Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: GPL-2.0-only |
| 4 | # |
| 5 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 6 | """git-make-shallow: make the current git repository shallow |
| 7 | |
| 8 | Remove the history of the specified revisions, then optionally filter the |
| 9 | available refs to those specified. |
| 10 | """ |
| 11 | |
| 12 | import argparse |
| 13 | import collections |
| 14 | import errno |
| 15 | import itertools |
| 16 | import os |
| 17 | import subprocess |
| 18 | import sys |
Andrew Geissler | 5199d83 | 2021-09-24 16:47:35 -0500 | [diff] [blame] | 19 | import warnings |
| 20 | warnings.simplefilter("default") |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 21 | |
| 22 | version = 1.0 |
| 23 | |
| 24 | |
| 25 | def main(): |
| 26 | if sys.version_info < (3, 4, 0): |
| 27 | sys.exit('Python 3.4 or greater is required') |
| 28 | |
| 29 | git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip() |
| 30 | shallow_file = os.path.join(git_dir, 'shallow') |
| 31 | if os.path.exists(shallow_file): |
| 32 | try: |
| 33 | check_output(['git', 'fetch', '--unshallow']) |
| 34 | except subprocess.CalledProcessError: |
| 35 | try: |
| 36 | os.unlink(shallow_file) |
| 37 | except OSError as exc: |
| 38 | if exc.errno != errno.ENOENT: |
| 39 | raise |
| 40 | |
| 41 | args = process_args() |
| 42 | revs = check_output(['git', 'rev-list'] + args.revisions).splitlines() |
| 43 | |
| 44 | make_shallow(shallow_file, args.revisions, args.refs) |
| 45 | |
| 46 | ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines() |
| 47 | remaining_history = set(revs) & set(ref_revs) |
| 48 | for rev in remaining_history: |
| 49 | if check_output(['git', 'rev-parse', '{}^@'.format(rev)]): |
| 50 | sys.exit('Error: %s was not made shallow' % rev) |
| 51 | |
| 52 | filter_refs(args.refs) |
| 53 | |
| 54 | if args.shrink: |
| 55 | shrink_repo(git_dir) |
| 56 | subprocess.check_call(['git', 'fsck', '--unreachable']) |
| 57 | |
| 58 | |
| 59 | def process_args(): |
| 60 | # TODO: add argument to automatically keep local-only refs, since they |
| 61 | # can't be easily restored with a git fetch. |
| 62 | parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.') |
| 63 | parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)') |
| 64 | parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning') |
| 65 | parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit') |
| 66 | if len(sys.argv) < 2: |
| 67 | parser.print_help() |
| 68 | sys.exit(2) |
| 69 | |
| 70 | args = parser.parse_args() |
| 71 | |
| 72 | if args.refs: |
| 73 | args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines() |
| 74 | else: |
| 75 | args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit') |
| 76 | |
| 77 | args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs)) |
| 78 | args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines() |
| 79 | return args |
| 80 | |
| 81 | |
| 82 | def check_output(cmd, input=None): |
| 83 | return subprocess.check_output(cmd, universal_newlines=True, input=input) |
| 84 | |
| 85 | |
| 86 | def make_shallow(shallow_file, revisions, refs): |
| 87 | """Remove the history of the specified revisions.""" |
| 88 | for rev in follow_history_intersections(revisions, refs): |
| 89 | print("Processing %s" % rev) |
| 90 | with open(shallow_file, 'a') as f: |
| 91 | f.write(rev + '\n') |
| 92 | |
| 93 | |
| 94 | def get_all_refs(ref_filter=None): |
| 95 | """Return all the existing refs in this repository, optionally filtering the refs.""" |
| 96 | ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)']) |
| 97 | ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()] |
| 98 | if ref_filter: |
| 99 | ref_split = (e for e in ref_split if ref_filter(*e)) |
| 100 | refs = [r[0] for r in ref_split] |
| 101 | return refs |
| 102 | |
| 103 | |
| 104 | def iter_extend(iterable, length, obj=None): |
| 105 | """Ensure that iterable is the specified length by extending with obj.""" |
| 106 | return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length) |
| 107 | |
| 108 | |
| 109 | def filter_refs(refs): |
| 110 | """Remove all but the specified refs from the git repository.""" |
| 111 | all_refs = get_all_refs() |
| 112 | to_remove = set(all_refs) - set(refs) |
| 113 | if to_remove: |
| 114 | check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'], |
| 115 | input=''.join(l + '\0' for l in to_remove)) |
| 116 | |
| 117 | |
| 118 | def follow_history_intersections(revisions, refs): |
| 119 | """Determine all the points where the history of the specified revisions intersects the specified refs.""" |
| 120 | queue = collections.deque(revisions) |
| 121 | seen = set() |
| 122 | |
| 123 | for rev in iter_except(queue.popleft, IndexError): |
| 124 | if rev in seen: |
| 125 | continue |
| 126 | |
| 127 | parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines() |
| 128 | |
| 129 | yield rev |
| 130 | seen.add(rev) |
| 131 | |
| 132 | if not parents: |
| 133 | continue |
| 134 | |
| 135 | check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines() |
| 136 | for parent in parents: |
| 137 | for ref in check_refs: |
| 138 | print("Checking %s vs %s" % (parent, ref)) |
| 139 | try: |
| 140 | merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip() |
| 141 | except subprocess.CalledProcessError: |
| 142 | continue |
| 143 | else: |
| 144 | queue.append(merge_base) |
| 145 | |
| 146 | |
| 147 | def iter_except(func, exception, start=None): |
| 148 | """Yield a function repeatedly until it raises an exception.""" |
| 149 | try: |
| 150 | if start is not None: |
| 151 | yield start() |
| 152 | while True: |
| 153 | yield func() |
| 154 | except exception: |
| 155 | pass |
| 156 | |
| 157 | |
| 158 | def shrink_repo(git_dir): |
| 159 | """Shrink the newly shallow repository, removing the unreachable objects.""" |
| 160 | subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all']) |
| 161 | subprocess.check_call(['git', 'repack', '-ad']) |
| 162 | try: |
| 163 | os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates')) |
| 164 | except OSError as exc: |
| 165 | if exc.errno != errno.ENOENT: |
| 166 | raise |
| 167 | subprocess.check_call(['git', 'prune', '--expire', 'now']) |
| 168 | |
| 169 | |
| 170 | if __name__ == '__main__': |
| 171 | main() |