| Andrew Geissler | 5f35090 | 2021-07-23 13:09:54 -0400 | [diff] [blame] | 1 | # | 
|  | 2 | # SPDX-License-Identifier: GPL-2.0-only | 
|  | 3 | # | 
|  | 4 | # Helper library to implement streaming compression and decompression using an | 
|  | 5 | # external process | 
|  | 6 | # | 
|  | 7 | # This library should be used directly by end users; a wrapper library for the | 
|  | 8 | # specific compression tool should be created | 
|  | 9 |  | 
|  | 10 | import builtins | 
|  | 11 | import io | 
|  | 12 | import os | 
|  | 13 | import subprocess | 
|  | 14 |  | 
|  | 15 |  | 
|  | 16 | def open_wrap( | 
|  | 17 | cls, filename, mode="rb", *, encoding=None, errors=None, newline=None, **kwargs | 
|  | 18 | ): | 
|  | 19 | """ | 
|  | 20 | Open a compressed file in binary or text mode. | 
|  | 21 |  | 
|  | 22 | Users should not call this directly. A specific compression library can use | 
|  | 23 | this helper to provide it's own "open" command | 
|  | 24 |  | 
|  | 25 | The filename argument can be an actual filename (a str or bytes object), or | 
|  | 26 | an existing file object to read from or write to. | 
|  | 27 |  | 
|  | 28 | The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for | 
|  | 29 | binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is | 
|  | 30 | "rb". | 
|  | 31 |  | 
|  | 32 | For binary mode, this function is equivalent to the cls constructor: | 
|  | 33 | cls(filename, mode). In this case, the encoding, errors and newline | 
|  | 34 | arguments must not be provided. | 
|  | 35 |  | 
|  | 36 | For text mode, a cls object is created, and wrapped in an | 
|  | 37 | io.TextIOWrapper instance with the specified encoding, error handling | 
|  | 38 | behavior, and line ending(s). | 
|  | 39 | """ | 
|  | 40 | if "t" in mode: | 
|  | 41 | if "b" in mode: | 
|  | 42 | raise ValueError("Invalid mode: %r" % (mode,)) | 
|  | 43 | else: | 
|  | 44 | if encoding is not None: | 
|  | 45 | raise ValueError("Argument 'encoding' not supported in binary mode") | 
|  | 46 | if errors is not None: | 
|  | 47 | raise ValueError("Argument 'errors' not supported in binary mode") | 
|  | 48 | if newline is not None: | 
|  | 49 | raise ValueError("Argument 'newline' not supported in binary mode") | 
|  | 50 |  | 
|  | 51 | file_mode = mode.replace("t", "") | 
|  | 52 | if isinstance(filename, (str, bytes, os.PathLike)): | 
|  | 53 | binary_file = cls(filename, file_mode, **kwargs) | 
|  | 54 | elif hasattr(filename, "read") or hasattr(filename, "write"): | 
|  | 55 | binary_file = cls(None, file_mode, fileobj=filename, **kwargs) | 
|  | 56 | else: | 
|  | 57 | raise TypeError("filename must be a str or bytes object, or a file") | 
|  | 58 |  | 
|  | 59 | if "t" in mode: | 
|  | 60 | return io.TextIOWrapper( | 
|  | 61 | binary_file, encoding, errors, newline, write_through=True | 
|  | 62 | ) | 
|  | 63 | else: | 
|  | 64 | return binary_file | 
|  | 65 |  | 
|  | 66 |  | 
|  | 67 | class CompressionError(OSError): | 
|  | 68 | pass | 
|  | 69 |  | 
|  | 70 |  | 
|  | 71 | class PipeFile(io.RawIOBase): | 
|  | 72 | """ | 
|  | 73 | Class that implements generically piping to/from a compression program | 
|  | 74 |  | 
|  | 75 | Derived classes should add the function get_compress() and get_decompress() | 
|  | 76 | that return the required commands. Input will be piped into stdin and the | 
|  | 77 | (de)compressed output should be written to stdout, e.g.: | 
|  | 78 |  | 
|  | 79 | class FooFile(PipeCompressionFile): | 
|  | 80 | def get_decompress(self): | 
|  | 81 | return ["fooc", "--decompress", "--stdout"] | 
|  | 82 |  | 
|  | 83 | def get_compress(self): | 
|  | 84 | return ["fooc", "--compress", "--stdout"] | 
|  | 85 |  | 
|  | 86 | """ | 
|  | 87 |  | 
|  | 88 | READ = 0 | 
|  | 89 | WRITE = 1 | 
|  | 90 |  | 
|  | 91 | def __init__(self, filename=None, mode="rb", *, stderr=None, fileobj=None): | 
|  | 92 | if "t" in mode or "U" in mode: | 
|  | 93 | raise ValueError("Invalid mode: {!r}".format(mode)) | 
|  | 94 |  | 
|  | 95 | if not "b" in mode: | 
|  | 96 | mode += "b" | 
|  | 97 |  | 
|  | 98 | if mode.startswith("r"): | 
|  | 99 | self.mode = self.READ | 
|  | 100 | elif mode.startswith("w"): | 
|  | 101 | self.mode = self.WRITE | 
|  | 102 | else: | 
|  | 103 | raise ValueError("Invalid mode %r" % mode) | 
|  | 104 |  | 
|  | 105 | if fileobj is not None: | 
|  | 106 | self.fileobj = fileobj | 
|  | 107 | else: | 
|  | 108 | self.fileobj = builtins.open(filename, mode or "rb") | 
|  | 109 |  | 
|  | 110 | if self.mode == self.READ: | 
|  | 111 | self.p = subprocess.Popen( | 
|  | 112 | self.get_decompress(), | 
|  | 113 | stdin=self.fileobj, | 
|  | 114 | stdout=subprocess.PIPE, | 
|  | 115 | stderr=stderr, | 
|  | 116 | close_fds=True, | 
|  | 117 | ) | 
|  | 118 | self.pipe = self.p.stdout | 
|  | 119 | else: | 
|  | 120 | self.p = subprocess.Popen( | 
|  | 121 | self.get_compress(), | 
|  | 122 | stdin=subprocess.PIPE, | 
|  | 123 | stdout=self.fileobj, | 
|  | 124 | stderr=stderr, | 
|  | 125 | close_fds=True, | 
|  | 126 | ) | 
|  | 127 | self.pipe = self.p.stdin | 
|  | 128 |  | 
|  | 129 | self.__closed = False | 
|  | 130 |  | 
|  | 131 | def _check_process(self): | 
|  | 132 | if self.p is None: | 
|  | 133 | return | 
|  | 134 |  | 
|  | 135 | returncode = self.p.wait() | 
|  | 136 | if returncode: | 
|  | 137 | raise CompressionError("Process died with %d" % returncode) | 
|  | 138 | self.p = None | 
|  | 139 |  | 
|  | 140 | def close(self): | 
|  | 141 | if self.closed: | 
|  | 142 | return | 
|  | 143 |  | 
|  | 144 | self.pipe.close() | 
|  | 145 | if self.p is not None: | 
|  | 146 | self._check_process() | 
|  | 147 | self.fileobj.close() | 
|  | 148 |  | 
|  | 149 | self.__closed = True | 
|  | 150 |  | 
|  | 151 | @property | 
|  | 152 | def closed(self): | 
|  | 153 | return self.__closed | 
|  | 154 |  | 
|  | 155 | def fileno(self): | 
|  | 156 | return self.pipe.fileno() | 
|  | 157 |  | 
|  | 158 | def flush(self): | 
|  | 159 | self.pipe.flush() | 
|  | 160 |  | 
|  | 161 | def isatty(self): | 
|  | 162 | return self.pipe.isatty() | 
|  | 163 |  | 
|  | 164 | def readable(self): | 
|  | 165 | return self.mode == self.READ | 
|  | 166 |  | 
|  | 167 | def writable(self): | 
|  | 168 | return self.mode == self.WRITE | 
|  | 169 |  | 
|  | 170 | def readinto(self, b): | 
|  | 171 | if self.mode != self.READ: | 
|  | 172 | import errno | 
|  | 173 |  | 
|  | 174 | raise OSError( | 
|  | 175 | errno.EBADF, "read() on write-only %s object" % self.__class__.__name__ | 
|  | 176 | ) | 
|  | 177 | size = self.pipe.readinto(b) | 
|  | 178 | if size == 0: | 
|  | 179 | self._check_process() | 
|  | 180 | return size | 
|  | 181 |  | 
|  | 182 | def write(self, data): | 
|  | 183 | if self.mode != self.WRITE: | 
|  | 184 | import errno | 
|  | 185 |  | 
|  | 186 | raise OSError( | 
|  | 187 | errno.EBADF, "write() on read-only %s object" % self.__class__.__name__ | 
|  | 188 | ) | 
|  | 189 | data = self.pipe.write(data) | 
|  | 190 |  | 
|  | 191 | if not data: | 
|  | 192 | self._check_process() | 
|  | 193 |  | 
|  | 194 | return data |