Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 1 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 2 | # Copyright (c) 2012 Intel, Inc. |
| 3 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 4 | # SPDX-License-Identifier: GPL-2.0-only |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 5 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 6 | |
| 7 | """ |
| 8 | This module implements python implements a way to get file block. Two methods |
| 9 | are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of |
| 10 | the file seek syscall. The former is implemented by the 'FilemapFiemap' class, |
| 11 | the latter is implemented by the 'FilemapSeek' class. Both classes provide the |
| 12 | same API. The 'filemap' function automatically selects which class can be used |
| 13 | and returns an instance of the class. |
| 14 | """ |
| 15 | |
| 16 | # Disable the following pylint recommendations: |
| 17 | # * Too many instance attributes (R0902) |
| 18 | # pylint: disable=R0902 |
| 19 | |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 20 | import errno |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 21 | import os |
| 22 | import struct |
| 23 | import array |
| 24 | import fcntl |
| 25 | import tempfile |
| 26 | import logging |
| 27 | |
| 28 | def get_block_size(file_obj): |
| 29 | """ |
| 30 | Returns block size for file object 'file_obj'. Errors are indicated by the |
| 31 | 'IOError' exception. |
| 32 | """ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 33 | # Get the block size of the host file-system for the image file by calling |
| 34 | # the FIGETBSZ ioctl (number 2). |
Brad Bishop | 15ae250 | 2019-06-18 21:44:24 -0400 | [diff] [blame] | 35 | try: |
| 36 | binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0)) |
Adriana Kobylak | c33a02d | 2020-03-18 10:08:00 -0500 | [diff] [blame] | 37 | bsize = struct.unpack('I', binary_data)[0] |
Brad Bishop | 15ae250 | 2019-06-18 21:44:24 -0400 | [diff] [blame] | 38 | except OSError: |
Adriana Kobylak | c33a02d | 2020-03-18 10:08:00 -0500 | [diff] [blame] | 39 | bsize = None |
| 40 | |
| 41 | # If ioctl causes OSError or give bsize to zero failback to os.fstat |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 42 | if not bsize: |
| 43 | import os |
| 44 | stat = os.fstat(file_obj.fileno()) |
| 45 | if hasattr(stat, 'st_blksize'): |
| 46 | bsize = stat.st_blksize |
| 47 | else: |
| 48 | raise IOError("Unable to determine block size") |
Andrew Geissler | fc113ea | 2023-03-31 09:59:46 -0500 | [diff] [blame] | 49 | |
| 50 | # The logic in this script only supports a maximum of a 4KB |
| 51 | # block size |
| 52 | max_block_size = 4 * 1024 |
| 53 | if bsize > max_block_size: |
| 54 | bsize = max_block_size |
| 55 | |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 56 | return bsize |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 57 | |
| 58 | class ErrorNotSupp(Exception): |
| 59 | """ |
| 60 | An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature |
| 61 | is not supported either by the kernel or the file-system. |
| 62 | """ |
| 63 | pass |
| 64 | |
| 65 | class Error(Exception): |
| 66 | """A class for all the other exceptions raised by this module.""" |
| 67 | pass |
| 68 | |
| 69 | |
| 70 | class _FilemapBase(object): |
| 71 | """ |
| 72 | This is a base class for a couple of other classes in this module. This |
| 73 | class simply performs the common parts of the initialization process: opens |
| 74 | the image file, gets its size, etc. The 'log' parameter is the logger object |
| 75 | to use for printing messages. |
| 76 | """ |
| 77 | |
| 78 | def __init__(self, image, log=None): |
| 79 | """ |
| 80 | Initialize a class instance. The 'image' argument is full path to the |
| 81 | file or file object to operate on. |
| 82 | """ |
| 83 | |
| 84 | self._log = log |
| 85 | if self._log is None: |
| 86 | self._log = logging.getLogger(__name__) |
| 87 | |
| 88 | self._f_image_needs_close = False |
| 89 | |
| 90 | if hasattr(image, "fileno"): |
| 91 | self._f_image = image |
| 92 | self._image_path = image.name |
| 93 | else: |
| 94 | self._image_path = image |
| 95 | self._open_image_file() |
| 96 | |
| 97 | try: |
| 98 | self.image_size = os.fstat(self._f_image.fileno()).st_size |
| 99 | except IOError as err: |
| 100 | raise Error("cannot get information about file '%s': %s" |
| 101 | % (self._f_image.name, err)) |
| 102 | |
| 103 | try: |
| 104 | self.block_size = get_block_size(self._f_image) |
| 105 | except IOError as err: |
| 106 | raise Error("cannot get block size for '%s': %s" |
| 107 | % (self._image_path, err)) |
| 108 | |
| 109 | self.blocks_cnt = self.image_size + self.block_size - 1 |
| 110 | self.blocks_cnt //= self.block_size |
| 111 | |
| 112 | try: |
| 113 | self._f_image.flush() |
| 114 | except IOError as err: |
| 115 | raise Error("cannot flush image file '%s': %s" |
| 116 | % (self._image_path, err)) |
| 117 | |
| 118 | try: |
| 119 | os.fsync(self._f_image.fileno()), |
| 120 | except OSError as err: |
| 121 | raise Error("cannot synchronize image file '%s': %s " |
| 122 | % (self._image_path, err.strerror)) |
| 123 | |
| 124 | self._log.debug("opened image \"%s\"" % self._image_path) |
| 125 | self._log.debug("block size %d, blocks count %d, image size %d" |
| 126 | % (self.block_size, self.blocks_cnt, self.image_size)) |
| 127 | |
| 128 | def __del__(self): |
| 129 | """The class destructor which just closes the image file.""" |
| 130 | if self._f_image_needs_close: |
| 131 | self._f_image.close() |
| 132 | |
| 133 | def _open_image_file(self): |
| 134 | """Open the image file.""" |
| 135 | try: |
| 136 | self._f_image = open(self._image_path, 'rb') |
| 137 | except IOError as err: |
| 138 | raise Error("cannot open image file '%s': %s" |
| 139 | % (self._image_path, err)) |
| 140 | |
| 141 | self._f_image_needs_close = True |
| 142 | |
| 143 | def block_is_mapped(self, block): # pylint: disable=W0613,R0201 |
| 144 | """ |
| 145 | This method has has to be implemented by child classes. It returns |
| 146 | 'True' if block number 'block' of the image file is mapped and 'False' |
| 147 | otherwise. |
| 148 | """ |
| 149 | |
| 150 | raise Error("the method is not implemented") |
| 151 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 152 | def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 153 | """ |
| 154 | This method has has to be implemented by child classes. This is a |
| 155 | generator which yields ranges of mapped blocks in the file. The ranges |
| 156 | are tuples of 2 elements: [first, last], where 'first' is the first |
| 157 | mapped block and 'last' is the last mapped block. |
| 158 | |
| 159 | The ranges are yielded for the area of the file of size 'count' blocks, |
| 160 | starting from block 'start'. |
| 161 | """ |
| 162 | |
| 163 | raise Error("the method is not implemented") |
| 164 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 165 | |
| 166 | # The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call |
| 167 | _SEEK_DATA = 3 |
| 168 | _SEEK_HOLE = 4 |
| 169 | |
| 170 | def _lseek(file_obj, offset, whence): |
| 171 | """This is a helper function which invokes 'os.lseek' for file object |
| 172 | 'file_obj' and with specified 'offset' and 'whence'. The 'whence' |
| 173 | argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When |
| 174 | there is no more data or hole starting from 'offset', this function |
| 175 | returns '-1'. Otherwise the data or hole position is returned.""" |
| 176 | |
| 177 | try: |
| 178 | return os.lseek(file_obj.fileno(), offset, whence) |
| 179 | except OSError as err: |
| 180 | # The 'lseek' system call returns the ENXIO if there is no data or |
| 181 | # hole starting from the specified offset. |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 182 | if err.errno == errno.ENXIO: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 183 | return -1 |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 184 | elif err.errno == errno.EINVAL: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 185 | raise ErrorNotSupp("the kernel or file-system does not support " |
| 186 | "\"SEEK_HOLE\" and \"SEEK_DATA\"") |
| 187 | else: |
| 188 | raise |
| 189 | |
| 190 | class FilemapSeek(_FilemapBase): |
| 191 | """ |
| 192 | This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. |
| 193 | Unfortunately, the current implementation requires the caller to have write |
| 194 | access to the image file. |
| 195 | """ |
| 196 | |
| 197 | def __init__(self, image, log=None): |
| 198 | """Refer the '_FilemapBase' class for the documentation.""" |
| 199 | |
| 200 | # Call the base class constructor first |
| 201 | _FilemapBase.__init__(self, image, log) |
| 202 | self._log.debug("FilemapSeek: initializing") |
| 203 | |
| 204 | self._probe_seek_hole() |
| 205 | |
| 206 | def _probe_seek_hole(self): |
| 207 | """ |
| 208 | Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. |
| 209 | Unfortunately, there seems to be no clean way for detecting this, |
| 210 | because often the system just fakes them by just assuming that all |
| 211 | files are fully mapped, so 'SEEK_HOLE' always returns EOF and |
| 212 | 'SEEK_DATA' always returns the requested offset. |
| 213 | |
| 214 | I could not invent a better way of detecting the fake 'SEEK_HOLE' |
| 215 | implementation than just to create a temporary file in the same |
| 216 | directory where the image file resides. It would be nice to change this |
| 217 | to something better. |
| 218 | """ |
| 219 | |
| 220 | directory = os.path.dirname(self._image_path) |
| 221 | |
| 222 | try: |
| 223 | tmp_obj = tempfile.TemporaryFile("w+", dir=directory) |
| 224 | except IOError as err: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 225 | raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 226 | % (directory, err)) |
| 227 | |
| 228 | try: |
| 229 | os.ftruncate(tmp_obj.fileno(), self.block_size) |
| 230 | except OSError as err: |
| 231 | raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" |
| 232 | % (directory, err)) |
| 233 | |
| 234 | offs = _lseek(tmp_obj, 0, _SEEK_HOLE) |
| 235 | if offs != 0: |
| 236 | # We are dealing with the stub 'SEEK_HOLE' implementation which |
| 237 | # always returns EOF. |
| 238 | self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) |
| 239 | raise ErrorNotSupp("the file-system does not support " |
| 240 | "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " |
| 241 | "provides a stub implementation") |
| 242 | |
| 243 | tmp_obj.close() |
| 244 | |
| 245 | def block_is_mapped(self, block): |
| 246 | """Refer the '_FilemapBase' class for the documentation.""" |
| 247 | offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) |
| 248 | if offs == -1: |
| 249 | result = False |
| 250 | else: |
| 251 | result = (offs // self.block_size == block) |
| 252 | |
| 253 | self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" |
| 254 | % (block, result)) |
| 255 | return result |
| 256 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 257 | def _get_ranges(self, start, count, whence1, whence2): |
| 258 | """ |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 259 | This function implements 'get_mapped_ranges()' depending |
| 260 | on what is passed in the 'whence1' and 'whence2' arguments. |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 261 | """ |
| 262 | |
| 263 | assert whence1 != whence2 |
| 264 | end = start * self.block_size |
| 265 | limit = end + count * self.block_size |
| 266 | |
| 267 | while True: |
| 268 | start = _lseek(self._f_image, end, whence1) |
| 269 | if start == -1 or start >= limit or start == self.image_size: |
| 270 | break |
| 271 | |
| 272 | end = _lseek(self._f_image, start, whence2) |
| 273 | if end == -1 or end == self.image_size: |
| 274 | end = self.blocks_cnt * self.block_size |
| 275 | if end > limit: |
| 276 | end = limit |
| 277 | |
| 278 | start_blk = start // self.block_size |
| 279 | end_blk = end // self.block_size - 1 |
| 280 | self._log.debug("FilemapSeek: yielding range (%d, %d)" |
| 281 | % (start_blk, end_blk)) |
| 282 | yield (start_blk, end_blk) |
| 283 | |
| 284 | def get_mapped_ranges(self, start, count): |
| 285 | """Refer the '_FilemapBase' class for the documentation.""" |
| 286 | self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" |
| 287 | % (start, count, start + count - 1)) |
| 288 | return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) |
| 289 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 290 | |
| 291 | # Below goes the FIEMAP ioctl implementation, which is not very readable |
| 292 | # because it deals with the rather complex FIEMAP ioctl. To understand the |
| 293 | # code, you need to know the FIEMAP interface, which is documented in the |
| 294 | # "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. |
| 295 | |
| 296 | # Format string for 'struct fiemap' |
| 297 | _FIEMAP_FORMAT = "=QQLLLL" |
| 298 | # sizeof(struct fiemap) |
| 299 | _FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) |
| 300 | # Format string for 'struct fiemap_extent' |
| 301 | _FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" |
| 302 | # sizeof(struct fiemap_extent) |
| 303 | _FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) |
| 304 | # The FIEMAP ioctl number |
| 305 | _FIEMAP_IOCTL = 0xC020660B |
| 306 | # This FIEMAP ioctl flag which instructs the kernel to sync the file before |
| 307 | # reading the block map |
| 308 | _FIEMAP_FLAG_SYNC = 0x00000001 |
| 309 | # Size of the buffer for 'struct fiemap_extent' elements which will be used |
| 310 | # when invoking the FIEMAP ioctl. The larger is the buffer, the less times the |
| 311 | # FIEMAP ioctl will be invoked. |
| 312 | _FIEMAP_BUFFER_SIZE = 256 * 1024 |
| 313 | |
| 314 | class FilemapFiemap(_FilemapBase): |
| 315 | """ |
| 316 | This class provides API to the FIEMAP ioctl. Namely, it allows to iterate |
| 317 | over all mapped blocks and over all holes. |
| 318 | |
| 319 | This class synchronizes the image file every time it invokes the FIEMAP |
| 320 | ioctl in order to work-around early FIEMAP implementation kernel bugs. |
| 321 | """ |
| 322 | |
| 323 | def __init__(self, image, log=None): |
| 324 | """ |
| 325 | Initialize a class instance. The 'image' argument is full the file |
| 326 | object to operate on. |
| 327 | """ |
| 328 | |
| 329 | # Call the base class constructor first |
| 330 | _FilemapBase.__init__(self, image, log) |
| 331 | self._log.debug("FilemapFiemap: initializing") |
| 332 | |
| 333 | self._buf_size = _FIEMAP_BUFFER_SIZE |
| 334 | |
| 335 | # Calculate how many 'struct fiemap_extent' elements fit the buffer |
| 336 | self._buf_size -= _FIEMAP_SIZE |
| 337 | self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE |
| 338 | assert self._fiemap_extent_cnt > 0 |
| 339 | self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE |
| 340 | self._buf_size += _FIEMAP_SIZE |
| 341 | |
| 342 | # Allocate a mutable buffer for the FIEMAP ioctl |
| 343 | self._buf = array.array('B', [0] * self._buf_size) |
| 344 | |
| 345 | # Check if the FIEMAP ioctl is supported |
| 346 | self.block_is_mapped(0) |
| 347 | |
| 348 | def _invoke_fiemap(self, block, count): |
| 349 | """ |
| 350 | Invoke the FIEMAP ioctl for 'count' blocks of the file starting from |
| 351 | block number 'block'. |
| 352 | |
| 353 | The full result of the operation is stored in 'self._buf' on exit. |
| 354 | Returns the unpacked 'struct fiemap' data structure in form of a python |
| 355 | list (just like 'struct.upack()'). |
| 356 | """ |
| 357 | |
| 358 | if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): |
| 359 | raise Error("bad block number %d, should be within [0, %d]" |
| 360 | % (block, self.blocks_cnt)) |
| 361 | |
| 362 | # Initialize the 'struct fiemap' part of the buffer. We use the |
| 363 | # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is |
| 364 | # synchronized. The reason for this is that early FIEMAP |
| 365 | # implementations had many bugs related to cached dirty data, and |
| 366 | # synchronizing the file is a necessary work-around. |
| 367 | struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, |
| 368 | count * self.block_size, _FIEMAP_FLAG_SYNC, 0, |
| 369 | self._fiemap_extent_cnt, 0) |
| 370 | |
| 371 | try: |
| 372 | fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) |
| 373 | except IOError as err: |
| 374 | # Note, the FIEMAP ioctl is supported by the Linux kernel starting |
| 375 | # from version 2.6.28 (year 2008). |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 376 | if err.errno == errno.EOPNOTSUPP: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 377 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 378 | "by the file-system" |
| 379 | self._log.debug(errstr) |
| 380 | raise ErrorNotSupp(errstr) |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 381 | if err.errno == errno.ENOTTY: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 382 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 383 | "by the kernel" |
| 384 | self._log.debug(errstr) |
| 385 | raise ErrorNotSupp(errstr) |
| 386 | raise Error("the FIEMAP ioctl failed for '%s': %s" |
| 387 | % (self._image_path, err)) |
| 388 | |
| 389 | return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) |
| 390 | |
| 391 | def block_is_mapped(self, block): |
| 392 | """Refer the '_FilemapBase' class for the documentation.""" |
| 393 | struct_fiemap = self._invoke_fiemap(block, 1) |
| 394 | |
| 395 | # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. |
| 396 | # If it contains zero, the block is not mapped, otherwise it is |
| 397 | # mapped. |
| 398 | result = bool(struct_fiemap[3]) |
| 399 | self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" |
| 400 | % (block, result)) |
| 401 | return result |
| 402 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 403 | def _unpack_fiemap_extent(self, index): |
| 404 | """ |
| 405 | Unpack a 'struct fiemap_extent' structure object number 'index' from |
| 406 | the internal 'self._buf' buffer. |
| 407 | """ |
| 408 | |
| 409 | offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index |
| 410 | return struct.unpack(_FIEMAP_EXTENT_FORMAT, |
| 411 | self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) |
| 412 | |
| 413 | def _do_get_mapped_ranges(self, start, count): |
| 414 | """ |
| 415 | Implements most the functionality for the 'get_mapped_ranges()' |
| 416 | generator: invokes the FIEMAP ioctl, walks through the mapped extents |
| 417 | and yields mapped block ranges. However, the ranges may be consecutive |
| 418 | (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges |
| 419 | them. |
| 420 | """ |
| 421 | |
| 422 | block = start |
| 423 | while block < start + count: |
| 424 | struct_fiemap = self._invoke_fiemap(block, count) |
| 425 | |
| 426 | mapped_extents = struct_fiemap[3] |
| 427 | if mapped_extents == 0: |
| 428 | # No more mapped blocks |
| 429 | return |
| 430 | |
| 431 | extent = 0 |
| 432 | while extent < mapped_extents: |
| 433 | fiemap_extent = self._unpack_fiemap_extent(extent) |
| 434 | |
| 435 | # Start of the extent |
| 436 | extent_start = fiemap_extent[0] |
| 437 | # Starting block number of the extent |
| 438 | extent_block = extent_start // self.block_size |
| 439 | # Length of the extent |
| 440 | extent_len = fiemap_extent[2] |
| 441 | # Count of blocks in the extent |
| 442 | extent_count = extent_len // self.block_size |
| 443 | |
| 444 | # Extent length and offset have to be block-aligned |
| 445 | assert extent_start % self.block_size == 0 |
| 446 | assert extent_len % self.block_size == 0 |
| 447 | |
| 448 | if extent_block > start + count - 1: |
| 449 | return |
| 450 | |
| 451 | first = max(extent_block, block) |
| 452 | last = min(extent_block + extent_count, start + count) - 1 |
| 453 | yield (first, last) |
| 454 | |
| 455 | extent += 1 |
| 456 | |
| 457 | block = extent_block + extent_count |
| 458 | |
| 459 | def get_mapped_ranges(self, start, count): |
| 460 | """Refer the '_FilemapBase' class for the documentation.""" |
| 461 | self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" |
| 462 | % (start, count, start + count - 1)) |
| 463 | iterator = self._do_get_mapped_ranges(start, count) |
| 464 | first_prev, last_prev = next(iterator) |
| 465 | |
| 466 | for first, last in iterator: |
| 467 | if last_prev == first - 1: |
| 468 | last_prev = last |
| 469 | else: |
| 470 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 471 | % (first_prev, last_prev)) |
| 472 | yield (first_prev, last_prev) |
| 473 | first_prev, last_prev = first, last |
| 474 | |
| 475 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 476 | % (first_prev, last_prev)) |
| 477 | yield (first_prev, last_prev) |
| 478 | |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 479 | class FilemapNobmap(_FilemapBase): |
| 480 | """ |
| 481 | This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not |
| 482 | supported by the filesystem or kernel. |
| 483 | """ |
| 484 | |
| 485 | def __init__(self, image, log=None): |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 486 | """Refer the '_FilemapBase' class for the documentation.""" |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 487 | |
| 488 | # Call the base class constructor first |
| 489 | _FilemapBase.__init__(self, image, log) |
| 490 | self._log.debug("FilemapNobmap: initializing") |
| 491 | |
| 492 | def block_is_mapped(self, block): |
| 493 | """Refer the '_FilemapBase' class for the documentation.""" |
| 494 | return True |
| 495 | |
| 496 | def get_mapped_ranges(self, start, count): |
| 497 | """Refer the '_FilemapBase' class for the documentation.""" |
| 498 | self._log.debug("FilemapNobmap: get_mapped_ranges(%d, %d(%d))" |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 499 | % (start, count, start + count - 1)) |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 500 | yield (start, start + count -1) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 501 | |
| 502 | def filemap(image, log=None): |
| 503 | """ |
| 504 | Create and return an instance of a Filemap class - 'FilemapFiemap' or |
| 505 | 'FilemapSeek', depending on what the system we run on supports. If the |
| 506 | FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is |
| 507 | returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the |
| 508 | 'FilemapSeek' class is returned. If none of these are supported, the |
| 509 | function generates an 'Error' type exception. |
| 510 | """ |
| 511 | |
| 512 | try: |
| 513 | return FilemapFiemap(image, log) |
| 514 | except ErrorNotSupp: |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 515 | try: |
| 516 | return FilemapSeek(image, log) |
| 517 | except ErrorNotSupp: |
| 518 | return FilemapNobmap(image, log) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 519 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 520 | def sparse_copy(src_fname, dst_fname, skip=0, seek=0, |
| 521 | length=0, api=None): |
| 522 | """ |
| 523 | Efficiently copy sparse file to or into another file. |
| 524 | |
| 525 | src_fname: path to source file |
| 526 | dst_fname: path to destination file |
| 527 | skip: skip N bytes at thestart of src |
| 528 | seek: seek N bytes from the start of dst |
| 529 | length: read N bytes from src and write them to dst |
| 530 | api: FilemapFiemap or FilemapSeek object |
| 531 | """ |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 532 | if not api: |
| 533 | api = filemap |
| 534 | fmap = api(src_fname) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 535 | try: |
| 536 | dst_file = open(dst_fname, 'r+b') |
| 537 | except IOError: |
| 538 | dst_file = open(dst_fname, 'wb') |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 539 | if length: |
| 540 | dst_size = length + seek |
| 541 | else: |
| 542 | dst_size = os.path.getsize(src_fname) + seek - skip |
| 543 | dst_file.truncate(dst_size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 544 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 545 | written = 0 |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 546 | for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt): |
| 547 | start = first * fmap.block_size |
| 548 | end = (last + 1) * fmap.block_size |
| 549 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 550 | if skip >= end: |
| 551 | continue |
| 552 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 553 | if start < skip < end: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 554 | start = skip |
| 555 | |
| 556 | fmap._f_image.seek(start, os.SEEK_SET) |
| 557 | |
| 558 | written += start - skip - written |
| 559 | if length and written >= length: |
| 560 | dst_file.seek(seek + length, os.SEEK_SET) |
| 561 | dst_file.close() |
| 562 | return |
| 563 | |
| 564 | dst_file.seek(seek + start - skip, os.SEEK_SET) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 565 | |
| 566 | chunk_size = 1024 * 1024 |
| 567 | to_read = end - start |
| 568 | read = 0 |
| 569 | |
| 570 | while read < to_read: |
| 571 | if read + chunk_size > to_read: |
| 572 | chunk_size = to_read - read |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 573 | size = chunk_size |
| 574 | if length and written + size > length: |
| 575 | size = length - written |
| 576 | chunk = fmap._f_image.read(size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 577 | dst_file.write(chunk) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 578 | read += size |
| 579 | written += size |
| 580 | if written == length: |
| 581 | dst_file.close() |
| 582 | return |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 583 | dst_file.close() |