Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 1 | # Copyright (c) 2012 Intel, Inc. |
| 2 | # |
| 3 | # This program is free software; you can redistribute it and/or modify |
| 4 | # it under the terms of the GNU General Public License, version 2, |
| 5 | # as published by the Free Software Foundation. |
| 6 | # |
| 7 | # This program is distributed in the hope that it will be useful, but |
| 8 | # WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 10 | # General Public License for more details. |
| 11 | |
| 12 | """ |
| 13 | This module implements python implements a way to get file block. Two methods |
| 14 | are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of |
| 15 | the file seek syscall. The former is implemented by the 'FilemapFiemap' class, |
| 16 | the latter is implemented by the 'FilemapSeek' class. Both classes provide the |
| 17 | same API. The 'filemap' function automatically selects which class can be used |
| 18 | and returns an instance of the class. |
| 19 | """ |
| 20 | |
| 21 | # Disable the following pylint recommendations: |
| 22 | # * Too many instance attributes (R0902) |
| 23 | # pylint: disable=R0902 |
| 24 | |
| 25 | import os |
| 26 | import struct |
| 27 | import array |
| 28 | import fcntl |
| 29 | import tempfile |
| 30 | import logging |
| 31 | |
| 32 | def get_block_size(file_obj): |
| 33 | """ |
| 34 | Returns block size for file object 'file_obj'. Errors are indicated by the |
| 35 | 'IOError' exception. |
| 36 | """ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 37 | # Get the block size of the host file-system for the image file by calling |
| 38 | # the FIGETBSZ ioctl (number 2). |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 39 | binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0)) |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 40 | bsize = struct.unpack('I', binary_data)[0] |
| 41 | if not bsize: |
| 42 | import os |
| 43 | stat = os.fstat(file_obj.fileno()) |
| 44 | if hasattr(stat, 'st_blksize'): |
| 45 | bsize = stat.st_blksize |
| 46 | else: |
| 47 | raise IOError("Unable to determine block size") |
| 48 | return bsize |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 49 | |
| 50 | class ErrorNotSupp(Exception): |
| 51 | """ |
| 52 | An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature |
| 53 | is not supported either by the kernel or the file-system. |
| 54 | """ |
| 55 | pass |
| 56 | |
| 57 | class Error(Exception): |
| 58 | """A class for all the other exceptions raised by this module.""" |
| 59 | pass |
| 60 | |
| 61 | |
| 62 | class _FilemapBase(object): |
| 63 | """ |
| 64 | This is a base class for a couple of other classes in this module. This |
| 65 | class simply performs the common parts of the initialization process: opens |
| 66 | the image file, gets its size, etc. The 'log' parameter is the logger object |
| 67 | to use for printing messages. |
| 68 | """ |
| 69 | |
| 70 | def __init__(self, image, log=None): |
| 71 | """ |
| 72 | Initialize a class instance. The 'image' argument is full path to the |
| 73 | file or file object to operate on. |
| 74 | """ |
| 75 | |
| 76 | self._log = log |
| 77 | if self._log is None: |
| 78 | self._log = logging.getLogger(__name__) |
| 79 | |
| 80 | self._f_image_needs_close = False |
| 81 | |
| 82 | if hasattr(image, "fileno"): |
| 83 | self._f_image = image |
| 84 | self._image_path = image.name |
| 85 | else: |
| 86 | self._image_path = image |
| 87 | self._open_image_file() |
| 88 | |
| 89 | try: |
| 90 | self.image_size = os.fstat(self._f_image.fileno()).st_size |
| 91 | except IOError as err: |
| 92 | raise Error("cannot get information about file '%s': %s" |
| 93 | % (self._f_image.name, err)) |
| 94 | |
| 95 | try: |
| 96 | self.block_size = get_block_size(self._f_image) |
| 97 | except IOError as err: |
| 98 | raise Error("cannot get block size for '%s': %s" |
| 99 | % (self._image_path, err)) |
| 100 | |
| 101 | self.blocks_cnt = self.image_size + self.block_size - 1 |
| 102 | self.blocks_cnt //= self.block_size |
| 103 | |
| 104 | try: |
| 105 | self._f_image.flush() |
| 106 | except IOError as err: |
| 107 | raise Error("cannot flush image file '%s': %s" |
| 108 | % (self._image_path, err)) |
| 109 | |
| 110 | try: |
| 111 | os.fsync(self._f_image.fileno()), |
| 112 | except OSError as err: |
| 113 | raise Error("cannot synchronize image file '%s': %s " |
| 114 | % (self._image_path, err.strerror)) |
| 115 | |
| 116 | self._log.debug("opened image \"%s\"" % self._image_path) |
| 117 | self._log.debug("block size %d, blocks count %d, image size %d" |
| 118 | % (self.block_size, self.blocks_cnt, self.image_size)) |
| 119 | |
| 120 | def __del__(self): |
| 121 | """The class destructor which just closes the image file.""" |
| 122 | if self._f_image_needs_close: |
| 123 | self._f_image.close() |
| 124 | |
| 125 | def _open_image_file(self): |
| 126 | """Open the image file.""" |
| 127 | try: |
| 128 | self._f_image = open(self._image_path, 'rb') |
| 129 | except IOError as err: |
| 130 | raise Error("cannot open image file '%s': %s" |
| 131 | % (self._image_path, err)) |
| 132 | |
| 133 | self._f_image_needs_close = True |
| 134 | |
| 135 | def block_is_mapped(self, block): # pylint: disable=W0613,R0201 |
| 136 | """ |
| 137 | This method has has to be implemented by child classes. It returns |
| 138 | 'True' if block number 'block' of the image file is mapped and 'False' |
| 139 | otherwise. |
| 140 | """ |
| 141 | |
| 142 | raise Error("the method is not implemented") |
| 143 | |
| 144 | def block_is_unmapped(self, block): # pylint: disable=W0613,R0201 |
| 145 | """ |
| 146 | This method has has to be implemented by child classes. It returns |
| 147 | 'True' if block number 'block' of the image file is not mapped (hole) |
| 148 | and 'False' otherwise. |
| 149 | """ |
| 150 | |
| 151 | raise Error("the method is not implemented") |
| 152 | |
| 153 | def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 154 | """ |
| 155 | This method has has to be implemented by child classes. This is a |
| 156 | generator which yields ranges of mapped blocks in the file. The ranges |
| 157 | are tuples of 2 elements: [first, last], where 'first' is the first |
| 158 | mapped block and 'last' is the last mapped block. |
| 159 | |
| 160 | The ranges are yielded for the area of the file of size 'count' blocks, |
| 161 | starting from block 'start'. |
| 162 | """ |
| 163 | |
| 164 | raise Error("the method is not implemented") |
| 165 | |
| 166 | def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 167 | """ |
| 168 | This method has has to be implemented by child classes. Just like |
| 169 | 'get_mapped_ranges()', but yields unmapped block ranges instead |
| 170 | (holes). |
| 171 | """ |
| 172 | |
| 173 | raise Error("the method is not implemented") |
| 174 | |
| 175 | |
| 176 | # The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call |
| 177 | _SEEK_DATA = 3 |
| 178 | _SEEK_HOLE = 4 |
| 179 | |
| 180 | def _lseek(file_obj, offset, whence): |
| 181 | """This is a helper function which invokes 'os.lseek' for file object |
| 182 | 'file_obj' and with specified 'offset' and 'whence'. The 'whence' |
| 183 | argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When |
| 184 | there is no more data or hole starting from 'offset', this function |
| 185 | returns '-1'. Otherwise the data or hole position is returned.""" |
| 186 | |
| 187 | try: |
| 188 | return os.lseek(file_obj.fileno(), offset, whence) |
| 189 | except OSError as err: |
| 190 | # The 'lseek' system call returns the ENXIO if there is no data or |
| 191 | # hole starting from the specified offset. |
| 192 | if err.errno == os.errno.ENXIO: |
| 193 | return -1 |
| 194 | elif err.errno == os.errno.EINVAL: |
| 195 | raise ErrorNotSupp("the kernel or file-system does not support " |
| 196 | "\"SEEK_HOLE\" and \"SEEK_DATA\"") |
| 197 | else: |
| 198 | raise |
| 199 | |
| 200 | class FilemapSeek(_FilemapBase): |
| 201 | """ |
| 202 | This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. |
| 203 | Unfortunately, the current implementation requires the caller to have write |
| 204 | access to the image file. |
| 205 | """ |
| 206 | |
| 207 | def __init__(self, image, log=None): |
| 208 | """Refer the '_FilemapBase' class for the documentation.""" |
| 209 | |
| 210 | # Call the base class constructor first |
| 211 | _FilemapBase.__init__(self, image, log) |
| 212 | self._log.debug("FilemapSeek: initializing") |
| 213 | |
| 214 | self._probe_seek_hole() |
| 215 | |
| 216 | def _probe_seek_hole(self): |
| 217 | """ |
| 218 | Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. |
| 219 | Unfortunately, there seems to be no clean way for detecting this, |
| 220 | because often the system just fakes them by just assuming that all |
| 221 | files are fully mapped, so 'SEEK_HOLE' always returns EOF and |
| 222 | 'SEEK_DATA' always returns the requested offset. |
| 223 | |
| 224 | I could not invent a better way of detecting the fake 'SEEK_HOLE' |
| 225 | implementation than just to create a temporary file in the same |
| 226 | directory where the image file resides. It would be nice to change this |
| 227 | to something better. |
| 228 | """ |
| 229 | |
| 230 | directory = os.path.dirname(self._image_path) |
| 231 | |
| 232 | try: |
| 233 | tmp_obj = tempfile.TemporaryFile("w+", dir=directory) |
| 234 | except IOError as err: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 235 | raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 236 | % (directory, err)) |
| 237 | |
| 238 | try: |
| 239 | os.ftruncate(tmp_obj.fileno(), self.block_size) |
| 240 | except OSError as err: |
| 241 | raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" |
| 242 | % (directory, err)) |
| 243 | |
| 244 | offs = _lseek(tmp_obj, 0, _SEEK_HOLE) |
| 245 | if offs != 0: |
| 246 | # We are dealing with the stub 'SEEK_HOLE' implementation which |
| 247 | # always returns EOF. |
| 248 | self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) |
| 249 | raise ErrorNotSupp("the file-system does not support " |
| 250 | "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " |
| 251 | "provides a stub implementation") |
| 252 | |
| 253 | tmp_obj.close() |
| 254 | |
| 255 | def block_is_mapped(self, block): |
| 256 | """Refer the '_FilemapBase' class for the documentation.""" |
| 257 | offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) |
| 258 | if offs == -1: |
| 259 | result = False |
| 260 | else: |
| 261 | result = (offs // self.block_size == block) |
| 262 | |
| 263 | self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" |
| 264 | % (block, result)) |
| 265 | return result |
| 266 | |
| 267 | def block_is_unmapped(self, block): |
| 268 | """Refer the '_FilemapBase' class for the documentation.""" |
| 269 | return not self.block_is_mapped(block) |
| 270 | |
| 271 | def _get_ranges(self, start, count, whence1, whence2): |
| 272 | """ |
| 273 | This function implements 'get_mapped_ranges()' and |
| 274 | 'get_unmapped_ranges()' depending on what is passed in the 'whence1' |
| 275 | and 'whence2' arguments. |
| 276 | """ |
| 277 | |
| 278 | assert whence1 != whence2 |
| 279 | end = start * self.block_size |
| 280 | limit = end + count * self.block_size |
| 281 | |
| 282 | while True: |
| 283 | start = _lseek(self._f_image, end, whence1) |
| 284 | if start == -1 or start >= limit or start == self.image_size: |
| 285 | break |
| 286 | |
| 287 | end = _lseek(self._f_image, start, whence2) |
| 288 | if end == -1 or end == self.image_size: |
| 289 | end = self.blocks_cnt * self.block_size |
| 290 | if end > limit: |
| 291 | end = limit |
| 292 | |
| 293 | start_blk = start // self.block_size |
| 294 | end_blk = end // self.block_size - 1 |
| 295 | self._log.debug("FilemapSeek: yielding range (%d, %d)" |
| 296 | % (start_blk, end_blk)) |
| 297 | yield (start_blk, end_blk) |
| 298 | |
| 299 | def get_mapped_ranges(self, start, count): |
| 300 | """Refer the '_FilemapBase' class for the documentation.""" |
| 301 | self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" |
| 302 | % (start, count, start + count - 1)) |
| 303 | return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) |
| 304 | |
| 305 | def get_unmapped_ranges(self, start, count): |
| 306 | """Refer the '_FilemapBase' class for the documentation.""" |
| 307 | self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))" |
| 308 | % (start, count, start + count - 1)) |
| 309 | return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA) |
| 310 | |
| 311 | |
| 312 | # Below goes the FIEMAP ioctl implementation, which is not very readable |
| 313 | # because it deals with the rather complex FIEMAP ioctl. To understand the |
| 314 | # code, you need to know the FIEMAP interface, which is documented in the |
| 315 | # "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. |
| 316 | |
| 317 | # Format string for 'struct fiemap' |
| 318 | _FIEMAP_FORMAT = "=QQLLLL" |
| 319 | # sizeof(struct fiemap) |
| 320 | _FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) |
| 321 | # Format string for 'struct fiemap_extent' |
| 322 | _FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" |
| 323 | # sizeof(struct fiemap_extent) |
| 324 | _FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) |
| 325 | # The FIEMAP ioctl number |
| 326 | _FIEMAP_IOCTL = 0xC020660B |
| 327 | # This FIEMAP ioctl flag which instructs the kernel to sync the file before |
| 328 | # reading the block map |
| 329 | _FIEMAP_FLAG_SYNC = 0x00000001 |
| 330 | # Size of the buffer for 'struct fiemap_extent' elements which will be used |
| 331 | # when invoking the FIEMAP ioctl. The larger is the buffer, the less times the |
| 332 | # FIEMAP ioctl will be invoked. |
| 333 | _FIEMAP_BUFFER_SIZE = 256 * 1024 |
| 334 | |
| 335 | class FilemapFiemap(_FilemapBase): |
| 336 | """ |
| 337 | This class provides API to the FIEMAP ioctl. Namely, it allows to iterate |
| 338 | over all mapped blocks and over all holes. |
| 339 | |
| 340 | This class synchronizes the image file every time it invokes the FIEMAP |
| 341 | ioctl in order to work-around early FIEMAP implementation kernel bugs. |
| 342 | """ |
| 343 | |
| 344 | def __init__(self, image, log=None): |
| 345 | """ |
| 346 | Initialize a class instance. The 'image' argument is full the file |
| 347 | object to operate on. |
| 348 | """ |
| 349 | |
| 350 | # Call the base class constructor first |
| 351 | _FilemapBase.__init__(self, image, log) |
| 352 | self._log.debug("FilemapFiemap: initializing") |
| 353 | |
| 354 | self._buf_size = _FIEMAP_BUFFER_SIZE |
| 355 | |
| 356 | # Calculate how many 'struct fiemap_extent' elements fit the buffer |
| 357 | self._buf_size -= _FIEMAP_SIZE |
| 358 | self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE |
| 359 | assert self._fiemap_extent_cnt > 0 |
| 360 | self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE |
| 361 | self._buf_size += _FIEMAP_SIZE |
| 362 | |
| 363 | # Allocate a mutable buffer for the FIEMAP ioctl |
| 364 | self._buf = array.array('B', [0] * self._buf_size) |
| 365 | |
| 366 | # Check if the FIEMAP ioctl is supported |
| 367 | self.block_is_mapped(0) |
| 368 | |
| 369 | def _invoke_fiemap(self, block, count): |
| 370 | """ |
| 371 | Invoke the FIEMAP ioctl for 'count' blocks of the file starting from |
| 372 | block number 'block'. |
| 373 | |
| 374 | The full result of the operation is stored in 'self._buf' on exit. |
| 375 | Returns the unpacked 'struct fiemap' data structure in form of a python |
| 376 | list (just like 'struct.upack()'). |
| 377 | """ |
| 378 | |
| 379 | if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): |
| 380 | raise Error("bad block number %d, should be within [0, %d]" |
| 381 | % (block, self.blocks_cnt)) |
| 382 | |
| 383 | # Initialize the 'struct fiemap' part of the buffer. We use the |
| 384 | # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is |
| 385 | # synchronized. The reason for this is that early FIEMAP |
| 386 | # implementations had many bugs related to cached dirty data, and |
| 387 | # synchronizing the file is a necessary work-around. |
| 388 | struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, |
| 389 | count * self.block_size, _FIEMAP_FLAG_SYNC, 0, |
| 390 | self._fiemap_extent_cnt, 0) |
| 391 | |
| 392 | try: |
| 393 | fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) |
| 394 | except IOError as err: |
| 395 | # Note, the FIEMAP ioctl is supported by the Linux kernel starting |
| 396 | # from version 2.6.28 (year 2008). |
| 397 | if err.errno == os.errno.EOPNOTSUPP: |
| 398 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 399 | "by the file-system" |
| 400 | self._log.debug(errstr) |
| 401 | raise ErrorNotSupp(errstr) |
| 402 | if err.errno == os.errno.ENOTTY: |
| 403 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 404 | "by the kernel" |
| 405 | self._log.debug(errstr) |
| 406 | raise ErrorNotSupp(errstr) |
| 407 | raise Error("the FIEMAP ioctl failed for '%s': %s" |
| 408 | % (self._image_path, err)) |
| 409 | |
| 410 | return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) |
| 411 | |
| 412 | def block_is_mapped(self, block): |
| 413 | """Refer the '_FilemapBase' class for the documentation.""" |
| 414 | struct_fiemap = self._invoke_fiemap(block, 1) |
| 415 | |
| 416 | # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. |
| 417 | # If it contains zero, the block is not mapped, otherwise it is |
| 418 | # mapped. |
| 419 | result = bool(struct_fiemap[3]) |
| 420 | self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" |
| 421 | % (block, result)) |
| 422 | return result |
| 423 | |
| 424 | def block_is_unmapped(self, block): |
| 425 | """Refer the '_FilemapBase' class for the documentation.""" |
| 426 | return not self.block_is_mapped(block) |
| 427 | |
| 428 | def _unpack_fiemap_extent(self, index): |
| 429 | """ |
| 430 | Unpack a 'struct fiemap_extent' structure object number 'index' from |
| 431 | the internal 'self._buf' buffer. |
| 432 | """ |
| 433 | |
| 434 | offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index |
| 435 | return struct.unpack(_FIEMAP_EXTENT_FORMAT, |
| 436 | self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) |
| 437 | |
| 438 | def _do_get_mapped_ranges(self, start, count): |
| 439 | """ |
| 440 | Implements most the functionality for the 'get_mapped_ranges()' |
| 441 | generator: invokes the FIEMAP ioctl, walks through the mapped extents |
| 442 | and yields mapped block ranges. However, the ranges may be consecutive |
| 443 | (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges |
| 444 | them. |
| 445 | """ |
| 446 | |
| 447 | block = start |
| 448 | while block < start + count: |
| 449 | struct_fiemap = self._invoke_fiemap(block, count) |
| 450 | |
| 451 | mapped_extents = struct_fiemap[3] |
| 452 | if mapped_extents == 0: |
| 453 | # No more mapped blocks |
| 454 | return |
| 455 | |
| 456 | extent = 0 |
| 457 | while extent < mapped_extents: |
| 458 | fiemap_extent = self._unpack_fiemap_extent(extent) |
| 459 | |
| 460 | # Start of the extent |
| 461 | extent_start = fiemap_extent[0] |
| 462 | # Starting block number of the extent |
| 463 | extent_block = extent_start // self.block_size |
| 464 | # Length of the extent |
| 465 | extent_len = fiemap_extent[2] |
| 466 | # Count of blocks in the extent |
| 467 | extent_count = extent_len // self.block_size |
| 468 | |
| 469 | # Extent length and offset have to be block-aligned |
| 470 | assert extent_start % self.block_size == 0 |
| 471 | assert extent_len % self.block_size == 0 |
| 472 | |
| 473 | if extent_block > start + count - 1: |
| 474 | return |
| 475 | |
| 476 | first = max(extent_block, block) |
| 477 | last = min(extent_block + extent_count, start + count) - 1 |
| 478 | yield (first, last) |
| 479 | |
| 480 | extent += 1 |
| 481 | |
| 482 | block = extent_block + extent_count |
| 483 | |
| 484 | def get_mapped_ranges(self, start, count): |
| 485 | """Refer the '_FilemapBase' class for the documentation.""" |
| 486 | self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" |
| 487 | % (start, count, start + count - 1)) |
| 488 | iterator = self._do_get_mapped_ranges(start, count) |
| 489 | first_prev, last_prev = next(iterator) |
| 490 | |
| 491 | for first, last in iterator: |
| 492 | if last_prev == first - 1: |
| 493 | last_prev = last |
| 494 | else: |
| 495 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 496 | % (first_prev, last_prev)) |
| 497 | yield (first_prev, last_prev) |
| 498 | first_prev, last_prev = first, last |
| 499 | |
| 500 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 501 | % (first_prev, last_prev)) |
| 502 | yield (first_prev, last_prev) |
| 503 | |
| 504 | def get_unmapped_ranges(self, start, count): |
| 505 | """Refer the '_FilemapBase' class for the documentation.""" |
| 506 | self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))" |
| 507 | % (start, count, start + count - 1)) |
| 508 | hole_first = start |
| 509 | for first, last in self._do_get_mapped_ranges(start, count): |
| 510 | if first > hole_first: |
| 511 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 512 | % (hole_first, first - 1)) |
| 513 | yield (hole_first, first - 1) |
| 514 | |
| 515 | hole_first = last + 1 |
| 516 | |
| 517 | if hole_first < start + count: |
| 518 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 519 | % (hole_first, start + count - 1)) |
| 520 | yield (hole_first, start + count - 1) |
| 521 | |
| 522 | def filemap(image, log=None): |
| 523 | """ |
| 524 | Create and return an instance of a Filemap class - 'FilemapFiemap' or |
| 525 | 'FilemapSeek', depending on what the system we run on supports. If the |
| 526 | FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is |
| 527 | returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the |
| 528 | 'FilemapSeek' class is returned. If none of these are supported, the |
| 529 | function generates an 'Error' type exception. |
| 530 | """ |
| 531 | |
| 532 | try: |
| 533 | return FilemapFiemap(image, log) |
| 534 | except ErrorNotSupp: |
| 535 | return FilemapSeek(image, log) |
| 536 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 537 | def sparse_copy(src_fname, dst_fname, skip=0, seek=0, |
| 538 | length=0, api=None): |
| 539 | """ |
| 540 | Efficiently copy sparse file to or into another file. |
| 541 | |
| 542 | src_fname: path to source file |
| 543 | dst_fname: path to destination file |
| 544 | skip: skip N bytes at thestart of src |
| 545 | seek: seek N bytes from the start of dst |
| 546 | length: read N bytes from src and write them to dst |
| 547 | api: FilemapFiemap or FilemapSeek object |
| 548 | """ |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 549 | if not api: |
| 550 | api = filemap |
| 551 | fmap = api(src_fname) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 552 | try: |
| 553 | dst_file = open(dst_fname, 'r+b') |
| 554 | except IOError: |
| 555 | dst_file = open(dst_fname, 'wb') |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 556 | if length: |
| 557 | dst_size = length + seek |
| 558 | else: |
| 559 | dst_size = os.path.getsize(src_fname) + seek - skip |
| 560 | dst_file.truncate(dst_size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 561 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 562 | written = 0 |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 563 | for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt): |
| 564 | start = first * fmap.block_size |
| 565 | end = (last + 1) * fmap.block_size |
| 566 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 567 | if skip >= end: |
| 568 | continue |
| 569 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 570 | if start < skip < end: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 571 | start = skip |
| 572 | |
| 573 | fmap._f_image.seek(start, os.SEEK_SET) |
| 574 | |
| 575 | written += start - skip - written |
| 576 | if length and written >= length: |
| 577 | dst_file.seek(seek + length, os.SEEK_SET) |
| 578 | dst_file.close() |
| 579 | return |
| 580 | |
| 581 | dst_file.seek(seek + start - skip, os.SEEK_SET) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 582 | |
| 583 | chunk_size = 1024 * 1024 |
| 584 | to_read = end - start |
| 585 | read = 0 |
| 586 | |
| 587 | while read < to_read: |
| 588 | if read + chunk_size > to_read: |
| 589 | chunk_size = to_read - read |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 590 | size = chunk_size |
| 591 | if length and written + size > length: |
| 592 | size = length - written |
| 593 | chunk = fmap._f_image.read(size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 594 | dst_file.write(chunk) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 595 | read += size |
| 596 | written += size |
| 597 | if written == length: |
| 598 | dst_file.close() |
| 599 | return |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 600 | dst_file.close() |