Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame^] | 1 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 2 | # Copyright (c) 2012 Intel, Inc. |
| 3 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame^] | 4 | # SPDX-License-Identifier: GPL-2.0-only |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 5 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 6 | |
| 7 | """ |
| 8 | This module implements python implements a way to get file block. Two methods |
| 9 | are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of |
| 10 | the file seek syscall. The former is implemented by the 'FilemapFiemap' class, |
| 11 | the latter is implemented by the 'FilemapSeek' class. Both classes provide the |
| 12 | same API. The 'filemap' function automatically selects which class can be used |
| 13 | and returns an instance of the class. |
| 14 | """ |
| 15 | |
| 16 | # Disable the following pylint recommendations: |
| 17 | # * Too many instance attributes (R0902) |
| 18 | # pylint: disable=R0902 |
| 19 | |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 20 | import errno |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 21 | import os |
| 22 | import struct |
| 23 | import array |
| 24 | import fcntl |
| 25 | import tempfile |
| 26 | import logging |
| 27 | |
| 28 | def get_block_size(file_obj): |
| 29 | """ |
| 30 | Returns block size for file object 'file_obj'. Errors are indicated by the |
| 31 | 'IOError' exception. |
| 32 | """ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 33 | # Get the block size of the host file-system for the image file by calling |
| 34 | # the FIGETBSZ ioctl (number 2). |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 35 | binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0)) |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 36 | bsize = struct.unpack('I', binary_data)[0] |
| 37 | if not bsize: |
| 38 | import os |
| 39 | stat = os.fstat(file_obj.fileno()) |
| 40 | if hasattr(stat, 'st_blksize'): |
| 41 | bsize = stat.st_blksize |
| 42 | else: |
| 43 | raise IOError("Unable to determine block size") |
| 44 | return bsize |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 45 | |
| 46 | class ErrorNotSupp(Exception): |
| 47 | """ |
| 48 | An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature |
| 49 | is not supported either by the kernel or the file-system. |
| 50 | """ |
| 51 | pass |
| 52 | |
| 53 | class Error(Exception): |
| 54 | """A class for all the other exceptions raised by this module.""" |
| 55 | pass |
| 56 | |
| 57 | |
| 58 | class _FilemapBase(object): |
| 59 | """ |
| 60 | This is a base class for a couple of other classes in this module. This |
| 61 | class simply performs the common parts of the initialization process: opens |
| 62 | the image file, gets its size, etc. The 'log' parameter is the logger object |
| 63 | to use for printing messages. |
| 64 | """ |
| 65 | |
| 66 | def __init__(self, image, log=None): |
| 67 | """ |
| 68 | Initialize a class instance. The 'image' argument is full path to the |
| 69 | file or file object to operate on. |
| 70 | """ |
| 71 | |
| 72 | self._log = log |
| 73 | if self._log is None: |
| 74 | self._log = logging.getLogger(__name__) |
| 75 | |
| 76 | self._f_image_needs_close = False |
| 77 | |
| 78 | if hasattr(image, "fileno"): |
| 79 | self._f_image = image |
| 80 | self._image_path = image.name |
| 81 | else: |
| 82 | self._image_path = image |
| 83 | self._open_image_file() |
| 84 | |
| 85 | try: |
| 86 | self.image_size = os.fstat(self._f_image.fileno()).st_size |
| 87 | except IOError as err: |
| 88 | raise Error("cannot get information about file '%s': %s" |
| 89 | % (self._f_image.name, err)) |
| 90 | |
| 91 | try: |
| 92 | self.block_size = get_block_size(self._f_image) |
| 93 | except IOError as err: |
| 94 | raise Error("cannot get block size for '%s': %s" |
| 95 | % (self._image_path, err)) |
| 96 | |
| 97 | self.blocks_cnt = self.image_size + self.block_size - 1 |
| 98 | self.blocks_cnt //= self.block_size |
| 99 | |
| 100 | try: |
| 101 | self._f_image.flush() |
| 102 | except IOError as err: |
| 103 | raise Error("cannot flush image file '%s': %s" |
| 104 | % (self._image_path, err)) |
| 105 | |
| 106 | try: |
| 107 | os.fsync(self._f_image.fileno()), |
| 108 | except OSError as err: |
| 109 | raise Error("cannot synchronize image file '%s': %s " |
| 110 | % (self._image_path, err.strerror)) |
| 111 | |
| 112 | self._log.debug("opened image \"%s\"" % self._image_path) |
| 113 | self._log.debug("block size %d, blocks count %d, image size %d" |
| 114 | % (self.block_size, self.blocks_cnt, self.image_size)) |
| 115 | |
| 116 | def __del__(self): |
| 117 | """The class destructor which just closes the image file.""" |
| 118 | if self._f_image_needs_close: |
| 119 | self._f_image.close() |
| 120 | |
| 121 | def _open_image_file(self): |
| 122 | """Open the image file.""" |
| 123 | try: |
| 124 | self._f_image = open(self._image_path, 'rb') |
| 125 | except IOError as err: |
| 126 | raise Error("cannot open image file '%s': %s" |
| 127 | % (self._image_path, err)) |
| 128 | |
| 129 | self._f_image_needs_close = True |
| 130 | |
| 131 | def block_is_mapped(self, block): # pylint: disable=W0613,R0201 |
| 132 | """ |
| 133 | This method has has to be implemented by child classes. It returns |
| 134 | 'True' if block number 'block' of the image file is mapped and 'False' |
| 135 | otherwise. |
| 136 | """ |
| 137 | |
| 138 | raise Error("the method is not implemented") |
| 139 | |
| 140 | def block_is_unmapped(self, block): # pylint: disable=W0613,R0201 |
| 141 | """ |
| 142 | This method has has to be implemented by child classes. It returns |
| 143 | 'True' if block number 'block' of the image file is not mapped (hole) |
| 144 | and 'False' otherwise. |
| 145 | """ |
| 146 | |
| 147 | raise Error("the method is not implemented") |
| 148 | |
| 149 | def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 150 | """ |
| 151 | This method has has to be implemented by child classes. This is a |
| 152 | generator which yields ranges of mapped blocks in the file. The ranges |
| 153 | are tuples of 2 elements: [first, last], where 'first' is the first |
| 154 | mapped block and 'last' is the last mapped block. |
| 155 | |
| 156 | The ranges are yielded for the area of the file of size 'count' blocks, |
| 157 | starting from block 'start'. |
| 158 | """ |
| 159 | |
| 160 | raise Error("the method is not implemented") |
| 161 | |
| 162 | def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 163 | """ |
| 164 | This method has has to be implemented by child classes. Just like |
| 165 | 'get_mapped_ranges()', but yields unmapped block ranges instead |
| 166 | (holes). |
| 167 | """ |
| 168 | |
| 169 | raise Error("the method is not implemented") |
| 170 | |
| 171 | |
| 172 | # The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call |
| 173 | _SEEK_DATA = 3 |
| 174 | _SEEK_HOLE = 4 |
| 175 | |
| 176 | def _lseek(file_obj, offset, whence): |
| 177 | """This is a helper function which invokes 'os.lseek' for file object |
| 178 | 'file_obj' and with specified 'offset' and 'whence'. The 'whence' |
| 179 | argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When |
| 180 | there is no more data or hole starting from 'offset', this function |
| 181 | returns '-1'. Otherwise the data or hole position is returned.""" |
| 182 | |
| 183 | try: |
| 184 | return os.lseek(file_obj.fileno(), offset, whence) |
| 185 | except OSError as err: |
| 186 | # The 'lseek' system call returns the ENXIO if there is no data or |
| 187 | # hole starting from the specified offset. |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 188 | if err.errno == errno.ENXIO: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 189 | return -1 |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 190 | elif err.errno == errno.EINVAL: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 191 | raise ErrorNotSupp("the kernel or file-system does not support " |
| 192 | "\"SEEK_HOLE\" and \"SEEK_DATA\"") |
| 193 | else: |
| 194 | raise |
| 195 | |
| 196 | class FilemapSeek(_FilemapBase): |
| 197 | """ |
| 198 | This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. |
| 199 | Unfortunately, the current implementation requires the caller to have write |
| 200 | access to the image file. |
| 201 | """ |
| 202 | |
| 203 | def __init__(self, image, log=None): |
| 204 | """Refer the '_FilemapBase' class for the documentation.""" |
| 205 | |
| 206 | # Call the base class constructor first |
| 207 | _FilemapBase.__init__(self, image, log) |
| 208 | self._log.debug("FilemapSeek: initializing") |
| 209 | |
| 210 | self._probe_seek_hole() |
| 211 | |
| 212 | def _probe_seek_hole(self): |
| 213 | """ |
| 214 | Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. |
| 215 | Unfortunately, there seems to be no clean way for detecting this, |
| 216 | because often the system just fakes them by just assuming that all |
| 217 | files are fully mapped, so 'SEEK_HOLE' always returns EOF and |
| 218 | 'SEEK_DATA' always returns the requested offset. |
| 219 | |
| 220 | I could not invent a better way of detecting the fake 'SEEK_HOLE' |
| 221 | implementation than just to create a temporary file in the same |
| 222 | directory where the image file resides. It would be nice to change this |
| 223 | to something better. |
| 224 | """ |
| 225 | |
| 226 | directory = os.path.dirname(self._image_path) |
| 227 | |
| 228 | try: |
| 229 | tmp_obj = tempfile.TemporaryFile("w+", dir=directory) |
| 230 | except IOError as err: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 231 | raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 232 | % (directory, err)) |
| 233 | |
| 234 | try: |
| 235 | os.ftruncate(tmp_obj.fileno(), self.block_size) |
| 236 | except OSError as err: |
| 237 | raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" |
| 238 | % (directory, err)) |
| 239 | |
| 240 | offs = _lseek(tmp_obj, 0, _SEEK_HOLE) |
| 241 | if offs != 0: |
| 242 | # We are dealing with the stub 'SEEK_HOLE' implementation which |
| 243 | # always returns EOF. |
| 244 | self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) |
| 245 | raise ErrorNotSupp("the file-system does not support " |
| 246 | "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " |
| 247 | "provides a stub implementation") |
| 248 | |
| 249 | tmp_obj.close() |
| 250 | |
| 251 | def block_is_mapped(self, block): |
| 252 | """Refer the '_FilemapBase' class for the documentation.""" |
| 253 | offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) |
| 254 | if offs == -1: |
| 255 | result = False |
| 256 | else: |
| 257 | result = (offs // self.block_size == block) |
| 258 | |
| 259 | self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" |
| 260 | % (block, result)) |
| 261 | return result |
| 262 | |
| 263 | def block_is_unmapped(self, block): |
| 264 | """Refer the '_FilemapBase' class for the documentation.""" |
| 265 | return not self.block_is_mapped(block) |
| 266 | |
| 267 | def _get_ranges(self, start, count, whence1, whence2): |
| 268 | """ |
| 269 | This function implements 'get_mapped_ranges()' and |
| 270 | 'get_unmapped_ranges()' depending on what is passed in the 'whence1' |
| 271 | and 'whence2' arguments. |
| 272 | """ |
| 273 | |
| 274 | assert whence1 != whence2 |
| 275 | end = start * self.block_size |
| 276 | limit = end + count * self.block_size |
| 277 | |
| 278 | while True: |
| 279 | start = _lseek(self._f_image, end, whence1) |
| 280 | if start == -1 or start >= limit or start == self.image_size: |
| 281 | break |
| 282 | |
| 283 | end = _lseek(self._f_image, start, whence2) |
| 284 | if end == -1 or end == self.image_size: |
| 285 | end = self.blocks_cnt * self.block_size |
| 286 | if end > limit: |
| 287 | end = limit |
| 288 | |
| 289 | start_blk = start // self.block_size |
| 290 | end_blk = end // self.block_size - 1 |
| 291 | self._log.debug("FilemapSeek: yielding range (%d, %d)" |
| 292 | % (start_blk, end_blk)) |
| 293 | yield (start_blk, end_blk) |
| 294 | |
| 295 | def get_mapped_ranges(self, start, count): |
| 296 | """Refer the '_FilemapBase' class for the documentation.""" |
| 297 | self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" |
| 298 | % (start, count, start + count - 1)) |
| 299 | return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) |
| 300 | |
| 301 | def get_unmapped_ranges(self, start, count): |
| 302 | """Refer the '_FilemapBase' class for the documentation.""" |
| 303 | self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))" |
| 304 | % (start, count, start + count - 1)) |
| 305 | return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA) |
| 306 | |
| 307 | |
| 308 | # Below goes the FIEMAP ioctl implementation, which is not very readable |
| 309 | # because it deals with the rather complex FIEMAP ioctl. To understand the |
| 310 | # code, you need to know the FIEMAP interface, which is documented in the |
| 311 | # "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. |
| 312 | |
| 313 | # Format string for 'struct fiemap' |
| 314 | _FIEMAP_FORMAT = "=QQLLLL" |
| 315 | # sizeof(struct fiemap) |
| 316 | _FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) |
| 317 | # Format string for 'struct fiemap_extent' |
| 318 | _FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" |
| 319 | # sizeof(struct fiemap_extent) |
| 320 | _FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) |
| 321 | # The FIEMAP ioctl number |
| 322 | _FIEMAP_IOCTL = 0xC020660B |
| 323 | # This FIEMAP ioctl flag which instructs the kernel to sync the file before |
| 324 | # reading the block map |
| 325 | _FIEMAP_FLAG_SYNC = 0x00000001 |
| 326 | # Size of the buffer for 'struct fiemap_extent' elements which will be used |
| 327 | # when invoking the FIEMAP ioctl. The larger is the buffer, the less times the |
| 328 | # FIEMAP ioctl will be invoked. |
| 329 | _FIEMAP_BUFFER_SIZE = 256 * 1024 |
| 330 | |
| 331 | class FilemapFiemap(_FilemapBase): |
| 332 | """ |
| 333 | This class provides API to the FIEMAP ioctl. Namely, it allows to iterate |
| 334 | over all mapped blocks and over all holes. |
| 335 | |
| 336 | This class synchronizes the image file every time it invokes the FIEMAP |
| 337 | ioctl in order to work-around early FIEMAP implementation kernel bugs. |
| 338 | """ |
| 339 | |
| 340 | def __init__(self, image, log=None): |
| 341 | """ |
| 342 | Initialize a class instance. The 'image' argument is full the file |
| 343 | object to operate on. |
| 344 | """ |
| 345 | |
| 346 | # Call the base class constructor first |
| 347 | _FilemapBase.__init__(self, image, log) |
| 348 | self._log.debug("FilemapFiemap: initializing") |
| 349 | |
| 350 | self._buf_size = _FIEMAP_BUFFER_SIZE |
| 351 | |
| 352 | # Calculate how many 'struct fiemap_extent' elements fit the buffer |
| 353 | self._buf_size -= _FIEMAP_SIZE |
| 354 | self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE |
| 355 | assert self._fiemap_extent_cnt > 0 |
| 356 | self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE |
| 357 | self._buf_size += _FIEMAP_SIZE |
| 358 | |
| 359 | # Allocate a mutable buffer for the FIEMAP ioctl |
| 360 | self._buf = array.array('B', [0] * self._buf_size) |
| 361 | |
| 362 | # Check if the FIEMAP ioctl is supported |
| 363 | self.block_is_mapped(0) |
| 364 | |
| 365 | def _invoke_fiemap(self, block, count): |
| 366 | """ |
| 367 | Invoke the FIEMAP ioctl for 'count' blocks of the file starting from |
| 368 | block number 'block'. |
| 369 | |
| 370 | The full result of the operation is stored in 'self._buf' on exit. |
| 371 | Returns the unpacked 'struct fiemap' data structure in form of a python |
| 372 | list (just like 'struct.upack()'). |
| 373 | """ |
| 374 | |
| 375 | if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): |
| 376 | raise Error("bad block number %d, should be within [0, %d]" |
| 377 | % (block, self.blocks_cnt)) |
| 378 | |
| 379 | # Initialize the 'struct fiemap' part of the buffer. We use the |
| 380 | # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is |
| 381 | # synchronized. The reason for this is that early FIEMAP |
| 382 | # implementations had many bugs related to cached dirty data, and |
| 383 | # synchronizing the file is a necessary work-around. |
| 384 | struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, |
| 385 | count * self.block_size, _FIEMAP_FLAG_SYNC, 0, |
| 386 | self._fiemap_extent_cnt, 0) |
| 387 | |
| 388 | try: |
| 389 | fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) |
| 390 | except IOError as err: |
| 391 | # Note, the FIEMAP ioctl is supported by the Linux kernel starting |
| 392 | # from version 2.6.28 (year 2008). |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 393 | if err.errno == errno.EOPNOTSUPP: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 394 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 395 | "by the file-system" |
| 396 | self._log.debug(errstr) |
| 397 | raise ErrorNotSupp(errstr) |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 398 | if err.errno == errno.ENOTTY: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 399 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 400 | "by the kernel" |
| 401 | self._log.debug(errstr) |
| 402 | raise ErrorNotSupp(errstr) |
| 403 | raise Error("the FIEMAP ioctl failed for '%s': %s" |
| 404 | % (self._image_path, err)) |
| 405 | |
| 406 | return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) |
| 407 | |
| 408 | def block_is_mapped(self, block): |
| 409 | """Refer the '_FilemapBase' class for the documentation.""" |
| 410 | struct_fiemap = self._invoke_fiemap(block, 1) |
| 411 | |
| 412 | # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. |
| 413 | # If it contains zero, the block is not mapped, otherwise it is |
| 414 | # mapped. |
| 415 | result = bool(struct_fiemap[3]) |
| 416 | self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" |
| 417 | % (block, result)) |
| 418 | return result |
| 419 | |
| 420 | def block_is_unmapped(self, block): |
| 421 | """Refer the '_FilemapBase' class for the documentation.""" |
| 422 | return not self.block_is_mapped(block) |
| 423 | |
| 424 | def _unpack_fiemap_extent(self, index): |
| 425 | """ |
| 426 | Unpack a 'struct fiemap_extent' structure object number 'index' from |
| 427 | the internal 'self._buf' buffer. |
| 428 | """ |
| 429 | |
| 430 | offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index |
| 431 | return struct.unpack(_FIEMAP_EXTENT_FORMAT, |
| 432 | self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) |
| 433 | |
| 434 | def _do_get_mapped_ranges(self, start, count): |
| 435 | """ |
| 436 | Implements most the functionality for the 'get_mapped_ranges()' |
| 437 | generator: invokes the FIEMAP ioctl, walks through the mapped extents |
| 438 | and yields mapped block ranges. However, the ranges may be consecutive |
| 439 | (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges |
| 440 | them. |
| 441 | """ |
| 442 | |
| 443 | block = start |
| 444 | while block < start + count: |
| 445 | struct_fiemap = self._invoke_fiemap(block, count) |
| 446 | |
| 447 | mapped_extents = struct_fiemap[3] |
| 448 | if mapped_extents == 0: |
| 449 | # No more mapped blocks |
| 450 | return |
| 451 | |
| 452 | extent = 0 |
| 453 | while extent < mapped_extents: |
| 454 | fiemap_extent = self._unpack_fiemap_extent(extent) |
| 455 | |
| 456 | # Start of the extent |
| 457 | extent_start = fiemap_extent[0] |
| 458 | # Starting block number of the extent |
| 459 | extent_block = extent_start // self.block_size |
| 460 | # Length of the extent |
| 461 | extent_len = fiemap_extent[2] |
| 462 | # Count of blocks in the extent |
| 463 | extent_count = extent_len // self.block_size |
| 464 | |
| 465 | # Extent length and offset have to be block-aligned |
| 466 | assert extent_start % self.block_size == 0 |
| 467 | assert extent_len % self.block_size == 0 |
| 468 | |
| 469 | if extent_block > start + count - 1: |
| 470 | return |
| 471 | |
| 472 | first = max(extent_block, block) |
| 473 | last = min(extent_block + extent_count, start + count) - 1 |
| 474 | yield (first, last) |
| 475 | |
| 476 | extent += 1 |
| 477 | |
| 478 | block = extent_block + extent_count |
| 479 | |
| 480 | def get_mapped_ranges(self, start, count): |
| 481 | """Refer the '_FilemapBase' class for the documentation.""" |
| 482 | self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" |
| 483 | % (start, count, start + count - 1)) |
| 484 | iterator = self._do_get_mapped_ranges(start, count) |
| 485 | first_prev, last_prev = next(iterator) |
| 486 | |
| 487 | for first, last in iterator: |
| 488 | if last_prev == first - 1: |
| 489 | last_prev = last |
| 490 | else: |
| 491 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 492 | % (first_prev, last_prev)) |
| 493 | yield (first_prev, last_prev) |
| 494 | first_prev, last_prev = first, last |
| 495 | |
| 496 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 497 | % (first_prev, last_prev)) |
| 498 | yield (first_prev, last_prev) |
| 499 | |
| 500 | def get_unmapped_ranges(self, start, count): |
| 501 | """Refer the '_FilemapBase' class for the documentation.""" |
| 502 | self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))" |
| 503 | % (start, count, start + count - 1)) |
| 504 | hole_first = start |
| 505 | for first, last in self._do_get_mapped_ranges(start, count): |
| 506 | if first > hole_first: |
| 507 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 508 | % (hole_first, first - 1)) |
| 509 | yield (hole_first, first - 1) |
| 510 | |
| 511 | hole_first = last + 1 |
| 512 | |
| 513 | if hole_first < start + count: |
| 514 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 515 | % (hole_first, start + count - 1)) |
| 516 | yield (hole_first, start + count - 1) |
| 517 | |
| 518 | def filemap(image, log=None): |
| 519 | """ |
| 520 | Create and return an instance of a Filemap class - 'FilemapFiemap' or |
| 521 | 'FilemapSeek', depending on what the system we run on supports. If the |
| 522 | FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is |
| 523 | returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the |
| 524 | 'FilemapSeek' class is returned. If none of these are supported, the |
| 525 | function generates an 'Error' type exception. |
| 526 | """ |
| 527 | |
| 528 | try: |
| 529 | return FilemapFiemap(image, log) |
| 530 | except ErrorNotSupp: |
| 531 | return FilemapSeek(image, log) |
| 532 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 533 | def sparse_copy(src_fname, dst_fname, skip=0, seek=0, |
| 534 | length=0, api=None): |
| 535 | """ |
| 536 | Efficiently copy sparse file to or into another file. |
| 537 | |
| 538 | src_fname: path to source file |
| 539 | dst_fname: path to destination file |
| 540 | skip: skip N bytes at thestart of src |
| 541 | seek: seek N bytes from the start of dst |
| 542 | length: read N bytes from src and write them to dst |
| 543 | api: FilemapFiemap or FilemapSeek object |
| 544 | """ |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 545 | if not api: |
| 546 | api = filemap |
| 547 | fmap = api(src_fname) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 548 | try: |
| 549 | dst_file = open(dst_fname, 'r+b') |
| 550 | except IOError: |
| 551 | dst_file = open(dst_fname, 'wb') |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 552 | if length: |
| 553 | dst_size = length + seek |
| 554 | else: |
| 555 | dst_size = os.path.getsize(src_fname) + seek - skip |
| 556 | dst_file.truncate(dst_size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 557 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 558 | written = 0 |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 559 | for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt): |
| 560 | start = first * fmap.block_size |
| 561 | end = (last + 1) * fmap.block_size |
| 562 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 563 | if skip >= end: |
| 564 | continue |
| 565 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 566 | if start < skip < end: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 567 | start = skip |
| 568 | |
| 569 | fmap._f_image.seek(start, os.SEEK_SET) |
| 570 | |
| 571 | written += start - skip - written |
| 572 | if length and written >= length: |
| 573 | dst_file.seek(seek + length, os.SEEK_SET) |
| 574 | dst_file.close() |
| 575 | return |
| 576 | |
| 577 | dst_file.seek(seek + start - skip, os.SEEK_SET) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 578 | |
| 579 | chunk_size = 1024 * 1024 |
| 580 | to_read = end - start |
| 581 | read = 0 |
| 582 | |
| 583 | while read < to_read: |
| 584 | if read + chunk_size > to_read: |
| 585 | chunk_size = to_read - read |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 586 | size = chunk_size |
| 587 | if length and written + size > length: |
| 588 | size = length - written |
| 589 | chunk = fmap._f_image.read(size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 590 | dst_file.write(chunk) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 591 | read += size |
| 592 | written += size |
| 593 | if written == length: |
| 594 | dst_file.close() |
| 595 | return |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 596 | dst_file.close() |