Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 1 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 2 | # Copyright (c) 2012 Intel, Inc. |
| 3 | # |
Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 4 | # SPDX-License-Identifier: GPL-2.0-only |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 5 | # |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 6 | |
| 7 | """ |
| 8 | This module implements python implements a way to get file block. Two methods |
| 9 | are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of |
| 10 | the file seek syscall. The former is implemented by the 'FilemapFiemap' class, |
| 11 | the latter is implemented by the 'FilemapSeek' class. Both classes provide the |
| 12 | same API. The 'filemap' function automatically selects which class can be used |
| 13 | and returns an instance of the class. |
| 14 | """ |
| 15 | |
| 16 | # Disable the following pylint recommendations: |
| 17 | # * Too many instance attributes (R0902) |
| 18 | # pylint: disable=R0902 |
| 19 | |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 20 | import errno |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 21 | import os |
| 22 | import struct |
| 23 | import array |
| 24 | import fcntl |
| 25 | import tempfile |
| 26 | import logging |
| 27 | |
| 28 | def get_block_size(file_obj): |
| 29 | """ |
| 30 | Returns block size for file object 'file_obj'. Errors are indicated by the |
| 31 | 'IOError' exception. |
| 32 | """ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 33 | # Get the block size of the host file-system for the image file by calling |
| 34 | # the FIGETBSZ ioctl (number 2). |
Brad Bishop | 15ae250 | 2019-06-18 21:44:24 -0400 | [diff] [blame] | 35 | try: |
| 36 | binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0)) |
Adriana Kobylak | c33a02d | 2020-03-18 10:08:00 -0500 | [diff] [blame] | 37 | bsize = struct.unpack('I', binary_data)[0] |
Brad Bishop | 15ae250 | 2019-06-18 21:44:24 -0400 | [diff] [blame] | 38 | except OSError: |
Adriana Kobylak | c33a02d | 2020-03-18 10:08:00 -0500 | [diff] [blame] | 39 | bsize = None |
| 40 | |
| 41 | # If ioctl causes OSError or give bsize to zero failback to os.fstat |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 42 | if not bsize: |
| 43 | import os |
| 44 | stat = os.fstat(file_obj.fileno()) |
| 45 | if hasattr(stat, 'st_blksize'): |
| 46 | bsize = stat.st_blksize |
| 47 | else: |
| 48 | raise IOError("Unable to determine block size") |
| 49 | return bsize |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 50 | |
| 51 | class ErrorNotSupp(Exception): |
| 52 | """ |
| 53 | An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature |
| 54 | is not supported either by the kernel or the file-system. |
| 55 | """ |
| 56 | pass |
| 57 | |
| 58 | class Error(Exception): |
| 59 | """A class for all the other exceptions raised by this module.""" |
| 60 | pass |
| 61 | |
| 62 | |
| 63 | class _FilemapBase(object): |
| 64 | """ |
| 65 | This is a base class for a couple of other classes in this module. This |
| 66 | class simply performs the common parts of the initialization process: opens |
| 67 | the image file, gets its size, etc. The 'log' parameter is the logger object |
| 68 | to use for printing messages. |
| 69 | """ |
| 70 | |
| 71 | def __init__(self, image, log=None): |
| 72 | """ |
| 73 | Initialize a class instance. The 'image' argument is full path to the |
| 74 | file or file object to operate on. |
| 75 | """ |
| 76 | |
| 77 | self._log = log |
| 78 | if self._log is None: |
| 79 | self._log = logging.getLogger(__name__) |
| 80 | |
| 81 | self._f_image_needs_close = False |
| 82 | |
| 83 | if hasattr(image, "fileno"): |
| 84 | self._f_image = image |
| 85 | self._image_path = image.name |
| 86 | else: |
| 87 | self._image_path = image |
| 88 | self._open_image_file() |
| 89 | |
| 90 | try: |
| 91 | self.image_size = os.fstat(self._f_image.fileno()).st_size |
| 92 | except IOError as err: |
| 93 | raise Error("cannot get information about file '%s': %s" |
| 94 | % (self._f_image.name, err)) |
| 95 | |
| 96 | try: |
| 97 | self.block_size = get_block_size(self._f_image) |
| 98 | except IOError as err: |
| 99 | raise Error("cannot get block size for '%s': %s" |
| 100 | % (self._image_path, err)) |
| 101 | |
| 102 | self.blocks_cnt = self.image_size + self.block_size - 1 |
| 103 | self.blocks_cnt //= self.block_size |
| 104 | |
| 105 | try: |
| 106 | self._f_image.flush() |
| 107 | except IOError as err: |
| 108 | raise Error("cannot flush image file '%s': %s" |
| 109 | % (self._image_path, err)) |
| 110 | |
| 111 | try: |
| 112 | os.fsync(self._f_image.fileno()), |
| 113 | except OSError as err: |
| 114 | raise Error("cannot synchronize image file '%s': %s " |
| 115 | % (self._image_path, err.strerror)) |
| 116 | |
| 117 | self._log.debug("opened image \"%s\"" % self._image_path) |
| 118 | self._log.debug("block size %d, blocks count %d, image size %d" |
| 119 | % (self.block_size, self.blocks_cnt, self.image_size)) |
| 120 | |
| 121 | def __del__(self): |
| 122 | """The class destructor which just closes the image file.""" |
| 123 | if self._f_image_needs_close: |
| 124 | self._f_image.close() |
| 125 | |
| 126 | def _open_image_file(self): |
| 127 | """Open the image file.""" |
| 128 | try: |
| 129 | self._f_image = open(self._image_path, 'rb') |
| 130 | except IOError as err: |
| 131 | raise Error("cannot open image file '%s': %s" |
| 132 | % (self._image_path, err)) |
| 133 | |
| 134 | self._f_image_needs_close = True |
| 135 | |
| 136 | def block_is_mapped(self, block): # pylint: disable=W0613,R0201 |
| 137 | """ |
| 138 | This method has has to be implemented by child classes. It returns |
| 139 | 'True' if block number 'block' of the image file is mapped and 'False' |
| 140 | otherwise. |
| 141 | """ |
| 142 | |
| 143 | raise Error("the method is not implemented") |
| 144 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 145 | def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 |
| 146 | """ |
| 147 | This method has has to be implemented by child classes. This is a |
| 148 | generator which yields ranges of mapped blocks in the file. The ranges |
| 149 | are tuples of 2 elements: [first, last], where 'first' is the first |
| 150 | mapped block and 'last' is the last mapped block. |
| 151 | |
| 152 | The ranges are yielded for the area of the file of size 'count' blocks, |
| 153 | starting from block 'start'. |
| 154 | """ |
| 155 | |
| 156 | raise Error("the method is not implemented") |
| 157 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 158 | |
| 159 | # The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call |
| 160 | _SEEK_DATA = 3 |
| 161 | _SEEK_HOLE = 4 |
| 162 | |
| 163 | def _lseek(file_obj, offset, whence): |
| 164 | """This is a helper function which invokes 'os.lseek' for file object |
| 165 | 'file_obj' and with specified 'offset' and 'whence'. The 'whence' |
| 166 | argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When |
| 167 | there is no more data or hole starting from 'offset', this function |
| 168 | returns '-1'. Otherwise the data or hole position is returned.""" |
| 169 | |
| 170 | try: |
| 171 | return os.lseek(file_obj.fileno(), offset, whence) |
| 172 | except OSError as err: |
| 173 | # The 'lseek' system call returns the ENXIO if there is no data or |
| 174 | # hole starting from the specified offset. |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 175 | if err.errno == errno.ENXIO: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 176 | return -1 |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 177 | elif err.errno == errno.EINVAL: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 178 | raise ErrorNotSupp("the kernel or file-system does not support " |
| 179 | "\"SEEK_HOLE\" and \"SEEK_DATA\"") |
| 180 | else: |
| 181 | raise |
| 182 | |
| 183 | class FilemapSeek(_FilemapBase): |
| 184 | """ |
| 185 | This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. |
| 186 | Unfortunately, the current implementation requires the caller to have write |
| 187 | access to the image file. |
| 188 | """ |
| 189 | |
| 190 | def __init__(self, image, log=None): |
| 191 | """Refer the '_FilemapBase' class for the documentation.""" |
| 192 | |
| 193 | # Call the base class constructor first |
| 194 | _FilemapBase.__init__(self, image, log) |
| 195 | self._log.debug("FilemapSeek: initializing") |
| 196 | |
| 197 | self._probe_seek_hole() |
| 198 | |
| 199 | def _probe_seek_hole(self): |
| 200 | """ |
| 201 | Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. |
| 202 | Unfortunately, there seems to be no clean way for detecting this, |
| 203 | because often the system just fakes them by just assuming that all |
| 204 | files are fully mapped, so 'SEEK_HOLE' always returns EOF and |
| 205 | 'SEEK_DATA' always returns the requested offset. |
| 206 | |
| 207 | I could not invent a better way of detecting the fake 'SEEK_HOLE' |
| 208 | implementation than just to create a temporary file in the same |
| 209 | directory where the image file resides. It would be nice to change this |
| 210 | to something better. |
| 211 | """ |
| 212 | |
| 213 | directory = os.path.dirname(self._image_path) |
| 214 | |
| 215 | try: |
| 216 | tmp_obj = tempfile.TemporaryFile("w+", dir=directory) |
| 217 | except IOError as err: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 218 | raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \ |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 219 | % (directory, err)) |
| 220 | |
| 221 | try: |
| 222 | os.ftruncate(tmp_obj.fileno(), self.block_size) |
| 223 | except OSError as err: |
| 224 | raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" |
| 225 | % (directory, err)) |
| 226 | |
| 227 | offs = _lseek(tmp_obj, 0, _SEEK_HOLE) |
| 228 | if offs != 0: |
| 229 | # We are dealing with the stub 'SEEK_HOLE' implementation which |
| 230 | # always returns EOF. |
| 231 | self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) |
| 232 | raise ErrorNotSupp("the file-system does not support " |
| 233 | "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " |
| 234 | "provides a stub implementation") |
| 235 | |
| 236 | tmp_obj.close() |
| 237 | |
| 238 | def block_is_mapped(self, block): |
| 239 | """Refer the '_FilemapBase' class for the documentation.""" |
| 240 | offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) |
| 241 | if offs == -1: |
| 242 | result = False |
| 243 | else: |
| 244 | result = (offs // self.block_size == block) |
| 245 | |
| 246 | self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" |
| 247 | % (block, result)) |
| 248 | return result |
| 249 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 250 | def _get_ranges(self, start, count, whence1, whence2): |
| 251 | """ |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 252 | This function implements 'get_mapped_ranges()' depending |
| 253 | on what is passed in the 'whence1' and 'whence2' arguments. |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 254 | """ |
| 255 | |
| 256 | assert whence1 != whence2 |
| 257 | end = start * self.block_size |
| 258 | limit = end + count * self.block_size |
| 259 | |
| 260 | while True: |
| 261 | start = _lseek(self._f_image, end, whence1) |
| 262 | if start == -1 or start >= limit or start == self.image_size: |
| 263 | break |
| 264 | |
| 265 | end = _lseek(self._f_image, start, whence2) |
| 266 | if end == -1 or end == self.image_size: |
| 267 | end = self.blocks_cnt * self.block_size |
| 268 | if end > limit: |
| 269 | end = limit |
| 270 | |
| 271 | start_blk = start // self.block_size |
| 272 | end_blk = end // self.block_size - 1 |
| 273 | self._log.debug("FilemapSeek: yielding range (%d, %d)" |
| 274 | % (start_blk, end_blk)) |
| 275 | yield (start_blk, end_blk) |
| 276 | |
| 277 | def get_mapped_ranges(self, start, count): |
| 278 | """Refer the '_FilemapBase' class for the documentation.""" |
| 279 | self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" |
| 280 | % (start, count, start + count - 1)) |
| 281 | return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) |
| 282 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 283 | |
| 284 | # Below goes the FIEMAP ioctl implementation, which is not very readable |
| 285 | # because it deals with the rather complex FIEMAP ioctl. To understand the |
| 286 | # code, you need to know the FIEMAP interface, which is documented in the |
| 287 | # "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. |
| 288 | |
| 289 | # Format string for 'struct fiemap' |
| 290 | _FIEMAP_FORMAT = "=QQLLLL" |
| 291 | # sizeof(struct fiemap) |
| 292 | _FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) |
| 293 | # Format string for 'struct fiemap_extent' |
| 294 | _FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" |
| 295 | # sizeof(struct fiemap_extent) |
| 296 | _FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) |
| 297 | # The FIEMAP ioctl number |
| 298 | _FIEMAP_IOCTL = 0xC020660B |
| 299 | # This FIEMAP ioctl flag which instructs the kernel to sync the file before |
| 300 | # reading the block map |
| 301 | _FIEMAP_FLAG_SYNC = 0x00000001 |
| 302 | # Size of the buffer for 'struct fiemap_extent' elements which will be used |
| 303 | # when invoking the FIEMAP ioctl. The larger is the buffer, the less times the |
| 304 | # FIEMAP ioctl will be invoked. |
| 305 | _FIEMAP_BUFFER_SIZE = 256 * 1024 |
| 306 | |
| 307 | class FilemapFiemap(_FilemapBase): |
| 308 | """ |
| 309 | This class provides API to the FIEMAP ioctl. Namely, it allows to iterate |
| 310 | over all mapped blocks and over all holes. |
| 311 | |
| 312 | This class synchronizes the image file every time it invokes the FIEMAP |
| 313 | ioctl in order to work-around early FIEMAP implementation kernel bugs. |
| 314 | """ |
| 315 | |
| 316 | def __init__(self, image, log=None): |
| 317 | """ |
| 318 | Initialize a class instance. The 'image' argument is full the file |
| 319 | object to operate on. |
| 320 | """ |
| 321 | |
| 322 | # Call the base class constructor first |
| 323 | _FilemapBase.__init__(self, image, log) |
| 324 | self._log.debug("FilemapFiemap: initializing") |
| 325 | |
| 326 | self._buf_size = _FIEMAP_BUFFER_SIZE |
| 327 | |
| 328 | # Calculate how many 'struct fiemap_extent' elements fit the buffer |
| 329 | self._buf_size -= _FIEMAP_SIZE |
| 330 | self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE |
| 331 | assert self._fiemap_extent_cnt > 0 |
| 332 | self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE |
| 333 | self._buf_size += _FIEMAP_SIZE |
| 334 | |
| 335 | # Allocate a mutable buffer for the FIEMAP ioctl |
| 336 | self._buf = array.array('B', [0] * self._buf_size) |
| 337 | |
| 338 | # Check if the FIEMAP ioctl is supported |
| 339 | self.block_is_mapped(0) |
| 340 | |
| 341 | def _invoke_fiemap(self, block, count): |
| 342 | """ |
| 343 | Invoke the FIEMAP ioctl for 'count' blocks of the file starting from |
| 344 | block number 'block'. |
| 345 | |
| 346 | The full result of the operation is stored in 'self._buf' on exit. |
| 347 | Returns the unpacked 'struct fiemap' data structure in form of a python |
| 348 | list (just like 'struct.upack()'). |
| 349 | """ |
| 350 | |
| 351 | if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): |
| 352 | raise Error("bad block number %d, should be within [0, %d]" |
| 353 | % (block, self.blocks_cnt)) |
| 354 | |
| 355 | # Initialize the 'struct fiemap' part of the buffer. We use the |
| 356 | # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is |
| 357 | # synchronized. The reason for this is that early FIEMAP |
| 358 | # implementations had many bugs related to cached dirty data, and |
| 359 | # synchronizing the file is a necessary work-around. |
| 360 | struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, |
| 361 | count * self.block_size, _FIEMAP_FLAG_SYNC, 0, |
| 362 | self._fiemap_extent_cnt, 0) |
| 363 | |
| 364 | try: |
| 365 | fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) |
| 366 | except IOError as err: |
| 367 | # Note, the FIEMAP ioctl is supported by the Linux kernel starting |
| 368 | # from version 2.6.28 (year 2008). |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 369 | if err.errno == errno.EOPNOTSUPP: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 370 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 371 | "by the file-system" |
| 372 | self._log.debug(errstr) |
| 373 | raise ErrorNotSupp(errstr) |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 374 | if err.errno == errno.ENOTTY: |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 375 | errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ |
| 376 | "by the kernel" |
| 377 | self._log.debug(errstr) |
| 378 | raise ErrorNotSupp(errstr) |
| 379 | raise Error("the FIEMAP ioctl failed for '%s': %s" |
| 380 | % (self._image_path, err)) |
| 381 | |
| 382 | return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) |
| 383 | |
| 384 | def block_is_mapped(self, block): |
| 385 | """Refer the '_FilemapBase' class for the documentation.""" |
| 386 | struct_fiemap = self._invoke_fiemap(block, 1) |
| 387 | |
| 388 | # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. |
| 389 | # If it contains zero, the block is not mapped, otherwise it is |
| 390 | # mapped. |
| 391 | result = bool(struct_fiemap[3]) |
| 392 | self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" |
| 393 | % (block, result)) |
| 394 | return result |
| 395 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 396 | def _unpack_fiemap_extent(self, index): |
| 397 | """ |
| 398 | Unpack a 'struct fiemap_extent' structure object number 'index' from |
| 399 | the internal 'self._buf' buffer. |
| 400 | """ |
| 401 | |
| 402 | offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index |
| 403 | return struct.unpack(_FIEMAP_EXTENT_FORMAT, |
| 404 | self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) |
| 405 | |
| 406 | def _do_get_mapped_ranges(self, start, count): |
| 407 | """ |
| 408 | Implements most the functionality for the 'get_mapped_ranges()' |
| 409 | generator: invokes the FIEMAP ioctl, walks through the mapped extents |
| 410 | and yields mapped block ranges. However, the ranges may be consecutive |
| 411 | (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges |
| 412 | them. |
| 413 | """ |
| 414 | |
| 415 | block = start |
| 416 | while block < start + count: |
| 417 | struct_fiemap = self._invoke_fiemap(block, count) |
| 418 | |
| 419 | mapped_extents = struct_fiemap[3] |
| 420 | if mapped_extents == 0: |
| 421 | # No more mapped blocks |
| 422 | return |
| 423 | |
| 424 | extent = 0 |
| 425 | while extent < mapped_extents: |
| 426 | fiemap_extent = self._unpack_fiemap_extent(extent) |
| 427 | |
| 428 | # Start of the extent |
| 429 | extent_start = fiemap_extent[0] |
| 430 | # Starting block number of the extent |
| 431 | extent_block = extent_start // self.block_size |
| 432 | # Length of the extent |
| 433 | extent_len = fiemap_extent[2] |
| 434 | # Count of blocks in the extent |
| 435 | extent_count = extent_len // self.block_size |
| 436 | |
| 437 | # Extent length and offset have to be block-aligned |
| 438 | assert extent_start % self.block_size == 0 |
| 439 | assert extent_len % self.block_size == 0 |
| 440 | |
| 441 | if extent_block > start + count - 1: |
| 442 | return |
| 443 | |
| 444 | first = max(extent_block, block) |
| 445 | last = min(extent_block + extent_count, start + count) - 1 |
| 446 | yield (first, last) |
| 447 | |
| 448 | extent += 1 |
| 449 | |
| 450 | block = extent_block + extent_count |
| 451 | |
| 452 | def get_mapped_ranges(self, start, count): |
| 453 | """Refer the '_FilemapBase' class for the documentation.""" |
| 454 | self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" |
| 455 | % (start, count, start + count - 1)) |
| 456 | iterator = self._do_get_mapped_ranges(start, count) |
| 457 | first_prev, last_prev = next(iterator) |
| 458 | |
| 459 | for first, last in iterator: |
| 460 | if last_prev == first - 1: |
| 461 | last_prev = last |
| 462 | else: |
| 463 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 464 | % (first_prev, last_prev)) |
| 465 | yield (first_prev, last_prev) |
| 466 | first_prev, last_prev = first, last |
| 467 | |
| 468 | self._log.debug("FilemapFiemap: yielding range (%d, %d)" |
| 469 | % (first_prev, last_prev)) |
| 470 | yield (first_prev, last_prev) |
| 471 | |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 472 | class FilemapNobmap(_FilemapBase): |
| 473 | """ |
| 474 | This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not |
| 475 | supported by the filesystem or kernel. |
| 476 | """ |
| 477 | |
| 478 | def __init__(self, image, log=None): |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 479 | """Refer the '_FilemapBase' class for the documentation.""" |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 480 | |
| 481 | # Call the base class constructor first |
| 482 | _FilemapBase.__init__(self, image, log) |
| 483 | self._log.debug("FilemapNobmap: initializing") |
| 484 | |
| 485 | def block_is_mapped(self, block): |
| 486 | """Refer the '_FilemapBase' class for the documentation.""" |
| 487 | return True |
| 488 | |
| 489 | def get_mapped_ranges(self, start, count): |
| 490 | """Refer the '_FilemapBase' class for the documentation.""" |
| 491 | self._log.debug("FilemapNobmap: get_mapped_ranges(%d, %d(%d))" |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 492 | % (start, count, start + count - 1)) |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 493 | yield (start, start + count -1) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 494 | |
| 495 | def filemap(image, log=None): |
| 496 | """ |
| 497 | Create and return an instance of a Filemap class - 'FilemapFiemap' or |
| 498 | 'FilemapSeek', depending on what the system we run on supports. If the |
| 499 | FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is |
| 500 | returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the |
| 501 | 'FilemapSeek' class is returned. If none of these are supported, the |
| 502 | function generates an 'Error' type exception. |
| 503 | """ |
| 504 | |
| 505 | try: |
| 506 | return FilemapFiemap(image, log) |
| 507 | except ErrorNotSupp: |
Andrew Geissler | b7d2861 | 2020-07-24 16:15:54 -0500 | [diff] [blame] | 508 | try: |
| 509 | return FilemapSeek(image, log) |
| 510 | except ErrorNotSupp: |
| 511 | return FilemapNobmap(image, log) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 512 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 513 | def sparse_copy(src_fname, dst_fname, skip=0, seek=0, |
| 514 | length=0, api=None): |
| 515 | """ |
| 516 | Efficiently copy sparse file to or into another file. |
| 517 | |
| 518 | src_fname: path to source file |
| 519 | dst_fname: path to destination file |
| 520 | skip: skip N bytes at thestart of src |
| 521 | seek: seek N bytes from the start of dst |
| 522 | length: read N bytes from src and write them to dst |
| 523 | api: FilemapFiemap or FilemapSeek object |
| 524 | """ |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 525 | if not api: |
| 526 | api = filemap |
| 527 | fmap = api(src_fname) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 528 | try: |
| 529 | dst_file = open(dst_fname, 'r+b') |
| 530 | except IOError: |
| 531 | dst_file = open(dst_fname, 'wb') |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 532 | if length: |
| 533 | dst_size = length + seek |
| 534 | else: |
| 535 | dst_size = os.path.getsize(src_fname) + seek - skip |
| 536 | dst_file.truncate(dst_size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 537 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 538 | written = 0 |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 539 | for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt): |
| 540 | start = first * fmap.block_size |
| 541 | end = (last + 1) * fmap.block_size |
| 542 | |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 543 | if skip >= end: |
| 544 | continue |
| 545 | |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 546 | if start < skip < end: |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 547 | start = skip |
| 548 | |
| 549 | fmap._f_image.seek(start, os.SEEK_SET) |
| 550 | |
| 551 | written += start - skip - written |
| 552 | if length and written >= length: |
| 553 | dst_file.seek(seek + length, os.SEEK_SET) |
| 554 | dst_file.close() |
| 555 | return |
| 556 | |
| 557 | dst_file.seek(seek + start - skip, os.SEEK_SET) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 558 | |
| 559 | chunk_size = 1024 * 1024 |
| 560 | to_read = end - start |
| 561 | read = 0 |
| 562 | |
| 563 | while read < to_read: |
| 564 | if read + chunk_size > to_read: |
| 565 | chunk_size = to_read - read |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 566 | size = chunk_size |
| 567 | if length and written + size > length: |
| 568 | size = length - written |
| 569 | chunk = fmap._f_image.read(size) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 570 | dst_file.write(chunk) |
Brad Bishop | d7bf8c1 | 2018-02-25 22:55:05 -0500 | [diff] [blame] | 571 | read += size |
| 572 | written += size |
| 573 | if written == length: |
| 574 | dst_file.close() |
| 575 | return |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 576 | dst_file.close() |