blob: 4d9da281720ce1ab12d7b98d13eb4ed16ef5b7b1 [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williamsc0f7c042017-02-23 20:41:17 -06002# Copyright (c) 2012 Intel, Inc.
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc0f7c042017-02-23 20:41:17 -06005#
Patrick Williamsc0f7c042017-02-23 20:41:17 -06006
7"""
8This module implements python implements a way to get file block. Two methods
9are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
10the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
11the latter is implemented by the 'FilemapSeek' class. Both classes provide the
12same API. The 'filemap' function automatically selects which class can be used
13and returns an instance of the class.
14"""
15
16# Disable the following pylint recommendations:
17# * Too many instance attributes (R0902)
18# pylint: disable=R0902
19
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080020import errno
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import os
22import struct
23import array
24import fcntl
25import tempfile
26import logging
27
28def get_block_size(file_obj):
29 """
30 Returns block size for file object 'file_obj'. Errors are indicated by the
31 'IOError' exception.
32 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033 # Get the block size of the host file-system for the image file by calling
34 # the FIGETBSZ ioctl (number 2).
Brad Bishop15ae2502019-06-18 21:44:24 -040035 try:
36 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
Adriana Kobylakc33a02d2020-03-18 10:08:00 -050037 bsize = struct.unpack('I', binary_data)[0]
Brad Bishop15ae2502019-06-18 21:44:24 -040038 except OSError:
Adriana Kobylakc33a02d2020-03-18 10:08:00 -050039 bsize = None
40
41 # If ioctl causes OSError or give bsize to zero failback to os.fstat
Brad Bishop316dfdd2018-06-25 12:45:53 -040042 if not bsize:
43 import os
44 stat = os.fstat(file_obj.fileno())
45 if hasattr(stat, 'st_blksize'):
46 bsize = stat.st_blksize
47 else:
48 raise IOError("Unable to determine block size")
49 return bsize
Patrick Williamsc0f7c042017-02-23 20:41:17 -060050
51class ErrorNotSupp(Exception):
52 """
53 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
54 is not supported either by the kernel or the file-system.
55 """
56 pass
57
58class Error(Exception):
59 """A class for all the other exceptions raised by this module."""
60 pass
61
62
63class _FilemapBase(object):
64 """
65 This is a base class for a couple of other classes in this module. This
66 class simply performs the common parts of the initialization process: opens
67 the image file, gets its size, etc. The 'log' parameter is the logger object
68 to use for printing messages.
69 """
70
71 def __init__(self, image, log=None):
72 """
73 Initialize a class instance. The 'image' argument is full path to the
74 file or file object to operate on.
75 """
76
77 self._log = log
78 if self._log is None:
79 self._log = logging.getLogger(__name__)
80
81 self._f_image_needs_close = False
82
83 if hasattr(image, "fileno"):
84 self._f_image = image
85 self._image_path = image.name
86 else:
87 self._image_path = image
88 self._open_image_file()
89
90 try:
91 self.image_size = os.fstat(self._f_image.fileno()).st_size
92 except IOError as err:
93 raise Error("cannot get information about file '%s': %s"
94 % (self._f_image.name, err))
95
96 try:
97 self.block_size = get_block_size(self._f_image)
98 except IOError as err:
99 raise Error("cannot get block size for '%s': %s"
100 % (self._image_path, err))
101
102 self.blocks_cnt = self.image_size + self.block_size - 1
103 self.blocks_cnt //= self.block_size
104
105 try:
106 self._f_image.flush()
107 except IOError as err:
108 raise Error("cannot flush image file '%s': %s"
109 % (self._image_path, err))
110
111 try:
112 os.fsync(self._f_image.fileno()),
113 except OSError as err:
114 raise Error("cannot synchronize image file '%s': %s "
115 % (self._image_path, err.strerror))
116
117 self._log.debug("opened image \"%s\"" % self._image_path)
118 self._log.debug("block size %d, blocks count %d, image size %d"
119 % (self.block_size, self.blocks_cnt, self.image_size))
120
121 def __del__(self):
122 """The class destructor which just closes the image file."""
123 if self._f_image_needs_close:
124 self._f_image.close()
125
126 def _open_image_file(self):
127 """Open the image file."""
128 try:
129 self._f_image = open(self._image_path, 'rb')
130 except IOError as err:
131 raise Error("cannot open image file '%s': %s"
132 % (self._image_path, err))
133
134 self._f_image_needs_close = True
135
136 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
137 """
138 This method has has to be implemented by child classes. It returns
139 'True' if block number 'block' of the image file is mapped and 'False'
140 otherwise.
141 """
142
143 raise Error("the method is not implemented")
144
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600145 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
146 """
147 This method has has to be implemented by child classes. This is a
148 generator which yields ranges of mapped blocks in the file. The ranges
149 are tuples of 2 elements: [first, last], where 'first' is the first
150 mapped block and 'last' is the last mapped block.
151
152 The ranges are yielded for the area of the file of size 'count' blocks,
153 starting from block 'start'.
154 """
155
156 raise Error("the method is not implemented")
157
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600158
159# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
160_SEEK_DATA = 3
161_SEEK_HOLE = 4
162
163def _lseek(file_obj, offset, whence):
164 """This is a helper function which invokes 'os.lseek' for file object
165 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
166 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
167 there is no more data or hole starting from 'offset', this function
168 returns '-1'. Otherwise the data or hole position is returned."""
169
170 try:
171 return os.lseek(file_obj.fileno(), offset, whence)
172 except OSError as err:
173 # The 'lseek' system call returns the ENXIO if there is no data or
174 # hole starting from the specified offset.
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800175 if err.errno == errno.ENXIO:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600176 return -1
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800177 elif err.errno == errno.EINVAL:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600178 raise ErrorNotSupp("the kernel or file-system does not support "
179 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
180 else:
181 raise
182
183class FilemapSeek(_FilemapBase):
184 """
185 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
186 Unfortunately, the current implementation requires the caller to have write
187 access to the image file.
188 """
189
190 def __init__(self, image, log=None):
191 """Refer the '_FilemapBase' class for the documentation."""
192
193 # Call the base class constructor first
194 _FilemapBase.__init__(self, image, log)
195 self._log.debug("FilemapSeek: initializing")
196
197 self._probe_seek_hole()
198
199 def _probe_seek_hole(self):
200 """
201 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
202 Unfortunately, there seems to be no clean way for detecting this,
203 because often the system just fakes them by just assuming that all
204 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
205 'SEEK_DATA' always returns the requested offset.
206
207 I could not invent a better way of detecting the fake 'SEEK_HOLE'
208 implementation than just to create a temporary file in the same
209 directory where the image file resides. It would be nice to change this
210 to something better.
211 """
212
213 directory = os.path.dirname(self._image_path)
214
215 try:
216 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
217 except IOError as err:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500218 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600219 % (directory, err))
220
221 try:
222 os.ftruncate(tmp_obj.fileno(), self.block_size)
223 except OSError as err:
224 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
225 % (directory, err))
226
227 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
228 if offs != 0:
229 # We are dealing with the stub 'SEEK_HOLE' implementation which
230 # always returns EOF.
231 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
232 raise ErrorNotSupp("the file-system does not support "
233 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
234 "provides a stub implementation")
235
236 tmp_obj.close()
237
238 def block_is_mapped(self, block):
239 """Refer the '_FilemapBase' class for the documentation."""
240 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
241 if offs == -1:
242 result = False
243 else:
244 result = (offs // self.block_size == block)
245
246 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
247 % (block, result))
248 return result
249
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600250 def _get_ranges(self, start, count, whence1, whence2):
251 """
Andrew Geisslerb7d28612020-07-24 16:15:54 -0500252 This function implements 'get_mapped_ranges()' depending
253 on what is passed in the 'whence1' and 'whence2' arguments.
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600254 """
255
256 assert whence1 != whence2
257 end = start * self.block_size
258 limit = end + count * self.block_size
259
260 while True:
261 start = _lseek(self._f_image, end, whence1)
262 if start == -1 or start >= limit or start == self.image_size:
263 break
264
265 end = _lseek(self._f_image, start, whence2)
266 if end == -1 or end == self.image_size:
267 end = self.blocks_cnt * self.block_size
268 if end > limit:
269 end = limit
270
271 start_blk = start // self.block_size
272 end_blk = end // self.block_size - 1
273 self._log.debug("FilemapSeek: yielding range (%d, %d)"
274 % (start_blk, end_blk))
275 yield (start_blk, end_blk)
276
277 def get_mapped_ranges(self, start, count):
278 """Refer the '_FilemapBase' class for the documentation."""
279 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
280 % (start, count, start + count - 1))
281 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
282
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600283
284# Below goes the FIEMAP ioctl implementation, which is not very readable
285# because it deals with the rather complex FIEMAP ioctl. To understand the
286# code, you need to know the FIEMAP interface, which is documented in the
287# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
288
289# Format string for 'struct fiemap'
290_FIEMAP_FORMAT = "=QQLLLL"
291# sizeof(struct fiemap)
292_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
293# Format string for 'struct fiemap_extent'
294_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
295# sizeof(struct fiemap_extent)
296_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
297# The FIEMAP ioctl number
298_FIEMAP_IOCTL = 0xC020660B
299# This FIEMAP ioctl flag which instructs the kernel to sync the file before
300# reading the block map
301_FIEMAP_FLAG_SYNC = 0x00000001
302# Size of the buffer for 'struct fiemap_extent' elements which will be used
303# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
304# FIEMAP ioctl will be invoked.
305_FIEMAP_BUFFER_SIZE = 256 * 1024
306
307class FilemapFiemap(_FilemapBase):
308 """
309 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
310 over all mapped blocks and over all holes.
311
312 This class synchronizes the image file every time it invokes the FIEMAP
313 ioctl in order to work-around early FIEMAP implementation kernel bugs.
314 """
315
316 def __init__(self, image, log=None):
317 """
318 Initialize a class instance. The 'image' argument is full the file
319 object to operate on.
320 """
321
322 # Call the base class constructor first
323 _FilemapBase.__init__(self, image, log)
324 self._log.debug("FilemapFiemap: initializing")
325
326 self._buf_size = _FIEMAP_BUFFER_SIZE
327
328 # Calculate how many 'struct fiemap_extent' elements fit the buffer
329 self._buf_size -= _FIEMAP_SIZE
330 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
331 assert self._fiemap_extent_cnt > 0
332 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
333 self._buf_size += _FIEMAP_SIZE
334
335 # Allocate a mutable buffer for the FIEMAP ioctl
336 self._buf = array.array('B', [0] * self._buf_size)
337
338 # Check if the FIEMAP ioctl is supported
339 self.block_is_mapped(0)
340
341 def _invoke_fiemap(self, block, count):
342 """
343 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
344 block number 'block'.
345
346 The full result of the operation is stored in 'self._buf' on exit.
347 Returns the unpacked 'struct fiemap' data structure in form of a python
348 list (just like 'struct.upack()').
349 """
350
351 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
352 raise Error("bad block number %d, should be within [0, %d]"
353 % (block, self.blocks_cnt))
354
355 # Initialize the 'struct fiemap' part of the buffer. We use the
356 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
357 # synchronized. The reason for this is that early FIEMAP
358 # implementations had many bugs related to cached dirty data, and
359 # synchronizing the file is a necessary work-around.
360 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
361 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
362 self._fiemap_extent_cnt, 0)
363
364 try:
365 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
366 except IOError as err:
367 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
368 # from version 2.6.28 (year 2008).
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800369 if err.errno == errno.EOPNOTSUPP:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600370 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
371 "by the file-system"
372 self._log.debug(errstr)
373 raise ErrorNotSupp(errstr)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800374 if err.errno == errno.ENOTTY:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600375 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
376 "by the kernel"
377 self._log.debug(errstr)
378 raise ErrorNotSupp(errstr)
379 raise Error("the FIEMAP ioctl failed for '%s': %s"
380 % (self._image_path, err))
381
382 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
383
384 def block_is_mapped(self, block):
385 """Refer the '_FilemapBase' class for the documentation."""
386 struct_fiemap = self._invoke_fiemap(block, 1)
387
388 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
389 # If it contains zero, the block is not mapped, otherwise it is
390 # mapped.
391 result = bool(struct_fiemap[3])
392 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
393 % (block, result))
394 return result
395
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600396 def _unpack_fiemap_extent(self, index):
397 """
398 Unpack a 'struct fiemap_extent' structure object number 'index' from
399 the internal 'self._buf' buffer.
400 """
401
402 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
403 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
404 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
405
406 def _do_get_mapped_ranges(self, start, count):
407 """
408 Implements most the functionality for the 'get_mapped_ranges()'
409 generator: invokes the FIEMAP ioctl, walks through the mapped extents
410 and yields mapped block ranges. However, the ranges may be consecutive
411 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
412 them.
413 """
414
415 block = start
416 while block < start + count:
417 struct_fiemap = self._invoke_fiemap(block, count)
418
419 mapped_extents = struct_fiemap[3]
420 if mapped_extents == 0:
421 # No more mapped blocks
422 return
423
424 extent = 0
425 while extent < mapped_extents:
426 fiemap_extent = self._unpack_fiemap_extent(extent)
427
428 # Start of the extent
429 extent_start = fiemap_extent[0]
430 # Starting block number of the extent
431 extent_block = extent_start // self.block_size
432 # Length of the extent
433 extent_len = fiemap_extent[2]
434 # Count of blocks in the extent
435 extent_count = extent_len // self.block_size
436
437 # Extent length and offset have to be block-aligned
438 assert extent_start % self.block_size == 0
439 assert extent_len % self.block_size == 0
440
441 if extent_block > start + count - 1:
442 return
443
444 first = max(extent_block, block)
445 last = min(extent_block + extent_count, start + count) - 1
446 yield (first, last)
447
448 extent += 1
449
450 block = extent_block + extent_count
451
452 def get_mapped_ranges(self, start, count):
453 """Refer the '_FilemapBase' class for the documentation."""
454 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
455 % (start, count, start + count - 1))
456 iterator = self._do_get_mapped_ranges(start, count)
457 first_prev, last_prev = next(iterator)
458
459 for first, last in iterator:
460 if last_prev == first - 1:
461 last_prev = last
462 else:
463 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
464 % (first_prev, last_prev))
465 yield (first_prev, last_prev)
466 first_prev, last_prev = first, last
467
468 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
469 % (first_prev, last_prev))
470 yield (first_prev, last_prev)
471
Andrew Geisslerb7d28612020-07-24 16:15:54 -0500472class FilemapNobmap(_FilemapBase):
473 """
474 This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not
475 supported by the filesystem or kernel.
476 """
477
478 def __init__(self, image, log=None):
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600479 """Refer the '_FilemapBase' class for the documentation."""
Andrew Geisslerb7d28612020-07-24 16:15:54 -0500480
481 # Call the base class constructor first
482 _FilemapBase.__init__(self, image, log)
483 self._log.debug("FilemapNobmap: initializing")
484
485 def block_is_mapped(self, block):
486 """Refer the '_FilemapBase' class for the documentation."""
487 return True
488
489 def get_mapped_ranges(self, start, count):
490 """Refer the '_FilemapBase' class for the documentation."""
491 self._log.debug("FilemapNobmap: get_mapped_ranges(%d, %d(%d))"
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600492 % (start, count, start + count - 1))
Andrew Geisslerb7d28612020-07-24 16:15:54 -0500493 yield (start, start + count -1)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600494
495def filemap(image, log=None):
496 """
497 Create and return an instance of a Filemap class - 'FilemapFiemap' or
498 'FilemapSeek', depending on what the system we run on supports. If the
499 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
500 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
501 'FilemapSeek' class is returned. If none of these are supported, the
502 function generates an 'Error' type exception.
503 """
504
505 try:
506 return FilemapFiemap(image, log)
507 except ErrorNotSupp:
Andrew Geisslerb7d28612020-07-24 16:15:54 -0500508 try:
509 return FilemapSeek(image, log)
510 except ErrorNotSupp:
511 return FilemapNobmap(image, log)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600512
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500513def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
514 length=0, api=None):
515 """
516 Efficiently copy sparse file to or into another file.
517
518 src_fname: path to source file
519 dst_fname: path to destination file
520 skip: skip N bytes at thestart of src
521 seek: seek N bytes from the start of dst
522 length: read N bytes from src and write them to dst
523 api: FilemapFiemap or FilemapSeek object
524 """
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500525 if not api:
526 api = filemap
527 fmap = api(src_fname)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600528 try:
529 dst_file = open(dst_fname, 'r+b')
530 except IOError:
531 dst_file = open(dst_fname, 'wb')
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500532 if length:
533 dst_size = length + seek
534 else:
535 dst_size = os.path.getsize(src_fname) + seek - skip
536 dst_file.truncate(dst_size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600537
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500538 written = 0
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600539 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
540 start = first * fmap.block_size
541 end = (last + 1) * fmap.block_size
542
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500543 if skip >= end:
544 continue
545
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600546 if start < skip < end:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500547 start = skip
548
549 fmap._f_image.seek(start, os.SEEK_SET)
550
551 written += start - skip - written
552 if length and written >= length:
553 dst_file.seek(seek + length, os.SEEK_SET)
554 dst_file.close()
555 return
556
557 dst_file.seek(seek + start - skip, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600558
559 chunk_size = 1024 * 1024
560 to_read = end - start
561 read = 0
562
563 while read < to_read:
564 if read + chunk_size > to_read:
565 chunk_size = to_read - read
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500566 size = chunk_size
567 if length and written + size > length:
568 size = length - written
569 chunk = fmap._f_image.read(size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600570 dst_file.write(chunk)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500571 read += size
572 written += size
573 if written == length:
574 dst_file.close()
575 return
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600576 dst_file.close()