blob: a3919fbcad8bd06a26f0ca6e9cc8f4f1260f489c [file] [log] [blame]
Brad Bishopc342db32019-05-15 21:57:59 -04001#
Patrick Williamsc0f7c042017-02-23 20:41:17 -06002# Copyright (c) 2012 Intel, Inc.
3#
Brad Bishopc342db32019-05-15 21:57:59 -04004# SPDX-License-Identifier: GPL-2.0-only
Patrick Williamsc0f7c042017-02-23 20:41:17 -06005#
Patrick Williamsc0f7c042017-02-23 20:41:17 -06006
7"""
8This module implements python implements a way to get file block. Two methods
9are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
10the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
11the latter is implemented by the 'FilemapSeek' class. Both classes provide the
12same API. The 'filemap' function automatically selects which class can be used
13and returns an instance of the class.
14"""
15
16# Disable the following pylint recommendations:
17# * Too many instance attributes (R0902)
18# pylint: disable=R0902
19
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080020import errno
Patrick Williamsc0f7c042017-02-23 20:41:17 -060021import os
22import struct
23import array
24import fcntl
25import tempfile
26import logging
27
28def get_block_size(file_obj):
29 """
30 Returns block size for file object 'file_obj'. Errors are indicated by the
31 'IOError' exception.
32 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -060033 # Get the block size of the host file-system for the image file by calling
34 # the FIGETBSZ ioctl (number 2).
Brad Bishop15ae2502019-06-18 21:44:24 -040035 try:
36 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
37 except OSError:
38 raise IOError("Unable to determine block size")
Brad Bishop316dfdd2018-06-25 12:45:53 -040039 bsize = struct.unpack('I', binary_data)[0]
40 if not bsize:
41 import os
42 stat = os.fstat(file_obj.fileno())
43 if hasattr(stat, 'st_blksize'):
44 bsize = stat.st_blksize
45 else:
46 raise IOError("Unable to determine block size")
47 return bsize
Patrick Williamsc0f7c042017-02-23 20:41:17 -060048
49class ErrorNotSupp(Exception):
50 """
51 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
52 is not supported either by the kernel or the file-system.
53 """
54 pass
55
56class Error(Exception):
57 """A class for all the other exceptions raised by this module."""
58 pass
59
60
61class _FilemapBase(object):
62 """
63 This is a base class for a couple of other classes in this module. This
64 class simply performs the common parts of the initialization process: opens
65 the image file, gets its size, etc. The 'log' parameter is the logger object
66 to use for printing messages.
67 """
68
69 def __init__(self, image, log=None):
70 """
71 Initialize a class instance. The 'image' argument is full path to the
72 file or file object to operate on.
73 """
74
75 self._log = log
76 if self._log is None:
77 self._log = logging.getLogger(__name__)
78
79 self._f_image_needs_close = False
80
81 if hasattr(image, "fileno"):
82 self._f_image = image
83 self._image_path = image.name
84 else:
85 self._image_path = image
86 self._open_image_file()
87
88 try:
89 self.image_size = os.fstat(self._f_image.fileno()).st_size
90 except IOError as err:
91 raise Error("cannot get information about file '%s': %s"
92 % (self._f_image.name, err))
93
94 try:
95 self.block_size = get_block_size(self._f_image)
96 except IOError as err:
97 raise Error("cannot get block size for '%s': %s"
98 % (self._image_path, err))
99
100 self.blocks_cnt = self.image_size + self.block_size - 1
101 self.blocks_cnt //= self.block_size
102
103 try:
104 self._f_image.flush()
105 except IOError as err:
106 raise Error("cannot flush image file '%s': %s"
107 % (self._image_path, err))
108
109 try:
110 os.fsync(self._f_image.fileno()),
111 except OSError as err:
112 raise Error("cannot synchronize image file '%s': %s "
113 % (self._image_path, err.strerror))
114
115 self._log.debug("opened image \"%s\"" % self._image_path)
116 self._log.debug("block size %d, blocks count %d, image size %d"
117 % (self.block_size, self.blocks_cnt, self.image_size))
118
119 def __del__(self):
120 """The class destructor which just closes the image file."""
121 if self._f_image_needs_close:
122 self._f_image.close()
123
124 def _open_image_file(self):
125 """Open the image file."""
126 try:
127 self._f_image = open(self._image_path, 'rb')
128 except IOError as err:
129 raise Error("cannot open image file '%s': %s"
130 % (self._image_path, err))
131
132 self._f_image_needs_close = True
133
134 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
135 """
136 This method has has to be implemented by child classes. It returns
137 'True' if block number 'block' of the image file is mapped and 'False'
138 otherwise.
139 """
140
141 raise Error("the method is not implemented")
142
143 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
144 """
145 This method has has to be implemented by child classes. It returns
146 'True' if block number 'block' of the image file is not mapped (hole)
147 and 'False' otherwise.
148 """
149
150 raise Error("the method is not implemented")
151
152 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
153 """
154 This method has has to be implemented by child classes. This is a
155 generator which yields ranges of mapped blocks in the file. The ranges
156 are tuples of 2 elements: [first, last], where 'first' is the first
157 mapped block and 'last' is the last mapped block.
158
159 The ranges are yielded for the area of the file of size 'count' blocks,
160 starting from block 'start'.
161 """
162
163 raise Error("the method is not implemented")
164
165 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
166 """
167 This method has has to be implemented by child classes. Just like
168 'get_mapped_ranges()', but yields unmapped block ranges instead
169 (holes).
170 """
171
172 raise Error("the method is not implemented")
173
174
175# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
176_SEEK_DATA = 3
177_SEEK_HOLE = 4
178
179def _lseek(file_obj, offset, whence):
180 """This is a helper function which invokes 'os.lseek' for file object
181 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
182 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
183 there is no more data or hole starting from 'offset', this function
184 returns '-1'. Otherwise the data or hole position is returned."""
185
186 try:
187 return os.lseek(file_obj.fileno(), offset, whence)
188 except OSError as err:
189 # The 'lseek' system call returns the ENXIO if there is no data or
190 # hole starting from the specified offset.
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800191 if err.errno == errno.ENXIO:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600192 return -1
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800193 elif err.errno == errno.EINVAL:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600194 raise ErrorNotSupp("the kernel or file-system does not support "
195 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
196 else:
197 raise
198
199class FilemapSeek(_FilemapBase):
200 """
201 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
202 Unfortunately, the current implementation requires the caller to have write
203 access to the image file.
204 """
205
206 def __init__(self, image, log=None):
207 """Refer the '_FilemapBase' class for the documentation."""
208
209 # Call the base class constructor first
210 _FilemapBase.__init__(self, image, log)
211 self._log.debug("FilemapSeek: initializing")
212
213 self._probe_seek_hole()
214
215 def _probe_seek_hole(self):
216 """
217 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
218 Unfortunately, there seems to be no clean way for detecting this,
219 because often the system just fakes them by just assuming that all
220 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
221 'SEEK_DATA' always returns the requested offset.
222
223 I could not invent a better way of detecting the fake 'SEEK_HOLE'
224 implementation than just to create a temporary file in the same
225 directory where the image file resides. It would be nice to change this
226 to something better.
227 """
228
229 directory = os.path.dirname(self._image_path)
230
231 try:
232 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
233 except IOError as err:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500234 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600235 % (directory, err))
236
237 try:
238 os.ftruncate(tmp_obj.fileno(), self.block_size)
239 except OSError as err:
240 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
241 % (directory, err))
242
243 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
244 if offs != 0:
245 # We are dealing with the stub 'SEEK_HOLE' implementation which
246 # always returns EOF.
247 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
248 raise ErrorNotSupp("the file-system does not support "
249 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
250 "provides a stub implementation")
251
252 tmp_obj.close()
253
254 def block_is_mapped(self, block):
255 """Refer the '_FilemapBase' class for the documentation."""
256 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
257 if offs == -1:
258 result = False
259 else:
260 result = (offs // self.block_size == block)
261
262 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
263 % (block, result))
264 return result
265
266 def block_is_unmapped(self, block):
267 """Refer the '_FilemapBase' class for the documentation."""
268 return not self.block_is_mapped(block)
269
270 def _get_ranges(self, start, count, whence1, whence2):
271 """
272 This function implements 'get_mapped_ranges()' and
273 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
274 and 'whence2' arguments.
275 """
276
277 assert whence1 != whence2
278 end = start * self.block_size
279 limit = end + count * self.block_size
280
281 while True:
282 start = _lseek(self._f_image, end, whence1)
283 if start == -1 or start >= limit or start == self.image_size:
284 break
285
286 end = _lseek(self._f_image, start, whence2)
287 if end == -1 or end == self.image_size:
288 end = self.blocks_cnt * self.block_size
289 if end > limit:
290 end = limit
291
292 start_blk = start // self.block_size
293 end_blk = end // self.block_size - 1
294 self._log.debug("FilemapSeek: yielding range (%d, %d)"
295 % (start_blk, end_blk))
296 yield (start_blk, end_blk)
297
298 def get_mapped_ranges(self, start, count):
299 """Refer the '_FilemapBase' class for the documentation."""
300 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
301 % (start, count, start + count - 1))
302 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
303
304 def get_unmapped_ranges(self, start, count):
305 """Refer the '_FilemapBase' class for the documentation."""
306 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
307 % (start, count, start + count - 1))
308 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
309
310
311# Below goes the FIEMAP ioctl implementation, which is not very readable
312# because it deals with the rather complex FIEMAP ioctl. To understand the
313# code, you need to know the FIEMAP interface, which is documented in the
314# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
315
316# Format string for 'struct fiemap'
317_FIEMAP_FORMAT = "=QQLLLL"
318# sizeof(struct fiemap)
319_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
320# Format string for 'struct fiemap_extent'
321_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
322# sizeof(struct fiemap_extent)
323_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
324# The FIEMAP ioctl number
325_FIEMAP_IOCTL = 0xC020660B
326# This FIEMAP ioctl flag which instructs the kernel to sync the file before
327# reading the block map
328_FIEMAP_FLAG_SYNC = 0x00000001
329# Size of the buffer for 'struct fiemap_extent' elements which will be used
330# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
331# FIEMAP ioctl will be invoked.
332_FIEMAP_BUFFER_SIZE = 256 * 1024
333
334class FilemapFiemap(_FilemapBase):
335 """
336 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
337 over all mapped blocks and over all holes.
338
339 This class synchronizes the image file every time it invokes the FIEMAP
340 ioctl in order to work-around early FIEMAP implementation kernel bugs.
341 """
342
343 def __init__(self, image, log=None):
344 """
345 Initialize a class instance. The 'image' argument is full the file
346 object to operate on.
347 """
348
349 # Call the base class constructor first
350 _FilemapBase.__init__(self, image, log)
351 self._log.debug("FilemapFiemap: initializing")
352
353 self._buf_size = _FIEMAP_BUFFER_SIZE
354
355 # Calculate how many 'struct fiemap_extent' elements fit the buffer
356 self._buf_size -= _FIEMAP_SIZE
357 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
358 assert self._fiemap_extent_cnt > 0
359 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
360 self._buf_size += _FIEMAP_SIZE
361
362 # Allocate a mutable buffer for the FIEMAP ioctl
363 self._buf = array.array('B', [0] * self._buf_size)
364
365 # Check if the FIEMAP ioctl is supported
366 self.block_is_mapped(0)
367
368 def _invoke_fiemap(self, block, count):
369 """
370 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
371 block number 'block'.
372
373 The full result of the operation is stored in 'self._buf' on exit.
374 Returns the unpacked 'struct fiemap' data structure in form of a python
375 list (just like 'struct.upack()').
376 """
377
378 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
379 raise Error("bad block number %d, should be within [0, %d]"
380 % (block, self.blocks_cnt))
381
382 # Initialize the 'struct fiemap' part of the buffer. We use the
383 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
384 # synchronized. The reason for this is that early FIEMAP
385 # implementations had many bugs related to cached dirty data, and
386 # synchronizing the file is a necessary work-around.
387 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
388 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
389 self._fiemap_extent_cnt, 0)
390
391 try:
392 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
393 except IOError as err:
394 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
395 # from version 2.6.28 (year 2008).
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800396 if err.errno == errno.EOPNOTSUPP:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600397 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
398 "by the file-system"
399 self._log.debug(errstr)
400 raise ErrorNotSupp(errstr)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800401 if err.errno == errno.ENOTTY:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600402 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
403 "by the kernel"
404 self._log.debug(errstr)
405 raise ErrorNotSupp(errstr)
406 raise Error("the FIEMAP ioctl failed for '%s': %s"
407 % (self._image_path, err))
408
409 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
410
411 def block_is_mapped(self, block):
412 """Refer the '_FilemapBase' class for the documentation."""
413 struct_fiemap = self._invoke_fiemap(block, 1)
414
415 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
416 # If it contains zero, the block is not mapped, otherwise it is
417 # mapped.
418 result = bool(struct_fiemap[3])
419 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
420 % (block, result))
421 return result
422
423 def block_is_unmapped(self, block):
424 """Refer the '_FilemapBase' class for the documentation."""
425 return not self.block_is_mapped(block)
426
427 def _unpack_fiemap_extent(self, index):
428 """
429 Unpack a 'struct fiemap_extent' structure object number 'index' from
430 the internal 'self._buf' buffer.
431 """
432
433 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
434 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
435 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
436
437 def _do_get_mapped_ranges(self, start, count):
438 """
439 Implements most the functionality for the 'get_mapped_ranges()'
440 generator: invokes the FIEMAP ioctl, walks through the mapped extents
441 and yields mapped block ranges. However, the ranges may be consecutive
442 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
443 them.
444 """
445
446 block = start
447 while block < start + count:
448 struct_fiemap = self._invoke_fiemap(block, count)
449
450 mapped_extents = struct_fiemap[3]
451 if mapped_extents == 0:
452 # No more mapped blocks
453 return
454
455 extent = 0
456 while extent < mapped_extents:
457 fiemap_extent = self._unpack_fiemap_extent(extent)
458
459 # Start of the extent
460 extent_start = fiemap_extent[0]
461 # Starting block number of the extent
462 extent_block = extent_start // self.block_size
463 # Length of the extent
464 extent_len = fiemap_extent[2]
465 # Count of blocks in the extent
466 extent_count = extent_len // self.block_size
467
468 # Extent length and offset have to be block-aligned
469 assert extent_start % self.block_size == 0
470 assert extent_len % self.block_size == 0
471
472 if extent_block > start + count - 1:
473 return
474
475 first = max(extent_block, block)
476 last = min(extent_block + extent_count, start + count) - 1
477 yield (first, last)
478
479 extent += 1
480
481 block = extent_block + extent_count
482
483 def get_mapped_ranges(self, start, count):
484 """Refer the '_FilemapBase' class for the documentation."""
485 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
486 % (start, count, start + count - 1))
487 iterator = self._do_get_mapped_ranges(start, count)
488 first_prev, last_prev = next(iterator)
489
490 for first, last in iterator:
491 if last_prev == first - 1:
492 last_prev = last
493 else:
494 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
495 % (first_prev, last_prev))
496 yield (first_prev, last_prev)
497 first_prev, last_prev = first, last
498
499 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
500 % (first_prev, last_prev))
501 yield (first_prev, last_prev)
502
503 def get_unmapped_ranges(self, start, count):
504 """Refer the '_FilemapBase' class for the documentation."""
505 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
506 % (start, count, start + count - 1))
507 hole_first = start
508 for first, last in self._do_get_mapped_ranges(start, count):
509 if first > hole_first:
510 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
511 % (hole_first, first - 1))
512 yield (hole_first, first - 1)
513
514 hole_first = last + 1
515
516 if hole_first < start + count:
517 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
518 % (hole_first, start + count - 1))
519 yield (hole_first, start + count - 1)
520
521def filemap(image, log=None):
522 """
523 Create and return an instance of a Filemap class - 'FilemapFiemap' or
524 'FilemapSeek', depending on what the system we run on supports. If the
525 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
526 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
527 'FilemapSeek' class is returned. If none of these are supported, the
528 function generates an 'Error' type exception.
529 """
530
531 try:
532 return FilemapFiemap(image, log)
533 except ErrorNotSupp:
534 return FilemapSeek(image, log)
535
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500536def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
537 length=0, api=None):
538 """
539 Efficiently copy sparse file to or into another file.
540
541 src_fname: path to source file
542 dst_fname: path to destination file
543 skip: skip N bytes at thestart of src
544 seek: seek N bytes from the start of dst
545 length: read N bytes from src and write them to dst
546 api: FilemapFiemap or FilemapSeek object
547 """
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500548 if not api:
549 api = filemap
550 fmap = api(src_fname)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600551 try:
552 dst_file = open(dst_fname, 'r+b')
553 except IOError:
554 dst_file = open(dst_fname, 'wb')
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500555 if length:
556 dst_size = length + seek
557 else:
558 dst_size = os.path.getsize(src_fname) + seek - skip
559 dst_file.truncate(dst_size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600560
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500561 written = 0
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600562 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
563 start = first * fmap.block_size
564 end = (last + 1) * fmap.block_size
565
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500566 if skip >= end:
567 continue
568
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600569 if start < skip < end:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500570 start = skip
571
572 fmap._f_image.seek(start, os.SEEK_SET)
573
574 written += start - skip - written
575 if length and written >= length:
576 dst_file.seek(seek + length, os.SEEK_SET)
577 dst_file.close()
578 return
579
580 dst_file.seek(seek + start - skip, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600581
582 chunk_size = 1024 * 1024
583 to_read = end - start
584 read = 0
585
586 while read < to_read:
587 if read + chunk_size > to_read:
588 chunk_size = to_read - read
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500589 size = chunk_size
590 if length and written + size > length:
591 size = length - written
592 chunk = fmap._f_image.read(size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600593 dst_file.write(chunk)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500594 read += size
595 written += size
596 if written == length:
597 dst_file.close()
598 return
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600599 dst_file.close()