blob: 77e32b9addbda4259eeec55127a05e200d4b776c [file] [log] [blame]
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001# Copyright (c) 2012 Intel, Inc.
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License, version 2,
5# as published by the Free Software Foundation.
6#
7# This program is distributed in the hope that it will be useful, but
8# WITHOUT ANY WARRANTY; without even the implied warranty of
9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10# General Public License for more details.
11
12"""
13This module implements python implements a way to get file block. Two methods
14are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
15the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
16the latter is implemented by the 'FilemapSeek' class. Both classes provide the
17same API. The 'filemap' function automatically selects which class can be used
18and returns an instance of the class.
19"""
20
21# Disable the following pylint recommendations:
22# * Too many instance attributes (R0902)
23# pylint: disable=R0902
24
25import os
26import struct
27import array
28import fcntl
29import tempfile
30import logging
31
32def get_block_size(file_obj):
33 """
34 Returns block size for file object 'file_obj'. Errors are indicated by the
35 'IOError' exception.
36 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -060037 # Get the block size of the host file-system for the image file by calling
38 # the FIGETBSZ ioctl (number 2).
Brad Bishopd7bf8c12018-02-25 22:55:05 -050039 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
Patrick Williamsc0f7c042017-02-23 20:41:17 -060040 return struct.unpack('I', binary_data)[0]
41
42class ErrorNotSupp(Exception):
43 """
44 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
45 is not supported either by the kernel or the file-system.
46 """
47 pass
48
49class Error(Exception):
50 """A class for all the other exceptions raised by this module."""
51 pass
52
53
54class _FilemapBase(object):
55 """
56 This is a base class for a couple of other classes in this module. This
57 class simply performs the common parts of the initialization process: opens
58 the image file, gets its size, etc. The 'log' parameter is the logger object
59 to use for printing messages.
60 """
61
62 def __init__(self, image, log=None):
63 """
64 Initialize a class instance. The 'image' argument is full path to the
65 file or file object to operate on.
66 """
67
68 self._log = log
69 if self._log is None:
70 self._log = logging.getLogger(__name__)
71
72 self._f_image_needs_close = False
73
74 if hasattr(image, "fileno"):
75 self._f_image = image
76 self._image_path = image.name
77 else:
78 self._image_path = image
79 self._open_image_file()
80
81 try:
82 self.image_size = os.fstat(self._f_image.fileno()).st_size
83 except IOError as err:
84 raise Error("cannot get information about file '%s': %s"
85 % (self._f_image.name, err))
86
87 try:
88 self.block_size = get_block_size(self._f_image)
89 except IOError as err:
90 raise Error("cannot get block size for '%s': %s"
91 % (self._image_path, err))
92
93 self.blocks_cnt = self.image_size + self.block_size - 1
94 self.blocks_cnt //= self.block_size
95
96 try:
97 self._f_image.flush()
98 except IOError as err:
99 raise Error("cannot flush image file '%s': %s"
100 % (self._image_path, err))
101
102 try:
103 os.fsync(self._f_image.fileno()),
104 except OSError as err:
105 raise Error("cannot synchronize image file '%s': %s "
106 % (self._image_path, err.strerror))
107
108 self._log.debug("opened image \"%s\"" % self._image_path)
109 self._log.debug("block size %d, blocks count %d, image size %d"
110 % (self.block_size, self.blocks_cnt, self.image_size))
111
112 def __del__(self):
113 """The class destructor which just closes the image file."""
114 if self._f_image_needs_close:
115 self._f_image.close()
116
117 def _open_image_file(self):
118 """Open the image file."""
119 try:
120 self._f_image = open(self._image_path, 'rb')
121 except IOError as err:
122 raise Error("cannot open image file '%s': %s"
123 % (self._image_path, err))
124
125 self._f_image_needs_close = True
126
127 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
128 """
129 This method has has to be implemented by child classes. It returns
130 'True' if block number 'block' of the image file is mapped and 'False'
131 otherwise.
132 """
133
134 raise Error("the method is not implemented")
135
136 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
137 """
138 This method has has to be implemented by child classes. It returns
139 'True' if block number 'block' of the image file is not mapped (hole)
140 and 'False' otherwise.
141 """
142
143 raise Error("the method is not implemented")
144
145 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
146 """
147 This method has has to be implemented by child classes. This is a
148 generator which yields ranges of mapped blocks in the file. The ranges
149 are tuples of 2 elements: [first, last], where 'first' is the first
150 mapped block and 'last' is the last mapped block.
151
152 The ranges are yielded for the area of the file of size 'count' blocks,
153 starting from block 'start'.
154 """
155
156 raise Error("the method is not implemented")
157
158 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
159 """
160 This method has has to be implemented by child classes. Just like
161 'get_mapped_ranges()', but yields unmapped block ranges instead
162 (holes).
163 """
164
165 raise Error("the method is not implemented")
166
167
168# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
169_SEEK_DATA = 3
170_SEEK_HOLE = 4
171
172def _lseek(file_obj, offset, whence):
173 """This is a helper function which invokes 'os.lseek' for file object
174 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
175 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
176 there is no more data or hole starting from 'offset', this function
177 returns '-1'. Otherwise the data or hole position is returned."""
178
179 try:
180 return os.lseek(file_obj.fileno(), offset, whence)
181 except OSError as err:
182 # The 'lseek' system call returns the ENXIO if there is no data or
183 # hole starting from the specified offset.
184 if err.errno == os.errno.ENXIO:
185 return -1
186 elif err.errno == os.errno.EINVAL:
187 raise ErrorNotSupp("the kernel or file-system does not support "
188 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
189 else:
190 raise
191
192class FilemapSeek(_FilemapBase):
193 """
194 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
195 Unfortunately, the current implementation requires the caller to have write
196 access to the image file.
197 """
198
199 def __init__(self, image, log=None):
200 """Refer the '_FilemapBase' class for the documentation."""
201
202 # Call the base class constructor first
203 _FilemapBase.__init__(self, image, log)
204 self._log.debug("FilemapSeek: initializing")
205
206 self._probe_seek_hole()
207
208 def _probe_seek_hole(self):
209 """
210 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
211 Unfortunately, there seems to be no clean way for detecting this,
212 because often the system just fakes them by just assuming that all
213 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
214 'SEEK_DATA' always returns the requested offset.
215
216 I could not invent a better way of detecting the fake 'SEEK_HOLE'
217 implementation than just to create a temporary file in the same
218 directory where the image file resides. It would be nice to change this
219 to something better.
220 """
221
222 directory = os.path.dirname(self._image_path)
223
224 try:
225 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
226 except IOError as err:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500227 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600228 % (directory, err))
229
230 try:
231 os.ftruncate(tmp_obj.fileno(), self.block_size)
232 except OSError as err:
233 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
234 % (directory, err))
235
236 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
237 if offs != 0:
238 # We are dealing with the stub 'SEEK_HOLE' implementation which
239 # always returns EOF.
240 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
241 raise ErrorNotSupp("the file-system does not support "
242 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
243 "provides a stub implementation")
244
245 tmp_obj.close()
246
247 def block_is_mapped(self, block):
248 """Refer the '_FilemapBase' class for the documentation."""
249 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
250 if offs == -1:
251 result = False
252 else:
253 result = (offs // self.block_size == block)
254
255 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
256 % (block, result))
257 return result
258
259 def block_is_unmapped(self, block):
260 """Refer the '_FilemapBase' class for the documentation."""
261 return not self.block_is_mapped(block)
262
263 def _get_ranges(self, start, count, whence1, whence2):
264 """
265 This function implements 'get_mapped_ranges()' and
266 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
267 and 'whence2' arguments.
268 """
269
270 assert whence1 != whence2
271 end = start * self.block_size
272 limit = end + count * self.block_size
273
274 while True:
275 start = _lseek(self._f_image, end, whence1)
276 if start == -1 or start >= limit or start == self.image_size:
277 break
278
279 end = _lseek(self._f_image, start, whence2)
280 if end == -1 or end == self.image_size:
281 end = self.blocks_cnt * self.block_size
282 if end > limit:
283 end = limit
284
285 start_blk = start // self.block_size
286 end_blk = end // self.block_size - 1
287 self._log.debug("FilemapSeek: yielding range (%d, %d)"
288 % (start_blk, end_blk))
289 yield (start_blk, end_blk)
290
291 def get_mapped_ranges(self, start, count):
292 """Refer the '_FilemapBase' class for the documentation."""
293 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
294 % (start, count, start + count - 1))
295 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
296
297 def get_unmapped_ranges(self, start, count):
298 """Refer the '_FilemapBase' class for the documentation."""
299 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
300 % (start, count, start + count - 1))
301 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
302
303
304# Below goes the FIEMAP ioctl implementation, which is not very readable
305# because it deals with the rather complex FIEMAP ioctl. To understand the
306# code, you need to know the FIEMAP interface, which is documented in the
307# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
308
309# Format string for 'struct fiemap'
310_FIEMAP_FORMAT = "=QQLLLL"
311# sizeof(struct fiemap)
312_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
313# Format string for 'struct fiemap_extent'
314_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
315# sizeof(struct fiemap_extent)
316_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
317# The FIEMAP ioctl number
318_FIEMAP_IOCTL = 0xC020660B
319# This FIEMAP ioctl flag which instructs the kernel to sync the file before
320# reading the block map
321_FIEMAP_FLAG_SYNC = 0x00000001
322# Size of the buffer for 'struct fiemap_extent' elements which will be used
323# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
324# FIEMAP ioctl will be invoked.
325_FIEMAP_BUFFER_SIZE = 256 * 1024
326
327class FilemapFiemap(_FilemapBase):
328 """
329 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
330 over all mapped blocks and over all holes.
331
332 This class synchronizes the image file every time it invokes the FIEMAP
333 ioctl in order to work-around early FIEMAP implementation kernel bugs.
334 """
335
336 def __init__(self, image, log=None):
337 """
338 Initialize a class instance. The 'image' argument is full the file
339 object to operate on.
340 """
341
342 # Call the base class constructor first
343 _FilemapBase.__init__(self, image, log)
344 self._log.debug("FilemapFiemap: initializing")
345
346 self._buf_size = _FIEMAP_BUFFER_SIZE
347
348 # Calculate how many 'struct fiemap_extent' elements fit the buffer
349 self._buf_size -= _FIEMAP_SIZE
350 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
351 assert self._fiemap_extent_cnt > 0
352 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
353 self._buf_size += _FIEMAP_SIZE
354
355 # Allocate a mutable buffer for the FIEMAP ioctl
356 self._buf = array.array('B', [0] * self._buf_size)
357
358 # Check if the FIEMAP ioctl is supported
359 self.block_is_mapped(0)
360
361 def _invoke_fiemap(self, block, count):
362 """
363 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
364 block number 'block'.
365
366 The full result of the operation is stored in 'self._buf' on exit.
367 Returns the unpacked 'struct fiemap' data structure in form of a python
368 list (just like 'struct.upack()').
369 """
370
371 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
372 raise Error("bad block number %d, should be within [0, %d]"
373 % (block, self.blocks_cnt))
374
375 # Initialize the 'struct fiemap' part of the buffer. We use the
376 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
377 # synchronized. The reason for this is that early FIEMAP
378 # implementations had many bugs related to cached dirty data, and
379 # synchronizing the file is a necessary work-around.
380 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
381 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
382 self._fiemap_extent_cnt, 0)
383
384 try:
385 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
386 except IOError as err:
387 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
388 # from version 2.6.28 (year 2008).
389 if err.errno == os.errno.EOPNOTSUPP:
390 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
391 "by the file-system"
392 self._log.debug(errstr)
393 raise ErrorNotSupp(errstr)
394 if err.errno == os.errno.ENOTTY:
395 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
396 "by the kernel"
397 self._log.debug(errstr)
398 raise ErrorNotSupp(errstr)
399 raise Error("the FIEMAP ioctl failed for '%s': %s"
400 % (self._image_path, err))
401
402 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
403
404 def block_is_mapped(self, block):
405 """Refer the '_FilemapBase' class for the documentation."""
406 struct_fiemap = self._invoke_fiemap(block, 1)
407
408 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
409 # If it contains zero, the block is not mapped, otherwise it is
410 # mapped.
411 result = bool(struct_fiemap[3])
412 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
413 % (block, result))
414 return result
415
416 def block_is_unmapped(self, block):
417 """Refer the '_FilemapBase' class for the documentation."""
418 return not self.block_is_mapped(block)
419
420 def _unpack_fiemap_extent(self, index):
421 """
422 Unpack a 'struct fiemap_extent' structure object number 'index' from
423 the internal 'self._buf' buffer.
424 """
425
426 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
427 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
428 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
429
430 def _do_get_mapped_ranges(self, start, count):
431 """
432 Implements most the functionality for the 'get_mapped_ranges()'
433 generator: invokes the FIEMAP ioctl, walks through the mapped extents
434 and yields mapped block ranges. However, the ranges may be consecutive
435 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
436 them.
437 """
438
439 block = start
440 while block < start + count:
441 struct_fiemap = self._invoke_fiemap(block, count)
442
443 mapped_extents = struct_fiemap[3]
444 if mapped_extents == 0:
445 # No more mapped blocks
446 return
447
448 extent = 0
449 while extent < mapped_extents:
450 fiemap_extent = self._unpack_fiemap_extent(extent)
451
452 # Start of the extent
453 extent_start = fiemap_extent[0]
454 # Starting block number of the extent
455 extent_block = extent_start // self.block_size
456 # Length of the extent
457 extent_len = fiemap_extent[2]
458 # Count of blocks in the extent
459 extent_count = extent_len // self.block_size
460
461 # Extent length and offset have to be block-aligned
462 assert extent_start % self.block_size == 0
463 assert extent_len % self.block_size == 0
464
465 if extent_block > start + count - 1:
466 return
467
468 first = max(extent_block, block)
469 last = min(extent_block + extent_count, start + count) - 1
470 yield (first, last)
471
472 extent += 1
473
474 block = extent_block + extent_count
475
476 def get_mapped_ranges(self, start, count):
477 """Refer the '_FilemapBase' class for the documentation."""
478 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
479 % (start, count, start + count - 1))
480 iterator = self._do_get_mapped_ranges(start, count)
481 first_prev, last_prev = next(iterator)
482
483 for first, last in iterator:
484 if last_prev == first - 1:
485 last_prev = last
486 else:
487 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
488 % (first_prev, last_prev))
489 yield (first_prev, last_prev)
490 first_prev, last_prev = first, last
491
492 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
493 % (first_prev, last_prev))
494 yield (first_prev, last_prev)
495
496 def get_unmapped_ranges(self, start, count):
497 """Refer the '_FilemapBase' class for the documentation."""
498 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
499 % (start, count, start + count - 1))
500 hole_first = start
501 for first, last in self._do_get_mapped_ranges(start, count):
502 if first > hole_first:
503 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
504 % (hole_first, first - 1))
505 yield (hole_first, first - 1)
506
507 hole_first = last + 1
508
509 if hole_first < start + count:
510 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
511 % (hole_first, start + count - 1))
512 yield (hole_first, start + count - 1)
513
514def filemap(image, log=None):
515 """
516 Create and return an instance of a Filemap class - 'FilemapFiemap' or
517 'FilemapSeek', depending on what the system we run on supports. If the
518 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
519 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
520 'FilemapSeek' class is returned. If none of these are supported, the
521 function generates an 'Error' type exception.
522 """
523
524 try:
525 return FilemapFiemap(image, log)
526 except ErrorNotSupp:
527 return FilemapSeek(image, log)
528
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500529def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
530 length=0, api=None):
531 """
532 Efficiently copy sparse file to or into another file.
533
534 src_fname: path to source file
535 dst_fname: path to destination file
536 skip: skip N bytes at thestart of src
537 seek: seek N bytes from the start of dst
538 length: read N bytes from src and write them to dst
539 api: FilemapFiemap or FilemapSeek object
540 """
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500541 if not api:
542 api = filemap
543 fmap = api(src_fname)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600544 try:
545 dst_file = open(dst_fname, 'r+b')
546 except IOError:
547 dst_file = open(dst_fname, 'wb')
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500548 if length:
549 dst_size = length + seek
550 else:
551 dst_size = os.path.getsize(src_fname) + seek - skip
552 dst_file.truncate(dst_size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600553
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500554 written = 0
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600555 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
556 start = first * fmap.block_size
557 end = (last + 1) * fmap.block_size
558
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500559 if skip >= end:
560 continue
561
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600562 if start < skip < end:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500563 start = skip
564
565 fmap._f_image.seek(start, os.SEEK_SET)
566
567 written += start - skip - written
568 if length and written >= length:
569 dst_file.seek(seek + length, os.SEEK_SET)
570 dst_file.close()
571 return
572
573 dst_file.seek(seek + start - skip, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600574
575 chunk_size = 1024 * 1024
576 to_read = end - start
577 read = 0
578
579 while read < to_read:
580 if read + chunk_size > to_read:
581 chunk_size = to_read - read
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500582 size = chunk_size
583 if length and written + size > length:
584 size = length - written
585 chunk = fmap._f_image.read(size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600586 dst_file.write(chunk)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500587 read += size
588 written += size
589 if written == length:
590 dst_file.close()
591 return
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600592 dst_file.close()