blob: a72fa09ef5503a414dd6a366c268deda24f2cf0c [file] [log] [blame]
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001# Copyright (c) 2012 Intel, Inc.
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License, version 2,
5# as published by the Free Software Foundation.
6#
7# This program is distributed in the hope that it will be useful, but
8# WITHOUT ANY WARRANTY; without even the implied warranty of
9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10# General Public License for more details.
11
12"""
13This module implements python implements a way to get file block. Two methods
14are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
15the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
16the latter is implemented by the 'FilemapSeek' class. Both classes provide the
17same API. The 'filemap' function automatically selects which class can be used
18and returns an instance of the class.
19"""
20
21# Disable the following pylint recommendations:
22# * Too many instance attributes (R0902)
23# pylint: disable=R0902
24
25import os
26import struct
27import array
28import fcntl
29import tempfile
30import logging
31
32def get_block_size(file_obj):
33 """
34 Returns block size for file object 'file_obj'. Errors are indicated by the
35 'IOError' exception.
36 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -060037 # Get the block size of the host file-system for the image file by calling
38 # the FIGETBSZ ioctl (number 2).
Brad Bishopd7bf8c12018-02-25 22:55:05 -050039 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
Brad Bishop316dfdd2018-06-25 12:45:53 -040040 bsize = struct.unpack('I', binary_data)[0]
41 if not bsize:
42 import os
43 stat = os.fstat(file_obj.fileno())
44 if hasattr(stat, 'st_blksize'):
45 bsize = stat.st_blksize
46 else:
47 raise IOError("Unable to determine block size")
48 return bsize
Patrick Williamsc0f7c042017-02-23 20:41:17 -060049
50class ErrorNotSupp(Exception):
51 """
52 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
53 is not supported either by the kernel or the file-system.
54 """
55 pass
56
57class Error(Exception):
58 """A class for all the other exceptions raised by this module."""
59 pass
60
61
62class _FilemapBase(object):
63 """
64 This is a base class for a couple of other classes in this module. This
65 class simply performs the common parts of the initialization process: opens
66 the image file, gets its size, etc. The 'log' parameter is the logger object
67 to use for printing messages.
68 """
69
70 def __init__(self, image, log=None):
71 """
72 Initialize a class instance. The 'image' argument is full path to the
73 file or file object to operate on.
74 """
75
76 self._log = log
77 if self._log is None:
78 self._log = logging.getLogger(__name__)
79
80 self._f_image_needs_close = False
81
82 if hasattr(image, "fileno"):
83 self._f_image = image
84 self._image_path = image.name
85 else:
86 self._image_path = image
87 self._open_image_file()
88
89 try:
90 self.image_size = os.fstat(self._f_image.fileno()).st_size
91 except IOError as err:
92 raise Error("cannot get information about file '%s': %s"
93 % (self._f_image.name, err))
94
95 try:
96 self.block_size = get_block_size(self._f_image)
97 except IOError as err:
98 raise Error("cannot get block size for '%s': %s"
99 % (self._image_path, err))
100
101 self.blocks_cnt = self.image_size + self.block_size - 1
102 self.blocks_cnt //= self.block_size
103
104 try:
105 self._f_image.flush()
106 except IOError as err:
107 raise Error("cannot flush image file '%s': %s"
108 % (self._image_path, err))
109
110 try:
111 os.fsync(self._f_image.fileno()),
112 except OSError as err:
113 raise Error("cannot synchronize image file '%s': %s "
114 % (self._image_path, err.strerror))
115
116 self._log.debug("opened image \"%s\"" % self._image_path)
117 self._log.debug("block size %d, blocks count %d, image size %d"
118 % (self.block_size, self.blocks_cnt, self.image_size))
119
120 def __del__(self):
121 """The class destructor which just closes the image file."""
122 if self._f_image_needs_close:
123 self._f_image.close()
124
125 def _open_image_file(self):
126 """Open the image file."""
127 try:
128 self._f_image = open(self._image_path, 'rb')
129 except IOError as err:
130 raise Error("cannot open image file '%s': %s"
131 % (self._image_path, err))
132
133 self._f_image_needs_close = True
134
135 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
136 """
137 This method has has to be implemented by child classes. It returns
138 'True' if block number 'block' of the image file is mapped and 'False'
139 otherwise.
140 """
141
142 raise Error("the method is not implemented")
143
144 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
145 """
146 This method has has to be implemented by child classes. It returns
147 'True' if block number 'block' of the image file is not mapped (hole)
148 and 'False' otherwise.
149 """
150
151 raise Error("the method is not implemented")
152
153 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
154 """
155 This method has has to be implemented by child classes. This is a
156 generator which yields ranges of mapped blocks in the file. The ranges
157 are tuples of 2 elements: [first, last], where 'first' is the first
158 mapped block and 'last' is the last mapped block.
159
160 The ranges are yielded for the area of the file of size 'count' blocks,
161 starting from block 'start'.
162 """
163
164 raise Error("the method is not implemented")
165
166 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
167 """
168 This method has has to be implemented by child classes. Just like
169 'get_mapped_ranges()', but yields unmapped block ranges instead
170 (holes).
171 """
172
173 raise Error("the method is not implemented")
174
175
176# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
177_SEEK_DATA = 3
178_SEEK_HOLE = 4
179
180def _lseek(file_obj, offset, whence):
181 """This is a helper function which invokes 'os.lseek' for file object
182 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
183 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
184 there is no more data or hole starting from 'offset', this function
185 returns '-1'. Otherwise the data or hole position is returned."""
186
187 try:
188 return os.lseek(file_obj.fileno(), offset, whence)
189 except OSError as err:
190 # The 'lseek' system call returns the ENXIO if there is no data or
191 # hole starting from the specified offset.
192 if err.errno == os.errno.ENXIO:
193 return -1
194 elif err.errno == os.errno.EINVAL:
195 raise ErrorNotSupp("the kernel or file-system does not support "
196 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
197 else:
198 raise
199
200class FilemapSeek(_FilemapBase):
201 """
202 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
203 Unfortunately, the current implementation requires the caller to have write
204 access to the image file.
205 """
206
207 def __init__(self, image, log=None):
208 """Refer the '_FilemapBase' class for the documentation."""
209
210 # Call the base class constructor first
211 _FilemapBase.__init__(self, image, log)
212 self._log.debug("FilemapSeek: initializing")
213
214 self._probe_seek_hole()
215
216 def _probe_seek_hole(self):
217 """
218 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
219 Unfortunately, there seems to be no clean way for detecting this,
220 because often the system just fakes them by just assuming that all
221 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
222 'SEEK_DATA' always returns the requested offset.
223
224 I could not invent a better way of detecting the fake 'SEEK_HOLE'
225 implementation than just to create a temporary file in the same
226 directory where the image file resides. It would be nice to change this
227 to something better.
228 """
229
230 directory = os.path.dirname(self._image_path)
231
232 try:
233 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
234 except IOError as err:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500235 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600236 % (directory, err))
237
238 try:
239 os.ftruncate(tmp_obj.fileno(), self.block_size)
240 except OSError as err:
241 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
242 % (directory, err))
243
244 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
245 if offs != 0:
246 # We are dealing with the stub 'SEEK_HOLE' implementation which
247 # always returns EOF.
248 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
249 raise ErrorNotSupp("the file-system does not support "
250 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
251 "provides a stub implementation")
252
253 tmp_obj.close()
254
255 def block_is_mapped(self, block):
256 """Refer the '_FilemapBase' class for the documentation."""
257 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
258 if offs == -1:
259 result = False
260 else:
261 result = (offs // self.block_size == block)
262
263 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
264 % (block, result))
265 return result
266
267 def block_is_unmapped(self, block):
268 """Refer the '_FilemapBase' class for the documentation."""
269 return not self.block_is_mapped(block)
270
271 def _get_ranges(self, start, count, whence1, whence2):
272 """
273 This function implements 'get_mapped_ranges()' and
274 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
275 and 'whence2' arguments.
276 """
277
278 assert whence1 != whence2
279 end = start * self.block_size
280 limit = end + count * self.block_size
281
282 while True:
283 start = _lseek(self._f_image, end, whence1)
284 if start == -1 or start >= limit or start == self.image_size:
285 break
286
287 end = _lseek(self._f_image, start, whence2)
288 if end == -1 or end == self.image_size:
289 end = self.blocks_cnt * self.block_size
290 if end > limit:
291 end = limit
292
293 start_blk = start // self.block_size
294 end_blk = end // self.block_size - 1
295 self._log.debug("FilemapSeek: yielding range (%d, %d)"
296 % (start_blk, end_blk))
297 yield (start_blk, end_blk)
298
299 def get_mapped_ranges(self, start, count):
300 """Refer the '_FilemapBase' class for the documentation."""
301 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
302 % (start, count, start + count - 1))
303 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
304
305 def get_unmapped_ranges(self, start, count):
306 """Refer the '_FilemapBase' class for the documentation."""
307 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
308 % (start, count, start + count - 1))
309 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
310
311
312# Below goes the FIEMAP ioctl implementation, which is not very readable
313# because it deals with the rather complex FIEMAP ioctl. To understand the
314# code, you need to know the FIEMAP interface, which is documented in the
315# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
316
317# Format string for 'struct fiemap'
318_FIEMAP_FORMAT = "=QQLLLL"
319# sizeof(struct fiemap)
320_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
321# Format string for 'struct fiemap_extent'
322_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
323# sizeof(struct fiemap_extent)
324_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
325# The FIEMAP ioctl number
326_FIEMAP_IOCTL = 0xC020660B
327# This FIEMAP ioctl flag which instructs the kernel to sync the file before
328# reading the block map
329_FIEMAP_FLAG_SYNC = 0x00000001
330# Size of the buffer for 'struct fiemap_extent' elements which will be used
331# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
332# FIEMAP ioctl will be invoked.
333_FIEMAP_BUFFER_SIZE = 256 * 1024
334
335class FilemapFiemap(_FilemapBase):
336 """
337 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
338 over all mapped blocks and over all holes.
339
340 This class synchronizes the image file every time it invokes the FIEMAP
341 ioctl in order to work-around early FIEMAP implementation kernel bugs.
342 """
343
344 def __init__(self, image, log=None):
345 """
346 Initialize a class instance. The 'image' argument is full the file
347 object to operate on.
348 """
349
350 # Call the base class constructor first
351 _FilemapBase.__init__(self, image, log)
352 self._log.debug("FilemapFiemap: initializing")
353
354 self._buf_size = _FIEMAP_BUFFER_SIZE
355
356 # Calculate how many 'struct fiemap_extent' elements fit the buffer
357 self._buf_size -= _FIEMAP_SIZE
358 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
359 assert self._fiemap_extent_cnt > 0
360 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
361 self._buf_size += _FIEMAP_SIZE
362
363 # Allocate a mutable buffer for the FIEMAP ioctl
364 self._buf = array.array('B', [0] * self._buf_size)
365
366 # Check if the FIEMAP ioctl is supported
367 self.block_is_mapped(0)
368
369 def _invoke_fiemap(self, block, count):
370 """
371 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
372 block number 'block'.
373
374 The full result of the operation is stored in 'self._buf' on exit.
375 Returns the unpacked 'struct fiemap' data structure in form of a python
376 list (just like 'struct.upack()').
377 """
378
379 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
380 raise Error("bad block number %d, should be within [0, %d]"
381 % (block, self.blocks_cnt))
382
383 # Initialize the 'struct fiemap' part of the buffer. We use the
384 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
385 # synchronized. The reason for this is that early FIEMAP
386 # implementations had many bugs related to cached dirty data, and
387 # synchronizing the file is a necessary work-around.
388 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
389 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
390 self._fiemap_extent_cnt, 0)
391
392 try:
393 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
394 except IOError as err:
395 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
396 # from version 2.6.28 (year 2008).
397 if err.errno == os.errno.EOPNOTSUPP:
398 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
399 "by the file-system"
400 self._log.debug(errstr)
401 raise ErrorNotSupp(errstr)
402 if err.errno == os.errno.ENOTTY:
403 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
404 "by the kernel"
405 self._log.debug(errstr)
406 raise ErrorNotSupp(errstr)
407 raise Error("the FIEMAP ioctl failed for '%s': %s"
408 % (self._image_path, err))
409
410 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
411
412 def block_is_mapped(self, block):
413 """Refer the '_FilemapBase' class for the documentation."""
414 struct_fiemap = self._invoke_fiemap(block, 1)
415
416 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
417 # If it contains zero, the block is not mapped, otherwise it is
418 # mapped.
419 result = bool(struct_fiemap[3])
420 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
421 % (block, result))
422 return result
423
424 def block_is_unmapped(self, block):
425 """Refer the '_FilemapBase' class for the documentation."""
426 return not self.block_is_mapped(block)
427
428 def _unpack_fiemap_extent(self, index):
429 """
430 Unpack a 'struct fiemap_extent' structure object number 'index' from
431 the internal 'self._buf' buffer.
432 """
433
434 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
435 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
436 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
437
438 def _do_get_mapped_ranges(self, start, count):
439 """
440 Implements most the functionality for the 'get_mapped_ranges()'
441 generator: invokes the FIEMAP ioctl, walks through the mapped extents
442 and yields mapped block ranges. However, the ranges may be consecutive
443 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
444 them.
445 """
446
447 block = start
448 while block < start + count:
449 struct_fiemap = self._invoke_fiemap(block, count)
450
451 mapped_extents = struct_fiemap[3]
452 if mapped_extents == 0:
453 # No more mapped blocks
454 return
455
456 extent = 0
457 while extent < mapped_extents:
458 fiemap_extent = self._unpack_fiemap_extent(extent)
459
460 # Start of the extent
461 extent_start = fiemap_extent[0]
462 # Starting block number of the extent
463 extent_block = extent_start // self.block_size
464 # Length of the extent
465 extent_len = fiemap_extent[2]
466 # Count of blocks in the extent
467 extent_count = extent_len // self.block_size
468
469 # Extent length and offset have to be block-aligned
470 assert extent_start % self.block_size == 0
471 assert extent_len % self.block_size == 0
472
473 if extent_block > start + count - 1:
474 return
475
476 first = max(extent_block, block)
477 last = min(extent_block + extent_count, start + count) - 1
478 yield (first, last)
479
480 extent += 1
481
482 block = extent_block + extent_count
483
484 def get_mapped_ranges(self, start, count):
485 """Refer the '_FilemapBase' class for the documentation."""
486 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
487 % (start, count, start + count - 1))
488 iterator = self._do_get_mapped_ranges(start, count)
489 first_prev, last_prev = next(iterator)
490
491 for first, last in iterator:
492 if last_prev == first - 1:
493 last_prev = last
494 else:
495 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
496 % (first_prev, last_prev))
497 yield (first_prev, last_prev)
498 first_prev, last_prev = first, last
499
500 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
501 % (first_prev, last_prev))
502 yield (first_prev, last_prev)
503
504 def get_unmapped_ranges(self, start, count):
505 """Refer the '_FilemapBase' class for the documentation."""
506 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
507 % (start, count, start + count - 1))
508 hole_first = start
509 for first, last in self._do_get_mapped_ranges(start, count):
510 if first > hole_first:
511 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
512 % (hole_first, first - 1))
513 yield (hole_first, first - 1)
514
515 hole_first = last + 1
516
517 if hole_first < start + count:
518 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
519 % (hole_first, start + count - 1))
520 yield (hole_first, start + count - 1)
521
522def filemap(image, log=None):
523 """
524 Create and return an instance of a Filemap class - 'FilemapFiemap' or
525 'FilemapSeek', depending on what the system we run on supports. If the
526 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
527 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
528 'FilemapSeek' class is returned. If none of these are supported, the
529 function generates an 'Error' type exception.
530 """
531
532 try:
533 return FilemapFiemap(image, log)
534 except ErrorNotSupp:
535 return FilemapSeek(image, log)
536
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500537def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
538 length=0, api=None):
539 """
540 Efficiently copy sparse file to or into another file.
541
542 src_fname: path to source file
543 dst_fname: path to destination file
544 skip: skip N bytes at thestart of src
545 seek: seek N bytes from the start of dst
546 length: read N bytes from src and write them to dst
547 api: FilemapFiemap or FilemapSeek object
548 """
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500549 if not api:
550 api = filemap
551 fmap = api(src_fname)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600552 try:
553 dst_file = open(dst_fname, 'r+b')
554 except IOError:
555 dst_file = open(dst_fname, 'wb')
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500556 if length:
557 dst_size = length + seek
558 else:
559 dst_size = os.path.getsize(src_fname) + seek - skip
560 dst_file.truncate(dst_size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600561
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500562 written = 0
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600563 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
564 start = first * fmap.block_size
565 end = (last + 1) * fmap.block_size
566
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500567 if skip >= end:
568 continue
569
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600570 if start < skip < end:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500571 start = skip
572
573 fmap._f_image.seek(start, os.SEEK_SET)
574
575 written += start - skip - written
576 if length and written >= length:
577 dst_file.seek(seek + length, os.SEEK_SET)
578 dst_file.close()
579 return
580
581 dst_file.seek(seek + start - skip, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600582
583 chunk_size = 1024 * 1024
584 to_read = end - start
585 read = 0
586
587 while read < to_read:
588 if read + chunk_size > to_read:
589 chunk_size = to_read - read
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500590 size = chunk_size
591 if length and written + size > length:
592 size = length - written
593 chunk = fmap._f_image.read(size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600594 dst_file.write(chunk)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500595 read += size
596 written += size
597 if written == length:
598 dst_file.close()
599 return
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600600 dst_file.close()