blob: abbf958b8c86320843f3774f835cf08922f0b0fd [file] [log] [blame]
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001# Copyright (c) 2012 Intel, Inc.
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License, version 2,
5# as published by the Free Software Foundation.
6#
7# This program is distributed in the hope that it will be useful, but
8# WITHOUT ANY WARRANTY; without even the implied warranty of
9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10# General Public License for more details.
11
12"""
13This module implements python implements a way to get file block. Two methods
14are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
15the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
16the latter is implemented by the 'FilemapSeek' class. Both classes provide the
17same API. The 'filemap' function automatically selects which class can be used
18and returns an instance of the class.
19"""
20
21# Disable the following pylint recommendations:
22# * Too many instance attributes (R0902)
23# pylint: disable=R0902
24
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080025import errno
Patrick Williamsc0f7c042017-02-23 20:41:17 -060026import os
27import struct
28import array
29import fcntl
30import tempfile
31import logging
32
33def get_block_size(file_obj):
34 """
35 Returns block size for file object 'file_obj'. Errors are indicated by the
36 'IOError' exception.
37 """
Patrick Williamsc0f7c042017-02-23 20:41:17 -060038 # Get the block size of the host file-system for the image file by calling
39 # the FIGETBSZ ioctl (number 2).
Brad Bishopd7bf8c12018-02-25 22:55:05 -050040 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
Brad Bishop316dfdd2018-06-25 12:45:53 -040041 bsize = struct.unpack('I', binary_data)[0]
42 if not bsize:
43 import os
44 stat = os.fstat(file_obj.fileno())
45 if hasattr(stat, 'st_blksize'):
46 bsize = stat.st_blksize
47 else:
48 raise IOError("Unable to determine block size")
49 return bsize
Patrick Williamsc0f7c042017-02-23 20:41:17 -060050
51class ErrorNotSupp(Exception):
52 """
53 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
54 is not supported either by the kernel or the file-system.
55 """
56 pass
57
58class Error(Exception):
59 """A class for all the other exceptions raised by this module."""
60 pass
61
62
63class _FilemapBase(object):
64 """
65 This is a base class for a couple of other classes in this module. This
66 class simply performs the common parts of the initialization process: opens
67 the image file, gets its size, etc. The 'log' parameter is the logger object
68 to use for printing messages.
69 """
70
71 def __init__(self, image, log=None):
72 """
73 Initialize a class instance. The 'image' argument is full path to the
74 file or file object to operate on.
75 """
76
77 self._log = log
78 if self._log is None:
79 self._log = logging.getLogger(__name__)
80
81 self._f_image_needs_close = False
82
83 if hasattr(image, "fileno"):
84 self._f_image = image
85 self._image_path = image.name
86 else:
87 self._image_path = image
88 self._open_image_file()
89
90 try:
91 self.image_size = os.fstat(self._f_image.fileno()).st_size
92 except IOError as err:
93 raise Error("cannot get information about file '%s': %s"
94 % (self._f_image.name, err))
95
96 try:
97 self.block_size = get_block_size(self._f_image)
98 except IOError as err:
99 raise Error("cannot get block size for '%s': %s"
100 % (self._image_path, err))
101
102 self.blocks_cnt = self.image_size + self.block_size - 1
103 self.blocks_cnt //= self.block_size
104
105 try:
106 self._f_image.flush()
107 except IOError as err:
108 raise Error("cannot flush image file '%s': %s"
109 % (self._image_path, err))
110
111 try:
112 os.fsync(self._f_image.fileno()),
113 except OSError as err:
114 raise Error("cannot synchronize image file '%s': %s "
115 % (self._image_path, err.strerror))
116
117 self._log.debug("opened image \"%s\"" % self._image_path)
118 self._log.debug("block size %d, blocks count %d, image size %d"
119 % (self.block_size, self.blocks_cnt, self.image_size))
120
121 def __del__(self):
122 """The class destructor which just closes the image file."""
123 if self._f_image_needs_close:
124 self._f_image.close()
125
126 def _open_image_file(self):
127 """Open the image file."""
128 try:
129 self._f_image = open(self._image_path, 'rb')
130 except IOError as err:
131 raise Error("cannot open image file '%s': %s"
132 % (self._image_path, err))
133
134 self._f_image_needs_close = True
135
136 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
137 """
138 This method has has to be implemented by child classes. It returns
139 'True' if block number 'block' of the image file is mapped and 'False'
140 otherwise.
141 """
142
143 raise Error("the method is not implemented")
144
145 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
146 """
147 This method has has to be implemented by child classes. It returns
148 'True' if block number 'block' of the image file is not mapped (hole)
149 and 'False' otherwise.
150 """
151
152 raise Error("the method is not implemented")
153
154 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
155 """
156 This method has has to be implemented by child classes. This is a
157 generator which yields ranges of mapped blocks in the file. The ranges
158 are tuples of 2 elements: [first, last], where 'first' is the first
159 mapped block and 'last' is the last mapped block.
160
161 The ranges are yielded for the area of the file of size 'count' blocks,
162 starting from block 'start'.
163 """
164
165 raise Error("the method is not implemented")
166
167 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
168 """
169 This method has has to be implemented by child classes. Just like
170 'get_mapped_ranges()', but yields unmapped block ranges instead
171 (holes).
172 """
173
174 raise Error("the method is not implemented")
175
176
177# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
178_SEEK_DATA = 3
179_SEEK_HOLE = 4
180
181def _lseek(file_obj, offset, whence):
182 """This is a helper function which invokes 'os.lseek' for file object
183 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
184 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
185 there is no more data or hole starting from 'offset', this function
186 returns '-1'. Otherwise the data or hole position is returned."""
187
188 try:
189 return os.lseek(file_obj.fileno(), offset, whence)
190 except OSError as err:
191 # The 'lseek' system call returns the ENXIO if there is no data or
192 # hole starting from the specified offset.
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800193 if err.errno == errno.ENXIO:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600194 return -1
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800195 elif err.errno == errno.EINVAL:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600196 raise ErrorNotSupp("the kernel or file-system does not support "
197 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
198 else:
199 raise
200
201class FilemapSeek(_FilemapBase):
202 """
203 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
204 Unfortunately, the current implementation requires the caller to have write
205 access to the image file.
206 """
207
208 def __init__(self, image, log=None):
209 """Refer the '_FilemapBase' class for the documentation."""
210
211 # Call the base class constructor first
212 _FilemapBase.__init__(self, image, log)
213 self._log.debug("FilemapSeek: initializing")
214
215 self._probe_seek_hole()
216
217 def _probe_seek_hole(self):
218 """
219 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
220 Unfortunately, there seems to be no clean way for detecting this,
221 because often the system just fakes them by just assuming that all
222 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
223 'SEEK_DATA' always returns the requested offset.
224
225 I could not invent a better way of detecting the fake 'SEEK_HOLE'
226 implementation than just to create a temporary file in the same
227 directory where the image file resides. It would be nice to change this
228 to something better.
229 """
230
231 directory = os.path.dirname(self._image_path)
232
233 try:
234 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
235 except IOError as err:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500236 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600237 % (directory, err))
238
239 try:
240 os.ftruncate(tmp_obj.fileno(), self.block_size)
241 except OSError as err:
242 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
243 % (directory, err))
244
245 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
246 if offs != 0:
247 # We are dealing with the stub 'SEEK_HOLE' implementation which
248 # always returns EOF.
249 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
250 raise ErrorNotSupp("the file-system does not support "
251 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
252 "provides a stub implementation")
253
254 tmp_obj.close()
255
256 def block_is_mapped(self, block):
257 """Refer the '_FilemapBase' class for the documentation."""
258 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
259 if offs == -1:
260 result = False
261 else:
262 result = (offs // self.block_size == block)
263
264 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
265 % (block, result))
266 return result
267
268 def block_is_unmapped(self, block):
269 """Refer the '_FilemapBase' class for the documentation."""
270 return not self.block_is_mapped(block)
271
272 def _get_ranges(self, start, count, whence1, whence2):
273 """
274 This function implements 'get_mapped_ranges()' and
275 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
276 and 'whence2' arguments.
277 """
278
279 assert whence1 != whence2
280 end = start * self.block_size
281 limit = end + count * self.block_size
282
283 while True:
284 start = _lseek(self._f_image, end, whence1)
285 if start == -1 or start >= limit or start == self.image_size:
286 break
287
288 end = _lseek(self._f_image, start, whence2)
289 if end == -1 or end == self.image_size:
290 end = self.blocks_cnt * self.block_size
291 if end > limit:
292 end = limit
293
294 start_blk = start // self.block_size
295 end_blk = end // self.block_size - 1
296 self._log.debug("FilemapSeek: yielding range (%d, %d)"
297 % (start_blk, end_blk))
298 yield (start_blk, end_blk)
299
300 def get_mapped_ranges(self, start, count):
301 """Refer the '_FilemapBase' class for the documentation."""
302 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
303 % (start, count, start + count - 1))
304 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
305
306 def get_unmapped_ranges(self, start, count):
307 """Refer the '_FilemapBase' class for the documentation."""
308 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
309 % (start, count, start + count - 1))
310 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
311
312
313# Below goes the FIEMAP ioctl implementation, which is not very readable
314# because it deals with the rather complex FIEMAP ioctl. To understand the
315# code, you need to know the FIEMAP interface, which is documented in the
316# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
317
318# Format string for 'struct fiemap'
319_FIEMAP_FORMAT = "=QQLLLL"
320# sizeof(struct fiemap)
321_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
322# Format string for 'struct fiemap_extent'
323_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
324# sizeof(struct fiemap_extent)
325_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
326# The FIEMAP ioctl number
327_FIEMAP_IOCTL = 0xC020660B
328# This FIEMAP ioctl flag which instructs the kernel to sync the file before
329# reading the block map
330_FIEMAP_FLAG_SYNC = 0x00000001
331# Size of the buffer for 'struct fiemap_extent' elements which will be used
332# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
333# FIEMAP ioctl will be invoked.
334_FIEMAP_BUFFER_SIZE = 256 * 1024
335
336class FilemapFiemap(_FilemapBase):
337 """
338 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
339 over all mapped blocks and over all holes.
340
341 This class synchronizes the image file every time it invokes the FIEMAP
342 ioctl in order to work-around early FIEMAP implementation kernel bugs.
343 """
344
345 def __init__(self, image, log=None):
346 """
347 Initialize a class instance. The 'image' argument is full the file
348 object to operate on.
349 """
350
351 # Call the base class constructor first
352 _FilemapBase.__init__(self, image, log)
353 self._log.debug("FilemapFiemap: initializing")
354
355 self._buf_size = _FIEMAP_BUFFER_SIZE
356
357 # Calculate how many 'struct fiemap_extent' elements fit the buffer
358 self._buf_size -= _FIEMAP_SIZE
359 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
360 assert self._fiemap_extent_cnt > 0
361 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
362 self._buf_size += _FIEMAP_SIZE
363
364 # Allocate a mutable buffer for the FIEMAP ioctl
365 self._buf = array.array('B', [0] * self._buf_size)
366
367 # Check if the FIEMAP ioctl is supported
368 self.block_is_mapped(0)
369
370 def _invoke_fiemap(self, block, count):
371 """
372 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
373 block number 'block'.
374
375 The full result of the operation is stored in 'self._buf' on exit.
376 Returns the unpacked 'struct fiemap' data structure in form of a python
377 list (just like 'struct.upack()').
378 """
379
380 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
381 raise Error("bad block number %d, should be within [0, %d]"
382 % (block, self.blocks_cnt))
383
384 # Initialize the 'struct fiemap' part of the buffer. We use the
385 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
386 # synchronized. The reason for this is that early FIEMAP
387 # implementations had many bugs related to cached dirty data, and
388 # synchronizing the file is a necessary work-around.
389 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
390 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
391 self._fiemap_extent_cnt, 0)
392
393 try:
394 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
395 except IOError as err:
396 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
397 # from version 2.6.28 (year 2008).
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800398 if err.errno == errno.EOPNOTSUPP:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600399 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
400 "by the file-system"
401 self._log.debug(errstr)
402 raise ErrorNotSupp(errstr)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800403 if err.errno == errno.ENOTTY:
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600404 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
405 "by the kernel"
406 self._log.debug(errstr)
407 raise ErrorNotSupp(errstr)
408 raise Error("the FIEMAP ioctl failed for '%s': %s"
409 % (self._image_path, err))
410
411 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
412
413 def block_is_mapped(self, block):
414 """Refer the '_FilemapBase' class for the documentation."""
415 struct_fiemap = self._invoke_fiemap(block, 1)
416
417 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
418 # If it contains zero, the block is not mapped, otherwise it is
419 # mapped.
420 result = bool(struct_fiemap[3])
421 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
422 % (block, result))
423 return result
424
425 def block_is_unmapped(self, block):
426 """Refer the '_FilemapBase' class for the documentation."""
427 return not self.block_is_mapped(block)
428
429 def _unpack_fiemap_extent(self, index):
430 """
431 Unpack a 'struct fiemap_extent' structure object number 'index' from
432 the internal 'self._buf' buffer.
433 """
434
435 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
436 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
437 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
438
439 def _do_get_mapped_ranges(self, start, count):
440 """
441 Implements most the functionality for the 'get_mapped_ranges()'
442 generator: invokes the FIEMAP ioctl, walks through the mapped extents
443 and yields mapped block ranges. However, the ranges may be consecutive
444 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
445 them.
446 """
447
448 block = start
449 while block < start + count:
450 struct_fiemap = self._invoke_fiemap(block, count)
451
452 mapped_extents = struct_fiemap[3]
453 if mapped_extents == 0:
454 # No more mapped blocks
455 return
456
457 extent = 0
458 while extent < mapped_extents:
459 fiemap_extent = self._unpack_fiemap_extent(extent)
460
461 # Start of the extent
462 extent_start = fiemap_extent[0]
463 # Starting block number of the extent
464 extent_block = extent_start // self.block_size
465 # Length of the extent
466 extent_len = fiemap_extent[2]
467 # Count of blocks in the extent
468 extent_count = extent_len // self.block_size
469
470 # Extent length and offset have to be block-aligned
471 assert extent_start % self.block_size == 0
472 assert extent_len % self.block_size == 0
473
474 if extent_block > start + count - 1:
475 return
476
477 first = max(extent_block, block)
478 last = min(extent_block + extent_count, start + count) - 1
479 yield (first, last)
480
481 extent += 1
482
483 block = extent_block + extent_count
484
485 def get_mapped_ranges(self, start, count):
486 """Refer the '_FilemapBase' class for the documentation."""
487 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
488 % (start, count, start + count - 1))
489 iterator = self._do_get_mapped_ranges(start, count)
490 first_prev, last_prev = next(iterator)
491
492 for first, last in iterator:
493 if last_prev == first - 1:
494 last_prev = last
495 else:
496 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
497 % (first_prev, last_prev))
498 yield (first_prev, last_prev)
499 first_prev, last_prev = first, last
500
501 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
502 % (first_prev, last_prev))
503 yield (first_prev, last_prev)
504
505 def get_unmapped_ranges(self, start, count):
506 """Refer the '_FilemapBase' class for the documentation."""
507 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
508 % (start, count, start + count - 1))
509 hole_first = start
510 for first, last in self._do_get_mapped_ranges(start, count):
511 if first > hole_first:
512 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
513 % (hole_first, first - 1))
514 yield (hole_first, first - 1)
515
516 hole_first = last + 1
517
518 if hole_first < start + count:
519 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
520 % (hole_first, start + count - 1))
521 yield (hole_first, start + count - 1)
522
523def filemap(image, log=None):
524 """
525 Create and return an instance of a Filemap class - 'FilemapFiemap' or
526 'FilemapSeek', depending on what the system we run on supports. If the
527 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
528 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
529 'FilemapSeek' class is returned. If none of these are supported, the
530 function generates an 'Error' type exception.
531 """
532
533 try:
534 return FilemapFiemap(image, log)
535 except ErrorNotSupp:
536 return FilemapSeek(image, log)
537
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500538def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
539 length=0, api=None):
540 """
541 Efficiently copy sparse file to or into another file.
542
543 src_fname: path to source file
544 dst_fname: path to destination file
545 skip: skip N bytes at thestart of src
546 seek: seek N bytes from the start of dst
547 length: read N bytes from src and write them to dst
548 api: FilemapFiemap or FilemapSeek object
549 """
Brad Bishop6e60e8b2018-02-01 10:27:11 -0500550 if not api:
551 api = filemap
552 fmap = api(src_fname)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600553 try:
554 dst_file = open(dst_fname, 'r+b')
555 except IOError:
556 dst_file = open(dst_fname, 'wb')
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500557 if length:
558 dst_size = length + seek
559 else:
560 dst_size = os.path.getsize(src_fname) + seek - skip
561 dst_file.truncate(dst_size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600562
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500563 written = 0
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600564 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
565 start = first * fmap.block_size
566 end = (last + 1) * fmap.block_size
567
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500568 if skip >= end:
569 continue
570
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600571 if start < skip < end:
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500572 start = skip
573
574 fmap._f_image.seek(start, os.SEEK_SET)
575
576 written += start - skip - written
577 if length and written >= length:
578 dst_file.seek(seek + length, os.SEEK_SET)
579 dst_file.close()
580 return
581
582 dst_file.seek(seek + start - skip, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600583
584 chunk_size = 1024 * 1024
585 to_read = end - start
586 read = 0
587
588 while read < to_read:
589 if read + chunk_size > to_read:
590 chunk_size = to_read - read
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500591 size = chunk_size
592 if length and written + size > length:
593 size = length - written
594 chunk = fmap._f_image.read(size)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600595 dst_file.write(chunk)
Brad Bishopd7bf8c12018-02-25 22:55:05 -0500596 read += size
597 written += size
598 if written == length:
599 dst_file.close()
600 return
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600601 dst_file.close()