blob: 162603ed0a101d9bf472acb0d45a1e06856b692e [file] [log] [blame]
Patrick Williamsc0f7c042017-02-23 20:41:17 -06001# Copyright (c) 2012 Intel, Inc.
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License, version 2,
5# as published by the Free Software Foundation.
6#
7# This program is distributed in the hope that it will be useful, but
8# WITHOUT ANY WARRANTY; without even the implied warranty of
9# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10# General Public License for more details.
11
12"""
13This module implements python implements a way to get file block. Two methods
14are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
15the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
16the latter is implemented by the 'FilemapSeek' class. Both classes provide the
17same API. The 'filemap' function automatically selects which class can be used
18and returns an instance of the class.
19"""
20
21# Disable the following pylint recommendations:
22# * Too many instance attributes (R0902)
23# pylint: disable=R0902
24
25import os
26import struct
27import array
28import fcntl
29import tempfile
30import logging
31
32def get_block_size(file_obj):
33 """
34 Returns block size for file object 'file_obj'. Errors are indicated by the
35 'IOError' exception.
36 """
37
38 from fcntl import ioctl
39 import struct
40
41 # Get the block size of the host file-system for the image file by calling
42 # the FIGETBSZ ioctl (number 2).
43 binary_data = ioctl(file_obj, 2, struct.pack('I', 0))
44 return struct.unpack('I', binary_data)[0]
45
46class ErrorNotSupp(Exception):
47 """
48 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
49 is not supported either by the kernel or the file-system.
50 """
51 pass
52
53class Error(Exception):
54 """A class for all the other exceptions raised by this module."""
55 pass
56
57
58class _FilemapBase(object):
59 """
60 This is a base class for a couple of other classes in this module. This
61 class simply performs the common parts of the initialization process: opens
62 the image file, gets its size, etc. The 'log' parameter is the logger object
63 to use for printing messages.
64 """
65
66 def __init__(self, image, log=None):
67 """
68 Initialize a class instance. The 'image' argument is full path to the
69 file or file object to operate on.
70 """
71
72 self._log = log
73 if self._log is None:
74 self._log = logging.getLogger(__name__)
75
76 self._f_image_needs_close = False
77
78 if hasattr(image, "fileno"):
79 self._f_image = image
80 self._image_path = image.name
81 else:
82 self._image_path = image
83 self._open_image_file()
84
85 try:
86 self.image_size = os.fstat(self._f_image.fileno()).st_size
87 except IOError as err:
88 raise Error("cannot get information about file '%s': %s"
89 % (self._f_image.name, err))
90
91 try:
92 self.block_size = get_block_size(self._f_image)
93 except IOError as err:
94 raise Error("cannot get block size for '%s': %s"
95 % (self._image_path, err))
96
97 self.blocks_cnt = self.image_size + self.block_size - 1
98 self.blocks_cnt //= self.block_size
99
100 try:
101 self._f_image.flush()
102 except IOError as err:
103 raise Error("cannot flush image file '%s': %s"
104 % (self._image_path, err))
105
106 try:
107 os.fsync(self._f_image.fileno()),
108 except OSError as err:
109 raise Error("cannot synchronize image file '%s': %s "
110 % (self._image_path, err.strerror))
111
112 self._log.debug("opened image \"%s\"" % self._image_path)
113 self._log.debug("block size %d, blocks count %d, image size %d"
114 % (self.block_size, self.blocks_cnt, self.image_size))
115
116 def __del__(self):
117 """The class destructor which just closes the image file."""
118 if self._f_image_needs_close:
119 self._f_image.close()
120
121 def _open_image_file(self):
122 """Open the image file."""
123 try:
124 self._f_image = open(self._image_path, 'rb')
125 except IOError as err:
126 raise Error("cannot open image file '%s': %s"
127 % (self._image_path, err))
128
129 self._f_image_needs_close = True
130
131 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
132 """
133 This method has has to be implemented by child classes. It returns
134 'True' if block number 'block' of the image file is mapped and 'False'
135 otherwise.
136 """
137
138 raise Error("the method is not implemented")
139
140 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
141 """
142 This method has has to be implemented by child classes. It returns
143 'True' if block number 'block' of the image file is not mapped (hole)
144 and 'False' otherwise.
145 """
146
147 raise Error("the method is not implemented")
148
149 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
150 """
151 This method has has to be implemented by child classes. This is a
152 generator which yields ranges of mapped blocks in the file. The ranges
153 are tuples of 2 elements: [first, last], where 'first' is the first
154 mapped block and 'last' is the last mapped block.
155
156 The ranges are yielded for the area of the file of size 'count' blocks,
157 starting from block 'start'.
158 """
159
160 raise Error("the method is not implemented")
161
162 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
163 """
164 This method has has to be implemented by child classes. Just like
165 'get_mapped_ranges()', but yields unmapped block ranges instead
166 (holes).
167 """
168
169 raise Error("the method is not implemented")
170
171
172# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
173_SEEK_DATA = 3
174_SEEK_HOLE = 4
175
176def _lseek(file_obj, offset, whence):
177 """This is a helper function which invokes 'os.lseek' for file object
178 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
179 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
180 there is no more data or hole starting from 'offset', this function
181 returns '-1'. Otherwise the data or hole position is returned."""
182
183 try:
184 return os.lseek(file_obj.fileno(), offset, whence)
185 except OSError as err:
186 # The 'lseek' system call returns the ENXIO if there is no data or
187 # hole starting from the specified offset.
188 if err.errno == os.errno.ENXIO:
189 return -1
190 elif err.errno == os.errno.EINVAL:
191 raise ErrorNotSupp("the kernel or file-system does not support "
192 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
193 else:
194 raise
195
196class FilemapSeek(_FilemapBase):
197 """
198 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
199 Unfortunately, the current implementation requires the caller to have write
200 access to the image file.
201 """
202
203 def __init__(self, image, log=None):
204 """Refer the '_FilemapBase' class for the documentation."""
205
206 # Call the base class constructor first
207 _FilemapBase.__init__(self, image, log)
208 self._log.debug("FilemapSeek: initializing")
209
210 self._probe_seek_hole()
211
212 def _probe_seek_hole(self):
213 """
214 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
215 Unfortunately, there seems to be no clean way for detecting this,
216 because often the system just fakes them by just assuming that all
217 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
218 'SEEK_DATA' always returns the requested offset.
219
220 I could not invent a better way of detecting the fake 'SEEK_HOLE'
221 implementation than just to create a temporary file in the same
222 directory where the image file resides. It would be nice to change this
223 to something better.
224 """
225
226 directory = os.path.dirname(self._image_path)
227
228 try:
229 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
230 except IOError as err:
231 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s"
232 % (directory, err))
233
234 try:
235 os.ftruncate(tmp_obj.fileno(), self.block_size)
236 except OSError as err:
237 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
238 % (directory, err))
239
240 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
241 if offs != 0:
242 # We are dealing with the stub 'SEEK_HOLE' implementation which
243 # always returns EOF.
244 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
245 raise ErrorNotSupp("the file-system does not support "
246 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
247 "provides a stub implementation")
248
249 tmp_obj.close()
250
251 def block_is_mapped(self, block):
252 """Refer the '_FilemapBase' class for the documentation."""
253 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
254 if offs == -1:
255 result = False
256 else:
257 result = (offs // self.block_size == block)
258
259 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
260 % (block, result))
261 return result
262
263 def block_is_unmapped(self, block):
264 """Refer the '_FilemapBase' class for the documentation."""
265 return not self.block_is_mapped(block)
266
267 def _get_ranges(self, start, count, whence1, whence2):
268 """
269 This function implements 'get_mapped_ranges()' and
270 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
271 and 'whence2' arguments.
272 """
273
274 assert whence1 != whence2
275 end = start * self.block_size
276 limit = end + count * self.block_size
277
278 while True:
279 start = _lseek(self._f_image, end, whence1)
280 if start == -1 or start >= limit or start == self.image_size:
281 break
282
283 end = _lseek(self._f_image, start, whence2)
284 if end == -1 or end == self.image_size:
285 end = self.blocks_cnt * self.block_size
286 if end > limit:
287 end = limit
288
289 start_blk = start // self.block_size
290 end_blk = end // self.block_size - 1
291 self._log.debug("FilemapSeek: yielding range (%d, %d)"
292 % (start_blk, end_blk))
293 yield (start_blk, end_blk)
294
295 def get_mapped_ranges(self, start, count):
296 """Refer the '_FilemapBase' class for the documentation."""
297 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
298 % (start, count, start + count - 1))
299 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
300
301 def get_unmapped_ranges(self, start, count):
302 """Refer the '_FilemapBase' class for the documentation."""
303 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
304 % (start, count, start + count - 1))
305 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
306
307
308# Below goes the FIEMAP ioctl implementation, which is not very readable
309# because it deals with the rather complex FIEMAP ioctl. To understand the
310# code, you need to know the FIEMAP interface, which is documented in the
311# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
312
313# Format string for 'struct fiemap'
314_FIEMAP_FORMAT = "=QQLLLL"
315# sizeof(struct fiemap)
316_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
317# Format string for 'struct fiemap_extent'
318_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
319# sizeof(struct fiemap_extent)
320_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
321# The FIEMAP ioctl number
322_FIEMAP_IOCTL = 0xC020660B
323# This FIEMAP ioctl flag which instructs the kernel to sync the file before
324# reading the block map
325_FIEMAP_FLAG_SYNC = 0x00000001
326# Size of the buffer for 'struct fiemap_extent' elements which will be used
327# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
328# FIEMAP ioctl will be invoked.
329_FIEMAP_BUFFER_SIZE = 256 * 1024
330
331class FilemapFiemap(_FilemapBase):
332 """
333 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
334 over all mapped blocks and over all holes.
335
336 This class synchronizes the image file every time it invokes the FIEMAP
337 ioctl in order to work-around early FIEMAP implementation kernel bugs.
338 """
339
340 def __init__(self, image, log=None):
341 """
342 Initialize a class instance. The 'image' argument is full the file
343 object to operate on.
344 """
345
346 # Call the base class constructor first
347 _FilemapBase.__init__(self, image, log)
348 self._log.debug("FilemapFiemap: initializing")
349
350 self._buf_size = _FIEMAP_BUFFER_SIZE
351
352 # Calculate how many 'struct fiemap_extent' elements fit the buffer
353 self._buf_size -= _FIEMAP_SIZE
354 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
355 assert self._fiemap_extent_cnt > 0
356 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
357 self._buf_size += _FIEMAP_SIZE
358
359 # Allocate a mutable buffer for the FIEMAP ioctl
360 self._buf = array.array('B', [0] * self._buf_size)
361
362 # Check if the FIEMAP ioctl is supported
363 self.block_is_mapped(0)
364
365 def _invoke_fiemap(self, block, count):
366 """
367 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
368 block number 'block'.
369
370 The full result of the operation is stored in 'self._buf' on exit.
371 Returns the unpacked 'struct fiemap' data structure in form of a python
372 list (just like 'struct.upack()').
373 """
374
375 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
376 raise Error("bad block number %d, should be within [0, %d]"
377 % (block, self.blocks_cnt))
378
379 # Initialize the 'struct fiemap' part of the buffer. We use the
380 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
381 # synchronized. The reason for this is that early FIEMAP
382 # implementations had many bugs related to cached dirty data, and
383 # synchronizing the file is a necessary work-around.
384 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
385 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
386 self._fiemap_extent_cnt, 0)
387
388 try:
389 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
390 except IOError as err:
391 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
392 # from version 2.6.28 (year 2008).
393 if err.errno == os.errno.EOPNOTSUPP:
394 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
395 "by the file-system"
396 self._log.debug(errstr)
397 raise ErrorNotSupp(errstr)
398 if err.errno == os.errno.ENOTTY:
399 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
400 "by the kernel"
401 self._log.debug(errstr)
402 raise ErrorNotSupp(errstr)
403 raise Error("the FIEMAP ioctl failed for '%s': %s"
404 % (self._image_path, err))
405
406 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
407
408 def block_is_mapped(self, block):
409 """Refer the '_FilemapBase' class for the documentation."""
410 struct_fiemap = self._invoke_fiemap(block, 1)
411
412 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
413 # If it contains zero, the block is not mapped, otherwise it is
414 # mapped.
415 result = bool(struct_fiemap[3])
416 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
417 % (block, result))
418 return result
419
420 def block_is_unmapped(self, block):
421 """Refer the '_FilemapBase' class for the documentation."""
422 return not self.block_is_mapped(block)
423
424 def _unpack_fiemap_extent(self, index):
425 """
426 Unpack a 'struct fiemap_extent' structure object number 'index' from
427 the internal 'self._buf' buffer.
428 """
429
430 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
431 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
432 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
433
434 def _do_get_mapped_ranges(self, start, count):
435 """
436 Implements most the functionality for the 'get_mapped_ranges()'
437 generator: invokes the FIEMAP ioctl, walks through the mapped extents
438 and yields mapped block ranges. However, the ranges may be consecutive
439 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
440 them.
441 """
442
443 block = start
444 while block < start + count:
445 struct_fiemap = self._invoke_fiemap(block, count)
446
447 mapped_extents = struct_fiemap[3]
448 if mapped_extents == 0:
449 # No more mapped blocks
450 return
451
452 extent = 0
453 while extent < mapped_extents:
454 fiemap_extent = self._unpack_fiemap_extent(extent)
455
456 # Start of the extent
457 extent_start = fiemap_extent[0]
458 # Starting block number of the extent
459 extent_block = extent_start // self.block_size
460 # Length of the extent
461 extent_len = fiemap_extent[2]
462 # Count of blocks in the extent
463 extent_count = extent_len // self.block_size
464
465 # Extent length and offset have to be block-aligned
466 assert extent_start % self.block_size == 0
467 assert extent_len % self.block_size == 0
468
469 if extent_block > start + count - 1:
470 return
471
472 first = max(extent_block, block)
473 last = min(extent_block + extent_count, start + count) - 1
474 yield (first, last)
475
476 extent += 1
477
478 block = extent_block + extent_count
479
480 def get_mapped_ranges(self, start, count):
481 """Refer the '_FilemapBase' class for the documentation."""
482 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
483 % (start, count, start + count - 1))
484 iterator = self._do_get_mapped_ranges(start, count)
485 first_prev, last_prev = next(iterator)
486
487 for first, last in iterator:
488 if last_prev == first - 1:
489 last_prev = last
490 else:
491 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
492 % (first_prev, last_prev))
493 yield (first_prev, last_prev)
494 first_prev, last_prev = first, last
495
496 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
497 % (first_prev, last_prev))
498 yield (first_prev, last_prev)
499
500 def get_unmapped_ranges(self, start, count):
501 """Refer the '_FilemapBase' class for the documentation."""
502 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
503 % (start, count, start + count - 1))
504 hole_first = start
505 for first, last in self._do_get_mapped_ranges(start, count):
506 if first > hole_first:
507 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
508 % (hole_first, first - 1))
509 yield (hole_first, first - 1)
510
511 hole_first = last + 1
512
513 if hole_first < start + count:
514 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
515 % (hole_first, start + count - 1))
516 yield (hole_first, start + count - 1)
517
518def filemap(image, log=None):
519 """
520 Create and return an instance of a Filemap class - 'FilemapFiemap' or
521 'FilemapSeek', depending on what the system we run on supports. If the
522 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
523 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
524 'FilemapSeek' class is returned. If none of these are supported, the
525 function generates an 'Error' type exception.
526 """
527
528 try:
529 return FilemapFiemap(image, log)
530 except ErrorNotSupp:
531 return FilemapSeek(image, log)
532
533def sparse_copy(src_fname, dst_fname, offset=0, skip=0):
534 """Efficiently copy sparse file to or into another file."""
535 fmap = filemap(src_fname)
536 try:
537 dst_file = open(dst_fname, 'r+b')
538 except IOError:
539 dst_file = open(dst_fname, 'wb')
540
541 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
542 start = first * fmap.block_size
543 end = (last + 1) * fmap.block_size
544
545 if start < skip < end:
Brad Bishop37a0e4d2017-12-04 01:01:44 -0500546 fmap._f_image.seek(skip, os.SEEK_SET)
547 else:
548 fmap._f_image.seek(start, os.SEEK_SET)
Patrick Williamsc0f7c042017-02-23 20:41:17 -0600549 dst_file.seek(offset + start, os.SEEK_SET)
550
551 chunk_size = 1024 * 1024
552 to_read = end - start
553 read = 0
554
555 while read < to_read:
556 if read + chunk_size > to_read:
557 chunk_size = to_read - read
558 chunk = fmap._f_image.read(chunk_size)
559 dst_file.write(chunk)
560 read += chunk_size
561 dst_file.close()