diff options
author | Lukas Czerner <lczerner@redhat.com> | 2011-08-19 08:50:46 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-08-19 08:50:46 -0400 |
commit | dfaa2ef68e80c378e610e3c8c536f1c239e8d3ef (patch) | |
tree | 1b827fb8b3785dfca6c5e19d7dbbb0a9d59a3d2c /drivers/block | |
parent | 548ef6cc26ca1c81f19855d57d3fb0f9a7ce3385 (diff) |
loop: add discard support for loop devices
This commit adds discard support for loop devices. Discard is usually
supported by SSD and thinly provisioned devices as a method for
reclaiming unused space. This is no different than trying to reclaim
back space which is not used by the file system on the image, but it
still occupies space on the host file system.
We can do the reclamation on file system which does support hole
punching. So when discard request gets to the loop driver we can
translate that to punch a hole to the underlying file, hence reclaim
the free space.
This is very useful for trimming down the size of the image to only what
is really used by the file system on that image. Fstrim may be used for
that purpose.
It has been tested on ext4, xfs and btrfs with the image file systems
ext4, ext3, xfs and btrfs. ext4, or ext6 image on ext4 file system has
some problems but it seems that ext4 punch hole implementation is
somewhat flawed and it is unrelated to this commit.
Also this is a very good method of validating file systems punch hole
implementation.
Note that when encryption is used, discard support is disabled, because
using it might leak some information useful for possible attacker.
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/loop.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da78212b..936cac3c3126 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -75,6 +75,7 @@ | |||
75 | #include <linux/kthread.h> | 75 | #include <linux/kthread.h> |
76 | #include <linux/splice.h> | 76 | #include <linux/splice.h> |
77 | #include <linux/sysfs.h> | 77 | #include <linux/sysfs.h> |
78 | #include <linux/falloc.h> | ||
78 | 79 | ||
79 | #include <asm/uaccess.h> | 80 | #include <asm/uaccess.h> |
80 | 81 | ||
@@ -484,6 +485,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | |||
484 | } | 485 | } |
485 | } | 486 | } |
486 | 487 | ||
488 | /* | ||
489 | * We use punch hole to reclaim the free space used by the | ||
490 | * image a.k.a. discard. However we do support discard if | ||
491 | * encryption is enabled, because it may give an attacker | ||
492 | * useful information. | ||
493 | */ | ||
494 | if (bio->bi_rw & REQ_DISCARD) { | ||
495 | struct file *file = lo->lo_backing_file; | ||
496 | int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; | ||
497 | |||
498 | if ((!file->f_op->fallocate) || | ||
499 | lo->lo_encrypt_key_size) { | ||
500 | ret = -EOPNOTSUPP; | ||
501 | goto out; | ||
502 | } | ||
503 | ret = file->f_op->fallocate(file, mode, pos, | ||
504 | bio->bi_size); | ||
505 | if (unlikely(ret && ret != -EINVAL && | ||
506 | ret != -EOPNOTSUPP)) | ||
507 | ret = -EIO; | ||
508 | goto out; | ||
509 | } | ||
510 | |||
487 | ret = lo_send(lo, bio, pos); | 511 | ret = lo_send(lo, bio, pos); |
488 | 512 | ||
489 | if ((bio->bi_rw & REQ_FUA) && !ret) { | 513 | if ((bio->bi_rw & REQ_FUA) && !ret) { |
@@ -814,6 +838,35 @@ static void loop_sysfs_exit(struct loop_device *lo) | |||
814 | &loop_attribute_group); | 838 | &loop_attribute_group); |
815 | } | 839 | } |
816 | 840 | ||
841 | static void loop_config_discard(struct loop_device *lo) | ||
842 | { | ||
843 | struct file *file = lo->lo_backing_file; | ||
844 | struct inode *inode = file->f_mapping->host; | ||
845 | struct request_queue *q = lo->lo_queue; | ||
846 | |||
847 | /* | ||
848 | * We use punch hole to reclaim the free space used by the | ||
849 | * image a.k.a. discard. However we do support discard if | ||
850 | * encryption is enabled, because it may give an attacker | ||
851 | * useful information. | ||
852 | */ | ||
853 | if ((!file->f_op->fallocate) || | ||
854 | lo->lo_encrypt_key_size) { | ||
855 | q->limits.discard_granularity = 0; | ||
856 | q->limits.discard_alignment = 0; | ||
857 | q->limits.max_discard_sectors = 0; | ||
858 | q->limits.discard_zeroes_data = 0; | ||
859 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | ||
860 | return; | ||
861 | } | ||
862 | |||
863 | q->limits.discard_granularity = inode->i_sb->s_blocksize; | ||
864 | q->limits.discard_alignment = inode->i_sb->s_blocksize; | ||
865 | q->limits.max_discard_sectors = UINT_MAX >> 9; | ||
866 | q->limits.discard_zeroes_data = 1; | ||
867 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | ||
868 | } | ||
869 | |||
817 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, | 870 | static int loop_set_fd(struct loop_device *lo, fmode_t mode, |
818 | struct block_device *bdev, unsigned int arg) | 871 | struct block_device *bdev, unsigned int arg) |
819 | { | 872 | { |
@@ -1090,6 +1143,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | |||
1090 | if (figure_loop_size(lo)) | 1143 | if (figure_loop_size(lo)) |
1091 | return -EFBIG; | 1144 | return -EFBIG; |
1092 | } | 1145 | } |
1146 | loop_config_discard(lo); | ||
1093 | 1147 | ||
1094 | memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); | 1148 | memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); |
1095 | memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); | 1149 | memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); |