aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r--drivers/block/loop.c251
1 files changed, 129 insertions, 122 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4720c7ade0ae..3d806820280e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,8 @@
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h> 77#include <linux/sysfs.h>
78#include <linux/miscdevice.h> 78#include <linux/miscdevice.h>
79#include <linux/falloc.h>
80
79#include <asm/uaccess.h> 81#include <asm/uaccess.h>
80 82
81static DEFINE_IDR(loop_index_idr); 83static DEFINE_IDR(loop_index_idr);
@@ -203,74 +205,6 @@ lo_do_transfer(struct loop_device *lo, int cmd,
203} 205}
204 206
205/** 207/**
206 * do_lo_send_aops - helper for writing data to a loop device
207 *
208 * This is the fast version for backing filesystems which implement the address
209 * space operations write_begin and write_end.
210 */
211static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
212 loff_t pos, struct page *unused)
213{
214 struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
215 struct address_space *mapping = file->f_mapping;
216 pgoff_t index;
217 unsigned offset, bv_offs;
218 int len, ret;
219
220 mutex_lock(&mapping->host->i_mutex);
221 index = pos >> PAGE_CACHE_SHIFT;
222 offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
223 bv_offs = bvec->bv_offset;
224 len = bvec->bv_len;
225 while (len > 0) {
226 sector_t IV;
227 unsigned size, copied;
228 int transfer_result;
229 struct page *page;
230 void *fsdata;
231
232 IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
233 size = PAGE_CACHE_SIZE - offset;
234 if (size > len)
235 size = len;
236
237 ret = pagecache_write_begin(file, mapping, pos, size, 0,
238 &page, &fsdata);
239 if (ret)
240 goto fail;
241
242 file_update_time(file);
243
244 transfer_result = lo_do_transfer(lo, WRITE, page, offset,
245 bvec->bv_page, bv_offs, size, IV);
246 copied = size;
247 if (unlikely(transfer_result))
248 copied = 0;
249
250 ret = pagecache_write_end(file, mapping, pos, size, copied,
251 page, fsdata);
252 if (ret < 0 || ret != copied)
253 goto fail;
254
255 if (unlikely(transfer_result))
256 goto fail;
257
258 bv_offs += copied;
259 len -= copied;
260 offset = 0;
261 index++;
262 pos += copied;
263 }
264 ret = 0;
265out:
266 mutex_unlock(&mapping->host->i_mutex);
267 return ret;
268fail:
269 ret = -1;
270 goto out;
271}
272
273/**
274 * __do_lo_send_write - helper for writing data to a loop device 208 * __do_lo_send_write - helper for writing data to a loop device
275 * 209 *
276 * This helper just factors out common code between do_lo_send_direct_write() 210 * This helper just factors out common code between do_lo_send_direct_write()
@@ -297,10 +231,8 @@ static int __do_lo_send_write(struct file *file,
297/** 231/**
298 * do_lo_send_direct_write - helper for writing data to a loop device 232 * do_lo_send_direct_write - helper for writing data to a loop device
299 * 233 *
300 * This is the fast, non-transforming version for backing filesystems which do 234 * This is the fast, non-transforming version that does not need double
301 * not implement the address space operations write_begin and write_end. 235 * buffering.
302 * It uses the write file operation which should be present on all writeable
303 * filesystems.
304 */ 236 */
305static int do_lo_send_direct_write(struct loop_device *lo, 237static int do_lo_send_direct_write(struct loop_device *lo,
306 struct bio_vec *bvec, loff_t pos, struct page *page) 238 struct bio_vec *bvec, loff_t pos, struct page *page)
@@ -316,15 +248,9 @@ static int do_lo_send_direct_write(struct loop_device *lo,
316/** 248/**
317 * do_lo_send_write - helper for writing data to a loop device 249 * do_lo_send_write - helper for writing data to a loop device
318 * 250 *
319 * This is the slow, transforming version for filesystems which do not 251 * This is the slow, transforming version that needs to double buffer the
320 * implement the address space operations write_begin and write_end. It 252 * data as it cannot do the transformations in place without having direct
321 * uses the write file operation which should be present on all writeable 253 * access to the destination pages of the backing file.
322 * filesystems.
323 *
324 * Using fops->write is slower than using aops->{prepare,commit}_write in the
325 * transforming case because we need to double buffer the data as we cannot do
326 * the transformations in place as we do not have direct access to the
327 * destination pages of the backing file.
328 */ 254 */
329static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, 255static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
330 loff_t pos, struct page *page) 256 loff_t pos, struct page *page)
@@ -350,17 +276,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
350 struct page *page = NULL; 276 struct page *page = NULL;
351 int i, ret = 0; 277 int i, ret = 0;
352 278
353 do_lo_send = do_lo_send_aops; 279 if (lo->transfer != transfer_none) {
354 if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { 280 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
281 if (unlikely(!page))
282 goto fail;
283 kmap(page);
284 do_lo_send = do_lo_send_write;
285 } else {
355 do_lo_send = do_lo_send_direct_write; 286 do_lo_send = do_lo_send_direct_write;
356 if (lo->transfer != transfer_none) {
357 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
358 if (unlikely(!page))
359 goto fail;
360 kmap(page);
361 do_lo_send = do_lo_send_write;
362 }
363 } 287 }
288
364 bio_for_each_segment(bvec, bio, i) { 289 bio_for_each_segment(bvec, bio, i) {
365 ret = do_lo_send(lo, bvec, pos, page); 290 ret = do_lo_send(lo, bvec, pos, page);
366 if (ret < 0) 291 if (ret < 0)
@@ -484,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
484 } 409 }
485 } 410 }
486 411
412 /*
413 * We use punch hole to reclaim the free space used by the
414 * image a.k.a. discard. However we do support discard if
415 * encryption is enabled, because it may give an attacker
416 * useful information.
417 */
418 if (bio->bi_rw & REQ_DISCARD) {
419 struct file *file = lo->lo_backing_file;
420 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
421
422 if ((!file->f_op->fallocate) ||
423 lo->lo_encrypt_key_size) {
424 ret = -EOPNOTSUPP;
425 goto out;
426 }
427 ret = file->f_op->fallocate(file, mode, pos,
428 bio->bi_size);
429 if (unlikely(ret && ret != -EINVAL &&
430 ret != -EOPNOTSUPP))
431 ret = -EIO;
432 goto out;
433 }
434
487 ret = lo_send(lo, bio, pos); 435 ret = lo_send(lo, bio, pos);
488 436
489 if ((bio->bi_rw & REQ_FUA) && !ret) { 437 if ((bio->bi_rw & REQ_FUA) && !ret) {
@@ -514,7 +462,7 @@ static struct bio *loop_get_bio(struct loop_device *lo)
514 return bio_list_pop(&lo->lo_bio_list); 462 return bio_list_pop(&lo->lo_bio_list);
515} 463}
516 464
517static int loop_make_request(struct request_queue *q, struct bio *old_bio) 465static void loop_make_request(struct request_queue *q, struct bio *old_bio)
518{ 466{
519 struct loop_device *lo = q->queuedata; 467 struct loop_device *lo = q->queuedata;
520 int rw = bio_rw(old_bio); 468 int rw = bio_rw(old_bio);
@@ -532,12 +480,11 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)
532 loop_add_bio(lo, old_bio); 480 loop_add_bio(lo, old_bio);
533 wake_up(&lo->lo_event); 481 wake_up(&lo->lo_event);
534 spin_unlock_irq(&lo->lo_lock); 482 spin_unlock_irq(&lo->lo_lock);
535 return 0; 483 return;
536 484
537out: 485out:
538 spin_unlock_irq(&lo->lo_lock); 486 spin_unlock_irq(&lo->lo_lock);
539 bio_io_error(old_bio); 487 bio_io_error(old_bio);
540 return 0;
541} 488}
542 489
543struct switch_request { 490struct switch_request {
@@ -700,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
700 goto out_putf; 647 goto out_putf;
701 648
702 fput(old_file); 649 fput(old_file);
703 if (max_part > 0) 650 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
704 ioctl_by_bdev(bdev, BLKRRPART, 0); 651 ioctl_by_bdev(bdev, BLKRRPART, 0);
705 return 0; 652 return 0;
706 653
@@ -777,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
777 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 724 return sprintf(buf, "%s\n", autoclear ? "1" : "0");
778} 725}
779 726
727static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
728{
729 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
730
731 return sprintf(buf, "%s\n", partscan ? "1" : "0");
732}
733
780LOOP_ATTR_RO(backing_file); 734LOOP_ATTR_RO(backing_file);
781LOOP_ATTR_RO(offset); 735LOOP_ATTR_RO(offset);
782LOOP_ATTR_RO(sizelimit); 736LOOP_ATTR_RO(sizelimit);
783LOOP_ATTR_RO(autoclear); 737LOOP_ATTR_RO(autoclear);
738LOOP_ATTR_RO(partscan);
784 739
785static struct attribute *loop_attrs[] = { 740static struct attribute *loop_attrs[] = {
786 &loop_attr_backing_file.attr, 741 &loop_attr_backing_file.attr,
787 &loop_attr_offset.attr, 742 &loop_attr_offset.attr,
788 &loop_attr_sizelimit.attr, 743 &loop_attr_sizelimit.attr,
789 &loop_attr_autoclear.attr, 744 &loop_attr_autoclear.attr,
745 &loop_attr_partscan.attr,
790 NULL, 746 NULL,
791}; 747};
792 748
@@ -807,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
807 &loop_attribute_group); 763 &loop_attribute_group);
808} 764}
809 765
766static void loop_config_discard(struct loop_device *lo)
767{
768 struct file *file = lo->lo_backing_file;
769 struct inode *inode = file->f_mapping->host;
770 struct request_queue *q = lo->lo_queue;
771
772 /*
773 * We use punch hole to reclaim the free space used by the
774 * image a.k.a. discard. However we do support discard if
775 * encryption is enabled, because it may give an attacker
776 * useful information.
777 */
778 if ((!file->f_op->fallocate) ||
779 lo->lo_encrypt_key_size) {
780 q->limits.discard_granularity = 0;
781 q->limits.discard_alignment = 0;
782 q->limits.max_discard_sectors = 0;
783 q->limits.discard_zeroes_data = 0;
784 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
785 return;
786 }
787
788 q->limits.discard_granularity = inode->i_sb->s_blocksize;
789 q->limits.discard_alignment = inode->i_sb->s_blocksize;
790 q->limits.max_discard_sectors = UINT_MAX >> 9;
791 q->limits.discard_zeroes_data = 1;
792 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
793}
794
810static int loop_set_fd(struct loop_device *lo, fmode_t mode, 795static int loop_set_fd(struct loop_device *lo, fmode_t mode,
811 struct block_device *bdev, unsigned int arg) 796 struct block_device *bdev, unsigned int arg)
812{ 797{
@@ -849,35 +834,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
849 mapping = file->f_mapping; 834 mapping = file->f_mapping;
850 inode = mapping->host; 835 inode = mapping->host;
851 836
852 if (!(file->f_mode & FMODE_WRITE))
853 lo_flags |= LO_FLAGS_READ_ONLY;
854
855 error = -EINVAL; 837 error = -EINVAL;
856 if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { 838 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
857 const struct address_space_operations *aops = mapping->a_ops; 839 goto out_putf;
858
859 if (aops->write_begin)
860 lo_flags |= LO_FLAGS_USE_AOPS;
861 if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
862 lo_flags |= LO_FLAGS_READ_ONLY;
863 840
864 lo_blocksize = S_ISBLK(inode->i_mode) ? 841 if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
865 inode->i_bdev->bd_block_size : PAGE_SIZE; 842 !file->f_op->write)
843 lo_flags |= LO_FLAGS_READ_ONLY;
866 844
867 error = 0; 845 lo_blocksize = S_ISBLK(inode->i_mode) ?
868 } else { 846 inode->i_bdev->bd_block_size : PAGE_SIZE;
869 goto out_putf;
870 }
871 847
848 error = -EFBIG;
872 size = get_loop_size(lo, file); 849 size = get_loop_size(lo, file);
873 850 if ((loff_t)(sector_t)size != size)
874 if ((loff_t)(sector_t)size != size) {
875 error = -EFBIG;
876 goto out_putf; 851 goto out_putf;
877 }
878 852
879 if (!(mode & FMODE_WRITE)) 853 error = 0;
880 lo_flags |= LO_FLAGS_READ_ONLY;
881 854
882 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 855 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
883 856
@@ -919,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
919 } 892 }
920 lo->lo_state = Lo_bound; 893 lo->lo_state = Lo_bound;
921 wake_up_process(lo->lo_thread); 894 wake_up_process(lo->lo_thread);
922 if (max_part > 0) 895 if (part_shift)
896 lo->lo_flags |= LO_FLAGS_PARTSCAN;
897 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
923 ioctl_by_bdev(bdev, BLKRRPART, 0); 898 ioctl_by_bdev(bdev, BLKRRPART, 0);
924 return 0; 899 return 0;
925 900
@@ -980,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
980 return err; 955 return err;
981} 956}
982 957
983static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) 958static int loop_clr_fd(struct loop_device *lo)
984{ 959{
985 struct file *filp = lo->lo_backing_file; 960 struct file *filp = lo->lo_backing_file;
986 gfp_t gfp = lo->old_gfp_mask; 961 gfp_t gfp = lo->old_gfp_mask;
962 struct block_device *bdev = lo->lo_device;
987 963
988 if (lo->lo_state != Lo_bound) 964 if (lo->lo_state != Lo_bound)
989 return -ENXIO; 965 return -ENXIO;
@@ -1012,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1012 lo->lo_offset = 0; 988 lo->lo_offset = 0;
1013 lo->lo_sizelimit = 0; 989 lo->lo_sizelimit = 0;
1014 lo->lo_encrypt_key_size = 0; 990 lo->lo_encrypt_key_size = 0;
1015 lo->lo_flags = 0;
1016 lo->lo_thread = NULL; 991 lo->lo_thread = NULL;
1017 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 992 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1018 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 993 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -1030,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1030 lo->lo_state = Lo_unbound; 1005 lo->lo_state = Lo_unbound;
1031 /* This is safe: open() is still holding a reference. */ 1006 /* This is safe: open() is still holding a reference. */
1032 module_put(THIS_MODULE); 1007 module_put(THIS_MODULE);
1033 if (max_part > 0 && bdev) 1008 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
1034 ioctl_by_bdev(bdev, BLKRRPART, 0); 1009 ioctl_by_bdev(bdev, BLKRRPART, 0);
1010 lo->lo_flags = 0;
1011 if (!part_shift)
1012 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
1035 mutex_unlock(&lo->lo_ctl_mutex); 1013 mutex_unlock(&lo->lo_ctl_mutex);
1036 /* 1014 /*
1037 * Need not hold lo_ctl_mutex to fput backing file. 1015 * Need not hold lo_ctl_mutex to fput backing file.
@@ -1085,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1085 if (figure_loop_size(lo)) 1063 if (figure_loop_size(lo))
1086 return -EFBIG; 1064 return -EFBIG;
1087 } 1065 }
1066 loop_config_discard(lo);
1088 1067
1089 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1068 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1090 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1069 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1100,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1100 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1079 (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1101 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1080 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1102 1081
1082 if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
1083 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
1084 lo->lo_flags |= LO_FLAGS_PARTSCAN;
1085 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
1086 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
1087 }
1088
1103 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1089 lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1104 lo->lo_init[0] = info->lo_init[0]; 1090 lo->lo_init[0] = info->lo_init[0];
1105 lo->lo_init[1] = info->lo_init[1]; 1091 lo->lo_init[1] = info->lo_init[1];
@@ -1293,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1293 break; 1279 break;
1294 case LOOP_CLR_FD: 1280 case LOOP_CLR_FD:
1295 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1281 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1296 err = loop_clr_fd(lo, bdev); 1282 err = loop_clr_fd(lo);
1297 if (!err) 1283 if (!err)
1298 goto out_unlocked; 1284 goto out_unlocked;
1299 break; 1285 break;
@@ -1513,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1513 * In autoclear mode, stop the loop thread 1499 * In autoclear mode, stop the loop thread
1514 * and remove configuration after last close. 1500 * and remove configuration after last close.
1515 */ 1501 */
1516 err = loop_clr_fd(lo, NULL); 1502 err = loop_clr_fd(lo);
1517 if (!err) 1503 if (!err)
1518 goto out_unlocked; 1504 goto out_unlocked;
1519 } else { 1505 } else {
@@ -1635,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
1635 if (!disk) 1621 if (!disk)
1636 goto out_free_queue; 1622 goto out_free_queue;
1637 1623
1624 /*
1625 * Disable partition scanning by default. The in-kernel partition
1626 * scanning can be requested individually per-device during its
1627 * setup. Userspace can always add and remove partitions from all
1628 * devices. The needed partition minors are allocated from the
1629 * extended minor space, the main loop device numbers will continue
1630 * to match the loop minors, regardless of the number of partitions
1631 * used.
1632 *
1633 * If max_part is given, partition scanning is globally enabled for
1634 * all loop devices. The minors for the main loop devices will be
1635 * multiples of max_part.
1636 *
1637 * Note: Global-for-all-devices, set-only-at-init, read-only module
1638 * parameteters like 'max_loop' and 'max_part' make things needlessly
1639 * complicated, are too static, inflexible and may surprise
1640 * userspace tools. Parameters like this in general should be avoided.
1641 */
1642 if (!part_shift)
1643 disk->flags |= GENHD_FL_NO_PART_SCAN;
1644 disk->flags |= GENHD_FL_EXT_DEVT;
1638 mutex_init(&lo->lo_ctl_mutex); 1645 mutex_init(&lo->lo_ctl_mutex);
1639 lo->lo_number = i; 1646 lo->lo_number = i;
1640 lo->lo_thread = NULL; 1647 lo->lo_thread = NULL;