aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-thin.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r--drivers/md/dm-thin.c181
1 files changed, 128 insertions, 53 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc9c848a60c9..4843801173fe 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,7 @@ struct thin_c {
227 struct list_head list; 227 struct list_head list;
228 struct dm_dev *pool_dev; 228 struct dm_dev *pool_dev;
229 struct dm_dev *origin_dev; 229 struct dm_dev *origin_dev;
230 sector_t origin_size;
230 dm_thin_id dev_id; 231 dm_thin_id dev_id;
231 232
232 struct pool *pool; 233 struct pool *pool;
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
554struct dm_thin_new_mapping { 555struct dm_thin_new_mapping {
555 struct list_head list; 556 struct list_head list;
556 557
557 bool quiesced:1;
558 bool prepared:1;
559 bool pass_discard:1; 558 bool pass_discard:1;
560 bool definitely_not_shared:1; 559 bool definitely_not_shared:1;
561 560
561 /*
562 * Track quiescing, copying and zeroing preparation actions. When this
563 * counter hits zero the block is prepared and can be inserted into the
564 * btree.
565 */
566 atomic_t prepare_actions;
567
562 int err; 568 int err;
563 struct thin_c *tc; 569 struct thin_c *tc;
564 dm_block_t virt_block; 570 dm_block_t virt_block;
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
575 bio_end_io_t *saved_bi_end_io; 581 bio_end_io_t *saved_bi_end_io;
576}; 582};
577 583
578static void __maybe_add_mapping(struct dm_thin_new_mapping *m) 584static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
579{ 585{
580 struct pool *pool = m->tc->pool; 586 struct pool *pool = m->tc->pool;
581 587
582 if (m->quiesced && m->prepared) { 588 if (atomic_dec_and_test(&m->prepare_actions)) {
583 list_add_tail(&m->list, &pool->prepared_mappings); 589 list_add_tail(&m->list, &pool->prepared_mappings);
584 wake_worker(pool); 590 wake_worker(pool);
585 } 591 }
586} 592}
587 593
588static void copy_complete(int read_err, unsigned long write_err, void *context) 594static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
589{ 595{
590 unsigned long flags; 596 unsigned long flags;
591 struct dm_thin_new_mapping *m = context;
592 struct pool *pool = m->tc->pool; 597 struct pool *pool = m->tc->pool;
593 598
594 m->err = read_err || write_err ? -EIO : 0;
595
596 spin_lock_irqsave(&pool->lock, flags); 599 spin_lock_irqsave(&pool->lock, flags);
597 m->prepared = true; 600 __complete_mapping_preparation(m);
598 __maybe_add_mapping(m);
599 spin_unlock_irqrestore(&pool->lock, flags); 601 spin_unlock_irqrestore(&pool->lock, flags);
600} 602}
601 603
604static void copy_complete(int read_err, unsigned long write_err, void *context)
605{
606 struct dm_thin_new_mapping *m = context;
607
608 m->err = read_err || write_err ? -EIO : 0;
609 complete_mapping_preparation(m);
610}
611
602static void overwrite_endio(struct bio *bio, int err) 612static void overwrite_endio(struct bio *bio, int err)
603{ 613{
604 unsigned long flags;
605 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); 614 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
606 struct dm_thin_new_mapping *m = h->overwrite_mapping; 615 struct dm_thin_new_mapping *m = h->overwrite_mapping;
607 struct pool *pool = m->tc->pool;
608 616
609 m->err = err; 617 m->err = err;
610 618 complete_mapping_preparation(m);
611 spin_lock_irqsave(&pool->lock, flags);
612 m->prepared = true;
613 __maybe_add_mapping(m);
614 spin_unlock_irqrestore(&pool->lock, flags);
615} 619}
616 620
617/*----------------------------------------------------------------*/ 621/*----------------------------------------------------------------*/
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
821 return m; 825 return m;
822} 826}
823 827
828static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
829 sector_t begin, sector_t end)
830{
831 int r;
832 struct dm_io_region to;
833
834 to.bdev = tc->pool_dev->bdev;
835 to.sector = begin;
836 to.count = end - begin;
837
838 r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
839 if (r < 0) {
840 DMERR_LIMIT("dm_kcopyd_zero() failed");
841 copy_complete(1, 1, m);
842 }
843}
844
845/*
846 * A partial copy also needs to zero the uncopied region.
847 */
824static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 848static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
825 struct dm_dev *origin, dm_block_t data_origin, 849 struct dm_dev *origin, dm_block_t data_origin,
826 dm_block_t data_dest, 850 dm_block_t data_dest,
827 struct dm_bio_prison_cell *cell, struct bio *bio) 851 struct dm_bio_prison_cell *cell, struct bio *bio,
852 sector_t len)
828{ 853{
829 int r; 854 int r;
830 struct pool *pool = tc->pool; 855 struct pool *pool = tc->pool;
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
835 m->data_block = data_dest; 860 m->data_block = data_dest;
836 m->cell = cell; 861 m->cell = cell;
837 862
863 /*
864 * quiesce action + copy action + an extra reference held for the
865 * duration of this function (we may need to inc later for a
866 * partial zero).
867 */
868 atomic_set(&m->prepare_actions, 3);
869
838 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) 870 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
839 m->quiesced = true; 871 complete_mapping_preparation(m); /* already quiesced */
840 872
841 /* 873 /*
842 * IO to pool_dev remaps to the pool target's data_dev. 874 * IO to pool_dev remaps to the pool target's data_dev.
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
857 889
858 from.bdev = origin->bdev; 890 from.bdev = origin->bdev;
859 from.sector = data_origin * pool->sectors_per_block; 891 from.sector = data_origin * pool->sectors_per_block;
860 from.count = pool->sectors_per_block; 892 from.count = len;
861 893
862 to.bdev = tc->pool_dev->bdev; 894 to.bdev = tc->pool_dev->bdev;
863 to.sector = data_dest * pool->sectors_per_block; 895 to.sector = data_dest * pool->sectors_per_block;
864 to.count = pool->sectors_per_block; 896 to.count = len;
865 897
866 r = dm_kcopyd_copy(pool->copier, &from, 1, &to, 898 r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
867 0, copy_complete, m); 899 0, copy_complete, m);
868 if (r < 0) { 900 if (r < 0) {
869 mempool_free(m, pool->mapping_pool);
870 DMERR_LIMIT("dm_kcopyd_copy() failed"); 901 DMERR_LIMIT("dm_kcopyd_copy() failed");
871 cell_error(pool, cell); 902 copy_complete(1, 1, m);
903
904 /*
905 * We allow the zero to be issued, to simplify the
906 * error path. Otherwise we'd need to start
907 * worrying about decrementing the prepare_actions
908 * counter.
909 */
910 }
911
912 /*
913 * Do we need to zero a tail region?
914 */
915 if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
916 atomic_inc(&m->prepare_actions);
917 ll_zero(tc, m,
918 data_dest * pool->sectors_per_block + len,
919 (data_dest + 1) * pool->sectors_per_block);
872 } 920 }
873 } 921 }
922
923 complete_mapping_preparation(m); /* drop our ref */
874} 924}
875 925
876static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, 926static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
878 struct dm_bio_prison_cell *cell, struct bio *bio) 928 struct dm_bio_prison_cell *cell, struct bio *bio)
879{ 929{
880 schedule_copy(tc, virt_block, tc->pool_dev, 930 schedule_copy(tc, virt_block, tc->pool_dev,
881 data_origin, data_dest, cell, bio); 931 data_origin, data_dest, cell, bio,
882} 932 tc->pool->sectors_per_block);
883
884static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
885 dm_block_t data_dest,
886 struct dm_bio_prison_cell *cell, struct bio *bio)
887{
888 schedule_copy(tc, virt_block, tc->origin_dev,
889 virt_block, data_dest, cell, bio);
890} 933}
891 934
892static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, 935static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
896 struct pool *pool = tc->pool; 939 struct pool *pool = tc->pool;
897 struct dm_thin_new_mapping *m = get_next_mapping(pool); 940 struct dm_thin_new_mapping *m = get_next_mapping(pool);
898 941
899 m->quiesced = true; 942 atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
900 m->prepared = false;
901 m->tc = tc; 943 m->tc = tc;
902 m->virt_block = virt_block; 944 m->virt_block = virt_block;
903 m->data_block = data_block; 945 m->data_block = data_block;
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
919 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); 961 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
920 inc_all_io_entry(pool, bio); 962 inc_all_io_entry(pool, bio);
921 remap_and_issue(tc, bio, data_block); 963 remap_and_issue(tc, bio, data_block);
922 } else {
923 int r;
924 struct dm_io_region to;
925 964
926 to.bdev = tc->pool_dev->bdev; 965 } else
927 to.sector = data_block * pool->sectors_per_block; 966 ll_zero(tc, m,
928 to.count = pool->sectors_per_block; 967 data_block * pool->sectors_per_block,
968 (data_block + 1) * pool->sectors_per_block);
969}
929 970
930 r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); 971static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
931 if (r < 0) { 972 dm_block_t data_dest,
932 mempool_free(m, pool->mapping_pool); 973 struct dm_bio_prison_cell *cell, struct bio *bio)
933 DMERR_LIMIT("dm_kcopyd_zero() failed"); 974{
934 cell_error(pool, cell); 975 struct pool *pool = tc->pool;
935 } 976 sector_t virt_block_begin = virt_block * pool->sectors_per_block;
936 } 977 sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
978
979 if (virt_block_end <= tc->origin_size)
980 schedule_copy(tc, virt_block, tc->origin_dev,
981 virt_block, data_dest, cell, bio,
982 pool->sectors_per_block);
983
984 else if (virt_block_begin < tc->origin_size)
985 schedule_copy(tc, virt_block, tc->origin_dev,
986 virt_block, data_dest, cell, bio,
987 tc->origin_size - virt_block_begin);
988
989 else
990 schedule_zero(tc, virt_block, data_dest, cell, bio);
937} 991}
938 992
939/* 993/*
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
1315 inc_all_io_entry(pool, bio); 1369 inc_all_io_entry(pool, bio);
1316 cell_defer_no_holder(tc, cell); 1370 cell_defer_no_holder(tc, cell);
1317 1371
1318 remap_to_origin_and_issue(tc, bio); 1372 if (bio_end_sector(bio) <= tc->origin_size)
1373 remap_to_origin_and_issue(tc, bio);
1374
1375 else if (bio->bi_iter.bi_sector < tc->origin_size) {
1376 zero_fill_bio(bio);
1377 bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
1378 remap_to_origin_and_issue(tc, bio);
1379
1380 } else {
1381 zero_fill_bio(bio);
1382 bio_endio(bio, 0);
1383 }
1319 } else 1384 } else
1320 provision_block(tc, bio, block, cell); 1385 provision_block(tc, bio, block, cell);
1321 break; 1386 break;
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
3112 */ 3177 */
3113 if (io_opt_sectors < pool->sectors_per_block || 3178 if (io_opt_sectors < pool->sectors_per_block ||
3114 do_div(io_opt_sectors, pool->sectors_per_block)) { 3179 do_div(io_opt_sectors, pool->sectors_per_block)) {
3115 blk_limits_io_min(limits, 0); 3180 blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
3116 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); 3181 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
3117 } 3182 }
3118 3183
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
3141 .name = "thin-pool", 3206 .name = "thin-pool",
3142 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 3207 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
3143 DM_TARGET_IMMUTABLE, 3208 DM_TARGET_IMMUTABLE,
3144 .version = {1, 12, 0}, 3209 .version = {1, 13, 0},
3145 .module = THIS_MODULE, 3210 .module = THIS_MODULE,
3146 .ctr = pool_ctr, 3211 .ctr = pool_ctr,
3147 .dtr = pool_dtr, 3212 .dtr = pool_dtr,
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
3361 spin_lock_irqsave(&pool->lock, flags); 3426 spin_lock_irqsave(&pool->lock, flags);
3362 list_for_each_entry_safe(m, tmp, &work, list) { 3427 list_for_each_entry_safe(m, tmp, &work, list) {
3363 list_del(&m->list); 3428 list_del(&m->list);
3364 m->quiesced = true; 3429 __complete_mapping_preparation(m);
3365 __maybe_add_mapping(m);
3366 } 3430 }
3367 spin_unlock_irqrestore(&pool->lock, flags); 3431 spin_unlock_irqrestore(&pool->lock, flags);
3368 } 3432 }
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
3401 noflush_work(tc, do_noflush_stop); 3465 noflush_work(tc, do_noflush_stop);
3402} 3466}
3403 3467
3468static int thin_preresume(struct dm_target *ti)
3469{
3470 struct thin_c *tc = ti->private;
3471
3472 if (tc->origin_dev)
3473 tc->origin_size = get_dev_size(tc->origin_dev->bdev);
3474
3475 return 0;
3476}
3477
3404/* 3478/*
3405 * <nr mapped sectors> <highest mapped sector> 3479 * <nr mapped sectors> <highest mapped sector>
3406 */ 3480 */
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
3483 3557
3484static struct target_type thin_target = { 3558static struct target_type thin_target = {
3485 .name = "thin", 3559 .name = "thin",
3486 .version = {1, 12, 0}, 3560 .version = {1, 13, 0},
3487 .module = THIS_MODULE, 3561 .module = THIS_MODULE,
3488 .ctr = thin_ctr, 3562 .ctr = thin_ctr,
3489 .dtr = thin_dtr, 3563 .dtr = thin_dtr,
3490 .map = thin_map, 3564 .map = thin_map,
3491 .end_io = thin_endio, 3565 .end_io = thin_endio,
3566 .preresume = thin_preresume,
3492 .presuspend = thin_presuspend, 3567 .presuspend = thin_presuspend,
3493 .postsuspend = thin_postsuspend, 3568 .postsuspend = thin_postsuspend,
3494 .status = thin_status, 3569 .status = thin_status,