diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 00:19:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 00:19:53 -0500 |
commit | e0700ce70921fbe3d1913968c663beb9df2b01a9 (patch) | |
tree | 8f8163dbdce74942a6ebe849c43c4fdcf2b52799 /drivers/md/dm.c | |
parent | ac322de6bf5416cb145b58599297b8be73cd86ac (diff) | |
parent | aad9ae4550755edc020b5c511a8b54f0104b2f47 (diff) |
Merge tag 'dm-4.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
"Smaller set of DM changes for this merge. I've based these changes on
Jens' for-4.4/reservations branch because the associated DM changes
required it.
- Revert a dm-multipath change that caused a regression for
unprivledged users (e.g. kvm guests) that issued ioctls when a
multipath device had no available paths.
- Include Christoph's refactoring of DM's ioctl handling and add
support for passing through persistent reservations with DM
multipath.
- All other changes are very simple cleanups"
* tag 'dm-4.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm switch: simplify conditional in alloc_region_table()
dm delay: document that offsets are specified in sectors
dm delay: capitalize the start of an delay_ctr() error message
dm delay: Use DM_MAPIO macros instead of open-coded equivalents
dm linear: remove redundant target name from error messages
dm persistent data: eliminate unnecessary return values
dm: eliminate unused "bioset" process for each bio-based DM device
dm: convert ffs to __ffs
dm: drop NULL test before kmem_cache_destroy() and mempool_destroy()
dm: add support for passing through persistent reservations
dm: refactor ioctl handling
Revert "dm mpath: fix stalls when handling invalid ioctls"
dm: initialize non-blk-mq queue data before queue is used
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 209 |
1 files changed, 182 insertions, 27 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 485760ebba76..32440ad5f684 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/ktime.h> | 24 | #include <linux/ktime.h> |
25 | #include <linux/elevator.h> /* for rq_end_sector() */ | 25 | #include <linux/elevator.h> /* for rq_end_sector() */ |
26 | #include <linux/blk-mq.h> | 26 | #include <linux/blk-mq.h> |
27 | #include <linux/pr.h> | ||
27 | 28 | ||
28 | #include <trace/events/block.h> | 29 | #include <trace/events/block.h> |
29 | 30 | ||
@@ -555,18 +556,16 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |||
555 | return dm_get_geometry(md, geo); | 556 | return dm_get_geometry(md, geo); |
556 | } | 557 | } |
557 | 558 | ||
558 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | 559 | static int dm_get_live_table_for_ioctl(struct mapped_device *md, |
559 | unsigned int cmd, unsigned long arg) | 560 | struct dm_target **tgt, struct block_device **bdev, |
561 | fmode_t *mode, int *srcu_idx) | ||
560 | { | 562 | { |
561 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
562 | int srcu_idx; | ||
563 | struct dm_table *map; | 563 | struct dm_table *map; |
564 | struct dm_target *tgt; | 564 | int r; |
565 | int r = -ENOTTY; | ||
566 | 565 | ||
567 | retry: | 566 | retry: |
568 | map = dm_get_live_table(md, &srcu_idx); | 567 | r = -ENOTTY; |
569 | 568 | map = dm_get_live_table(md, srcu_idx); | |
570 | if (!map || !dm_table_get_size(map)) | 569 | if (!map || !dm_table_get_size(map)) |
571 | goto out; | 570 | goto out; |
572 | 571 | ||
@@ -574,8 +573,9 @@ retry: | |||
574 | if (dm_table_get_num_targets(map) != 1) | 573 | if (dm_table_get_num_targets(map) != 1) |
575 | goto out; | 574 | goto out; |
576 | 575 | ||
577 | tgt = dm_table_get_target(map, 0); | 576 | *tgt = dm_table_get_target(map, 0); |
578 | if (!tgt->type->ioctl) | 577 | |
578 | if (!(*tgt)->type->prepare_ioctl) | ||
579 | goto out; | 579 | goto out; |
580 | 580 | ||
581 | if (dm_suspended_md(md)) { | 581 | if (dm_suspended_md(md)) { |
@@ -583,16 +583,46 @@ retry: | |||
583 | goto out; | 583 | goto out; |
584 | } | 584 | } |
585 | 585 | ||
586 | r = tgt->type->ioctl(tgt, cmd, arg); | 586 | r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode); |
587 | if (r < 0) | ||
588 | goto out; | ||
587 | 589 | ||
588 | out: | 590 | return r; |
589 | dm_put_live_table(md, srcu_idx); | ||
590 | 591 | ||
592 | out: | ||
593 | dm_put_live_table(md, *srcu_idx); | ||
591 | if (r == -ENOTCONN) { | 594 | if (r == -ENOTCONN) { |
592 | msleep(10); | 595 | msleep(10); |
593 | goto retry; | 596 | goto retry; |
594 | } | 597 | } |
598 | return r; | ||
599 | } | ||
600 | |||
601 | static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, | ||
602 | unsigned int cmd, unsigned long arg) | ||
603 | { | ||
604 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
605 | struct dm_target *tgt; | ||
606 | int srcu_idx, r; | ||
607 | |||
608 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
609 | if (r < 0) | ||
610 | return r; | ||
595 | 611 | ||
612 | if (r > 0) { | ||
613 | /* | ||
614 | * Target determined this ioctl is being issued against | ||
615 | * a logical partition of the parent bdev; so extra | ||
616 | * validation is needed. | ||
617 | */ | ||
618 | r = scsi_verify_blk_ioctl(NULL, cmd); | ||
619 | if (r) | ||
620 | goto out; | ||
621 | } | ||
622 | |||
623 | r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); | ||
624 | out: | ||
625 | dm_put_live_table(md, srcu_idx); | ||
596 | return r; | 626 | return r; |
597 | } | 627 | } |
598 | 628 | ||
@@ -1734,8 +1764,6 @@ static void dm_make_request(struct request_queue *q, struct bio *bio) | |||
1734 | 1764 | ||
1735 | map = dm_get_live_table(md, &srcu_idx); | 1765 | map = dm_get_live_table(md, &srcu_idx); |
1736 | 1766 | ||
1737 | blk_queue_split(q, &bio, q->bio_split); | ||
1738 | |||
1739 | generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); | 1767 | generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); |
1740 | 1768 | ||
1741 | /* if we're suspended, we have to queue this io for later */ | 1769 | /* if we're suspended, we have to queue this io for later */ |
@@ -2198,6 +2226,13 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
2198 | * This queue is new, so no concurrency on the queue_flags. | 2226 | * This queue is new, so no concurrency on the queue_flags. |
2199 | */ | 2227 | */ |
2200 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | 2228 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); |
2229 | |||
2230 | /* | ||
2231 | * Initialize data that will only be used by a non-blk-mq DM queue | ||
2232 | * - must do so here (in alloc_dev callchain) before queue is used | ||
2233 | */ | ||
2234 | md->queue->queuedata = md; | ||
2235 | md->queue->backing_dev_info.congested_data = md; | ||
2201 | } | 2236 | } |
2202 | 2237 | ||
2203 | static void dm_init_old_md_queue(struct mapped_device *md) | 2238 | static void dm_init_old_md_queue(struct mapped_device *md) |
@@ -2208,10 +2243,7 @@ static void dm_init_old_md_queue(struct mapped_device *md) | |||
2208 | /* | 2243 | /* |
2209 | * Initialize aspects of queue that aren't relevant for blk-mq | 2244 | * Initialize aspects of queue that aren't relevant for blk-mq |
2210 | */ | 2245 | */ |
2211 | md->queue->queuedata = md; | ||
2212 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 2246 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
2213 | md->queue->backing_dev_info.congested_data = md; | ||
2214 | |||
2215 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 2247 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
2216 | } | 2248 | } |
2217 | 2249 | ||
@@ -2221,10 +2253,8 @@ static void cleanup_mapped_device(struct mapped_device *md) | |||
2221 | destroy_workqueue(md->wq); | 2253 | destroy_workqueue(md->wq); |
2222 | if (md->kworker_task) | 2254 | if (md->kworker_task) |
2223 | kthread_stop(md->kworker_task); | 2255 | kthread_stop(md->kworker_task); |
2224 | if (md->io_pool) | 2256 | mempool_destroy(md->io_pool); |
2225 | mempool_destroy(md->io_pool); | 2257 | mempool_destroy(md->rq_pool); |
2226 | if (md->rq_pool) | ||
2227 | mempool_destroy(md->rq_pool); | ||
2228 | if (md->bs) | 2258 | if (md->bs) |
2229 | bioset_free(md->bs); | 2259 | bioset_free(md->bs); |
2230 | 2260 | ||
@@ -2759,6 +2789,12 @@ int dm_setup_md_queue(struct mapped_device *md) | |||
2759 | case DM_TYPE_BIO_BASED: | 2789 | case DM_TYPE_BIO_BASED: |
2760 | dm_init_old_md_queue(md); | 2790 | dm_init_old_md_queue(md); |
2761 | blk_queue_make_request(md->queue, dm_make_request); | 2791 | blk_queue_make_request(md->queue, dm_make_request); |
2792 | /* | ||
2793 | * DM handles splitting bios as needed. Free the bio_split bioset | ||
2794 | * since it won't be used (saves 1 process per bio-based DM device). | ||
2795 | */ | ||
2796 | bioset_free(md->queue->bio_split); | ||
2797 | md->queue->bio_split = NULL; | ||
2762 | break; | 2798 | break; |
2763 | } | 2799 | } |
2764 | 2800 | ||
@@ -3505,11 +3541,8 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) | |||
3505 | if (!pools) | 3541 | if (!pools) |
3506 | return; | 3542 | return; |
3507 | 3543 | ||
3508 | if (pools->io_pool) | 3544 | mempool_destroy(pools->io_pool); |
3509 | mempool_destroy(pools->io_pool); | 3545 | mempool_destroy(pools->rq_pool); |
3510 | |||
3511 | if (pools->rq_pool) | ||
3512 | mempool_destroy(pools->rq_pool); | ||
3513 | 3546 | ||
3514 | if (pools->bs) | 3547 | if (pools->bs) |
3515 | bioset_free(pools->bs); | 3548 | bioset_free(pools->bs); |
@@ -3517,11 +3550,133 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) | |||
3517 | kfree(pools); | 3550 | kfree(pools); |
3518 | } | 3551 | } |
3519 | 3552 | ||
3553 | static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, | ||
3554 | u32 flags) | ||
3555 | { | ||
3556 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
3557 | const struct pr_ops *ops; | ||
3558 | struct dm_target *tgt; | ||
3559 | fmode_t mode; | ||
3560 | int srcu_idx, r; | ||
3561 | |||
3562 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
3563 | if (r < 0) | ||
3564 | return r; | ||
3565 | |||
3566 | ops = bdev->bd_disk->fops->pr_ops; | ||
3567 | if (ops && ops->pr_register) | ||
3568 | r = ops->pr_register(bdev, old_key, new_key, flags); | ||
3569 | else | ||
3570 | r = -EOPNOTSUPP; | ||
3571 | |||
3572 | dm_put_live_table(md, srcu_idx); | ||
3573 | return r; | ||
3574 | } | ||
3575 | |||
3576 | static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, | ||
3577 | u32 flags) | ||
3578 | { | ||
3579 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
3580 | const struct pr_ops *ops; | ||
3581 | struct dm_target *tgt; | ||
3582 | fmode_t mode; | ||
3583 | int srcu_idx, r; | ||
3584 | |||
3585 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
3586 | if (r < 0) | ||
3587 | return r; | ||
3588 | |||
3589 | ops = bdev->bd_disk->fops->pr_ops; | ||
3590 | if (ops && ops->pr_reserve) | ||
3591 | r = ops->pr_reserve(bdev, key, type, flags); | ||
3592 | else | ||
3593 | r = -EOPNOTSUPP; | ||
3594 | |||
3595 | dm_put_live_table(md, srcu_idx); | ||
3596 | return r; | ||
3597 | } | ||
3598 | |||
3599 | static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) | ||
3600 | { | ||
3601 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
3602 | const struct pr_ops *ops; | ||
3603 | struct dm_target *tgt; | ||
3604 | fmode_t mode; | ||
3605 | int srcu_idx, r; | ||
3606 | |||
3607 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
3608 | if (r < 0) | ||
3609 | return r; | ||
3610 | |||
3611 | ops = bdev->bd_disk->fops->pr_ops; | ||
3612 | if (ops && ops->pr_release) | ||
3613 | r = ops->pr_release(bdev, key, type); | ||
3614 | else | ||
3615 | r = -EOPNOTSUPP; | ||
3616 | |||
3617 | dm_put_live_table(md, srcu_idx); | ||
3618 | return r; | ||
3619 | } | ||
3620 | |||
3621 | static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, | ||
3622 | enum pr_type type, bool abort) | ||
3623 | { | ||
3624 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
3625 | const struct pr_ops *ops; | ||
3626 | struct dm_target *tgt; | ||
3627 | fmode_t mode; | ||
3628 | int srcu_idx, r; | ||
3629 | |||
3630 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
3631 | if (r < 0) | ||
3632 | return r; | ||
3633 | |||
3634 | ops = bdev->bd_disk->fops->pr_ops; | ||
3635 | if (ops && ops->pr_preempt) | ||
3636 | r = ops->pr_preempt(bdev, old_key, new_key, type, abort); | ||
3637 | else | ||
3638 | r = -EOPNOTSUPP; | ||
3639 | |||
3640 | dm_put_live_table(md, srcu_idx); | ||
3641 | return r; | ||
3642 | } | ||
3643 | |||
3644 | static int dm_pr_clear(struct block_device *bdev, u64 key) | ||
3645 | { | ||
3646 | struct mapped_device *md = bdev->bd_disk->private_data; | ||
3647 | const struct pr_ops *ops; | ||
3648 | struct dm_target *tgt; | ||
3649 | fmode_t mode; | ||
3650 | int srcu_idx, r; | ||
3651 | |||
3652 | r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); | ||
3653 | if (r < 0) | ||
3654 | return r; | ||
3655 | |||
3656 | ops = bdev->bd_disk->fops->pr_ops; | ||
3657 | if (ops && ops->pr_clear) | ||
3658 | r = ops->pr_clear(bdev, key); | ||
3659 | else | ||
3660 | r = -EOPNOTSUPP; | ||
3661 | |||
3662 | dm_put_live_table(md, srcu_idx); | ||
3663 | return r; | ||
3664 | } | ||
3665 | |||
3666 | static const struct pr_ops dm_pr_ops = { | ||
3667 | .pr_register = dm_pr_register, | ||
3668 | .pr_reserve = dm_pr_reserve, | ||
3669 | .pr_release = dm_pr_release, | ||
3670 | .pr_preempt = dm_pr_preempt, | ||
3671 | .pr_clear = dm_pr_clear, | ||
3672 | }; | ||
3673 | |||
3520 | static const struct block_device_operations dm_blk_dops = { | 3674 | static const struct block_device_operations dm_blk_dops = { |
3521 | .open = dm_blk_open, | 3675 | .open = dm_blk_open, |
3522 | .release = dm_blk_close, | 3676 | .release = dm_blk_close, |
3523 | .ioctl = dm_blk_ioctl, | 3677 | .ioctl = dm_blk_ioctl, |
3524 | .getgeo = dm_blk_getgeo, | 3678 | .getgeo = dm_blk_getgeo, |
3679 | .pr_ops = &dm_pr_ops, | ||
3525 | .owner = THIS_MODULE | 3680 | .owner = THIS_MODULE |
3526 | }; | 3681 | }; |
3527 | 3682 | ||