diff options
Diffstat (limited to 'drivers/md/dm.c')
| -rw-r--r-- | drivers/md/dm.c | 1142 |
1 files changed, 1003 insertions, 139 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 424f7b048c30..3c6d4ee8921d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -19,11 +19,18 @@ | |||
| 19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
| 21 | #include <linux/hdreg.h> | 21 | #include <linux/hdreg.h> |
| 22 | #include <linux/blktrace_api.h> | 22 | |
| 23 | #include <trace/block.h> | 23 | #include <trace/events/block.h> |
| 24 | 24 | ||
| 25 | #define DM_MSG_PREFIX "core" | 25 | #define DM_MSG_PREFIX "core" |
| 26 | 26 | ||
| 27 | /* | ||
| 28 | * Cookies are numeric values sent with CHANGE and REMOVE | ||
| 29 | * uevents while resuming, removing or renaming the device. | ||
| 30 | */ | ||
| 31 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" | ||
| 32 | #define DM_COOKIE_LENGTH 24 | ||
| 33 | |||
| 27 | static const char *_name = DM_NAME; | 34 | static const char *_name = DM_NAME; |
| 28 | 35 | ||
| 29 | static unsigned int major = 0; | 36 | static unsigned int major = 0; |
| @@ -53,8 +60,6 @@ struct dm_target_io { | |||
| 53 | union map_info info; | 60 | union map_info info; |
| 54 | }; | 61 | }; |
| 55 | 62 | ||
| 56 | DEFINE_TRACE(block_bio_complete); | ||
| 57 | |||
| 58 | /* | 63 | /* |
| 59 | * For request-based dm. | 64 | * For request-based dm. |
| 60 | * One of these is allocated per request. | 65 | * One of these is allocated per request. |
| @@ -73,7 +78,7 @@ struct dm_rq_target_io { | |||
| 73 | */ | 78 | */ |
| 74 | struct dm_rq_clone_bio_info { | 79 | struct dm_rq_clone_bio_info { |
| 75 | struct bio *orig; | 80 | struct bio *orig; |
| 76 | struct request *rq; | 81 | struct dm_rq_target_io *tio; |
| 77 | }; | 82 | }; |
| 78 | 83 | ||
| 79 | union map_info *dm_get_mapinfo(struct bio *bio) | 84 | union map_info *dm_get_mapinfo(struct bio *bio) |
| @@ -83,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
| 83 | return NULL; | 88 | return NULL; |
| 84 | } | 89 | } |
| 85 | 90 | ||
| 91 | union map_info *dm_get_rq_mapinfo(struct request *rq) | ||
| 92 | { | ||
| 93 | if (rq && rq->end_io_data) | ||
| 94 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; | ||
| 95 | return NULL; | ||
| 96 | } | ||
| 97 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | ||
| 98 | |||
| 86 | #define MINOR_ALLOCED ((void *)-1) | 99 | #define MINOR_ALLOCED ((void *)-1) |
| 87 | 100 | ||
| 88 | /* | 101 | /* |
| @@ -159,13 +172,31 @@ struct mapped_device { | |||
| 159 | * freeze/thaw support require holding onto a super block | 172 | * freeze/thaw support require holding onto a super block |
| 160 | */ | 173 | */ |
| 161 | struct super_block *frozen_sb; | 174 | struct super_block *frozen_sb; |
| 162 | struct block_device *suspended_bdev; | 175 | struct block_device *bdev; |
| 163 | 176 | ||
| 164 | /* forced geometry settings */ | 177 | /* forced geometry settings */ |
| 165 | struct hd_geometry geometry; | 178 | struct hd_geometry geometry; |
| 166 | 179 | ||
| 180 | /* marker of flush suspend for request-based dm */ | ||
| 181 | struct request suspend_rq; | ||
| 182 | |||
| 183 | /* For saving the address of __make_request for request based dm */ | ||
| 184 | make_request_fn *saved_make_request_fn; | ||
| 185 | |||
| 167 | /* sysfs handle */ | 186 | /* sysfs handle */ |
| 168 | struct kobject kobj; | 187 | struct kobject kobj; |
| 188 | |||
| 189 | /* zero-length barrier that will be cloned and submitted to targets */ | ||
| 190 | struct bio barrier_bio; | ||
| 191 | }; | ||
| 192 | |||
| 193 | /* | ||
| 194 | * For mempools pre-allocation at the table loading time. | ||
| 195 | */ | ||
| 196 | struct dm_md_mempools { | ||
| 197 | mempool_t *io_pool; | ||
| 198 | mempool_t *tio_pool; | ||
| 199 | struct bio_set *bs; | ||
| 169 | }; | 200 | }; |
| 170 | 201 | ||
| 171 | #define MIN_IOS 256 | 202 | #define MIN_IOS 256 |
| @@ -393,14 +424,29 @@ static void free_io(struct mapped_device *md, struct dm_io *io) | |||
| 393 | mempool_free(io, md->io_pool); | 424 | mempool_free(io, md->io_pool); |
| 394 | } | 425 | } |
| 395 | 426 | ||
| 396 | static struct dm_target_io *alloc_tio(struct mapped_device *md) | 427 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) |
| 428 | { | ||
| 429 | mempool_free(tio, md->tio_pool); | ||
| 430 | } | ||
| 431 | |||
| 432 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) | ||
| 397 | { | 433 | { |
| 398 | return mempool_alloc(md->tio_pool, GFP_NOIO); | 434 | return mempool_alloc(md->tio_pool, GFP_ATOMIC); |
| 399 | } | 435 | } |
| 400 | 436 | ||
| 401 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | 437 | static void free_rq_tio(struct dm_rq_target_io *tio) |
| 402 | { | 438 | { |
| 403 | mempool_free(tio, md->tio_pool); | 439 | mempool_free(tio, tio->md->tio_pool); |
| 440 | } | ||
| 441 | |||
| 442 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | ||
| 443 | { | ||
| 444 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | ||
| 445 | } | ||
| 446 | |||
| 447 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | ||
| 448 | { | ||
| 449 | mempool_free(info, info->tio->md->io_pool); | ||
| 404 | } | 450 | } |
| 405 | 451 | ||
| 406 | static void start_io_acct(struct dm_io *io) | 452 | static void start_io_acct(struct dm_io *io) |
| @@ -466,12 +512,13 @@ static void queue_io(struct mapped_device *md, struct bio *bio) | |||
| 466 | struct dm_table *dm_get_table(struct mapped_device *md) | 512 | struct dm_table *dm_get_table(struct mapped_device *md) |
| 467 | { | 513 | { |
| 468 | struct dm_table *t; | 514 | struct dm_table *t; |
| 515 | unsigned long flags; | ||
| 469 | 516 | ||
| 470 | read_lock(&md->map_lock); | 517 | read_lock_irqsave(&md->map_lock, flags); |
| 471 | t = md->map; | 518 | t = md->map; |
| 472 | if (t) | 519 | if (t) |
| 473 | dm_table_get(t); | 520 | dm_table_get(t); |
| 474 | read_unlock(&md->map_lock); | 521 | read_unlock_irqrestore(&md->map_lock, flags); |
| 475 | 522 | ||
| 476 | return t; | 523 | return t; |
| 477 | } | 524 | } |
| @@ -538,9 +585,11 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 538 | * Target requested pushing back the I/O. | 585 | * Target requested pushing back the I/O. |
| 539 | */ | 586 | */ |
| 540 | spin_lock_irqsave(&md->deferred_lock, flags); | 587 | spin_lock_irqsave(&md->deferred_lock, flags); |
| 541 | if (__noflush_suspending(md)) | 588 | if (__noflush_suspending(md)) { |
| 542 | bio_list_add_head(&md->deferred, io->bio); | 589 | if (!bio_barrier(io->bio)) |
| 543 | else | 590 | bio_list_add_head(&md->deferred, |
| 591 | io->bio); | ||
| 592 | } else | ||
| 544 | /* noflush suspend was interrupted. */ | 593 | /* noflush suspend was interrupted. */ |
| 545 | io->error = -EIO; | 594 | io->error = -EIO; |
| 546 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 595 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
| @@ -555,7 +604,8 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 555 | * a per-device variable for error reporting. | 604 | * a per-device variable for error reporting. |
| 556 | * Note that you can't touch the bio after end_io_acct | 605 | * Note that you can't touch the bio after end_io_acct |
| 557 | */ | 606 | */ |
| 558 | md->barrier_error = io_error; | 607 | if (!md->barrier_error && io_error != -EOPNOTSUPP) |
| 608 | md->barrier_error = io_error; | ||
| 559 | end_io_acct(io); | 609 | end_io_acct(io); |
| 560 | } else { | 610 | } else { |
| 561 | end_io_acct(io); | 611 | end_io_acct(io); |
| @@ -609,6 +659,262 @@ static void clone_endio(struct bio *bio, int error) | |||
| 609 | dec_pending(io, error); | 659 | dec_pending(io, error); |
| 610 | } | 660 | } |
| 611 | 661 | ||
| 662 | /* | ||
| 663 | * Partial completion handling for request-based dm | ||
| 664 | */ | ||
| 665 | static void end_clone_bio(struct bio *clone, int error) | ||
| 666 | { | ||
| 667 | struct dm_rq_clone_bio_info *info = clone->bi_private; | ||
| 668 | struct dm_rq_target_io *tio = info->tio; | ||
| 669 | struct bio *bio = info->orig; | ||
| 670 | unsigned int nr_bytes = info->orig->bi_size; | ||
| 671 | |||
| 672 | bio_put(clone); | ||
| 673 | |||
| 674 | if (tio->error) | ||
| 675 | /* | ||
| 676 | * An error has already been detected on the request. | ||
| 677 | * Once error occurred, just let clone->end_io() handle | ||
| 678 | * the remainder. | ||
| 679 | */ | ||
| 680 | return; | ||
| 681 | else if (error) { | ||
| 682 | /* | ||
| 683 | * Don't notice the error to the upper layer yet. | ||
| 684 | * The error handling decision is made by the target driver, | ||
| 685 | * when the request is completed. | ||
| 686 | */ | ||
| 687 | tio->error = error; | ||
| 688 | return; | ||
| 689 | } | ||
| 690 | |||
| 691 | /* | ||
| 692 | * I/O for the bio successfully completed. | ||
| 693 | * Notice the data completion to the upper layer. | ||
| 694 | */ | ||
| 695 | |||
| 696 | /* | ||
| 697 | * bios are processed from the head of the list. | ||
| 698 | * So the completing bio should always be rq->bio. | ||
| 699 | * If it's not, something wrong is happening. | ||
| 700 | */ | ||
| 701 | if (tio->orig->bio != bio) | ||
| 702 | DMERR("bio completion is going in the middle of the request"); | ||
| 703 | |||
| 704 | /* | ||
| 705 | * Update the original request. | ||
| 706 | * Do not use blk_end_request() here, because it may complete | ||
| 707 | * the original request before the clone, and break the ordering. | ||
| 708 | */ | ||
| 709 | blk_update_request(tio->orig, 0, nr_bytes); | ||
| 710 | } | ||
| 711 | |||
| 712 | /* | ||
| 713 | * Don't touch any member of the md after calling this function because | ||
| 714 | * the md may be freed in dm_put() at the end of this function. | ||
| 715 | * Or do dm_get() before calling this function and dm_put() later. | ||
| 716 | */ | ||
| 717 | static void rq_completed(struct mapped_device *md, int run_queue) | ||
| 718 | { | ||
| 719 | int wakeup_waiters = 0; | ||
| 720 | struct request_queue *q = md->queue; | ||
| 721 | unsigned long flags; | ||
| 722 | |||
| 723 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 724 | if (!queue_in_flight(q)) | ||
| 725 | wakeup_waiters = 1; | ||
| 726 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 727 | |||
| 728 | /* nudge anyone waiting on suspend queue */ | ||
| 729 | if (wakeup_waiters) | ||
| 730 | wake_up(&md->wait); | ||
| 731 | |||
| 732 | if (run_queue) | ||
| 733 | blk_run_queue(q); | ||
| 734 | |||
| 735 | /* | ||
| 736 | * dm_put() must be at the end of this function. See the comment above | ||
| 737 | */ | ||
| 738 | dm_put(md); | ||
| 739 | } | ||
| 740 | |||
| 741 | static void dm_unprep_request(struct request *rq) | ||
| 742 | { | ||
| 743 | struct request *clone = rq->special; | ||
| 744 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 745 | |||
| 746 | rq->special = NULL; | ||
| 747 | rq->cmd_flags &= ~REQ_DONTPREP; | ||
| 748 | |||
| 749 | blk_rq_unprep_clone(clone); | ||
| 750 | free_rq_tio(tio); | ||
| 751 | } | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Requeue the original request of a clone. | ||
| 755 | */ | ||
| 756 | void dm_requeue_unmapped_request(struct request *clone) | ||
| 757 | { | ||
| 758 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 759 | struct mapped_device *md = tio->md; | ||
| 760 | struct request *rq = tio->orig; | ||
| 761 | struct request_queue *q = rq->q; | ||
| 762 | unsigned long flags; | ||
| 763 | |||
| 764 | dm_unprep_request(rq); | ||
| 765 | |||
| 766 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 767 | if (elv_queue_empty(q)) | ||
| 768 | blk_plug_device(q); | ||
| 769 | blk_requeue_request(q, rq); | ||
| 770 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 771 | |||
| 772 | rq_completed(md, 0); | ||
| 773 | } | ||
| 774 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | ||
| 775 | |||
| 776 | static void __stop_queue(struct request_queue *q) | ||
| 777 | { | ||
| 778 | blk_stop_queue(q); | ||
| 779 | } | ||
| 780 | |||
| 781 | static void stop_queue(struct request_queue *q) | ||
| 782 | { | ||
| 783 | unsigned long flags; | ||
| 784 | |||
| 785 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 786 | __stop_queue(q); | ||
| 787 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 788 | } | ||
| 789 | |||
| 790 | static void __start_queue(struct request_queue *q) | ||
| 791 | { | ||
| 792 | if (blk_queue_stopped(q)) | ||
| 793 | blk_start_queue(q); | ||
| 794 | } | ||
| 795 | |||
| 796 | static void start_queue(struct request_queue *q) | ||
| 797 | { | ||
| 798 | unsigned long flags; | ||
| 799 | |||
| 800 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 801 | __start_queue(q); | ||
| 802 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 803 | } | ||
| 804 | |||
| 805 | /* | ||
| 806 | * Complete the clone and the original request. | ||
| 807 | * Must be called without queue lock. | ||
| 808 | */ | ||
| 809 | static void dm_end_request(struct request *clone, int error) | ||
| 810 | { | ||
| 811 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 812 | struct mapped_device *md = tio->md; | ||
| 813 | struct request *rq = tio->orig; | ||
| 814 | |||
| 815 | if (blk_pc_request(rq)) { | ||
| 816 | rq->errors = clone->errors; | ||
| 817 | rq->resid_len = clone->resid_len; | ||
| 818 | |||
| 819 | if (rq->sense) | ||
| 820 | /* | ||
| 821 | * We are using the sense buffer of the original | ||
| 822 | * request. | ||
| 823 | * So setting the length of the sense data is enough. | ||
| 824 | */ | ||
| 825 | rq->sense_len = clone->sense_len; | ||
| 826 | } | ||
| 827 | |||
| 828 | BUG_ON(clone->bio); | ||
| 829 | free_rq_tio(tio); | ||
| 830 | |||
| 831 | blk_end_request_all(rq, error); | ||
| 832 | |||
| 833 | rq_completed(md, 1); | ||
| 834 | } | ||
| 835 | |||
| 836 | /* | ||
| 837 | * Request completion handler for request-based dm | ||
| 838 | */ | ||
| 839 | static void dm_softirq_done(struct request *rq) | ||
| 840 | { | ||
| 841 | struct request *clone = rq->completion_data; | ||
| 842 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 843 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | ||
| 844 | int error = tio->error; | ||
| 845 | |||
| 846 | if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) | ||
| 847 | error = rq_end_io(tio->ti, clone, error, &tio->info); | ||
| 848 | |||
| 849 | if (error <= 0) | ||
| 850 | /* The target wants to complete the I/O */ | ||
| 851 | dm_end_request(clone, error); | ||
| 852 | else if (error == DM_ENDIO_INCOMPLETE) | ||
| 853 | /* The target will handle the I/O */ | ||
| 854 | return; | ||
| 855 | else if (error == DM_ENDIO_REQUEUE) | ||
| 856 | /* The target wants to requeue the I/O */ | ||
| 857 | dm_requeue_unmapped_request(clone); | ||
| 858 | else { | ||
| 859 | DMWARN("unimplemented target endio return value: %d", error); | ||
| 860 | BUG(); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | /* | ||
| 865 | * Complete the clone and the original request with the error status | ||
| 866 | * through softirq context. | ||
| 867 | */ | ||
| 868 | static void dm_complete_request(struct request *clone, int error) | ||
| 869 | { | ||
| 870 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 871 | struct request *rq = tio->orig; | ||
| 872 | |||
| 873 | tio->error = error; | ||
| 874 | rq->completion_data = clone; | ||
| 875 | blk_complete_request(rq); | ||
| 876 | } | ||
| 877 | |||
| 878 | /* | ||
| 879 | * Complete the not-mapped clone and the original request with the error status | ||
| 880 | * through softirq context. | ||
| 881 | * Target's rq_end_io() function isn't called. | ||
| 882 | * This may be used when the target's map_rq() function fails. | ||
| 883 | */ | ||
| 884 | void dm_kill_unmapped_request(struct request *clone, int error) | ||
| 885 | { | ||
| 886 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 887 | struct request *rq = tio->orig; | ||
| 888 | |||
| 889 | rq->cmd_flags |= REQ_FAILED; | ||
| 890 | dm_complete_request(clone, error); | ||
| 891 | } | ||
| 892 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); | ||
| 893 | |||
| 894 | /* | ||
| 895 | * Called with the queue lock held | ||
| 896 | */ | ||
| 897 | static void end_clone_request(struct request *clone, int error) | ||
| 898 | { | ||
| 899 | /* | ||
| 900 | * For just cleaning up the information of the queue in which | ||
| 901 | * the clone was dispatched. | ||
| 902 | * The clone is *NOT* freed actually here because it is alloced from | ||
| 903 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | ||
| 904 | */ | ||
| 905 | __blk_put_request(clone->q, clone); | ||
| 906 | |||
| 907 | /* | ||
| 908 | * Actual request completion is done in a softirq context which doesn't | ||
| 909 | * hold the queue lock. Otherwise, deadlock could occur because: | ||
| 910 | * - another request may be submitted by the upper level driver | ||
| 911 | * of the stacking during the completion | ||
| 912 | * - the submission which requires queue lock may be done | ||
| 913 | * against this queue | ||
| 914 | */ | ||
| 915 | dm_complete_request(clone, error); | ||
| 916 | } | ||
| 917 | |||
| 612 | static sector_t max_io_len(struct mapped_device *md, | 918 | static sector_t max_io_len(struct mapped_device *md, |
| 613 | sector_t sector, struct dm_target *ti) | 919 | sector_t sector, struct dm_target *ti) |
| 614 | { | 920 | { |
| @@ -636,11 +942,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
| 636 | sector_t sector; | 942 | sector_t sector; |
| 637 | struct mapped_device *md; | 943 | struct mapped_device *md; |
| 638 | 944 | ||
| 639 | /* | ||
| 640 | * Sanity checks. | ||
| 641 | */ | ||
| 642 | BUG_ON(!clone->bi_size); | ||
| 643 | |||
| 644 | clone->bi_end_io = clone_endio; | 945 | clone->bi_end_io = clone_endio; |
| 645 | clone->bi_private = tio; | 946 | clone->bi_private = tio; |
| 646 | 947 | ||
| @@ -656,8 +957,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
| 656 | /* the bio has been remapped so dispatch it */ | 957 | /* the bio has been remapped so dispatch it */ |
| 657 | 958 | ||
| 658 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, | 959 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, |
| 659 | tio->io->bio->bi_bdev->bd_dev, | 960 | tio->io->bio->bi_bdev->bd_dev, sector); |
| 660 | clone->bi_sector, sector); | ||
| 661 | 961 | ||
| 662 | generic_make_request(clone); | 962 | generic_make_request(clone); |
| 663 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { | 963 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
| @@ -755,6 +1055,48 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 755 | return clone; | 1055 | return clone; |
| 756 | } | 1056 | } |
| 757 | 1057 | ||
| 1058 | static struct dm_target_io *alloc_tio(struct clone_info *ci, | ||
| 1059 | struct dm_target *ti) | ||
| 1060 | { | ||
| 1061 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); | ||
| 1062 | |||
| 1063 | tio->io = ci->io; | ||
| 1064 | tio->ti = ti; | ||
| 1065 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 1066 | |||
| 1067 | return tio; | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | static void __flush_target(struct clone_info *ci, struct dm_target *ti, | ||
| 1071 | unsigned flush_nr) | ||
| 1072 | { | ||
| 1073 | struct dm_target_io *tio = alloc_tio(ci, ti); | ||
| 1074 | struct bio *clone; | ||
| 1075 | |||
| 1076 | tio->info.flush_request = flush_nr; | ||
| 1077 | |||
| 1078 | clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs); | ||
| 1079 | __bio_clone(clone, ci->bio); | ||
| 1080 | clone->bi_destructor = dm_bio_destructor; | ||
| 1081 | |||
| 1082 | __map_bio(ti, clone, tio); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | ||
| 1086 | { | ||
| 1087 | unsigned target_nr = 0, flush_nr; | ||
| 1088 | struct dm_target *ti; | ||
| 1089 | |||
| 1090 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | ||
| 1091 | for (flush_nr = 0; flush_nr < ti->num_flush_requests; | ||
| 1092 | flush_nr++) | ||
| 1093 | __flush_target(ci, ti, flush_nr); | ||
| 1094 | |||
| 1095 | ci->sector_count = 0; | ||
| 1096 | |||
| 1097 | return 0; | ||
| 1098 | } | ||
| 1099 | |||
| 758 | static int __clone_and_map(struct clone_info *ci) | 1100 | static int __clone_and_map(struct clone_info *ci) |
| 759 | { | 1101 | { |
| 760 | struct bio *clone, *bio = ci->bio; | 1102 | struct bio *clone, *bio = ci->bio; |
| @@ -762,6 +1104,9 @@ static int __clone_and_map(struct clone_info *ci) | |||
| 762 | sector_t len = 0, max; | 1104 | sector_t len = 0, max; |
| 763 | struct dm_target_io *tio; | 1105 | struct dm_target_io *tio; |
| 764 | 1106 | ||
| 1107 | if (unlikely(bio_empty_barrier(bio))) | ||
| 1108 | return __clone_and_map_empty_barrier(ci); | ||
| 1109 | |||
| 765 | ti = dm_table_find_target(ci->map, ci->sector); | 1110 | ti = dm_table_find_target(ci->map, ci->sector); |
| 766 | if (!dm_target_is_valid(ti)) | 1111 | if (!dm_target_is_valid(ti)) |
| 767 | return -EIO; | 1112 | return -EIO; |
| @@ -771,10 +1116,7 @@ static int __clone_and_map(struct clone_info *ci) | |||
| 771 | /* | 1116 | /* |
| 772 | * Allocate a target io object. | 1117 | * Allocate a target io object. |
| 773 | */ | 1118 | */ |
| 774 | tio = alloc_tio(ci->md); | 1119 | tio = alloc_tio(ci, ti); |
| 775 | tio->io = ci->io; | ||
| 776 | tio->ti = ti; | ||
| 777 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 778 | 1120 | ||
| 779 | if (ci->sector_count <= max) { | 1121 | if (ci->sector_count <= max) { |
| 780 | /* | 1122 | /* |
| @@ -830,10 +1172,7 @@ static int __clone_and_map(struct clone_info *ci) | |||
| 830 | 1172 | ||
| 831 | max = max_io_len(ci->md, ci->sector, ti); | 1173 | max = max_io_len(ci->md, ci->sector, ti); |
| 832 | 1174 | ||
| 833 | tio = alloc_tio(ci->md); | 1175 | tio = alloc_tio(ci, ti); |
| 834 | tio->io = ci->io; | ||
| 835 | tio->ti = ti; | ||
| 836 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 837 | } | 1176 | } |
| 838 | 1177 | ||
| 839 | len = min(remaining, max); | 1178 | len = min(remaining, max); |
| @@ -868,7 +1207,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 868 | if (!bio_barrier(bio)) | 1207 | if (!bio_barrier(bio)) |
| 869 | bio_io_error(bio); | 1208 | bio_io_error(bio); |
| 870 | else | 1209 | else |
| 871 | md->barrier_error = -EIO; | 1210 | if (!md->barrier_error) |
| 1211 | md->barrier_error = -EIO; | ||
| 872 | return; | 1212 | return; |
| 873 | } | 1213 | } |
| 874 | 1214 | ||
| @@ -881,6 +1221,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 881 | ci.io->md = md; | 1221 | ci.io->md = md; |
| 882 | ci.sector = bio->bi_sector; | 1222 | ci.sector = bio->bi_sector; |
| 883 | ci.sector_count = bio_sectors(bio); | 1223 | ci.sector_count = bio_sectors(bio); |
| 1224 | if (unlikely(bio_empty_barrier(bio))) | ||
| 1225 | ci.sector_count = 1; | ||
| 884 | ci.idx = bio->bi_idx; | 1226 | ci.idx = bio->bi_idx; |
| 885 | 1227 | ||
| 886 | start_io_acct(ci.io); | 1228 | start_io_acct(ci.io); |
| @@ -928,6 +1270,16 @@ static int dm_merge_bvec(struct request_queue *q, | |||
| 928 | */ | 1270 | */ |
| 929 | if (max_size && ti->type->merge) | 1271 | if (max_size && ti->type->merge) |
| 930 | max_size = ti->type->merge(ti, bvm, biovec, max_size); | 1272 | max_size = ti->type->merge(ti, bvm, biovec, max_size); |
| 1273 | /* | ||
| 1274 | * If the target doesn't support merge method and some of the devices | ||
| 1275 | * provided their merge_bvec method (we know this by looking at | ||
| 1276 | * queue_max_hw_sectors), then we can't allow bios with multiple vector | ||
| 1277 | * entries. So always set max_size to 0, and the code below allows | ||
| 1278 | * just one page. | ||
| 1279 | */ | ||
| 1280 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) | ||
| 1281 | |||
| 1282 | max_size = 0; | ||
| 931 | 1283 | ||
| 932 | out_table: | 1284 | out_table: |
| 933 | dm_table_put(map); | 1285 | dm_table_put(map); |
| @@ -946,7 +1298,7 @@ out: | |||
| 946 | * The request function that just remaps the bio built up by | 1298 | * The request function that just remaps the bio built up by |
| 947 | * dm_merge_bvec. | 1299 | * dm_merge_bvec. |
| 948 | */ | 1300 | */ |
| 949 | static int dm_request(struct request_queue *q, struct bio *bio) | 1301 | static int _dm_request(struct request_queue *q, struct bio *bio) |
| 950 | { | 1302 | { |
| 951 | int rw = bio_data_dir(bio); | 1303 | int rw = bio_data_dir(bio); |
| 952 | struct mapped_device *md = q->queuedata; | 1304 | struct mapped_device *md = q->queuedata; |
| @@ -983,12 +1335,274 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
| 983 | return 0; | 1335 | return 0; |
| 984 | } | 1336 | } |
| 985 | 1337 | ||
| 1338 | static int dm_make_request(struct request_queue *q, struct bio *bio) | ||
| 1339 | { | ||
| 1340 | struct mapped_device *md = q->queuedata; | ||
| 1341 | |||
| 1342 | if (unlikely(bio_barrier(bio))) { | ||
| 1343 | bio_endio(bio, -EOPNOTSUPP); | ||
| 1344 | return 0; | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | static int dm_request_based(struct mapped_device *md) | ||
| 1351 | { | ||
| 1352 | return blk_queue_stackable(md->queue); | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | static int dm_request(struct request_queue *q, struct bio *bio) | ||
| 1356 | { | ||
| 1357 | struct mapped_device *md = q->queuedata; | ||
| 1358 | |||
| 1359 | if (dm_request_based(md)) | ||
| 1360 | return dm_make_request(q, bio); | ||
| 1361 | |||
| 1362 | return _dm_request(q, bio); | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | void dm_dispatch_request(struct request *rq) | ||
| 1366 | { | ||
| 1367 | int r; | ||
| 1368 | |||
| 1369 | if (blk_queue_io_stat(rq->q)) | ||
| 1370 | rq->cmd_flags |= REQ_IO_STAT; | ||
| 1371 | |||
| 1372 | rq->start_time = jiffies; | ||
| 1373 | r = blk_insert_cloned_request(rq->q, rq); | ||
| 1374 | if (r) | ||
| 1375 | dm_complete_request(rq, r); | ||
| 1376 | } | ||
| 1377 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | ||
| 1378 | |||
| 1379 | static void dm_rq_bio_destructor(struct bio *bio) | ||
| 1380 | { | ||
| 1381 | struct dm_rq_clone_bio_info *info = bio->bi_private; | ||
| 1382 | struct mapped_device *md = info->tio->md; | ||
| 1383 | |||
| 1384 | free_bio_info(info); | ||
| 1385 | bio_free(bio, md->bs); | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | ||
| 1389 | void *data) | ||
| 1390 | { | ||
| 1391 | struct dm_rq_target_io *tio = data; | ||
| 1392 | struct mapped_device *md = tio->md; | ||
| 1393 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | ||
| 1394 | |||
| 1395 | if (!info) | ||
| 1396 | return -ENOMEM; | ||
| 1397 | |||
| 1398 | info->orig = bio_orig; | ||
| 1399 | info->tio = tio; | ||
| 1400 | bio->bi_end_io = end_clone_bio; | ||
| 1401 | bio->bi_private = info; | ||
| 1402 | bio->bi_destructor = dm_rq_bio_destructor; | ||
| 1403 | |||
| 1404 | return 0; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | static int setup_clone(struct request *clone, struct request *rq, | ||
| 1408 | struct dm_rq_target_io *tio) | ||
| 1409 | { | ||
| 1410 | int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
| 1411 | dm_rq_bio_constructor, tio); | ||
| 1412 | |||
| 1413 | if (r) | ||
| 1414 | return r; | ||
| 1415 | |||
| 1416 | clone->cmd = rq->cmd; | ||
| 1417 | clone->cmd_len = rq->cmd_len; | ||
| 1418 | clone->sense = rq->sense; | ||
| 1419 | clone->buffer = rq->buffer; | ||
| 1420 | clone->end_io = end_clone_request; | ||
| 1421 | clone->end_io_data = tio; | ||
| 1422 | |||
| 1423 | return 0; | ||
| 1424 | } | ||
| 1425 | |||
| 1426 | static int dm_rq_flush_suspending(struct mapped_device *md) | ||
| 1427 | { | ||
| 1428 | return !md->suspend_rq.special; | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | /* | ||
| 1432 | * Called with the queue lock held. | ||
| 1433 | */ | ||
| 1434 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | ||
| 1435 | { | ||
| 1436 | struct mapped_device *md = q->queuedata; | ||
| 1437 | struct dm_rq_target_io *tio; | ||
| 1438 | struct request *clone; | ||
| 1439 | |||
| 1440 | if (unlikely(rq == &md->suspend_rq)) { | ||
| 1441 | if (dm_rq_flush_suspending(md)) | ||
| 1442 | return BLKPREP_OK; | ||
| 1443 | else | ||
| 1444 | /* The flush suspend was interrupted */ | ||
| 1445 | return BLKPREP_KILL; | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | if (unlikely(rq->special)) { | ||
| 1449 | DMWARN("Already has something in rq->special."); | ||
| 1450 | return BLKPREP_KILL; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | tio = alloc_rq_tio(md); /* Only one for each original request */ | ||
| 1454 | if (!tio) | ||
| 1455 | /* -ENOMEM */ | ||
| 1456 | return BLKPREP_DEFER; | ||
| 1457 | |||
| 1458 | tio->md = md; | ||
| 1459 | tio->ti = NULL; | ||
| 1460 | tio->orig = rq; | ||
| 1461 | tio->error = 0; | ||
| 1462 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 1463 | |||
| 1464 | clone = &tio->clone; | ||
| 1465 | if (setup_clone(clone, rq, tio)) { | ||
| 1466 | /* -ENOMEM */ | ||
| 1467 | free_rq_tio(tio); | ||
| 1468 | return BLKPREP_DEFER; | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | rq->special = clone; | ||
| 1472 | rq->cmd_flags |= REQ_DONTPREP; | ||
| 1473 | |||
| 1474 | return BLKPREP_OK; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | static void map_request(struct dm_target *ti, struct request *rq, | ||
| 1478 | struct mapped_device *md) | ||
| 1479 | { | ||
| 1480 | int r; | ||
| 1481 | struct request *clone = rq->special; | ||
| 1482 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
| 1483 | |||
| 1484 | /* | ||
| 1485 | * Hold the md reference here for the in-flight I/O. | ||
| 1486 | * We can't rely on the reference count by device opener, | ||
| 1487 | * because the device may be closed during the request completion | ||
| 1488 | * when all bios are completed. | ||
| 1489 | * See the comment in rq_completed() too. | ||
| 1490 | */ | ||
| 1491 | dm_get(md); | ||
| 1492 | |||
| 1493 | tio->ti = ti; | ||
| 1494 | r = ti->type->map_rq(ti, clone, &tio->info); | ||
| 1495 | switch (r) { | ||
| 1496 | case DM_MAPIO_SUBMITTED: | ||
| 1497 | /* The target has taken the I/O to submit by itself later */ | ||
| 1498 | break; | ||
| 1499 | case DM_MAPIO_REMAPPED: | ||
| 1500 | /* The target has remapped the I/O so dispatch it */ | ||
| 1501 | dm_dispatch_request(clone); | ||
| 1502 | break; | ||
| 1503 | case DM_MAPIO_REQUEUE: | ||
| 1504 | /* The target wants to requeue the I/O */ | ||
| 1505 | dm_requeue_unmapped_request(clone); | ||
| 1506 | break; | ||
| 1507 | default: | ||
| 1508 | if (r > 0) { | ||
| 1509 | DMWARN("unimplemented target map return value: %d", r); | ||
| 1510 | BUG(); | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | /* The target wants to complete the I/O */ | ||
| 1514 | dm_kill_unmapped_request(clone, r); | ||
| 1515 | break; | ||
| 1516 | } | ||
| 1517 | } | ||
| 1518 | |||
| 1519 | /* | ||
| 1520 | * q->request_fn for request-based dm. | ||
| 1521 | * Called with the queue lock held. | ||
| 1522 | */ | ||
| 1523 | static void dm_request_fn(struct request_queue *q) | ||
| 1524 | { | ||
| 1525 | struct mapped_device *md = q->queuedata; | ||
| 1526 | struct dm_table *map = dm_get_table(md); | ||
| 1527 | struct dm_target *ti; | ||
| 1528 | struct request *rq; | ||
| 1529 | |||
| 1530 | /* | ||
| 1531 | * For noflush suspend, check blk_queue_stopped() to immediately | ||
| 1532 | * quit I/O dispatching. | ||
| 1533 | */ | ||
| 1534 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | ||
| 1535 | rq = blk_peek_request(q); | ||
| 1536 | if (!rq) | ||
| 1537 | goto plug_and_out; | ||
| 1538 | |||
| 1539 | if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ | ||
| 1540 | if (queue_in_flight(q)) | ||
| 1541 | /* Not quiet yet. Wait more */ | ||
| 1542 | goto plug_and_out; | ||
| 1543 | |||
| 1544 | /* This device should be quiet now */ | ||
| 1545 | __stop_queue(q); | ||
| 1546 | blk_start_request(rq); | ||
| 1547 | __blk_end_request_all(rq, 0); | ||
| 1548 | wake_up(&md->wait); | ||
| 1549 | goto out; | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
| 1553 | if (ti->type->busy && ti->type->busy(ti)) | ||
| 1554 | goto plug_and_out; | ||
| 1555 | |||
| 1556 | blk_start_request(rq); | ||
| 1557 | spin_unlock(q->queue_lock); | ||
| 1558 | map_request(ti, rq, md); | ||
| 1559 | spin_lock_irq(q->queue_lock); | ||
| 1560 | } | ||
| 1561 | |||
| 1562 | goto out; | ||
| 1563 | |||
| 1564 | plug_and_out: | ||
| 1565 | if (!elv_queue_empty(q)) | ||
| 1566 | /* Some requests still remain, retry later */ | ||
| 1567 | blk_plug_device(q); | ||
| 1568 | |||
| 1569 | out: | ||
| 1570 | dm_table_put(map); | ||
| 1571 | |||
| 1572 | return; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | int dm_underlying_device_busy(struct request_queue *q) | ||
| 1576 | { | ||
| 1577 | return blk_lld_busy(q); | ||
| 1578 | } | ||
| 1579 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | ||
| 1580 | |||
| 1581 | static int dm_lld_busy(struct request_queue *q) | ||
| 1582 | { | ||
| 1583 | int r; | ||
| 1584 | struct mapped_device *md = q->queuedata; | ||
| 1585 | struct dm_table *map = dm_get_table(md); | ||
| 1586 | |||
| 1587 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | ||
| 1588 | r = 1; | ||
| 1589 | else | ||
| 1590 | r = dm_table_any_busy_target(map); | ||
| 1591 | |||
| 1592 | dm_table_put(map); | ||
| 1593 | |||
| 1594 | return r; | ||
| 1595 | } | ||
| 1596 | |||
| 986 | static void dm_unplug_all(struct request_queue *q) | 1597 | static void dm_unplug_all(struct request_queue *q) |
| 987 | { | 1598 | { |
| 988 | struct mapped_device *md = q->queuedata; | 1599 | struct mapped_device *md = q->queuedata; |
| 989 | struct dm_table *map = dm_get_table(md); | 1600 | struct dm_table *map = dm_get_table(md); |
| 990 | 1601 | ||
| 991 | if (map) { | 1602 | if (map) { |
| 1603 | if (dm_request_based(md)) | ||
| 1604 | generic_unplug_device(q); | ||
| 1605 | |||
| 992 | dm_table_unplug_all(map); | 1606 | dm_table_unplug_all(map); |
| 993 | dm_table_put(map); | 1607 | dm_table_put(map); |
| 994 | } | 1608 | } |
| @@ -1003,7 +1617,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
| 1003 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1617 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
| 1004 | map = dm_get_table(md); | 1618 | map = dm_get_table(md); |
| 1005 | if (map) { | 1619 | if (map) { |
| 1006 | r = dm_table_any_congested(map, bdi_bits); | 1620 | /* |
| 1621 | * Request-based dm cares about only own queue for | ||
| 1622 | * the query about congestion status of request_queue | ||
| 1623 | */ | ||
| 1624 | if (dm_request_based(md)) | ||
| 1625 | r = md->queue->backing_dev_info.state & | ||
| 1626 | bdi_bits; | ||
| 1627 | else | ||
| 1628 | r = dm_table_any_congested(map, bdi_bits); | ||
| 1629 | |||
| 1007 | dm_table_put(map); | 1630 | dm_table_put(map); |
| 1008 | } | 1631 | } |
| 1009 | } | 1632 | } |
| @@ -1126,30 +1749,32 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 1126 | INIT_LIST_HEAD(&md->uevent_list); | 1749 | INIT_LIST_HEAD(&md->uevent_list); |
| 1127 | spin_lock_init(&md->uevent_lock); | 1750 | spin_lock_init(&md->uevent_lock); |
| 1128 | 1751 | ||
| 1129 | md->queue = blk_alloc_queue(GFP_KERNEL); | 1752 | md->queue = blk_init_queue(dm_request_fn, NULL); |
| 1130 | if (!md->queue) | 1753 | if (!md->queue) |
| 1131 | goto bad_queue; | 1754 | goto bad_queue; |
| 1132 | 1755 | ||
| 1756 | /* | ||
| 1757 | * Request-based dm devices cannot be stacked on top of bio-based dm | ||
| 1758 | * devices. The type of this dm device has not been decided yet, | ||
| 1759 | * although we initialized the queue using blk_init_queue(). | ||
| 1760 | * The type is decided at the first table loading time. | ||
| 1761 | * To prevent problematic device stacking, clear the queue flag | ||
| 1762 | * for request stacking support until then. | ||
| 1763 | * | ||
| 1764 | * This queue is new, so no concurrency on the queue_flags. | ||
| 1765 | */ | ||
| 1766 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | ||
| 1767 | md->saved_make_request_fn = md->queue->make_request_fn; | ||
| 1133 | md->queue->queuedata = md; | 1768 | md->queue->queuedata = md; |
| 1134 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 1769 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
| 1135 | md->queue->backing_dev_info.congested_data = md; | 1770 | md->queue->backing_dev_info.congested_data = md; |
| 1136 | blk_queue_make_request(md->queue, dm_request); | 1771 | blk_queue_make_request(md->queue, dm_request); |
| 1137 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
| 1138 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1772 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
| 1139 | md->queue->unplug_fn = dm_unplug_all; | 1773 | md->queue->unplug_fn = dm_unplug_all; |
| 1140 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1774 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
| 1141 | 1775 | blk_queue_softirq_done(md->queue, dm_softirq_done); | |
| 1142 | md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); | 1776 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
| 1143 | if (!md->io_pool) | 1777 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
| 1144 | goto bad_io_pool; | ||
| 1145 | |||
| 1146 | md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); | ||
| 1147 | if (!md->tio_pool) | ||
| 1148 | goto bad_tio_pool; | ||
| 1149 | |||
| 1150 | md->bs = bioset_create(16, 0); | ||
| 1151 | if (!md->bs) | ||
| 1152 | goto bad_no_bioset; | ||
| 1153 | 1778 | ||
| 1154 | md->disk = alloc_disk(1); | 1779 | md->disk = alloc_disk(1); |
| 1155 | if (!md->disk) | 1780 | if (!md->disk) |
| @@ -1173,6 +1798,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 1173 | if (!md->wq) | 1798 | if (!md->wq) |
| 1174 | goto bad_thread; | 1799 | goto bad_thread; |
| 1175 | 1800 | ||
| 1801 | md->bdev = bdget_disk(md->disk, 0); | ||
| 1802 | if (!md->bdev) | ||
| 1803 | goto bad_bdev; | ||
| 1804 | |||
| 1176 | /* Populate the mapping, nobody knows we exist yet */ | 1805 | /* Populate the mapping, nobody knows we exist yet */ |
| 1177 | spin_lock(&_minor_lock); | 1806 | spin_lock(&_minor_lock); |
| 1178 | old_md = idr_replace(&_minor_idr, md, minor); | 1807 | old_md = idr_replace(&_minor_idr, md, minor); |
| @@ -1182,15 +1811,11 @@ static struct mapped_device *alloc_dev(int minor) | |||
| 1182 | 1811 | ||
| 1183 | return md; | 1812 | return md; |
| 1184 | 1813 | ||
| 1814 | bad_bdev: | ||
| 1815 | destroy_workqueue(md->wq); | ||
| 1185 | bad_thread: | 1816 | bad_thread: |
| 1186 | put_disk(md->disk); | 1817 | put_disk(md->disk); |
| 1187 | bad_disk: | 1818 | bad_disk: |
| 1188 | bioset_free(md->bs); | ||
| 1189 | bad_no_bioset: | ||
| 1190 | mempool_destroy(md->tio_pool); | ||
| 1191 | bad_tio_pool: | ||
| 1192 | mempool_destroy(md->io_pool); | ||
| 1193 | bad_io_pool: | ||
| 1194 | blk_cleanup_queue(md->queue); | 1819 | blk_cleanup_queue(md->queue); |
| 1195 | bad_queue: | 1820 | bad_queue: |
| 1196 | free_minor(minor); | 1821 | free_minor(minor); |
| @@ -1207,14 +1832,15 @@ static void free_dev(struct mapped_device *md) | |||
| 1207 | { | 1832 | { |
| 1208 | int minor = MINOR(disk_devt(md->disk)); | 1833 | int minor = MINOR(disk_devt(md->disk)); |
| 1209 | 1834 | ||
| 1210 | if (md->suspended_bdev) { | 1835 | unlock_fs(md); |
| 1211 | unlock_fs(md); | 1836 | bdput(md->bdev); |
| 1212 | bdput(md->suspended_bdev); | ||
| 1213 | } | ||
| 1214 | destroy_workqueue(md->wq); | 1837 | destroy_workqueue(md->wq); |
| 1215 | mempool_destroy(md->tio_pool); | 1838 | if (md->tio_pool) |
| 1216 | mempool_destroy(md->io_pool); | 1839 | mempool_destroy(md->tio_pool); |
| 1217 | bioset_free(md->bs); | 1840 | if (md->io_pool) |
| 1841 | mempool_destroy(md->io_pool); | ||
| 1842 | if (md->bs) | ||
| 1843 | bioset_free(md->bs); | ||
| 1218 | blk_integrity_unregister(md->disk); | 1844 | blk_integrity_unregister(md->disk); |
| 1219 | del_gendisk(md->disk); | 1845 | del_gendisk(md->disk); |
| 1220 | free_minor(minor); | 1846 | free_minor(minor); |
| @@ -1229,6 +1855,29 @@ static void free_dev(struct mapped_device *md) | |||
| 1229 | kfree(md); | 1855 | kfree(md); |
| 1230 | } | 1856 | } |
| 1231 | 1857 | ||
| 1858 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | ||
| 1859 | { | ||
| 1860 | struct dm_md_mempools *p; | ||
| 1861 | |||
| 1862 | if (md->io_pool && md->tio_pool && md->bs) | ||
| 1863 | /* the md already has necessary mempools */ | ||
| 1864 | goto out; | ||
| 1865 | |||
| 1866 | p = dm_table_get_md_mempools(t); | ||
| 1867 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); | ||
| 1868 | |||
| 1869 | md->io_pool = p->io_pool; | ||
| 1870 | p->io_pool = NULL; | ||
| 1871 | md->tio_pool = p->tio_pool; | ||
| 1872 | p->tio_pool = NULL; | ||
| 1873 | md->bs = p->bs; | ||
| 1874 | p->bs = NULL; | ||
| 1875 | |||
| 1876 | out: | ||
| 1877 | /* mempool bind completed, now no need any mempools in the table */ | ||
| 1878 | dm_table_free_md_mempools(t); | ||
| 1879 | } | ||
| 1880 | |||
| 1232 | /* | 1881 | /* |
| 1233 | * Bind a table to the device. | 1882 | * Bind a table to the device. |
| 1234 | */ | 1883 | */ |
| @@ -1252,15 +1901,17 @@ static void __set_size(struct mapped_device *md, sector_t size) | |||
| 1252 | { | 1901 | { |
| 1253 | set_capacity(md->disk, size); | 1902 | set_capacity(md->disk, size); |
| 1254 | 1903 | ||
| 1255 | mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); | 1904 | mutex_lock(&md->bdev->bd_inode->i_mutex); |
| 1256 | i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); | 1905 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
| 1257 | mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); | 1906 | mutex_unlock(&md->bdev->bd_inode->i_mutex); |
| 1258 | } | 1907 | } |
| 1259 | 1908 | ||
| 1260 | static int __bind(struct mapped_device *md, struct dm_table *t) | 1909 | static int __bind(struct mapped_device *md, struct dm_table *t, |
| 1910 | struct queue_limits *limits) | ||
| 1261 | { | 1911 | { |
| 1262 | struct request_queue *q = md->queue; | 1912 | struct request_queue *q = md->queue; |
| 1263 | sector_t size; | 1913 | sector_t size; |
| 1914 | unsigned long flags; | ||
| 1264 | 1915 | ||
| 1265 | size = dm_table_get_size(t); | 1916 | size = dm_table_get_size(t); |
| 1266 | 1917 | ||
| @@ -1270,8 +1921,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
| 1270 | if (size != get_capacity(md->disk)) | 1921 | if (size != get_capacity(md->disk)) |
| 1271 | memset(&md->geometry, 0, sizeof(md->geometry)); | 1922 | memset(&md->geometry, 0, sizeof(md->geometry)); |
| 1272 | 1923 | ||
| 1273 | if (md->suspended_bdev) | 1924 | __set_size(md, size); |
| 1274 | __set_size(md, size); | ||
| 1275 | 1925 | ||
| 1276 | if (!size) { | 1926 | if (!size) { |
| 1277 | dm_table_destroy(t); | 1927 | dm_table_destroy(t); |
| @@ -1280,10 +1930,22 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
| 1280 | 1930 | ||
| 1281 | dm_table_event_callback(t, event_callback, md); | 1931 | dm_table_event_callback(t, event_callback, md); |
| 1282 | 1932 | ||
| 1283 | write_lock(&md->map_lock); | 1933 | /* |
| 1934 | * The queue hasn't been stopped yet, if the old table type wasn't | ||
| 1935 | * for request-based during suspension. So stop it to prevent | ||
| 1936 | * I/O mapping before resume. | ||
| 1937 | * This must be done before setting the queue restrictions, | ||
| 1938 | * because request-based dm may be run just after the setting. | ||
| 1939 | */ | ||
| 1940 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | ||
| 1941 | stop_queue(q); | ||
| 1942 | |||
| 1943 | __bind_mempools(md, t); | ||
| 1944 | |||
| 1945 | write_lock_irqsave(&md->map_lock, flags); | ||
| 1284 | md->map = t; | 1946 | md->map = t; |
| 1285 | dm_table_set_restrictions(t, q); | 1947 | dm_table_set_restrictions(t, q, limits); |
| 1286 | write_unlock(&md->map_lock); | 1948 | write_unlock_irqrestore(&md->map_lock, flags); |
| 1287 | 1949 | ||
| 1288 | return 0; | 1950 | return 0; |
| 1289 | } | 1951 | } |
| @@ -1291,14 +1953,15 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
| 1291 | static void __unbind(struct mapped_device *md) | 1953 | static void __unbind(struct mapped_device *md) |
| 1292 | { | 1954 | { |
| 1293 | struct dm_table *map = md->map; | 1955 | struct dm_table *map = md->map; |
| 1956 | unsigned long flags; | ||
| 1294 | 1957 | ||
| 1295 | if (!map) | 1958 | if (!map) |
| 1296 | return; | 1959 | return; |
| 1297 | 1960 | ||
| 1298 | dm_table_event_callback(map, NULL, NULL); | 1961 | dm_table_event_callback(map, NULL, NULL); |
| 1299 | write_lock(&md->map_lock); | 1962 | write_lock_irqsave(&md->map_lock, flags); |
| 1300 | md->map = NULL; | 1963 | md->map = NULL; |
| 1301 | write_unlock(&md->map_lock); | 1964 | write_unlock_irqrestore(&md->map_lock, flags); |
| 1302 | dm_table_destroy(map); | 1965 | dm_table_destroy(map); |
| 1303 | } | 1966 | } |
| 1304 | 1967 | ||
| @@ -1402,6 +2065,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
| 1402 | { | 2065 | { |
| 1403 | int r = 0; | 2066 | int r = 0; |
| 1404 | DECLARE_WAITQUEUE(wait, current); | 2067 | DECLARE_WAITQUEUE(wait, current); |
| 2068 | struct request_queue *q = md->queue; | ||
| 2069 | unsigned long flags; | ||
| 1405 | 2070 | ||
| 1406 | dm_unplug_all(md->queue); | 2071 | dm_unplug_all(md->queue); |
| 1407 | 2072 | ||
| @@ -1411,7 +2076,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
| 1411 | set_current_state(interruptible); | 2076 | set_current_state(interruptible); |
| 1412 | 2077 | ||
| 1413 | smp_mb(); | 2078 | smp_mb(); |
| 1414 | if (!atomic_read(&md->pending)) | 2079 | if (dm_request_based(md)) { |
| 2080 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 2081 | if (!queue_in_flight(q) && blk_queue_stopped(q)) { | ||
| 2082 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 2083 | break; | ||
| 2084 | } | ||
| 2085 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 2086 | } else if (!atomic_read(&md->pending)) | ||
| 1415 | break; | 2087 | break; |
| 1416 | 2088 | ||
| 1417 | if (interruptible == TASK_INTERRUPTIBLE && | 2089 | if (interruptible == TASK_INTERRUPTIBLE && |
| @@ -1429,34 +2101,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
| 1429 | return r; | 2101 | return r; |
| 1430 | } | 2102 | } |
| 1431 | 2103 | ||
| 1432 | static int dm_flush(struct mapped_device *md) | 2104 | static void dm_flush(struct mapped_device *md) |
| 1433 | { | 2105 | { |
| 1434 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2106 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
| 1435 | return 0; | 2107 | |
| 2108 | bio_init(&md->barrier_bio); | ||
| 2109 | md->barrier_bio.bi_bdev = md->bdev; | ||
| 2110 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
| 2111 | __split_and_process_bio(md, &md->barrier_bio); | ||
| 2112 | |||
| 2113 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
| 1436 | } | 2114 | } |
| 1437 | 2115 | ||
| 1438 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2116 | static void process_barrier(struct mapped_device *md, struct bio *bio) |
| 1439 | { | 2117 | { |
| 1440 | int error = dm_flush(md); | 2118 | md->barrier_error = 0; |
| 1441 | 2119 | ||
| 1442 | if (unlikely(error)) { | 2120 | dm_flush(md); |
| 1443 | bio_endio(bio, error); | ||
| 1444 | return; | ||
| 1445 | } | ||
| 1446 | if (bio_empty_barrier(bio)) { | ||
| 1447 | bio_endio(bio, 0); | ||
| 1448 | return; | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | __split_and_process_bio(md, bio); | ||
| 1452 | 2121 | ||
| 1453 | error = dm_flush(md); | 2122 | if (!bio_empty_barrier(bio)) { |
| 1454 | 2123 | __split_and_process_bio(md, bio); | |
| 1455 | if (!error && md->barrier_error) | 2124 | dm_flush(md); |
| 1456 | error = md->barrier_error; | 2125 | } |
| 1457 | 2126 | ||
| 1458 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2127 | if (md->barrier_error != DM_ENDIO_REQUEUE) |
| 1459 | bio_endio(bio, error); | 2128 | bio_endio(bio, md->barrier_error); |
| 2129 | else { | ||
| 2130 | spin_lock_irq(&md->deferred_lock); | ||
| 2131 | bio_list_add_head(&md->deferred, bio); | ||
| 2132 | spin_unlock_irq(&md->deferred_lock); | ||
| 2133 | } | ||
| 1460 | } | 2134 | } |
| 1461 | 2135 | ||
| 1462 | /* | 2136 | /* |
| @@ -1482,10 +2156,14 @@ static void dm_wq_work(struct work_struct *work) | |||
| 1482 | 2156 | ||
| 1483 | up_write(&md->io_lock); | 2157 | up_write(&md->io_lock); |
| 1484 | 2158 | ||
| 1485 | if (bio_barrier(c)) | 2159 | if (dm_request_based(md)) |
| 1486 | process_barrier(md, c); | 2160 | generic_make_request(c); |
| 1487 | else | 2161 | else { |
| 1488 | __split_and_process_bio(md, c); | 2162 | if (bio_barrier(c)) |
| 2163 | process_barrier(md, c); | ||
| 2164 | else | ||
| 2165 | __split_and_process_bio(md, c); | ||
| 2166 | } | ||
| 1489 | 2167 | ||
| 1490 | down_write(&md->io_lock); | 2168 | down_write(&md->io_lock); |
| 1491 | } | 2169 | } |
| @@ -1505,6 +2183,7 @@ static void dm_queue_flush(struct mapped_device *md) | |||
| 1505 | */ | 2183 | */ |
| 1506 | int dm_swap_table(struct mapped_device *md, struct dm_table *table) | 2184 | int dm_swap_table(struct mapped_device *md, struct dm_table *table) |
| 1507 | { | 2185 | { |
| 2186 | struct queue_limits limits; | ||
| 1508 | int r = -EINVAL; | 2187 | int r = -EINVAL; |
| 1509 | 2188 | ||
| 1510 | mutex_lock(&md->suspend_lock); | 2189 | mutex_lock(&md->suspend_lock); |
| @@ -1513,19 +2192,96 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
| 1513 | if (!dm_suspended(md)) | 2192 | if (!dm_suspended(md)) |
| 1514 | goto out; | 2193 | goto out; |
| 1515 | 2194 | ||
| 1516 | /* without bdev, the device size cannot be changed */ | 2195 | r = dm_calculate_queue_limits(table, &limits); |
| 1517 | if (!md->suspended_bdev) | 2196 | if (r) |
| 1518 | if (get_capacity(md->disk) != dm_table_get_size(table)) | 2197 | goto out; |
| 1519 | goto out; | 2198 | |
| 2199 | /* cannot change the device type, once a table is bound */ | ||
| 2200 | if (md->map && | ||
| 2201 | (dm_table_get_type(md->map) != dm_table_get_type(table))) { | ||
| 2202 | DMWARN("can't change the device type after a table is bound"); | ||
| 2203 | goto out; | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | /* | ||
| 2207 | * It is enought that blk_queue_ordered() is called only once when | ||
| 2208 | * the first bio-based table is bound. | ||
| 2209 | * | ||
| 2210 | * This setting should be moved to alloc_dev() when request-based dm | ||
| 2211 | * supports barrier. | ||
| 2212 | */ | ||
| 2213 | if (!md->map && dm_table_bio_based(table)) | ||
| 2214 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
| 1520 | 2215 | ||
| 1521 | __unbind(md); | 2216 | __unbind(md); |
| 1522 | r = __bind(md, table); | 2217 | r = __bind(md, table, &limits); |
| 1523 | 2218 | ||
| 1524 | out: | 2219 | out: |
| 1525 | mutex_unlock(&md->suspend_lock); | 2220 | mutex_unlock(&md->suspend_lock); |
| 1526 | return r; | 2221 | return r; |
| 1527 | } | 2222 | } |
| 1528 | 2223 | ||
| 2224 | static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) | ||
| 2225 | { | ||
| 2226 | md->suspend_rq.special = (void *)0x1; | ||
| 2227 | } | ||
| 2228 | |||
| 2229 | static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) | ||
| 2230 | { | ||
| 2231 | struct request_queue *q = md->queue; | ||
| 2232 | unsigned long flags; | ||
| 2233 | |||
| 2234 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 2235 | if (!noflush) | ||
| 2236 | dm_rq_invalidate_suspend_marker(md); | ||
| 2237 | __start_queue(q); | ||
| 2238 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 2239 | } | ||
| 2240 | |||
| 2241 | static void dm_rq_start_suspend(struct mapped_device *md, int noflush) | ||
| 2242 | { | ||
| 2243 | struct request *rq = &md->suspend_rq; | ||
| 2244 | struct request_queue *q = md->queue; | ||
| 2245 | |||
| 2246 | if (noflush) | ||
| 2247 | stop_queue(q); | ||
| 2248 | else { | ||
| 2249 | blk_rq_init(q, rq); | ||
| 2250 | blk_insert_request(q, rq, 0, NULL); | ||
| 2251 | } | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | static int dm_rq_suspend_available(struct mapped_device *md, int noflush) | ||
| 2255 | { | ||
| 2256 | int r = 1; | ||
| 2257 | struct request *rq = &md->suspend_rq; | ||
| 2258 | struct request_queue *q = md->queue; | ||
| 2259 | unsigned long flags; | ||
| 2260 | |||
| 2261 | if (noflush) | ||
| 2262 | return r; | ||
| 2263 | |||
| 2264 | /* The marker must be protected by queue lock if it is in use */ | ||
| 2265 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 2266 | if (unlikely(rq->ref_count)) { | ||
| 2267 | /* | ||
| 2268 | * This can happen, when the previous flush suspend was | ||
| 2269 | * interrupted, the marker is still in the queue and | ||
| 2270 | * this flush suspend has been invoked, because we don't | ||
| 2271 | * remove the marker at the time of suspend interruption. | ||
| 2272 | * We have only one marker per mapped_device, so we can't | ||
| 2273 | * start another flush suspend while it is in use. | ||
| 2274 | */ | ||
| 2275 | BUG_ON(!rq->special); /* The marker should be invalidated */ | ||
| 2276 | DMWARN("Invalidating the previous flush suspend is still in" | ||
| 2277 | " progress. Please retry later."); | ||
| 2278 | r = 0; | ||
| 2279 | } | ||
| 2280 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 2281 | |||
| 2282 | return r; | ||
| 2283 | } | ||
| 2284 | |||
| 1529 | /* | 2285 | /* |
| 1530 | * Functions to lock and unlock any filesystem running on the | 2286 | * Functions to lock and unlock any filesystem running on the |
| 1531 | * device. | 2287 | * device. |
| @@ -1536,7 +2292,7 @@ static int lock_fs(struct mapped_device *md) | |||
| 1536 | 2292 | ||
| 1537 | WARN_ON(md->frozen_sb); | 2293 | WARN_ON(md->frozen_sb); |
| 1538 | 2294 | ||
| 1539 | md->frozen_sb = freeze_bdev(md->suspended_bdev); | 2295 | md->frozen_sb = freeze_bdev(md->bdev); |
| 1540 | if (IS_ERR(md->frozen_sb)) { | 2296 | if (IS_ERR(md->frozen_sb)) { |
| 1541 | r = PTR_ERR(md->frozen_sb); | 2297 | r = PTR_ERR(md->frozen_sb); |
| 1542 | md->frozen_sb = NULL; | 2298 | md->frozen_sb = NULL; |
| @@ -1545,9 +2301,6 @@ static int lock_fs(struct mapped_device *md) | |||
| 1545 | 2301 | ||
| 1546 | set_bit(DMF_FROZEN, &md->flags); | 2302 | set_bit(DMF_FROZEN, &md->flags); |
| 1547 | 2303 | ||
| 1548 | /* don't bdput right now, we don't want the bdev | ||
| 1549 | * to go away while it is locked. | ||
| 1550 | */ | ||
| 1551 | return 0; | 2304 | return 0; |
| 1552 | } | 2305 | } |
| 1553 | 2306 | ||
| @@ -1556,7 +2309,7 @@ static void unlock_fs(struct mapped_device *md) | |||
| 1556 | if (!test_bit(DMF_FROZEN, &md->flags)) | 2309 | if (!test_bit(DMF_FROZEN, &md->flags)) |
| 1557 | return; | 2310 | return; |
| 1558 | 2311 | ||
| 1559 | thaw_bdev(md->suspended_bdev, md->frozen_sb); | 2312 | thaw_bdev(md->bdev, md->frozen_sb); |
| 1560 | md->frozen_sb = NULL; | 2313 | md->frozen_sb = NULL; |
| 1561 | clear_bit(DMF_FROZEN, &md->flags); | 2314 | clear_bit(DMF_FROZEN, &md->flags); |
| 1562 | } | 2315 | } |
| @@ -1568,6 +2321,53 @@ static void unlock_fs(struct mapped_device *md) | |||
| 1568 | * dm_bind_table, dm_suspend must be called to flush any in | 2321 | * dm_bind_table, dm_suspend must be called to flush any in |
| 1569 | * flight bios and ensure that any further io gets deferred. | 2322 | * flight bios and ensure that any further io gets deferred. |
| 1570 | */ | 2323 | */ |
| 2324 | /* | ||
| 2325 | * Suspend mechanism in request-based dm. | ||
| 2326 | * | ||
| 2327 | * After the suspend starts, further incoming requests are kept in | ||
| 2328 | * the request_queue and deferred. | ||
| 2329 | * Remaining requests in the request_queue at the start of suspend are flushed | ||
| 2330 | * if it is flush suspend. | ||
| 2331 | * The suspend completes when the following conditions have been satisfied, | ||
| 2332 | * so wait for it: | ||
| 2333 | * 1. q->in_flight is 0 (which means no in_flight request) | ||
| 2334 | * 2. queue has been stopped (which means no request dispatching) | ||
| 2335 | * | ||
| 2336 | * | ||
| 2337 | * Noflush suspend | ||
| 2338 | * --------------- | ||
| 2339 | * Noflush suspend doesn't need to dispatch remaining requests. | ||
| 2340 | * So stop the queue immediately. Then, wait for all in_flight requests | ||
| 2341 | * to be completed or requeued. | ||
| 2342 | * | ||
| 2343 | * To abort noflush suspend, start the queue. | ||
| 2344 | * | ||
| 2345 | * | ||
| 2346 | * Flush suspend | ||
| 2347 | * ------------- | ||
| 2348 | * Flush suspend needs to dispatch remaining requests. So stop the queue | ||
| 2349 | * after the remaining requests are completed. (Requeued request must be also | ||
| 2350 | * re-dispatched and completed. Until then, we can't stop the queue.) | ||
| 2351 | * | ||
| 2352 | * During flushing the remaining requests, further incoming requests are also | ||
| 2353 | * inserted to the same queue. To distinguish which requests are to be | ||
| 2354 | * flushed, we insert a marker request to the queue at the time of starting | ||
| 2355 | * flush suspend, like a barrier. | ||
| 2356 | * The dispatching is blocked when the marker is found on the top of the queue. | ||
| 2357 | * And the queue is stopped when all in_flight requests are completed, since | ||
| 2358 | * that means the remaining requests are completely flushed. | ||
| 2359 | * Then, the marker is removed from the queue. | ||
| 2360 | * | ||
| 2361 | * To abort flush suspend, we also need to take care of the marker, not only | ||
| 2362 | * starting the queue. | ||
| 2363 | * We don't remove the marker forcibly from the queue since it's against | ||
| 2364 | * the block-layer manner. Instead, we put a invalidated mark on the marker. | ||
| 2365 | * When the invalidated marker is found on the top of the queue, it is | ||
| 2366 | * immediately removed from the queue, so it doesn't block dispatching. | ||
| 2367 | * Because we have only one marker per mapped_device, we can't start another | ||
| 2368 | * flush suspend until the invalidated marker is removed from the queue. | ||
| 2369 | * So fail and return with -EBUSY in such a case. | ||
| 2370 | */ | ||
| 1571 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2371 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
| 1572 | { | 2372 | { |
| 1573 | struct dm_table *map = NULL; | 2373 | struct dm_table *map = NULL; |
| @@ -1582,6 +2382,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1582 | goto out_unlock; | 2382 | goto out_unlock; |
| 1583 | } | 2383 | } |
| 1584 | 2384 | ||
| 2385 | if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { | ||
| 2386 | r = -EBUSY; | ||
| 2387 | goto out_unlock; | ||
| 2388 | } | ||
| 2389 | |||
| 1585 | map = dm_get_table(md); | 2390 | map = dm_get_table(md); |
| 1586 | 2391 | ||
| 1587 | /* | 2392 | /* |
| @@ -1594,24 +2399,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1594 | /* This does not get reverted if there's an error later. */ | 2399 | /* This does not get reverted if there's an error later. */ |
| 1595 | dm_table_presuspend_targets(map); | 2400 | dm_table_presuspend_targets(map); |
| 1596 | 2401 | ||
| 1597 | /* bdget() can stall if the pending I/Os are not flushed */ | 2402 | /* |
| 1598 | if (!noflush) { | 2403 | * Flush I/O to the device. noflush supersedes do_lockfs, |
| 1599 | md->suspended_bdev = bdget_disk(md->disk, 0); | 2404 | * because lock_fs() needs to flush I/Os. |
| 1600 | if (!md->suspended_bdev) { | 2405 | */ |
| 1601 | DMWARN("bdget failed in dm_suspend"); | 2406 | if (!noflush && do_lockfs) { |
| 1602 | r = -ENOMEM; | 2407 | r = lock_fs(md); |
| 2408 | if (r) | ||
| 1603 | goto out; | 2409 | goto out; |
| 1604 | } | ||
| 1605 | |||
| 1606 | /* | ||
| 1607 | * Flush I/O to the device. noflush supersedes do_lockfs, | ||
| 1608 | * because lock_fs() needs to flush I/Os. | ||
| 1609 | */ | ||
| 1610 | if (do_lockfs) { | ||
| 1611 | r = lock_fs(md); | ||
| 1612 | if (r) | ||
| 1613 | goto out; | ||
| 1614 | } | ||
| 1615 | } | 2410 | } |
| 1616 | 2411 | ||
| 1617 | /* | 2412 | /* |
| @@ -1637,6 +2432,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1637 | 2432 | ||
| 1638 | flush_workqueue(md->wq); | 2433 | flush_workqueue(md->wq); |
| 1639 | 2434 | ||
| 2435 | if (dm_request_based(md)) | ||
| 2436 | dm_rq_start_suspend(md, noflush); | ||
| 2437 | |||
| 1640 | /* | 2438 | /* |
| 1641 | * At this point no more requests are entering target request routines. | 2439 | * At this point no more requests are entering target request routines. |
| 1642 | * We call dm_wait_for_completion to wait for all existing requests | 2440 | * We call dm_wait_for_completion to wait for all existing requests |
| @@ -1653,6 +2451,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1653 | if (r < 0) { | 2451 | if (r < 0) { |
| 1654 | dm_queue_flush(md); | 2452 | dm_queue_flush(md); |
| 1655 | 2453 | ||
| 2454 | if (dm_request_based(md)) | ||
| 2455 | dm_rq_abort_suspend(md, noflush); | ||
| 2456 | |||
| 1656 | unlock_fs(md); | 2457 | unlock_fs(md); |
| 1657 | goto out; /* pushback list is already flushed, so skip flush */ | 2458 | goto out; /* pushback list is already flushed, so skip flush */ |
| 1658 | } | 2459 | } |
| @@ -1668,11 +2469,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
| 1668 | set_bit(DMF_SUSPENDED, &md->flags); | 2469 | set_bit(DMF_SUSPENDED, &md->flags); |
| 1669 | 2470 | ||
| 1670 | out: | 2471 | out: |
| 1671 | if (r && md->suspended_bdev) { | ||
| 1672 | bdput(md->suspended_bdev); | ||
| 1673 | md->suspended_bdev = NULL; | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | dm_table_put(map); | 2472 | dm_table_put(map); |
| 1677 | 2473 | ||
| 1678 | out_unlock: | 2474 | out_unlock: |
| @@ -1699,21 +2495,20 @@ int dm_resume(struct mapped_device *md) | |||
| 1699 | 2495 | ||
| 1700 | dm_queue_flush(md); | 2496 | dm_queue_flush(md); |
| 1701 | 2497 | ||
| 1702 | unlock_fs(md); | 2498 | /* |
| 2499 | * Flushing deferred I/Os must be done after targets are resumed | ||
| 2500 | * so that mapping of targets can work correctly. | ||
| 2501 | * Request-based dm is queueing the deferred I/Os in its request_queue. | ||
| 2502 | */ | ||
| 2503 | if (dm_request_based(md)) | ||
| 2504 | start_queue(md->queue); | ||
| 1703 | 2505 | ||
| 1704 | if (md->suspended_bdev) { | 2506 | unlock_fs(md); |
| 1705 | bdput(md->suspended_bdev); | ||
| 1706 | md->suspended_bdev = NULL; | ||
| 1707 | } | ||
| 1708 | 2507 | ||
| 1709 | clear_bit(DMF_SUSPENDED, &md->flags); | 2508 | clear_bit(DMF_SUSPENDED, &md->flags); |
| 1710 | 2509 | ||
| 1711 | dm_table_unplug_all(map); | 2510 | dm_table_unplug_all(map); |
| 1712 | |||
| 1713 | dm_kobject_uevent(md); | ||
| 1714 | |||
| 1715 | r = 0; | 2511 | r = 0; |
| 1716 | |||
| 1717 | out: | 2512 | out: |
| 1718 | dm_table_put(map); | 2513 | dm_table_put(map); |
| 1719 | mutex_unlock(&md->suspend_lock); | 2514 | mutex_unlock(&md->suspend_lock); |
| @@ -1724,9 +2519,19 @@ out: | |||
| 1724 | /*----------------------------------------------------------------- | 2519 | /*----------------------------------------------------------------- |
| 1725 | * Event notification. | 2520 | * Event notification. |
| 1726 | *---------------------------------------------------------------*/ | 2521 | *---------------------------------------------------------------*/ |
| 1727 | void dm_kobject_uevent(struct mapped_device *md) | 2522 | void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, |
| 1728 | { | 2523 | unsigned cookie) |
| 1729 | kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); | 2524 | { |
| 2525 | char udev_cookie[DM_COOKIE_LENGTH]; | ||
| 2526 | char *envp[] = { udev_cookie, NULL }; | ||
| 2527 | |||
| 2528 | if (!cookie) | ||
| 2529 | kobject_uevent(&disk_to_dev(md->disk)->kobj, action); | ||
| 2530 | else { | ||
| 2531 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", | ||
| 2532 | DM_COOKIE_ENV_VAR_NAME, cookie); | ||
| 2533 | kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); | ||
| 2534 | } | ||
| 1730 | } | 2535 | } |
| 1731 | 2536 | ||
| 1732 | uint32_t dm_next_uevent_seq(struct mapped_device *md) | 2537 | uint32_t dm_next_uevent_seq(struct mapped_device *md) |
| @@ -1780,6 +2585,10 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | |||
| 1780 | if (&md->kobj != kobj) | 2585 | if (&md->kobj != kobj) |
| 1781 | return NULL; | 2586 | return NULL; |
| 1782 | 2587 | ||
| 2588 | if (test_bit(DMF_FREEING, &md->flags) || | ||
| 2589 | test_bit(DMF_DELETING, &md->flags)) | ||
| 2590 | return NULL; | ||
| 2591 | |||
| 1783 | dm_get(md); | 2592 | dm_get(md); |
| 1784 | return md; | 2593 | return md; |
| 1785 | } | 2594 | } |
| @@ -1800,6 +2609,61 @@ int dm_noflush_suspending(struct dm_target *ti) | |||
| 1800 | } | 2609 | } |
| 1801 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | 2610 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
| 1802 | 2611 | ||
| 2612 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type) | ||
| 2613 | { | ||
| 2614 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); | ||
| 2615 | |||
| 2616 | if (!pools) | ||
| 2617 | return NULL; | ||
| 2618 | |||
| 2619 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? | ||
| 2620 | mempool_create_slab_pool(MIN_IOS, _io_cache) : | ||
| 2621 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); | ||
| 2622 | if (!pools->io_pool) | ||
| 2623 | goto free_pools_and_out; | ||
| 2624 | |||
| 2625 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? | ||
| 2626 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : | ||
| 2627 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); | ||
| 2628 | if (!pools->tio_pool) | ||
| 2629 | goto free_io_pool_and_out; | ||
| 2630 | |||
| 2631 | pools->bs = (type == DM_TYPE_BIO_BASED) ? | ||
| 2632 | bioset_create(16, 0) : bioset_create(MIN_IOS, 0); | ||
| 2633 | if (!pools->bs) | ||
| 2634 | goto free_tio_pool_and_out; | ||
| 2635 | |||
| 2636 | return pools; | ||
| 2637 | |||
| 2638 | free_tio_pool_and_out: | ||
| 2639 | mempool_destroy(pools->tio_pool); | ||
| 2640 | |||
| 2641 | free_io_pool_and_out: | ||
| 2642 | mempool_destroy(pools->io_pool); | ||
| 2643 | |||
| 2644 | free_pools_and_out: | ||
| 2645 | kfree(pools); | ||
| 2646 | |||
| 2647 | return NULL; | ||
| 2648 | } | ||
| 2649 | |||
| 2650 | void dm_free_md_mempools(struct dm_md_mempools *pools) | ||
| 2651 | { | ||
| 2652 | if (!pools) | ||
| 2653 | return; | ||
| 2654 | |||
| 2655 | if (pools->io_pool) | ||
| 2656 | mempool_destroy(pools->io_pool); | ||
| 2657 | |||
| 2658 | if (pools->tio_pool) | ||
| 2659 | mempool_destroy(pools->tio_pool); | ||
| 2660 | |||
| 2661 | if (pools->bs) | ||
| 2662 | bioset_free(pools->bs); | ||
| 2663 | |||
| 2664 | kfree(pools); | ||
| 2665 | } | ||
| 2666 | |||
| 1803 | static struct block_device_operations dm_blk_dops = { | 2667 | static struct block_device_operations dm_blk_dops = { |
| 1804 | .open = dm_blk_open, | 2668 | .open = dm_blk_open, |
| 1805 | .release = dm_blk_close, | 2669 | .release = dm_blk_close, |
