diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 1142 |
1 files changed, 1003 insertions, 139 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 424f7b048c30..3c6d4ee8921d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -19,11 +19,18 @@ | |||
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/hdreg.h> | 21 | #include <linux/hdreg.h> |
22 | #include <linux/blktrace_api.h> | 22 | |
23 | #include <trace/block.h> | 23 | #include <trace/events/block.h> |
24 | 24 | ||
25 | #define DM_MSG_PREFIX "core" | 25 | #define DM_MSG_PREFIX "core" |
26 | 26 | ||
27 | /* | ||
28 | * Cookies are numeric values sent with CHANGE and REMOVE | ||
29 | * uevents while resuming, removing or renaming the device. | ||
30 | */ | ||
31 | #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" | ||
32 | #define DM_COOKIE_LENGTH 24 | ||
33 | |||
27 | static const char *_name = DM_NAME; | 34 | static const char *_name = DM_NAME; |
28 | 35 | ||
29 | static unsigned int major = 0; | 36 | static unsigned int major = 0; |
@@ -53,8 +60,6 @@ struct dm_target_io { | |||
53 | union map_info info; | 60 | union map_info info; |
54 | }; | 61 | }; |
55 | 62 | ||
56 | DEFINE_TRACE(block_bio_complete); | ||
57 | |||
58 | /* | 63 | /* |
59 | * For request-based dm. | 64 | * For request-based dm. |
60 | * One of these is allocated per request. | 65 | * One of these is allocated per request. |
@@ -73,7 +78,7 @@ struct dm_rq_target_io { | |||
73 | */ | 78 | */ |
74 | struct dm_rq_clone_bio_info { | 79 | struct dm_rq_clone_bio_info { |
75 | struct bio *orig; | 80 | struct bio *orig; |
76 | struct request *rq; | 81 | struct dm_rq_target_io *tio; |
77 | }; | 82 | }; |
78 | 83 | ||
79 | union map_info *dm_get_mapinfo(struct bio *bio) | 84 | union map_info *dm_get_mapinfo(struct bio *bio) |
@@ -83,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio) | |||
83 | return NULL; | 88 | return NULL; |
84 | } | 89 | } |
85 | 90 | ||
91 | union map_info *dm_get_rq_mapinfo(struct request *rq) | ||
92 | { | ||
93 | if (rq && rq->end_io_data) | ||
94 | return &((struct dm_rq_target_io *)rq->end_io_data)->info; | ||
95 | return NULL; | ||
96 | } | ||
97 | EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | ||
98 | |||
86 | #define MINOR_ALLOCED ((void *)-1) | 99 | #define MINOR_ALLOCED ((void *)-1) |
87 | 100 | ||
88 | /* | 101 | /* |
@@ -159,13 +172,31 @@ struct mapped_device { | |||
159 | * freeze/thaw support require holding onto a super block | 172 | * freeze/thaw support require holding onto a super block |
160 | */ | 173 | */ |
161 | struct super_block *frozen_sb; | 174 | struct super_block *frozen_sb; |
162 | struct block_device *suspended_bdev; | 175 | struct block_device *bdev; |
163 | 176 | ||
164 | /* forced geometry settings */ | 177 | /* forced geometry settings */ |
165 | struct hd_geometry geometry; | 178 | struct hd_geometry geometry; |
166 | 179 | ||
180 | /* marker of flush suspend for request-based dm */ | ||
181 | struct request suspend_rq; | ||
182 | |||
183 | /* For saving the address of __make_request for request based dm */ | ||
184 | make_request_fn *saved_make_request_fn; | ||
185 | |||
167 | /* sysfs handle */ | 186 | /* sysfs handle */ |
168 | struct kobject kobj; | 187 | struct kobject kobj; |
188 | |||
189 | /* zero-length barrier that will be cloned and submitted to targets */ | ||
190 | struct bio barrier_bio; | ||
191 | }; | ||
192 | |||
193 | /* | ||
194 | * For mempools pre-allocation at the table loading time. | ||
195 | */ | ||
196 | struct dm_md_mempools { | ||
197 | mempool_t *io_pool; | ||
198 | mempool_t *tio_pool; | ||
199 | struct bio_set *bs; | ||
169 | }; | 200 | }; |
170 | 201 | ||
171 | #define MIN_IOS 256 | 202 | #define MIN_IOS 256 |
@@ -393,14 +424,29 @@ static void free_io(struct mapped_device *md, struct dm_io *io) | |||
393 | mempool_free(io, md->io_pool); | 424 | mempool_free(io, md->io_pool); |
394 | } | 425 | } |
395 | 426 | ||
396 | static struct dm_target_io *alloc_tio(struct mapped_device *md) | 427 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) |
428 | { | ||
429 | mempool_free(tio, md->tio_pool); | ||
430 | } | ||
431 | |||
432 | static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) | ||
397 | { | 433 | { |
398 | return mempool_alloc(md->tio_pool, GFP_NOIO); | 434 | return mempool_alloc(md->tio_pool, GFP_ATOMIC); |
399 | } | 435 | } |
400 | 436 | ||
401 | static void free_tio(struct mapped_device *md, struct dm_target_io *tio) | 437 | static void free_rq_tio(struct dm_rq_target_io *tio) |
402 | { | 438 | { |
403 | mempool_free(tio, md->tio_pool); | 439 | mempool_free(tio, tio->md->tio_pool); |
440 | } | ||
441 | |||
442 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | ||
443 | { | ||
444 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | ||
445 | } | ||
446 | |||
447 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | ||
448 | { | ||
449 | mempool_free(info, info->tio->md->io_pool); | ||
404 | } | 450 | } |
405 | 451 | ||
406 | static void start_io_acct(struct dm_io *io) | 452 | static void start_io_acct(struct dm_io *io) |
@@ -466,12 +512,13 @@ static void queue_io(struct mapped_device *md, struct bio *bio) | |||
466 | struct dm_table *dm_get_table(struct mapped_device *md) | 512 | struct dm_table *dm_get_table(struct mapped_device *md) |
467 | { | 513 | { |
468 | struct dm_table *t; | 514 | struct dm_table *t; |
515 | unsigned long flags; | ||
469 | 516 | ||
470 | read_lock(&md->map_lock); | 517 | read_lock_irqsave(&md->map_lock, flags); |
471 | t = md->map; | 518 | t = md->map; |
472 | if (t) | 519 | if (t) |
473 | dm_table_get(t); | 520 | dm_table_get(t); |
474 | read_unlock(&md->map_lock); | 521 | read_unlock_irqrestore(&md->map_lock, flags); |
475 | 522 | ||
476 | return t; | 523 | return t; |
477 | } | 524 | } |
@@ -538,9 +585,11 @@ static void dec_pending(struct dm_io *io, int error) | |||
538 | * Target requested pushing back the I/O. | 585 | * Target requested pushing back the I/O. |
539 | */ | 586 | */ |
540 | spin_lock_irqsave(&md->deferred_lock, flags); | 587 | spin_lock_irqsave(&md->deferred_lock, flags); |
541 | if (__noflush_suspending(md)) | 588 | if (__noflush_suspending(md)) { |
542 | bio_list_add_head(&md->deferred, io->bio); | 589 | if (!bio_barrier(io->bio)) |
543 | else | 590 | bio_list_add_head(&md->deferred, |
591 | io->bio); | ||
592 | } else | ||
544 | /* noflush suspend was interrupted. */ | 593 | /* noflush suspend was interrupted. */ |
545 | io->error = -EIO; | 594 | io->error = -EIO; |
546 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 595 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
@@ -555,7 +604,8 @@ static void dec_pending(struct dm_io *io, int error) | |||
555 | * a per-device variable for error reporting. | 604 | * a per-device variable for error reporting. |
556 | * Note that you can't touch the bio after end_io_acct | 605 | * Note that you can't touch the bio after end_io_acct |
557 | */ | 606 | */ |
558 | md->barrier_error = io_error; | 607 | if (!md->barrier_error && io_error != -EOPNOTSUPP) |
608 | md->barrier_error = io_error; | ||
559 | end_io_acct(io); | 609 | end_io_acct(io); |
560 | } else { | 610 | } else { |
561 | end_io_acct(io); | 611 | end_io_acct(io); |
@@ -609,6 +659,262 @@ static void clone_endio(struct bio *bio, int error) | |||
609 | dec_pending(io, error); | 659 | dec_pending(io, error); |
610 | } | 660 | } |
611 | 661 | ||
662 | /* | ||
663 | * Partial completion handling for request-based dm | ||
664 | */ | ||
665 | static void end_clone_bio(struct bio *clone, int error) | ||
666 | { | ||
667 | struct dm_rq_clone_bio_info *info = clone->bi_private; | ||
668 | struct dm_rq_target_io *tio = info->tio; | ||
669 | struct bio *bio = info->orig; | ||
670 | unsigned int nr_bytes = info->orig->bi_size; | ||
671 | |||
672 | bio_put(clone); | ||
673 | |||
674 | if (tio->error) | ||
675 | /* | ||
676 | * An error has already been detected on the request. | ||
677 | * Once error occurred, just let clone->end_io() handle | ||
678 | * the remainder. | ||
679 | */ | ||
680 | return; | ||
681 | else if (error) { | ||
682 | /* | ||
683 | * Don't notice the error to the upper layer yet. | ||
684 | * The error handling decision is made by the target driver, | ||
685 | * when the request is completed. | ||
686 | */ | ||
687 | tio->error = error; | ||
688 | return; | ||
689 | } | ||
690 | |||
691 | /* | ||
692 | * I/O for the bio successfully completed. | ||
693 | * Notice the data completion to the upper layer. | ||
694 | */ | ||
695 | |||
696 | /* | ||
697 | * bios are processed from the head of the list. | ||
698 | * So the completing bio should always be rq->bio. | ||
699 | * If it's not, something wrong is happening. | ||
700 | */ | ||
701 | if (tio->orig->bio != bio) | ||
702 | DMERR("bio completion is going in the middle of the request"); | ||
703 | |||
704 | /* | ||
705 | * Update the original request. | ||
706 | * Do not use blk_end_request() here, because it may complete | ||
707 | * the original request before the clone, and break the ordering. | ||
708 | */ | ||
709 | blk_update_request(tio->orig, 0, nr_bytes); | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * Don't touch any member of the md after calling this function because | ||
714 | * the md may be freed in dm_put() at the end of this function. | ||
715 | * Or do dm_get() before calling this function and dm_put() later. | ||
716 | */ | ||
717 | static void rq_completed(struct mapped_device *md, int run_queue) | ||
718 | { | ||
719 | int wakeup_waiters = 0; | ||
720 | struct request_queue *q = md->queue; | ||
721 | unsigned long flags; | ||
722 | |||
723 | spin_lock_irqsave(q->queue_lock, flags); | ||
724 | if (!queue_in_flight(q)) | ||
725 | wakeup_waiters = 1; | ||
726 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
727 | |||
728 | /* nudge anyone waiting on suspend queue */ | ||
729 | if (wakeup_waiters) | ||
730 | wake_up(&md->wait); | ||
731 | |||
732 | if (run_queue) | ||
733 | blk_run_queue(q); | ||
734 | |||
735 | /* | ||
736 | * dm_put() must be at the end of this function. See the comment above | ||
737 | */ | ||
738 | dm_put(md); | ||
739 | } | ||
740 | |||
741 | static void dm_unprep_request(struct request *rq) | ||
742 | { | ||
743 | struct request *clone = rq->special; | ||
744 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
745 | |||
746 | rq->special = NULL; | ||
747 | rq->cmd_flags &= ~REQ_DONTPREP; | ||
748 | |||
749 | blk_rq_unprep_clone(clone); | ||
750 | free_rq_tio(tio); | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * Requeue the original request of a clone. | ||
755 | */ | ||
756 | void dm_requeue_unmapped_request(struct request *clone) | ||
757 | { | ||
758 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
759 | struct mapped_device *md = tio->md; | ||
760 | struct request *rq = tio->orig; | ||
761 | struct request_queue *q = rq->q; | ||
762 | unsigned long flags; | ||
763 | |||
764 | dm_unprep_request(rq); | ||
765 | |||
766 | spin_lock_irqsave(q->queue_lock, flags); | ||
767 | if (elv_queue_empty(q)) | ||
768 | blk_plug_device(q); | ||
769 | blk_requeue_request(q, rq); | ||
770 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
771 | |||
772 | rq_completed(md, 0); | ||
773 | } | ||
774 | EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); | ||
775 | |||
776 | static void __stop_queue(struct request_queue *q) | ||
777 | { | ||
778 | blk_stop_queue(q); | ||
779 | } | ||
780 | |||
781 | static void stop_queue(struct request_queue *q) | ||
782 | { | ||
783 | unsigned long flags; | ||
784 | |||
785 | spin_lock_irqsave(q->queue_lock, flags); | ||
786 | __stop_queue(q); | ||
787 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
788 | } | ||
789 | |||
790 | static void __start_queue(struct request_queue *q) | ||
791 | { | ||
792 | if (blk_queue_stopped(q)) | ||
793 | blk_start_queue(q); | ||
794 | } | ||
795 | |||
796 | static void start_queue(struct request_queue *q) | ||
797 | { | ||
798 | unsigned long flags; | ||
799 | |||
800 | spin_lock_irqsave(q->queue_lock, flags); | ||
801 | __start_queue(q); | ||
802 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
803 | } | ||
804 | |||
805 | /* | ||
806 | * Complete the clone and the original request. | ||
807 | * Must be called without queue lock. | ||
808 | */ | ||
809 | static void dm_end_request(struct request *clone, int error) | ||
810 | { | ||
811 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
812 | struct mapped_device *md = tio->md; | ||
813 | struct request *rq = tio->orig; | ||
814 | |||
815 | if (blk_pc_request(rq)) { | ||
816 | rq->errors = clone->errors; | ||
817 | rq->resid_len = clone->resid_len; | ||
818 | |||
819 | if (rq->sense) | ||
820 | /* | ||
821 | * We are using the sense buffer of the original | ||
822 | * request. | ||
823 | * So setting the length of the sense data is enough. | ||
824 | */ | ||
825 | rq->sense_len = clone->sense_len; | ||
826 | } | ||
827 | |||
828 | BUG_ON(clone->bio); | ||
829 | free_rq_tio(tio); | ||
830 | |||
831 | blk_end_request_all(rq, error); | ||
832 | |||
833 | rq_completed(md, 1); | ||
834 | } | ||
835 | |||
836 | /* | ||
837 | * Request completion handler for request-based dm | ||
838 | */ | ||
839 | static void dm_softirq_done(struct request *rq) | ||
840 | { | ||
841 | struct request *clone = rq->completion_data; | ||
842 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
843 | dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | ||
844 | int error = tio->error; | ||
845 | |||
846 | if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io) | ||
847 | error = rq_end_io(tio->ti, clone, error, &tio->info); | ||
848 | |||
849 | if (error <= 0) | ||
850 | /* The target wants to complete the I/O */ | ||
851 | dm_end_request(clone, error); | ||
852 | else if (error == DM_ENDIO_INCOMPLETE) | ||
853 | /* The target will handle the I/O */ | ||
854 | return; | ||
855 | else if (error == DM_ENDIO_REQUEUE) | ||
856 | /* The target wants to requeue the I/O */ | ||
857 | dm_requeue_unmapped_request(clone); | ||
858 | else { | ||
859 | DMWARN("unimplemented target endio return value: %d", error); | ||
860 | BUG(); | ||
861 | } | ||
862 | } | ||
863 | |||
864 | /* | ||
865 | * Complete the clone and the original request with the error status | ||
866 | * through softirq context. | ||
867 | */ | ||
868 | static void dm_complete_request(struct request *clone, int error) | ||
869 | { | ||
870 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
871 | struct request *rq = tio->orig; | ||
872 | |||
873 | tio->error = error; | ||
874 | rq->completion_data = clone; | ||
875 | blk_complete_request(rq); | ||
876 | } | ||
877 | |||
878 | /* | ||
879 | * Complete the not-mapped clone and the original request with the error status | ||
880 | * through softirq context. | ||
881 | * Target's rq_end_io() function isn't called. | ||
882 | * This may be used when the target's map_rq() function fails. | ||
883 | */ | ||
884 | void dm_kill_unmapped_request(struct request *clone, int error) | ||
885 | { | ||
886 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
887 | struct request *rq = tio->orig; | ||
888 | |||
889 | rq->cmd_flags |= REQ_FAILED; | ||
890 | dm_complete_request(clone, error); | ||
891 | } | ||
892 | EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); | ||
893 | |||
894 | /* | ||
895 | * Called with the queue lock held | ||
896 | */ | ||
897 | static void end_clone_request(struct request *clone, int error) | ||
898 | { | ||
899 | /* | ||
900 | * For just cleaning up the information of the queue in which | ||
901 | * the clone was dispatched. | ||
902 | * The clone is *NOT* freed actually here because it is alloced from | ||
903 | * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | ||
904 | */ | ||
905 | __blk_put_request(clone->q, clone); | ||
906 | |||
907 | /* | ||
908 | * Actual request completion is done in a softirq context which doesn't | ||
909 | * hold the queue lock. Otherwise, deadlock could occur because: | ||
910 | * - another request may be submitted by the upper level driver | ||
911 | * of the stacking during the completion | ||
912 | * - the submission which requires queue lock may be done | ||
913 | * against this queue | ||
914 | */ | ||
915 | dm_complete_request(clone, error); | ||
916 | } | ||
917 | |||
612 | static sector_t max_io_len(struct mapped_device *md, | 918 | static sector_t max_io_len(struct mapped_device *md, |
613 | sector_t sector, struct dm_target *ti) | 919 | sector_t sector, struct dm_target *ti) |
614 | { | 920 | { |
@@ -636,11 +942,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
636 | sector_t sector; | 942 | sector_t sector; |
637 | struct mapped_device *md; | 943 | struct mapped_device *md; |
638 | 944 | ||
639 | /* | ||
640 | * Sanity checks. | ||
641 | */ | ||
642 | BUG_ON(!clone->bi_size); | ||
643 | |||
644 | clone->bi_end_io = clone_endio; | 945 | clone->bi_end_io = clone_endio; |
645 | clone->bi_private = tio; | 946 | clone->bi_private = tio; |
646 | 947 | ||
@@ -656,8 +957,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
656 | /* the bio has been remapped so dispatch it */ | 957 | /* the bio has been remapped so dispatch it */ |
657 | 958 | ||
658 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, | 959 | trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, |
659 | tio->io->bio->bi_bdev->bd_dev, | 960 | tio->io->bio->bi_bdev->bd_dev, sector); |
660 | clone->bi_sector, sector); | ||
661 | 961 | ||
662 | generic_make_request(clone); | 962 | generic_make_request(clone); |
663 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { | 963 | } else if (r < 0 || r == DM_MAPIO_REQUEUE) { |
@@ -755,6 +1055,48 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
755 | return clone; | 1055 | return clone; |
756 | } | 1056 | } |
757 | 1057 | ||
1058 | static struct dm_target_io *alloc_tio(struct clone_info *ci, | ||
1059 | struct dm_target *ti) | ||
1060 | { | ||
1061 | struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO); | ||
1062 | |||
1063 | tio->io = ci->io; | ||
1064 | tio->ti = ti; | ||
1065 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1066 | |||
1067 | return tio; | ||
1068 | } | ||
1069 | |||
1070 | static void __flush_target(struct clone_info *ci, struct dm_target *ti, | ||
1071 | unsigned flush_nr) | ||
1072 | { | ||
1073 | struct dm_target_io *tio = alloc_tio(ci, ti); | ||
1074 | struct bio *clone; | ||
1075 | |||
1076 | tio->info.flush_request = flush_nr; | ||
1077 | |||
1078 | clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs); | ||
1079 | __bio_clone(clone, ci->bio); | ||
1080 | clone->bi_destructor = dm_bio_destructor; | ||
1081 | |||
1082 | __map_bio(ti, clone, tio); | ||
1083 | } | ||
1084 | |||
1085 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | ||
1086 | { | ||
1087 | unsigned target_nr = 0, flush_nr; | ||
1088 | struct dm_target *ti; | ||
1089 | |||
1090 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | ||
1091 | for (flush_nr = 0; flush_nr < ti->num_flush_requests; | ||
1092 | flush_nr++) | ||
1093 | __flush_target(ci, ti, flush_nr); | ||
1094 | |||
1095 | ci->sector_count = 0; | ||
1096 | |||
1097 | return 0; | ||
1098 | } | ||
1099 | |||
758 | static int __clone_and_map(struct clone_info *ci) | 1100 | static int __clone_and_map(struct clone_info *ci) |
759 | { | 1101 | { |
760 | struct bio *clone, *bio = ci->bio; | 1102 | struct bio *clone, *bio = ci->bio; |
@@ -762,6 +1104,9 @@ static int __clone_and_map(struct clone_info *ci) | |||
762 | sector_t len = 0, max; | 1104 | sector_t len = 0, max; |
763 | struct dm_target_io *tio; | 1105 | struct dm_target_io *tio; |
764 | 1106 | ||
1107 | if (unlikely(bio_empty_barrier(bio))) | ||
1108 | return __clone_and_map_empty_barrier(ci); | ||
1109 | |||
765 | ti = dm_table_find_target(ci->map, ci->sector); | 1110 | ti = dm_table_find_target(ci->map, ci->sector); |
766 | if (!dm_target_is_valid(ti)) | 1111 | if (!dm_target_is_valid(ti)) |
767 | return -EIO; | 1112 | return -EIO; |
@@ -771,10 +1116,7 @@ static int __clone_and_map(struct clone_info *ci) | |||
771 | /* | 1116 | /* |
772 | * Allocate a target io object. | 1117 | * Allocate a target io object. |
773 | */ | 1118 | */ |
774 | tio = alloc_tio(ci->md); | 1119 | tio = alloc_tio(ci, ti); |
775 | tio->io = ci->io; | ||
776 | tio->ti = ti; | ||
777 | memset(&tio->info, 0, sizeof(tio->info)); | ||
778 | 1120 | ||
779 | if (ci->sector_count <= max) { | 1121 | if (ci->sector_count <= max) { |
780 | /* | 1122 | /* |
@@ -830,10 +1172,7 @@ static int __clone_and_map(struct clone_info *ci) | |||
830 | 1172 | ||
831 | max = max_io_len(ci->md, ci->sector, ti); | 1173 | max = max_io_len(ci->md, ci->sector, ti); |
832 | 1174 | ||
833 | tio = alloc_tio(ci->md); | 1175 | tio = alloc_tio(ci, ti); |
834 | tio->io = ci->io; | ||
835 | tio->ti = ti; | ||
836 | memset(&tio->info, 0, sizeof(tio->info)); | ||
837 | } | 1176 | } |
838 | 1177 | ||
839 | len = min(remaining, max); | 1178 | len = min(remaining, max); |
@@ -868,7 +1207,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
868 | if (!bio_barrier(bio)) | 1207 | if (!bio_barrier(bio)) |
869 | bio_io_error(bio); | 1208 | bio_io_error(bio); |
870 | else | 1209 | else |
871 | md->barrier_error = -EIO; | 1210 | if (!md->barrier_error) |
1211 | md->barrier_error = -EIO; | ||
872 | return; | 1212 | return; |
873 | } | 1213 | } |
874 | 1214 | ||
@@ -881,6 +1221,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
881 | ci.io->md = md; | 1221 | ci.io->md = md; |
882 | ci.sector = bio->bi_sector; | 1222 | ci.sector = bio->bi_sector; |
883 | ci.sector_count = bio_sectors(bio); | 1223 | ci.sector_count = bio_sectors(bio); |
1224 | if (unlikely(bio_empty_barrier(bio))) | ||
1225 | ci.sector_count = 1; | ||
884 | ci.idx = bio->bi_idx; | 1226 | ci.idx = bio->bi_idx; |
885 | 1227 | ||
886 | start_io_acct(ci.io); | 1228 | start_io_acct(ci.io); |
@@ -928,6 +1270,16 @@ static int dm_merge_bvec(struct request_queue *q, | |||
928 | */ | 1270 | */ |
929 | if (max_size && ti->type->merge) | 1271 | if (max_size && ti->type->merge) |
930 | max_size = ti->type->merge(ti, bvm, biovec, max_size); | 1272 | max_size = ti->type->merge(ti, bvm, biovec, max_size); |
1273 | /* | ||
1274 | * If the target doesn't support merge method and some of the devices | ||
1275 | * provided their merge_bvec method (we know this by looking at | ||
1276 | * queue_max_hw_sectors), then we can't allow bios with multiple vector | ||
1277 | * entries. So always set max_size to 0, and the code below allows | ||
1278 | * just one page. | ||
1279 | */ | ||
1280 | else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) | ||
1281 | |||
1282 | max_size = 0; | ||
931 | 1283 | ||
932 | out_table: | 1284 | out_table: |
933 | dm_table_put(map); | 1285 | dm_table_put(map); |
@@ -946,7 +1298,7 @@ out: | |||
946 | * The request function that just remaps the bio built up by | 1298 | * The request function that just remaps the bio built up by |
947 | * dm_merge_bvec. | 1299 | * dm_merge_bvec. |
948 | */ | 1300 | */ |
949 | static int dm_request(struct request_queue *q, struct bio *bio) | 1301 | static int _dm_request(struct request_queue *q, struct bio *bio) |
950 | { | 1302 | { |
951 | int rw = bio_data_dir(bio); | 1303 | int rw = bio_data_dir(bio); |
952 | struct mapped_device *md = q->queuedata; | 1304 | struct mapped_device *md = q->queuedata; |
@@ -983,12 +1335,274 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
983 | return 0; | 1335 | return 0; |
984 | } | 1336 | } |
985 | 1337 | ||
1338 | static int dm_make_request(struct request_queue *q, struct bio *bio) | ||
1339 | { | ||
1340 | struct mapped_device *md = q->queuedata; | ||
1341 | |||
1342 | if (unlikely(bio_barrier(bio))) { | ||
1343 | bio_endio(bio, -EOPNOTSUPP); | ||
1344 | return 0; | ||
1345 | } | ||
1346 | |||
1347 | return md->saved_make_request_fn(q, bio); /* call __make_request() */ | ||
1348 | } | ||
1349 | |||
1350 | static int dm_request_based(struct mapped_device *md) | ||
1351 | { | ||
1352 | return blk_queue_stackable(md->queue); | ||
1353 | } | ||
1354 | |||
1355 | static int dm_request(struct request_queue *q, struct bio *bio) | ||
1356 | { | ||
1357 | struct mapped_device *md = q->queuedata; | ||
1358 | |||
1359 | if (dm_request_based(md)) | ||
1360 | return dm_make_request(q, bio); | ||
1361 | |||
1362 | return _dm_request(q, bio); | ||
1363 | } | ||
1364 | |||
1365 | void dm_dispatch_request(struct request *rq) | ||
1366 | { | ||
1367 | int r; | ||
1368 | |||
1369 | if (blk_queue_io_stat(rq->q)) | ||
1370 | rq->cmd_flags |= REQ_IO_STAT; | ||
1371 | |||
1372 | rq->start_time = jiffies; | ||
1373 | r = blk_insert_cloned_request(rq->q, rq); | ||
1374 | if (r) | ||
1375 | dm_complete_request(rq, r); | ||
1376 | } | ||
1377 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | ||
1378 | |||
1379 | static void dm_rq_bio_destructor(struct bio *bio) | ||
1380 | { | ||
1381 | struct dm_rq_clone_bio_info *info = bio->bi_private; | ||
1382 | struct mapped_device *md = info->tio->md; | ||
1383 | |||
1384 | free_bio_info(info); | ||
1385 | bio_free(bio, md->bs); | ||
1386 | } | ||
1387 | |||
1388 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | ||
1389 | void *data) | ||
1390 | { | ||
1391 | struct dm_rq_target_io *tio = data; | ||
1392 | struct mapped_device *md = tio->md; | ||
1393 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | ||
1394 | |||
1395 | if (!info) | ||
1396 | return -ENOMEM; | ||
1397 | |||
1398 | info->orig = bio_orig; | ||
1399 | info->tio = tio; | ||
1400 | bio->bi_end_io = end_clone_bio; | ||
1401 | bio->bi_private = info; | ||
1402 | bio->bi_destructor = dm_rq_bio_destructor; | ||
1403 | |||
1404 | return 0; | ||
1405 | } | ||
1406 | |||
1407 | static int setup_clone(struct request *clone, struct request *rq, | ||
1408 | struct dm_rq_target_io *tio) | ||
1409 | { | ||
1410 | int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1411 | dm_rq_bio_constructor, tio); | ||
1412 | |||
1413 | if (r) | ||
1414 | return r; | ||
1415 | |||
1416 | clone->cmd = rq->cmd; | ||
1417 | clone->cmd_len = rq->cmd_len; | ||
1418 | clone->sense = rq->sense; | ||
1419 | clone->buffer = rq->buffer; | ||
1420 | clone->end_io = end_clone_request; | ||
1421 | clone->end_io_data = tio; | ||
1422 | |||
1423 | return 0; | ||
1424 | } | ||
1425 | |||
1426 | static int dm_rq_flush_suspending(struct mapped_device *md) | ||
1427 | { | ||
1428 | return !md->suspend_rq.special; | ||
1429 | } | ||
1430 | |||
1431 | /* | ||
1432 | * Called with the queue lock held. | ||
1433 | */ | ||
1434 | static int dm_prep_fn(struct request_queue *q, struct request *rq) | ||
1435 | { | ||
1436 | struct mapped_device *md = q->queuedata; | ||
1437 | struct dm_rq_target_io *tio; | ||
1438 | struct request *clone; | ||
1439 | |||
1440 | if (unlikely(rq == &md->suspend_rq)) { | ||
1441 | if (dm_rq_flush_suspending(md)) | ||
1442 | return BLKPREP_OK; | ||
1443 | else | ||
1444 | /* The flush suspend was interrupted */ | ||
1445 | return BLKPREP_KILL; | ||
1446 | } | ||
1447 | |||
1448 | if (unlikely(rq->special)) { | ||
1449 | DMWARN("Already has something in rq->special."); | ||
1450 | return BLKPREP_KILL; | ||
1451 | } | ||
1452 | |||
1453 | tio = alloc_rq_tio(md); /* Only one for each original request */ | ||
1454 | if (!tio) | ||
1455 | /* -ENOMEM */ | ||
1456 | return BLKPREP_DEFER; | ||
1457 | |||
1458 | tio->md = md; | ||
1459 | tio->ti = NULL; | ||
1460 | tio->orig = rq; | ||
1461 | tio->error = 0; | ||
1462 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1463 | |||
1464 | clone = &tio->clone; | ||
1465 | if (setup_clone(clone, rq, tio)) { | ||
1466 | /* -ENOMEM */ | ||
1467 | free_rq_tio(tio); | ||
1468 | return BLKPREP_DEFER; | ||
1469 | } | ||
1470 | |||
1471 | rq->special = clone; | ||
1472 | rq->cmd_flags |= REQ_DONTPREP; | ||
1473 | |||
1474 | return BLKPREP_OK; | ||
1475 | } | ||
1476 | |||
1477 | static void map_request(struct dm_target *ti, struct request *rq, | ||
1478 | struct mapped_device *md) | ||
1479 | { | ||
1480 | int r; | ||
1481 | struct request *clone = rq->special; | ||
1482 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
1483 | |||
1484 | /* | ||
1485 | * Hold the md reference here for the in-flight I/O. | ||
1486 | * We can't rely on the reference count by device opener, | ||
1487 | * because the device may be closed during the request completion | ||
1488 | * when all bios are completed. | ||
1489 | * See the comment in rq_completed() too. | ||
1490 | */ | ||
1491 | dm_get(md); | ||
1492 | |||
1493 | tio->ti = ti; | ||
1494 | r = ti->type->map_rq(ti, clone, &tio->info); | ||
1495 | switch (r) { | ||
1496 | case DM_MAPIO_SUBMITTED: | ||
1497 | /* The target has taken the I/O to submit by itself later */ | ||
1498 | break; | ||
1499 | case DM_MAPIO_REMAPPED: | ||
1500 | /* The target has remapped the I/O so dispatch it */ | ||
1501 | dm_dispatch_request(clone); | ||
1502 | break; | ||
1503 | case DM_MAPIO_REQUEUE: | ||
1504 | /* The target wants to requeue the I/O */ | ||
1505 | dm_requeue_unmapped_request(clone); | ||
1506 | break; | ||
1507 | default: | ||
1508 | if (r > 0) { | ||
1509 | DMWARN("unimplemented target map return value: %d", r); | ||
1510 | BUG(); | ||
1511 | } | ||
1512 | |||
1513 | /* The target wants to complete the I/O */ | ||
1514 | dm_kill_unmapped_request(clone, r); | ||
1515 | break; | ||
1516 | } | ||
1517 | } | ||
1518 | |||
1519 | /* | ||
1520 | * q->request_fn for request-based dm. | ||
1521 | * Called with the queue lock held. | ||
1522 | */ | ||
1523 | static void dm_request_fn(struct request_queue *q) | ||
1524 | { | ||
1525 | struct mapped_device *md = q->queuedata; | ||
1526 | struct dm_table *map = dm_get_table(md); | ||
1527 | struct dm_target *ti; | ||
1528 | struct request *rq; | ||
1529 | |||
1530 | /* | ||
1531 | * For noflush suspend, check blk_queue_stopped() to immediately | ||
1532 | * quit I/O dispatching. | ||
1533 | */ | ||
1534 | while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | ||
1535 | rq = blk_peek_request(q); | ||
1536 | if (!rq) | ||
1537 | goto plug_and_out; | ||
1538 | |||
1539 | if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */ | ||
1540 | if (queue_in_flight(q)) | ||
1541 | /* Not quiet yet. Wait more */ | ||
1542 | goto plug_and_out; | ||
1543 | |||
1544 | /* This device should be quiet now */ | ||
1545 | __stop_queue(q); | ||
1546 | blk_start_request(rq); | ||
1547 | __blk_end_request_all(rq, 0); | ||
1548 | wake_up(&md->wait); | ||
1549 | goto out; | ||
1550 | } | ||
1551 | |||
1552 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
1553 | if (ti->type->busy && ti->type->busy(ti)) | ||
1554 | goto plug_and_out; | ||
1555 | |||
1556 | blk_start_request(rq); | ||
1557 | spin_unlock(q->queue_lock); | ||
1558 | map_request(ti, rq, md); | ||
1559 | spin_lock_irq(q->queue_lock); | ||
1560 | } | ||
1561 | |||
1562 | goto out; | ||
1563 | |||
1564 | plug_and_out: | ||
1565 | if (!elv_queue_empty(q)) | ||
1566 | /* Some requests still remain, retry later */ | ||
1567 | blk_plug_device(q); | ||
1568 | |||
1569 | out: | ||
1570 | dm_table_put(map); | ||
1571 | |||
1572 | return; | ||
1573 | } | ||
1574 | |||
1575 | int dm_underlying_device_busy(struct request_queue *q) | ||
1576 | { | ||
1577 | return blk_lld_busy(q); | ||
1578 | } | ||
1579 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | ||
1580 | |||
1581 | static int dm_lld_busy(struct request_queue *q) | ||
1582 | { | ||
1583 | int r; | ||
1584 | struct mapped_device *md = q->queuedata; | ||
1585 | struct dm_table *map = dm_get_table(md); | ||
1586 | |||
1587 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | ||
1588 | r = 1; | ||
1589 | else | ||
1590 | r = dm_table_any_busy_target(map); | ||
1591 | |||
1592 | dm_table_put(map); | ||
1593 | |||
1594 | return r; | ||
1595 | } | ||
1596 | |||
986 | static void dm_unplug_all(struct request_queue *q) | 1597 | static void dm_unplug_all(struct request_queue *q) |
987 | { | 1598 | { |
988 | struct mapped_device *md = q->queuedata; | 1599 | struct mapped_device *md = q->queuedata; |
989 | struct dm_table *map = dm_get_table(md); | 1600 | struct dm_table *map = dm_get_table(md); |
990 | 1601 | ||
991 | if (map) { | 1602 | if (map) { |
1603 | if (dm_request_based(md)) | ||
1604 | generic_unplug_device(q); | ||
1605 | |||
992 | dm_table_unplug_all(map); | 1606 | dm_table_unplug_all(map); |
993 | dm_table_put(map); | 1607 | dm_table_put(map); |
994 | } | 1608 | } |
@@ -1003,7 +1617,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits) | |||
1003 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 1617 | if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
1004 | map = dm_get_table(md); | 1618 | map = dm_get_table(md); |
1005 | if (map) { | 1619 | if (map) { |
1006 | r = dm_table_any_congested(map, bdi_bits); | 1620 | /* |
1621 | * Request-based dm cares about only own queue for | ||
1622 | * the query about congestion status of request_queue | ||
1623 | */ | ||
1624 | if (dm_request_based(md)) | ||
1625 | r = md->queue->backing_dev_info.state & | ||
1626 | bdi_bits; | ||
1627 | else | ||
1628 | r = dm_table_any_congested(map, bdi_bits); | ||
1629 | |||
1007 | dm_table_put(map); | 1630 | dm_table_put(map); |
1008 | } | 1631 | } |
1009 | } | 1632 | } |
@@ -1126,30 +1749,32 @@ static struct mapped_device *alloc_dev(int minor) | |||
1126 | INIT_LIST_HEAD(&md->uevent_list); | 1749 | INIT_LIST_HEAD(&md->uevent_list); |
1127 | spin_lock_init(&md->uevent_lock); | 1750 | spin_lock_init(&md->uevent_lock); |
1128 | 1751 | ||
1129 | md->queue = blk_alloc_queue(GFP_KERNEL); | 1752 | md->queue = blk_init_queue(dm_request_fn, NULL); |
1130 | if (!md->queue) | 1753 | if (!md->queue) |
1131 | goto bad_queue; | 1754 | goto bad_queue; |
1132 | 1755 | ||
1756 | /* | ||
1757 | * Request-based dm devices cannot be stacked on top of bio-based dm | ||
1758 | * devices. The type of this dm device has not been decided yet, | ||
1759 | * although we initialized the queue using blk_init_queue(). | ||
1760 | * The type is decided at the first table loading time. | ||
1761 | * To prevent problematic device stacking, clear the queue flag | ||
1762 | * for request stacking support until then. | ||
1763 | * | ||
1764 | * This queue is new, so no concurrency on the queue_flags. | ||
1765 | */ | ||
1766 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | ||
1767 | md->saved_make_request_fn = md->queue->make_request_fn; | ||
1133 | md->queue->queuedata = md; | 1768 | md->queue->queuedata = md; |
1134 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 1769 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
1135 | md->queue->backing_dev_info.congested_data = md; | 1770 | md->queue->backing_dev_info.congested_data = md; |
1136 | blk_queue_make_request(md->queue, dm_request); | 1771 | blk_queue_make_request(md->queue, dm_request); |
1137 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
1138 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1772 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1139 | md->queue->unplug_fn = dm_unplug_all; | 1773 | md->queue->unplug_fn = dm_unplug_all; |
1140 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1774 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1141 | 1775 | blk_queue_softirq_done(md->queue, dm_softirq_done); | |
1142 | md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); | 1776 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
1143 | if (!md->io_pool) | 1777 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
1144 | goto bad_io_pool; | ||
1145 | |||
1146 | md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); | ||
1147 | if (!md->tio_pool) | ||
1148 | goto bad_tio_pool; | ||
1149 | |||
1150 | md->bs = bioset_create(16, 0); | ||
1151 | if (!md->bs) | ||
1152 | goto bad_no_bioset; | ||
1153 | 1778 | ||
1154 | md->disk = alloc_disk(1); | 1779 | md->disk = alloc_disk(1); |
1155 | if (!md->disk) | 1780 | if (!md->disk) |
@@ -1173,6 +1798,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
1173 | if (!md->wq) | 1798 | if (!md->wq) |
1174 | goto bad_thread; | 1799 | goto bad_thread; |
1175 | 1800 | ||
1801 | md->bdev = bdget_disk(md->disk, 0); | ||
1802 | if (!md->bdev) | ||
1803 | goto bad_bdev; | ||
1804 | |||
1176 | /* Populate the mapping, nobody knows we exist yet */ | 1805 | /* Populate the mapping, nobody knows we exist yet */ |
1177 | spin_lock(&_minor_lock); | 1806 | spin_lock(&_minor_lock); |
1178 | old_md = idr_replace(&_minor_idr, md, minor); | 1807 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -1182,15 +1811,11 @@ static struct mapped_device *alloc_dev(int minor) | |||
1182 | 1811 | ||
1183 | return md; | 1812 | return md; |
1184 | 1813 | ||
1814 | bad_bdev: | ||
1815 | destroy_workqueue(md->wq); | ||
1185 | bad_thread: | 1816 | bad_thread: |
1186 | put_disk(md->disk); | 1817 | put_disk(md->disk); |
1187 | bad_disk: | 1818 | bad_disk: |
1188 | bioset_free(md->bs); | ||
1189 | bad_no_bioset: | ||
1190 | mempool_destroy(md->tio_pool); | ||
1191 | bad_tio_pool: | ||
1192 | mempool_destroy(md->io_pool); | ||
1193 | bad_io_pool: | ||
1194 | blk_cleanup_queue(md->queue); | 1819 | blk_cleanup_queue(md->queue); |
1195 | bad_queue: | 1820 | bad_queue: |
1196 | free_minor(minor); | 1821 | free_minor(minor); |
@@ -1207,14 +1832,15 @@ static void free_dev(struct mapped_device *md) | |||
1207 | { | 1832 | { |
1208 | int minor = MINOR(disk_devt(md->disk)); | 1833 | int minor = MINOR(disk_devt(md->disk)); |
1209 | 1834 | ||
1210 | if (md->suspended_bdev) { | 1835 | unlock_fs(md); |
1211 | unlock_fs(md); | 1836 | bdput(md->bdev); |
1212 | bdput(md->suspended_bdev); | ||
1213 | } | ||
1214 | destroy_workqueue(md->wq); | 1837 | destroy_workqueue(md->wq); |
1215 | mempool_destroy(md->tio_pool); | 1838 | if (md->tio_pool) |
1216 | mempool_destroy(md->io_pool); | 1839 | mempool_destroy(md->tio_pool); |
1217 | bioset_free(md->bs); | 1840 | if (md->io_pool) |
1841 | mempool_destroy(md->io_pool); | ||
1842 | if (md->bs) | ||
1843 | bioset_free(md->bs); | ||
1218 | blk_integrity_unregister(md->disk); | 1844 | blk_integrity_unregister(md->disk); |
1219 | del_gendisk(md->disk); | 1845 | del_gendisk(md->disk); |
1220 | free_minor(minor); | 1846 | free_minor(minor); |
@@ -1229,6 +1855,29 @@ static void free_dev(struct mapped_device *md) | |||
1229 | kfree(md); | 1855 | kfree(md); |
1230 | } | 1856 | } |
1231 | 1857 | ||
1858 | static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | ||
1859 | { | ||
1860 | struct dm_md_mempools *p; | ||
1861 | |||
1862 | if (md->io_pool && md->tio_pool && md->bs) | ||
1863 | /* the md already has necessary mempools */ | ||
1864 | goto out; | ||
1865 | |||
1866 | p = dm_table_get_md_mempools(t); | ||
1867 | BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); | ||
1868 | |||
1869 | md->io_pool = p->io_pool; | ||
1870 | p->io_pool = NULL; | ||
1871 | md->tio_pool = p->tio_pool; | ||
1872 | p->tio_pool = NULL; | ||
1873 | md->bs = p->bs; | ||
1874 | p->bs = NULL; | ||
1875 | |||
1876 | out: | ||
1877 | /* mempool bind completed, now no need any mempools in the table */ | ||
1878 | dm_table_free_md_mempools(t); | ||
1879 | } | ||
1880 | |||
1232 | /* | 1881 | /* |
1233 | * Bind a table to the device. | 1882 | * Bind a table to the device. |
1234 | */ | 1883 | */ |
@@ -1252,15 +1901,17 @@ static void __set_size(struct mapped_device *md, sector_t size) | |||
1252 | { | 1901 | { |
1253 | set_capacity(md->disk, size); | 1902 | set_capacity(md->disk, size); |
1254 | 1903 | ||
1255 | mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); | 1904 | mutex_lock(&md->bdev->bd_inode->i_mutex); |
1256 | i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); | 1905 | i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); |
1257 | mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); | 1906 | mutex_unlock(&md->bdev->bd_inode->i_mutex); |
1258 | } | 1907 | } |
1259 | 1908 | ||
1260 | static int __bind(struct mapped_device *md, struct dm_table *t) | 1909 | static int __bind(struct mapped_device *md, struct dm_table *t, |
1910 | struct queue_limits *limits) | ||
1261 | { | 1911 | { |
1262 | struct request_queue *q = md->queue; | 1912 | struct request_queue *q = md->queue; |
1263 | sector_t size; | 1913 | sector_t size; |
1914 | unsigned long flags; | ||
1264 | 1915 | ||
1265 | size = dm_table_get_size(t); | 1916 | size = dm_table_get_size(t); |
1266 | 1917 | ||
@@ -1270,8 +1921,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1270 | if (size != get_capacity(md->disk)) | 1921 | if (size != get_capacity(md->disk)) |
1271 | memset(&md->geometry, 0, sizeof(md->geometry)); | 1922 | memset(&md->geometry, 0, sizeof(md->geometry)); |
1272 | 1923 | ||
1273 | if (md->suspended_bdev) | 1924 | __set_size(md, size); |
1274 | __set_size(md, size); | ||
1275 | 1925 | ||
1276 | if (!size) { | 1926 | if (!size) { |
1277 | dm_table_destroy(t); | 1927 | dm_table_destroy(t); |
@@ -1280,10 +1930,22 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1280 | 1930 | ||
1281 | dm_table_event_callback(t, event_callback, md); | 1931 | dm_table_event_callback(t, event_callback, md); |
1282 | 1932 | ||
1283 | write_lock(&md->map_lock); | 1933 | /* |
1934 | * The queue hasn't been stopped yet, if the old table type wasn't | ||
1935 | * for request-based during suspension. So stop it to prevent | ||
1936 | * I/O mapping before resume. | ||
1937 | * This must be done before setting the queue restrictions, | ||
1938 | * because request-based dm may be run just after the setting. | ||
1939 | */ | ||
1940 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | ||
1941 | stop_queue(q); | ||
1942 | |||
1943 | __bind_mempools(md, t); | ||
1944 | |||
1945 | write_lock_irqsave(&md->map_lock, flags); | ||
1284 | md->map = t; | 1946 | md->map = t; |
1285 | dm_table_set_restrictions(t, q); | 1947 | dm_table_set_restrictions(t, q, limits); |
1286 | write_unlock(&md->map_lock); | 1948 | write_unlock_irqrestore(&md->map_lock, flags); |
1287 | 1949 | ||
1288 | return 0; | 1950 | return 0; |
1289 | } | 1951 | } |
@@ -1291,14 +1953,15 @@ static int __bind(struct mapped_device *md, struct dm_table *t) | |||
1291 | static void __unbind(struct mapped_device *md) | 1953 | static void __unbind(struct mapped_device *md) |
1292 | { | 1954 | { |
1293 | struct dm_table *map = md->map; | 1955 | struct dm_table *map = md->map; |
1956 | unsigned long flags; | ||
1294 | 1957 | ||
1295 | if (!map) | 1958 | if (!map) |
1296 | return; | 1959 | return; |
1297 | 1960 | ||
1298 | dm_table_event_callback(map, NULL, NULL); | 1961 | dm_table_event_callback(map, NULL, NULL); |
1299 | write_lock(&md->map_lock); | 1962 | write_lock_irqsave(&md->map_lock, flags); |
1300 | md->map = NULL; | 1963 | md->map = NULL; |
1301 | write_unlock(&md->map_lock); | 1964 | write_unlock_irqrestore(&md->map_lock, flags); |
1302 | dm_table_destroy(map); | 1965 | dm_table_destroy(map); |
1303 | } | 1966 | } |
1304 | 1967 | ||
@@ -1402,6 +2065,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1402 | { | 2065 | { |
1403 | int r = 0; | 2066 | int r = 0; |
1404 | DECLARE_WAITQUEUE(wait, current); | 2067 | DECLARE_WAITQUEUE(wait, current); |
2068 | struct request_queue *q = md->queue; | ||
2069 | unsigned long flags; | ||
1405 | 2070 | ||
1406 | dm_unplug_all(md->queue); | 2071 | dm_unplug_all(md->queue); |
1407 | 2072 | ||
@@ -1411,7 +2076,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1411 | set_current_state(interruptible); | 2076 | set_current_state(interruptible); |
1412 | 2077 | ||
1413 | smp_mb(); | 2078 | smp_mb(); |
1414 | if (!atomic_read(&md->pending)) | 2079 | if (dm_request_based(md)) { |
2080 | spin_lock_irqsave(q->queue_lock, flags); | ||
2081 | if (!queue_in_flight(q) && blk_queue_stopped(q)) { | ||
2082 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2083 | break; | ||
2084 | } | ||
2085 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2086 | } else if (!atomic_read(&md->pending)) | ||
1415 | break; | 2087 | break; |
1416 | 2088 | ||
1417 | if (interruptible == TASK_INTERRUPTIBLE && | 2089 | if (interruptible == TASK_INTERRUPTIBLE && |
@@ -1429,34 +2101,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
1429 | return r; | 2101 | return r; |
1430 | } | 2102 | } |
1431 | 2103 | ||
1432 | static int dm_flush(struct mapped_device *md) | 2104 | static void dm_flush(struct mapped_device *md) |
1433 | { | 2105 | { |
1434 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2106 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
1435 | return 0; | 2107 | |
2108 | bio_init(&md->barrier_bio); | ||
2109 | md->barrier_bio.bi_bdev = md->bdev; | ||
2110 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2111 | __split_and_process_bio(md, &md->barrier_bio); | ||
2112 | |||
2113 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
1436 | } | 2114 | } |
1437 | 2115 | ||
1438 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2116 | static void process_barrier(struct mapped_device *md, struct bio *bio) |
1439 | { | 2117 | { |
1440 | int error = dm_flush(md); | 2118 | md->barrier_error = 0; |
1441 | 2119 | ||
1442 | if (unlikely(error)) { | 2120 | dm_flush(md); |
1443 | bio_endio(bio, error); | ||
1444 | return; | ||
1445 | } | ||
1446 | if (bio_empty_barrier(bio)) { | ||
1447 | bio_endio(bio, 0); | ||
1448 | return; | ||
1449 | } | ||
1450 | |||
1451 | __split_and_process_bio(md, bio); | ||
1452 | 2121 | ||
1453 | error = dm_flush(md); | 2122 | if (!bio_empty_barrier(bio)) { |
1454 | 2123 | __split_and_process_bio(md, bio); | |
1455 | if (!error && md->barrier_error) | 2124 | dm_flush(md); |
1456 | error = md->barrier_error; | 2125 | } |
1457 | 2126 | ||
1458 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2127 | if (md->barrier_error != DM_ENDIO_REQUEUE) |
1459 | bio_endio(bio, error); | 2128 | bio_endio(bio, md->barrier_error); |
2129 | else { | ||
2130 | spin_lock_irq(&md->deferred_lock); | ||
2131 | bio_list_add_head(&md->deferred, bio); | ||
2132 | spin_unlock_irq(&md->deferred_lock); | ||
2133 | } | ||
1460 | } | 2134 | } |
1461 | 2135 | ||
1462 | /* | 2136 | /* |
@@ -1482,10 +2156,14 @@ static void dm_wq_work(struct work_struct *work) | |||
1482 | 2156 | ||
1483 | up_write(&md->io_lock); | 2157 | up_write(&md->io_lock); |
1484 | 2158 | ||
1485 | if (bio_barrier(c)) | 2159 | if (dm_request_based(md)) |
1486 | process_barrier(md, c); | 2160 | generic_make_request(c); |
1487 | else | 2161 | else { |
1488 | __split_and_process_bio(md, c); | 2162 | if (bio_barrier(c)) |
2163 | process_barrier(md, c); | ||
2164 | else | ||
2165 | __split_and_process_bio(md, c); | ||
2166 | } | ||
1489 | 2167 | ||
1490 | down_write(&md->io_lock); | 2168 | down_write(&md->io_lock); |
1491 | } | 2169 | } |
@@ -1505,6 +2183,7 @@ static void dm_queue_flush(struct mapped_device *md) | |||
1505 | */ | 2183 | */ |
1506 | int dm_swap_table(struct mapped_device *md, struct dm_table *table) | 2184 | int dm_swap_table(struct mapped_device *md, struct dm_table *table) |
1507 | { | 2185 | { |
2186 | struct queue_limits limits; | ||
1508 | int r = -EINVAL; | 2187 | int r = -EINVAL; |
1509 | 2188 | ||
1510 | mutex_lock(&md->suspend_lock); | 2189 | mutex_lock(&md->suspend_lock); |
@@ -1513,19 +2192,96 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
1513 | if (!dm_suspended(md)) | 2192 | if (!dm_suspended(md)) |
1514 | goto out; | 2193 | goto out; |
1515 | 2194 | ||
1516 | /* without bdev, the device size cannot be changed */ | 2195 | r = dm_calculate_queue_limits(table, &limits); |
1517 | if (!md->suspended_bdev) | 2196 | if (r) |
1518 | if (get_capacity(md->disk) != dm_table_get_size(table)) | 2197 | goto out; |
1519 | goto out; | 2198 | |
2199 | /* cannot change the device type, once a table is bound */ | ||
2200 | if (md->map && | ||
2201 | (dm_table_get_type(md->map) != dm_table_get_type(table))) { | ||
2202 | DMWARN("can't change the device type after a table is bound"); | ||
2203 | goto out; | ||
2204 | } | ||
2205 | |||
2206 | /* | ||
2207 | * It is enought that blk_queue_ordered() is called only once when | ||
2208 | * the first bio-based table is bound. | ||
2209 | * | ||
2210 | * This setting should be moved to alloc_dev() when request-based dm | ||
2211 | * supports barrier. | ||
2212 | */ | ||
2213 | if (!md->map && dm_table_bio_based(table)) | ||
2214 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
1520 | 2215 | ||
1521 | __unbind(md); | 2216 | __unbind(md); |
1522 | r = __bind(md, table); | 2217 | r = __bind(md, table, &limits); |
1523 | 2218 | ||
1524 | out: | 2219 | out: |
1525 | mutex_unlock(&md->suspend_lock); | 2220 | mutex_unlock(&md->suspend_lock); |
1526 | return r; | 2221 | return r; |
1527 | } | 2222 | } |
1528 | 2223 | ||
2224 | static void dm_rq_invalidate_suspend_marker(struct mapped_device *md) | ||
2225 | { | ||
2226 | md->suspend_rq.special = (void *)0x1; | ||
2227 | } | ||
2228 | |||
2229 | static void dm_rq_abort_suspend(struct mapped_device *md, int noflush) | ||
2230 | { | ||
2231 | struct request_queue *q = md->queue; | ||
2232 | unsigned long flags; | ||
2233 | |||
2234 | spin_lock_irqsave(q->queue_lock, flags); | ||
2235 | if (!noflush) | ||
2236 | dm_rq_invalidate_suspend_marker(md); | ||
2237 | __start_queue(q); | ||
2238 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2239 | } | ||
2240 | |||
2241 | static void dm_rq_start_suspend(struct mapped_device *md, int noflush) | ||
2242 | { | ||
2243 | struct request *rq = &md->suspend_rq; | ||
2244 | struct request_queue *q = md->queue; | ||
2245 | |||
2246 | if (noflush) | ||
2247 | stop_queue(q); | ||
2248 | else { | ||
2249 | blk_rq_init(q, rq); | ||
2250 | blk_insert_request(q, rq, 0, NULL); | ||
2251 | } | ||
2252 | } | ||
2253 | |||
2254 | static int dm_rq_suspend_available(struct mapped_device *md, int noflush) | ||
2255 | { | ||
2256 | int r = 1; | ||
2257 | struct request *rq = &md->suspend_rq; | ||
2258 | struct request_queue *q = md->queue; | ||
2259 | unsigned long flags; | ||
2260 | |||
2261 | if (noflush) | ||
2262 | return r; | ||
2263 | |||
2264 | /* The marker must be protected by queue lock if it is in use */ | ||
2265 | spin_lock_irqsave(q->queue_lock, flags); | ||
2266 | if (unlikely(rq->ref_count)) { | ||
2267 | /* | ||
2268 | * This can happen, when the previous flush suspend was | ||
2269 | * interrupted, the marker is still in the queue and | ||
2270 | * this flush suspend has been invoked, because we don't | ||
2271 | * remove the marker at the time of suspend interruption. | ||
2272 | * We have only one marker per mapped_device, so we can't | ||
2273 | * start another flush suspend while it is in use. | ||
2274 | */ | ||
2275 | BUG_ON(!rq->special); /* The marker should be invalidated */ | ||
2276 | DMWARN("Invalidating the previous flush suspend is still in" | ||
2277 | " progress. Please retry later."); | ||
2278 | r = 0; | ||
2279 | } | ||
2280 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2281 | |||
2282 | return r; | ||
2283 | } | ||
2284 | |||
1529 | /* | 2285 | /* |
1530 | * Functions to lock and unlock any filesystem running on the | 2286 | * Functions to lock and unlock any filesystem running on the |
1531 | * device. | 2287 | * device. |
@@ -1536,7 +2292,7 @@ static int lock_fs(struct mapped_device *md) | |||
1536 | 2292 | ||
1537 | WARN_ON(md->frozen_sb); | 2293 | WARN_ON(md->frozen_sb); |
1538 | 2294 | ||
1539 | md->frozen_sb = freeze_bdev(md->suspended_bdev); | 2295 | md->frozen_sb = freeze_bdev(md->bdev); |
1540 | if (IS_ERR(md->frozen_sb)) { | 2296 | if (IS_ERR(md->frozen_sb)) { |
1541 | r = PTR_ERR(md->frozen_sb); | 2297 | r = PTR_ERR(md->frozen_sb); |
1542 | md->frozen_sb = NULL; | 2298 | md->frozen_sb = NULL; |
@@ -1545,9 +2301,6 @@ static int lock_fs(struct mapped_device *md) | |||
1545 | 2301 | ||
1546 | set_bit(DMF_FROZEN, &md->flags); | 2302 | set_bit(DMF_FROZEN, &md->flags); |
1547 | 2303 | ||
1548 | /* don't bdput right now, we don't want the bdev | ||
1549 | * to go away while it is locked. | ||
1550 | */ | ||
1551 | return 0; | 2304 | return 0; |
1552 | } | 2305 | } |
1553 | 2306 | ||
@@ -1556,7 +2309,7 @@ static void unlock_fs(struct mapped_device *md) | |||
1556 | if (!test_bit(DMF_FROZEN, &md->flags)) | 2309 | if (!test_bit(DMF_FROZEN, &md->flags)) |
1557 | return; | 2310 | return; |
1558 | 2311 | ||
1559 | thaw_bdev(md->suspended_bdev, md->frozen_sb); | 2312 | thaw_bdev(md->bdev, md->frozen_sb); |
1560 | md->frozen_sb = NULL; | 2313 | md->frozen_sb = NULL; |
1561 | clear_bit(DMF_FROZEN, &md->flags); | 2314 | clear_bit(DMF_FROZEN, &md->flags); |
1562 | } | 2315 | } |
@@ -1568,6 +2321,53 @@ static void unlock_fs(struct mapped_device *md) | |||
1568 | * dm_bind_table, dm_suspend must be called to flush any in | 2321 | * dm_bind_table, dm_suspend must be called to flush any in |
1569 | * flight bios and ensure that any further io gets deferred. | 2322 | * flight bios and ensure that any further io gets deferred. |
1570 | */ | 2323 | */ |
2324 | /* | ||
2325 | * Suspend mechanism in request-based dm. | ||
2326 | * | ||
2327 | * After the suspend starts, further incoming requests are kept in | ||
2328 | * the request_queue and deferred. | ||
2329 | * Remaining requests in the request_queue at the start of suspend are flushed | ||
2330 | * if it is flush suspend. | ||
2331 | * The suspend completes when the following conditions have been satisfied, | ||
2332 | * so wait for it: | ||
2333 | * 1. q->in_flight is 0 (which means no in_flight request) | ||
2334 | * 2. queue has been stopped (which means no request dispatching) | ||
2335 | * | ||
2336 | * | ||
2337 | * Noflush suspend | ||
2338 | * --------------- | ||
2339 | * Noflush suspend doesn't need to dispatch remaining requests. | ||
2340 | * So stop the queue immediately. Then, wait for all in_flight requests | ||
2341 | * to be completed or requeued. | ||
2342 | * | ||
2343 | * To abort noflush suspend, start the queue. | ||
2344 | * | ||
2345 | * | ||
2346 | * Flush suspend | ||
2347 | * ------------- | ||
2348 | * Flush suspend needs to dispatch remaining requests. So stop the queue | ||
2349 | * after the remaining requests are completed. (Requeued request must be also | ||
2350 | * re-dispatched and completed. Until then, we can't stop the queue.) | ||
2351 | * | ||
2352 | * During flushing the remaining requests, further incoming requests are also | ||
2353 | * inserted to the same queue. To distinguish which requests are to be | ||
2354 | * flushed, we insert a marker request to the queue at the time of starting | ||
2355 | * flush suspend, like a barrier. | ||
2356 | * The dispatching is blocked when the marker is found on the top of the queue. | ||
2357 | * And the queue is stopped when all in_flight requests are completed, since | ||
2358 | * that means the remaining requests are completely flushed. | ||
2359 | * Then, the marker is removed from the queue. | ||
2360 | * | ||
2361 | * To abort flush suspend, we also need to take care of the marker, not only | ||
2362 | * starting the queue. | ||
2363 | * We don't remove the marker forcibly from the queue since it's against | ||
2364 | * the block-layer manner. Instead, we put a invalidated mark on the marker. | ||
2365 | * When the invalidated marker is found on the top of the queue, it is | ||
2366 | * immediately removed from the queue, so it doesn't block dispatching. | ||
2367 | * Because we have only one marker per mapped_device, we can't start another | ||
2368 | * flush suspend until the invalidated marker is removed from the queue. | ||
2369 | * So fail and return with -EBUSY in such a case. | ||
2370 | */ | ||
1571 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | 2371 | int dm_suspend(struct mapped_device *md, unsigned suspend_flags) |
1572 | { | 2372 | { |
1573 | struct dm_table *map = NULL; | 2373 | struct dm_table *map = NULL; |
@@ -1582,6 +2382,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1582 | goto out_unlock; | 2382 | goto out_unlock; |
1583 | } | 2383 | } |
1584 | 2384 | ||
2385 | if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) { | ||
2386 | r = -EBUSY; | ||
2387 | goto out_unlock; | ||
2388 | } | ||
2389 | |||
1585 | map = dm_get_table(md); | 2390 | map = dm_get_table(md); |
1586 | 2391 | ||
1587 | /* | 2392 | /* |
@@ -1594,24 +2399,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1594 | /* This does not get reverted if there's an error later. */ | 2399 | /* This does not get reverted if there's an error later. */ |
1595 | dm_table_presuspend_targets(map); | 2400 | dm_table_presuspend_targets(map); |
1596 | 2401 | ||
1597 | /* bdget() can stall if the pending I/Os are not flushed */ | 2402 | /* |
1598 | if (!noflush) { | 2403 | * Flush I/O to the device. noflush supersedes do_lockfs, |
1599 | md->suspended_bdev = bdget_disk(md->disk, 0); | 2404 | * because lock_fs() needs to flush I/Os. |
1600 | if (!md->suspended_bdev) { | 2405 | */ |
1601 | DMWARN("bdget failed in dm_suspend"); | 2406 | if (!noflush && do_lockfs) { |
1602 | r = -ENOMEM; | 2407 | r = lock_fs(md); |
2408 | if (r) | ||
1603 | goto out; | 2409 | goto out; |
1604 | } | ||
1605 | |||
1606 | /* | ||
1607 | * Flush I/O to the device. noflush supersedes do_lockfs, | ||
1608 | * because lock_fs() needs to flush I/Os. | ||
1609 | */ | ||
1610 | if (do_lockfs) { | ||
1611 | r = lock_fs(md); | ||
1612 | if (r) | ||
1613 | goto out; | ||
1614 | } | ||
1615 | } | 2410 | } |
1616 | 2411 | ||
1617 | /* | 2412 | /* |
@@ -1637,6 +2432,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1637 | 2432 | ||
1638 | flush_workqueue(md->wq); | 2433 | flush_workqueue(md->wq); |
1639 | 2434 | ||
2435 | if (dm_request_based(md)) | ||
2436 | dm_rq_start_suspend(md, noflush); | ||
2437 | |||
1640 | /* | 2438 | /* |
1641 | * At this point no more requests are entering target request routines. | 2439 | * At this point no more requests are entering target request routines. |
1642 | * We call dm_wait_for_completion to wait for all existing requests | 2440 | * We call dm_wait_for_completion to wait for all existing requests |
@@ -1653,6 +2451,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1653 | if (r < 0) { | 2451 | if (r < 0) { |
1654 | dm_queue_flush(md); | 2452 | dm_queue_flush(md); |
1655 | 2453 | ||
2454 | if (dm_request_based(md)) | ||
2455 | dm_rq_abort_suspend(md, noflush); | ||
2456 | |||
1656 | unlock_fs(md); | 2457 | unlock_fs(md); |
1657 | goto out; /* pushback list is already flushed, so skip flush */ | 2458 | goto out; /* pushback list is already flushed, so skip flush */ |
1658 | } | 2459 | } |
@@ -1668,11 +2469,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
1668 | set_bit(DMF_SUSPENDED, &md->flags); | 2469 | set_bit(DMF_SUSPENDED, &md->flags); |
1669 | 2470 | ||
1670 | out: | 2471 | out: |
1671 | if (r && md->suspended_bdev) { | ||
1672 | bdput(md->suspended_bdev); | ||
1673 | md->suspended_bdev = NULL; | ||
1674 | } | ||
1675 | |||
1676 | dm_table_put(map); | 2472 | dm_table_put(map); |
1677 | 2473 | ||
1678 | out_unlock: | 2474 | out_unlock: |
@@ -1699,21 +2495,20 @@ int dm_resume(struct mapped_device *md) | |||
1699 | 2495 | ||
1700 | dm_queue_flush(md); | 2496 | dm_queue_flush(md); |
1701 | 2497 | ||
1702 | unlock_fs(md); | 2498 | /* |
2499 | * Flushing deferred I/Os must be done after targets are resumed | ||
2500 | * so that mapping of targets can work correctly. | ||
2501 | * Request-based dm is queueing the deferred I/Os in its request_queue. | ||
2502 | */ | ||
2503 | if (dm_request_based(md)) | ||
2504 | start_queue(md->queue); | ||
1703 | 2505 | ||
1704 | if (md->suspended_bdev) { | 2506 | unlock_fs(md); |
1705 | bdput(md->suspended_bdev); | ||
1706 | md->suspended_bdev = NULL; | ||
1707 | } | ||
1708 | 2507 | ||
1709 | clear_bit(DMF_SUSPENDED, &md->flags); | 2508 | clear_bit(DMF_SUSPENDED, &md->flags); |
1710 | 2509 | ||
1711 | dm_table_unplug_all(map); | 2510 | dm_table_unplug_all(map); |
1712 | |||
1713 | dm_kobject_uevent(md); | ||
1714 | |||
1715 | r = 0; | 2511 | r = 0; |
1716 | |||
1717 | out: | 2512 | out: |
1718 | dm_table_put(map); | 2513 | dm_table_put(map); |
1719 | mutex_unlock(&md->suspend_lock); | 2514 | mutex_unlock(&md->suspend_lock); |
@@ -1724,9 +2519,19 @@ out: | |||
1724 | /*----------------------------------------------------------------- | 2519 | /*----------------------------------------------------------------- |
1725 | * Event notification. | 2520 | * Event notification. |
1726 | *---------------------------------------------------------------*/ | 2521 | *---------------------------------------------------------------*/ |
1727 | void dm_kobject_uevent(struct mapped_device *md) | 2522 | void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, |
1728 | { | 2523 | unsigned cookie) |
1729 | kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); | 2524 | { |
2525 | char udev_cookie[DM_COOKIE_LENGTH]; | ||
2526 | char *envp[] = { udev_cookie, NULL }; | ||
2527 | |||
2528 | if (!cookie) | ||
2529 | kobject_uevent(&disk_to_dev(md->disk)->kobj, action); | ||
2530 | else { | ||
2531 | snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", | ||
2532 | DM_COOKIE_ENV_VAR_NAME, cookie); | ||
2533 | kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); | ||
2534 | } | ||
1730 | } | 2535 | } |
1731 | 2536 | ||
1732 | uint32_t dm_next_uevent_seq(struct mapped_device *md) | 2537 | uint32_t dm_next_uevent_seq(struct mapped_device *md) |
@@ -1780,6 +2585,10 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | |||
1780 | if (&md->kobj != kobj) | 2585 | if (&md->kobj != kobj) |
1781 | return NULL; | 2586 | return NULL; |
1782 | 2587 | ||
2588 | if (test_bit(DMF_FREEING, &md->flags) || | ||
2589 | test_bit(DMF_DELETING, &md->flags)) | ||
2590 | return NULL; | ||
2591 | |||
1783 | dm_get(md); | 2592 | dm_get(md); |
1784 | return md; | 2593 | return md; |
1785 | } | 2594 | } |
@@ -1800,6 +2609,61 @@ int dm_noflush_suspending(struct dm_target *ti) | |||
1800 | } | 2609 | } |
1801 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | 2610 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
1802 | 2611 | ||
2612 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type) | ||
2613 | { | ||
2614 | struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); | ||
2615 | |||
2616 | if (!pools) | ||
2617 | return NULL; | ||
2618 | |||
2619 | pools->io_pool = (type == DM_TYPE_BIO_BASED) ? | ||
2620 | mempool_create_slab_pool(MIN_IOS, _io_cache) : | ||
2621 | mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); | ||
2622 | if (!pools->io_pool) | ||
2623 | goto free_pools_and_out; | ||
2624 | |||
2625 | pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? | ||
2626 | mempool_create_slab_pool(MIN_IOS, _tio_cache) : | ||
2627 | mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); | ||
2628 | if (!pools->tio_pool) | ||
2629 | goto free_io_pool_and_out; | ||
2630 | |||
2631 | pools->bs = (type == DM_TYPE_BIO_BASED) ? | ||
2632 | bioset_create(16, 0) : bioset_create(MIN_IOS, 0); | ||
2633 | if (!pools->bs) | ||
2634 | goto free_tio_pool_and_out; | ||
2635 | |||
2636 | return pools; | ||
2637 | |||
2638 | free_tio_pool_and_out: | ||
2639 | mempool_destroy(pools->tio_pool); | ||
2640 | |||
2641 | free_io_pool_and_out: | ||
2642 | mempool_destroy(pools->io_pool); | ||
2643 | |||
2644 | free_pools_and_out: | ||
2645 | kfree(pools); | ||
2646 | |||
2647 | return NULL; | ||
2648 | } | ||
2649 | |||
2650 | void dm_free_md_mempools(struct dm_md_mempools *pools) | ||
2651 | { | ||
2652 | if (!pools) | ||
2653 | return; | ||
2654 | |||
2655 | if (pools->io_pool) | ||
2656 | mempool_destroy(pools->io_pool); | ||
2657 | |||
2658 | if (pools->tio_pool) | ||
2659 | mempool_destroy(pools->tio_pool); | ||
2660 | |||
2661 | if (pools->bs) | ||
2662 | bioset_free(pools->bs); | ||
2663 | |||
2664 | kfree(pools); | ||
2665 | } | ||
2666 | |||
1803 | static struct block_device_operations dm_blk_dops = { | 2667 | static struct block_device_operations dm_blk_dops = { |
1804 | .open = dm_blk_open, | 2668 | .open = dm_blk_open, |
1805 | .release = dm_blk_close, | 2669 | .release = dm_blk_close, |